Merge tag 'batadv-next-for-davem-20191105' of git://git.open-mesh.org/linux-merge
authorDavid S. Miller <davem@davemloft.net>
Wed, 6 Nov 2019 02:33:05 +0000 (18:33 -0800)
committerDavid S. Miller <davem@davemloft.net>
Wed, 6 Nov 2019 02:33:05 +0000 (18:33 -0800)
Simon Wunderlich says:

====================
This feature/cleanup patchset includes the following patches:

 - bump version strings, by Simon Wunderlich

 - Simplify batadv_v_ogm_aggr_list_free using skb_queue_purge,
   by Christophe Jaillet

 - Replace aggr_list_lock with lock free skb handlers,
   by Christophe Jaillet

 - explicitly mark fallthrough cases, by Sven Eckelmann

 - Drop lockdep.h include from soft-interface.c, by Sven Eckelmann
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
872 files changed:
Documentation/ABI/testing/sysfs-class-net-statistics
Documentation/bpf/index.rst
Documentation/bpf/prog_flow_dissector.rst
Documentation/bpf/s390.rst [new file with mode: 0644]
Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
Documentation/devicetree/bindings/net/ftgmac100.txt
Documentation/devicetree/bindings/net/lpc-eth.txt
Documentation/devicetree/bindings/net/nfc/pn532.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt [deleted file]
Documentation/devicetree/bindings/net/renesas,ether.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/net/sh_eth.txt [deleted file]
Documentation/devicetree/bindings/ptp/ptp-idtcm.yaml [new file with mode: 0644]
Documentation/networking/af_xdp.rst
Documentation/networking/device_drivers/freescale/dpaa2/index.rst
Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst [new file with mode: 0644]
Documentation/networking/devlink-params-mv88e6xxx.txt [new file with mode: 0644]
Documentation/networking/tls.rst
MAINTAINERS
arch/x86/mm/Makefile
arch/x86/mm/maccess.c [new file with mode: 0644]
arch/x86/net/bpf_jit_comp.c
drivers/atm/firestream.c
drivers/bluetooth/Kconfig
drivers/bluetooth/Makefile
drivers/bluetooth/btintel.c
drivers/bluetooth/btintel.h
drivers/bluetooth/btrtl.c
drivers/bluetooth/btusb.c
drivers/bluetooth/btwilink.c [deleted file]
drivers/bluetooth/hci_bcm.c
drivers/bluetooth/hci_ll.c
drivers/bluetooth/hci_nokia.c
drivers/bluetooth/hci_qca.c
drivers/bus/fsl-mc/dprc-driver.c
drivers/bus/fsl-mc/dprc.c
drivers/bus/fsl-mc/fsl-mc-bus.c
drivers/bus/fsl-mc/fsl-mc-private.h
drivers/crypto/chelsio/Kconfig
drivers/crypto/chelsio/chtls/chtls.h
drivers/crypto/chelsio/chtls/chtls_main.c
drivers/firmware/broadcom/Kconfig
drivers/firmware/broadcom/Makefile
drivers/firmware/broadcom/tee_bnxt_fw.c [new file with mode: 0644]
drivers/isdn/hardware/mISDN/hfcsusb.h
drivers/isdn/hardware/mISDN/mISDNisar.c
drivers/net/bonding/bond_main.c
drivers/net/caif/Kconfig
drivers/net/dsa/Kconfig
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/bcm_sf2.h
drivers/net/dsa/bcm_sf2_cfp.c
drivers/net/dsa/dsa_loop.c
drivers/net/dsa/lan9303-core.c
drivers/net/dsa/lantiq_gswip.c
drivers/net/dsa/microchip/ksz9477_i2c.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mv88e6060.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global1.h
drivers/net/dsa/mv88e6xxx/global1_atu.c
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/qca8k.c
drivers/net/dsa/realtek-smi-core.c
drivers/net/dsa/sja1105/sja1105.h
drivers/net/dsa/sja1105/sja1105_clocking.c
drivers/net/dsa/sja1105/sja1105_dynamic_config.c
drivers/net/dsa/sja1105/sja1105_ethtool.c
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/dsa/sja1105/sja1105_ptp.c
drivers/net/dsa/sja1105/sja1105_ptp.h
drivers/net/dsa/sja1105/sja1105_spi.c
drivers/net/dsa/vitesse-vsc73xx-core.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/altera/altera_tse_main.c
drivers/net/ethernet/amazon/ena/ena_ethtool.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amazon/ena/ena_netdev.h
drivers/net/ethernet/aquantia/atlantic/Makefile
drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
drivers/net/ethernet/aquantia/atlantic/aq_filters.c
drivers/net/ethernet/aquantia/atlantic/aq_hw.h
drivers/net/ethernet/aquantia/atlantic/aq_main.c
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/aq_nic.h
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/aquantia/atlantic/aq_phy.c [new file with mode: 0644]
drivers/net/ethernet/aquantia/atlantic/aq_phy.h [new file with mode: 0644]
drivers/net/ethernet/aquantia/atlantic/aq_ptp.c [new file with mode: 0644]
drivers/net/ethernet/aquantia/atlantic/aq_ptp.h [new file with mode: 0644]
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
drivers/net/ethernet/arc/emac_arc.c
drivers/net/ethernet/arc/emac_rockchip.c
drivers/net/ethernet/atheros/ag71xx.c
drivers/net/ethernet/aurora/nb8800.c
drivers/net/ethernet/aurora/nb8800.h
drivers/net/ethernet/broadcom/b44.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
drivers/net/ethernet/broadcom/cnic.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/calxeda/xgmac.c
drivers/net/ethernet/cavium/thunder/thunder_bgx.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/l2t.c
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
drivers/net/ethernet/freescale/dpaa2/Makefile
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa2/dpmac.c [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa2/dpmac.h [new file with mode: 0644]
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fman/fman.c
drivers/net/ethernet/freescale/fman/fman_port.c
drivers/net/ethernet/freescale/fman/fman_port.h
drivers/net/ethernet/freescale/fman/mac.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/hisilicon/hip04_eth.c
drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
drivers/net/ethernet/hisilicon/hns3/hnae3.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
drivers/net/ethernet/hp/Kconfig [deleted file]
drivers/net/ethernet/hp/Makefile [deleted file]
drivers/net/ethernet/hp/hp100.c [deleted file]
drivers/net/ethernet/hp/hp100.h [deleted file]
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/ibm/emac/core.h
drivers/net/ethernet/ibm/emac/zmii.c
drivers/net/ethernet/ibm/emac/zmii.h
drivers/net/ethernet/intel/e1000e/ethtool.c
drivers/net/ethernet/intel/e1000e/hw.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/e1000e/ptp.c
drivers/net/ethernet/intel/e1000e/regs.h
drivers/net/ethernet/intel/fm10k/fm10k.h
drivers/net/ethernet/intel/fm10k/fm10k_iov.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
drivers/net/ethernet/intel/fm10k/fm10k_type.h
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_adminq.c
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40e/i40e_common.c
drivers/net/ethernet/intel/i40e/i40e_dcb.c
drivers/net/ethernet/intel/i40e/i40e_dcb.h
drivers/net/ethernet/intel/i40e/i40e_devids.h
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_nvm.c
drivers/net/ethernet/intel/i40e/i40e_prototype.h
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_type.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
drivers/net/ethernet/intel/ice/Makefile
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_base.c [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_base.h [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_dcb_lib.h
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_lib.h
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/ice/ice_txrx.h
drivers/net/ethernet/intel/ice/ice_txrx_lib.c [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_txrx_lib.h [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
drivers/net/ethernet/intel/ice/ice_xsk.c [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_xsk.h [new file with mode: 0644]
drivers/net/ethernet/intel/igb/e1000_82575.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/intel/igc/igc_defines.h
drivers/net/ethernet/intel/igc/igc_hw.h
drivers/net/ethernet/intel/igc/igc_mac.c
drivers/net/ethernet/intel/igc/igc_mac.h
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/marvell/Kconfig
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/pxa168_eth.c
drivers/net/ethernet/mediatek/mtk_eth_path.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_sgmii.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lag.h
drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/wq.c
drivers/net/ethernet/mellanox/mlx5/core/wq.h
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/core_env.c
drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
drivers/net/ethernet/mellanox/mlxsw/i2c.c
drivers/net/ethernet/mellanox/mlxsw/minimal.c
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
drivers/net/ethernet/mellanox/mlxsw/port.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/resources.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/switchib.c
drivers/net/ethernet/mellanox/mlxsw/switchx2.c
drivers/net/ethernet/mscc/ocelot_board.c
drivers/net/ethernet/netronome/nfp/bpf/jit.c
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/ni/nixge.c
drivers/net/ethernet/nxp/lpc_eth.c
drivers/net/ethernet/pensando/ionic/ionic.h
drivers/net/ethernet/pensando/ionic/ionic_dev.c
drivers/net/ethernet/pensando/ionic/ionic_dev.h
drivers/net/ethernet/pensando/ionic/ionic_devlink.c
drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
drivers/net/ethernet/pensando/ionic/ionic_if.h
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/pensando/ionic/ionic_lif.h
drivers/net/ethernet/pensando/ionic/ionic_main.c
drivers/net/ethernet/pensando/ionic/ionic_txrx.c
drivers/net/ethernet/qlogic/qed/qed_int.h
drivers/net/ethernet/qlogic/qed/qed_sriov.h
drivers/net/ethernet/qlogic/qede/qede_filter.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/efx.h
drivers/net/ethernet/sfc/ethtool.c
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/sfc/tx.c
drivers/net/ethernet/sgi/ioc3-eth.c
drivers/net/ethernet/socionext/netsec.c
drivers/net/ethernet/socionext/sni_ave.c
drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
drivers/net/ethernet/stmicro/stmmac/hwif.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/cpsw_priv.h
drivers/net/ethernet/ti/netcp_ethss.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/netdevsim/Makefile
drivers/net/netdevsim/bus.c
drivers/net/netdevsim/dev.c
drivers/net/netdevsim/fib.c
drivers/net/netdevsim/health.c [new file with mode: 0644]
drivers/net/netdevsim/netdev.c
drivers/net/netdevsim/netdevsim.h
drivers/net/phy/at803x.c
drivers/net/phy/broadcom.c
drivers/net/phy/dp83867.c
drivers/net/phy/marvell.c
drivers/net/phy/mscc.c
drivers/net/phy/phy-core.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/sfp-bus.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/cdc_ether.c
drivers/net/usb/lan78xx.c
drivers/net/usb/r8152.c
drivers/net/vxlan.c
drivers/net/wimax/i2400m/debugfs.c
drivers/net/wimax/i2400m/usb.c
drivers/net/wireless/ath/ath9k/ar9003_hw.c
drivers/net/wireless/ath/ath9k/htc_drv_main.c
drivers/net/wireless/ath/ath9k/main.c
drivers/net/wireless/ath/carl9170/main.c
drivers/net/wireless/ath/wcn36xx/main.c
drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
drivers/net/wireless/intel/iwlegacy/4965-mac.c
drivers/net/wireless/intel/iwlwifi/dvm/tx.c
drivers/net/wireless/intel/iwlwifi/iwl-fh.h
drivers/net/wireless/intel/iwlwifi/mvm/sta.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/marvell/mwl8k.c
drivers/net/wireless/mediatek/mt76/mt7603/main.c
drivers/net/wireless/mediatek/mt76/mt7615/main.c
drivers/net/wireless/mediatek/mt76/mt76x02_util.c
drivers/net/wireless/mediatek/mt7601u/main.c
drivers/net/wireless/ralink/rt2x00/rt2800lib.c
drivers/net/wireless/realtek/rtlwifi/base.c
drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
drivers/net/wireless/realtek/rtw88/mac80211.c
drivers/net/wireless/rsi/rsi_91x_mac80211.c
drivers/net/wireless/ti/wlcore/spi.c
drivers/net/xen-netback/interface.c
drivers/nfc/nfcmrvl/i2c.c
drivers/nfc/pn533/Kconfig
drivers/nfc/pn533/Makefile
drivers/nfc/pn533/i2c.c
drivers/nfc/pn533/pn533.c
drivers/nfc/pn533/pn533.h
drivers/nfc/pn533/uart.c [new file with mode: 0644]
drivers/nfc/pn533/usb.c
drivers/nfc/s3fwrn5/i2c.c
drivers/of/of_mdio.c
drivers/of/of_net.c
drivers/ptp/Kconfig
drivers/ptp/Makefile
drivers/ptp/idt8a340_reg.h [new file with mode: 0644]
drivers/ptp/ptp_clockmatrix.c [new file with mode: 0644]
drivers/ptp/ptp_clockmatrix.h [new file with mode: 0644]
drivers/ptp/ptp_dte.c
drivers/s390/cio/qdio.h
drivers/s390/cio/qdio_main.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_core_mpc.h
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3.h
drivers/s390/net/qeth_l3_main.c
drivers/soc/fsl/qbman/qman.c
drivers/spi/spi.c
drivers/staging/Kconfig
drivers/staging/Makefile
drivers/staging/hp/Kconfig [new file with mode: 0644]
drivers/staging/hp/Makefile [new file with mode: 0644]
drivers/staging/hp/hp100.c [new file with mode: 0644]
drivers/staging/hp/hp100.h [new file with mode: 0644]
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/bpf_verifier.h
include/linux/brcmphy.h
include/linux/btf.h
include/linux/dim.h
include/linux/extable.h
include/linux/filter.h
include/linux/firmware/broadcom/tee_bnxt_fw.h [new file with mode: 0644]
include/linux/fsl/mc.h
include/linux/icmp.h
include/linux/icmpv6.h
include/linux/linkmode.h
include/linux/mroute_base.h
include/linux/netdevice.h
include/linux/netfilter.h
include/linux/netfilter/ipset/ip_set.h
include/linux/netfilter/ipset/ip_set_bitmap.h
include/linux/netfilter/ipset/ip_set_getport.h
include/linux/of_net.h
include/linux/phy.h
include/linux/sfp.h
include/linux/skbuff.h
include/linux/skmsg.h
include/linux/spi/spi.h
include/linux/stmmac.h
include/linux/sxgbe_platform.h
include/linux/tcp.h
include/linux/uaccess.h
include/net/act_api.h
include/net/addrconf.h
include/net/cfg80211.h
include/net/devlink.h
include/net/dsa.h
include/net/fib_notifier.h
include/net/fib_rules.h
include/net/flow_dissector.h
include/net/gen_stats.h
include/net/genetlink.h
include/net/ip6_fib.h
include/net/ip_fib.h
include/net/ip_vs.h
include/net/ipv6.h
include/net/mac80211.h
include/net/net_namespace.h
include/net/netfilter/nf_conntrack_extend.h
include/net/netfilter/nf_flow_table.h
include/net/netfilter/nf_tables.h
include/net/netns/mib.h
include/net/sch_generic.h
include/net/sctp/ulpevent.h
include/net/smc.h
include/net/snmp.h
include/net/sock.h
include/net/tls.h
include/net/tls_toe.h [new file with mode: 0644]
include/net/xdp_sock.h
include/soc/fsl/qman.h
include/trace/bpf_probe.h
include/trace/events/bridge.h
include/trace/events/xdp.h
include/uapi/linux/bpf.h
include/uapi/linux/dcbnl.h
include/uapi/linux/devlink.h
include/uapi/linux/ethtool.h
include/uapi/linux/gen_stats.h
include/uapi/linux/if.h
include/uapi/linux/if_link.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/netfilter_arp/arp_tables.h
include/uapi/linux/netfilter_bridge/ebtables.h
include/uapi/linux/netfilter_ipv4/ip_tables.h
include/uapi/linux/netfilter_ipv6/ip6_tables.h
include/uapi/linux/nl80211.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/rtnetlink.h
include/uapi/linux/sctp.h
include/uapi/linux/snmp.h
include/uapi/linux/tcp.h
include/uapi/linux/tipc.h
include/uapi/linux/tipc_config.h
include/uapi/linux/virtio_ring.h
init/do_mounts.c
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/stackmap.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/bpf/xskmap.c
kernel/extable.c
kernel/trace/bpf_trace.c
lib/test_bpf.c
mm/maccess.c
net/bluetooth/hci_conn.c
net/bluetooth/hci_core.c
net/bluetooth/smp.c
net/bpf/test_run.c
net/bridge/br_fdb.c
net/bridge/br_input.c
net/bridge/br_private.h
net/bridge/br_switchdev.c
net/caif/Kconfig
net/core/dev.c
net/core/devlink.c
net/core/fib_notifier.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow_dissector.c
net/core/gen_stats.c
net/core/net-procfs.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/sock.c
net/core/xdp.c
net/dsa/dsa.c
net/dsa/dsa2.c
net/dsa/dsa_priv.h
net/dsa/port.c
net/dsa/slave.c
net/dsa/switch.c
net/dsa/tag_8021q.c
net/ieee802154/nl802154.c
net/ipv4/fib_notifier.c
net/ipv4/fib_rules.c
net/ipv4/fib_trie.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/ip_input.c
net/ipv4/ipconfig.c
net/ipv4/ipmr.c
net/ipv4/ipmr_base.c
net/ipv4/netfilter/nf_socket_ipv4.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv6/addrconf.c
net/ipv6/fib6_notifier.c
net/ipv6/fib6_rules.c
net/ipv6/icmp.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_input.c
net/ipv6/ip6mr.c
net/ipv6/netfilter/nf_tproxy_ipv6.c
net/ipv6/route.c
net/mac80211/agg-tx.c
net/mac80211/ibss.c
net/mac80211/rc80211_minstrel.c
net/mac80211/rc80211_minstrel.h
net/mac80211/rc80211_minstrel_debugfs.c
net/mac80211/rc80211_minstrel_ht.c
net/mac80211/rc80211_minstrel_ht.h
net/mac80211/rc80211_minstrel_ht_debugfs.c
net/mac80211/tx.c
net/netfilter/core.c
net/netfilter/ipset/ip_set_bitmap_gen.h
net/netfilter/ipset/ip_set_bitmap_ip.c
net/netfilter/ipset/ip_set_bitmap_ipmac.c
net/netfilter/ipset/ip_set_bitmap_port.c
net/netfilter/ipset/ip_set_core.c
net/netfilter/ipset/ip_set_getport.c
net/netfilter/ipset/ip_set_hash_gen.h
net/netfilter/ipset/ip_set_hash_ip.c
net/netfilter/ipset/ip_set_hash_ipmac.c
net/netfilter/ipset/ip_set_hash_ipmark.c
net/netfilter/ipset/ip_set_hash_ipport.c
net/netfilter/ipset/ip_set_hash_ipportip.c
net/netfilter/ipset/ip_set_hash_ipportnet.c
net/netfilter/ipset/ip_set_hash_mac.c
net/netfilter/ipset/ip_set_hash_net.c
net/netfilter/ipset/ip_set_hash_netiface.c
net/netfilter/ipset/ip_set_hash_netnet.c
net/netfilter/ipset/ip_set_hash_netport.c
net/netfilter/ipset/ip_set_hash_netportnet.c
net/netfilter/ipset/ip_set_list_set.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_ovf.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_ecache.c
net/netfilter/nf_conntrack_extend.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_icmp.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_offload.c
net/netfilter/nft_chain_filter.c
net/netfilter/xt_HMARK.c
net/netlink/genetlink.c
net/nfc/netlink.c
net/openvswitch/actions.c
net/openvswitch/conntrack.c
net/openvswitch/datapath.c
net/openvswitch/flow.c
net/openvswitch/flow.h
net/openvswitch/flow_netlink.c
net/openvswitch/flow_table.c
net/openvswitch/flow_table.h
net/qrtr/tun.c
net/rds/ib.c
net/rds/ib.h
net/rds/ib_cm.c
net/rds/ib_recv.c
net/rds/ib_send.c
net/rxrpc/peer_object.c
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_ct.c
net/sched/act_ctinfo.c
net/sched/act_gact.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_mpls.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/sch_fq.c
net/sched/sch_fq_codel.c
net/sched/sch_generic.c
net/sctp/associola.c
net/sctp/chunk.c
net/sctp/ulpevent.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_cdc.c
net/smc/smc_close.c
net/smc/smc_close.h
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_ib.c
net/smc/smc_ib.h
net/smc/smc_ism.c
net/smc/smc_llc.c
net/smc/smc_pnet.c
net/smc/smc_rx.c
net/smc/smc_tx.c
net/smc/smc_wr.c
net/tipc/core.c
net/tipc/core.h
net/tipc/discover.c
net/tipc/link.c
net/tipc/msg.c
net/tipc/msg.h
net/tipc/name_distr.c
net/tipc/netlink.c
net/tipc/netlink.h
net/tipc/netlink_compat.c
net/tipc/node.c
net/tipc/node.h
net/tipc/socket.c
net/tipc/udp_media.c
net/tls/Kconfig
net/tls/Makefile
net/tls/tls_device.c
net/tls/tls_main.c
net/tls/tls_proc.c [new file with mode: 0644]
net/tls/tls_sw.c
net/tls/tls_toe.c [new file with mode: 0644]
net/tls/trace.c [new file with mode: 0644]
net/tls/trace.h [new file with mode: 0644]
net/unix/af_unix.c
net/vmw_vsock/af_vsock.c
net/vmw_vsock/hyperv_transport.c
net/vmw_vsock/virtio_transport_common.c
net/wireless/nl80211.c
net/wireless/reg.h
net/xdp/xsk.c
samples/bpf/Makefile
samples/bpf/Makefile.target [new file with mode: 0644]
samples/bpf/README.rst
samples/bpf/hbm_kern.h
samples/bpf/map_perf_test_kern.c
samples/bpf/offwaketime_kern.c
samples/bpf/parse_ldabs.c
samples/bpf/sampleip_kern.c
samples/bpf/sockex1_kern.c
samples/bpf/sockex2_kern.c
samples/bpf/sockex3_kern.c
samples/bpf/spintest_kern.c
samples/bpf/tcbpf1_kern.c
samples/bpf/test_map_in_map_kern.c
samples/bpf/test_overhead_kprobe_kern.c
samples/bpf/test_probe_write_user_kern.c
samples/bpf/trace_event_kern.c
samples/bpf/tracex1_kern.c
samples/bpf/tracex2_kern.c
samples/bpf/tracex3_kern.c
samples/bpf/tracex4_kern.c
samples/bpf/tracex5_kern.c
samples/bpf/xdp_adjust_tail_kern.c
samples/bpf/xdp_adjust_tail_user.c
samples/bpf/xdpsock_user.c
samples/pktgen/README.rst
samples/pktgen/functions.sh
samples/pktgen/parameters.sh
samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
samples/pktgen/pktgen_sample01_simple.sh
samples/pktgen/pktgen_sample02_multiqueue.sh
samples/pktgen/pktgen_sample03_burst_single_flow.sh
samples/pktgen/pktgen_sample04_many_flows.sh
samples/pktgen/pktgen_sample05_flow_per_thread.sh
samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
scripts/bpf_helpers_doc.py
security/selinux/nlmsgtab.c
tools/bpf/bpftool/btf.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/prog.c
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/if_link.h
tools/lib/bpf/.gitignore
tools/lib/bpf/Makefile
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_core_read.h [new file with mode: 0644]
tools/lib/bpf/bpf_endian.h [new file with mode: 0644]
tools/lib/bpf/bpf_helpers.h [new file with mode: 0644]
tools/lib/bpf/bpf_tracing.h [new file with mode: 0644]
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/btf_dump.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_internal.h
tools/lib/bpf/libbpf_probes.c
tools/lib/bpf/test_libbpf.c [new file with mode: 0644]
tools/lib/bpf/test_libbpf.cpp [deleted file]
tools/lib/bpf/xsk.c
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_endian.h [deleted file]
tools/testing/selftests/bpf/bpf_helpers.h [deleted file]
tools/testing/selftests/bpf/bpf_legacy.h [new file with mode: 0644]
tools/testing/selftests/bpf/cgroup_helpers.c
tools/testing/selftests/bpf/prog_tests/attach_probe.c
tools/testing/selftests/bpf/prog_tests/btf_dump.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/core_reloc.c
tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/kfree_skb.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/pinning.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/probe_user.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/rdonly_maps.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/reference_tracking.c
tools/testing/selftests/bpf/prog_tests/section_names.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/skb_ctx.c
tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
tools/testing/selftests/bpf/progs/core_reloc_types.h
tools/testing/selftests/bpf/progs/kfree_skb.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/loop1.c
tools/testing/selftests/bpf/progs/loop2.c
tools/testing/selftests/bpf/progs/loop3.c
tools/testing/selftests/bpf/progs/pyperf.h
tools/testing/selftests/bpf/progs/sockopt_sk.c
tools/testing/selftests/bpf/progs/strobemeta.h
tools/testing/selftests/bpf/progs/tcp_rtt.c
tools/testing/selftests/bpf/progs/test_attach_probe.c
tools/testing/selftests/bpf/progs/test_btf_haskv.c
tools/testing/selftests/bpf/progs/test_btf_newkv.c
tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
tools/testing/selftests/bpf/progs/test_core_reloc_existence.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
tools/testing/selftests/bpf/progs/test_core_reloc_misc.c
tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
tools/testing/selftests/bpf/progs/test_perf_buffer.c
tools/testing/selftests/bpf/progs/test_pinning.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_pinning_invalid.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_probe_user.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_queue_stack_map.h [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_rdonly_maps.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
tools/testing/selftests/bpf/progs/test_skb_ctx.c
tools/testing/selftests/bpf/progs/test_stacktrace_map.c
tools/testing/selftests/bpf/progs/test_tcp_estats.c
tools/testing/selftests/bpf/test_btf_dump.c [deleted file]
tools/testing/selftests/bpf/test_flow_dissector.sh
tools/testing/selftests/bpf/test_libbpf.sh [deleted file]
tools/testing/selftests/bpf/test_libbpf_open.c [deleted file]
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_queue_stack_map.h [deleted file]
tools/testing/selftests/bpf/test_section_names.c [deleted file]
tools/testing/selftests/bpf/test_sysctl.c
tools/testing/selftests/bpf/verifier/loops1.c
tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
tools/testing/selftests/drivers/net/netdevsim/devlink.sh
tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/lib.sh
tools/testing/selftests/netfilter/Makefile
tools/testing/selftests/netfilter/ipvs.sh [new file with mode: 0755]
tools/testing/selftests/tc-testing/config
tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json

index 397118d..55db278 100644 (file)
@@ -51,6 +51,14 @@ Description:
                packet processing. See the network driver for the exact
                meaning of this value.
 
+What:          /sys/class/<iface>/statistics/rx_errors
+Date:          April 2005
+KernelVersion: 2.6.12
+Contact:       netdev@vger.kernel.org
+Description:
+               Indicates the number of receive errors on this network device.
+               See the network driver for the exact meaning of this value.
+
 What:          /sys/class/<iface>/statistics/rx_fifo_errors
 Date:          April 2005
 KernelVersion: 2.6.12
@@ -88,6 +96,14 @@ Description:
                due to lack of capacity in the receive side. See the network
                driver for the exact meaning of this value.
 
+What:          /sys/class/<iface>/statistics/rx_nohandler
+Date:          February 2016
+KernelVersion: 4.6
+Contact:       netdev@vger.kernel.org
+Description:
+               Indicates the number of received packets that were dropped on
+               an inactive device by the network core.
+
 What:          /sys/class/<iface>/statistics/rx_over_errors
 Date:          April 2005
 KernelVersion: 2.6.12
index 801a6ed..4f5410b 100644 (file)
@@ -47,6 +47,15 @@ Program types
    prog_flow_dissector
 
 
+Testing BPF
+===========
+
+.. toctree::
+   :maxdepth: 1
+
+   s390
+
+
 .. Links:
 .. _Documentation/networking/filter.txt: ../networking/filter.txt
 .. _man-pages: https://www.kernel.org/doc/man-pages/
index a78bf03..4d86780 100644 (file)
@@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel
 C-based implementation can export. Notable example is single VLAN (802.1Q)
 and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
 for a set of information that's currently can be exported from the BPF context.
+
+When BPF flow dissector is attached to the root network namespace (machine-wide
+policy), users can't override it in their child network namespaces.
diff --git a/Documentation/bpf/s390.rst b/Documentation/bpf/s390.rst
new file mode 100644 (file)
index 0000000..21ecb30
--- /dev/null
@@ -0,0 +1,205 @@
+===================
+Testing BPF on s390
+===================
+
+1. Introduction
+***************
+
+IBM Z are mainframe computers, which are descendants of IBM System/360 from
+year 1964. They are supported by the Linux kernel under the name "s390". This
+document describes how to test BPF in an s390 QEMU guest.
+
+2. One-time setup
+*****************
+
+The following is required to build and run the test suite:
+
+  * s390 GCC
+  * s390 development headers and libraries
+  * Clang with BPF support
+  * QEMU with s390 support
+  * Disk image with s390 rootfs
+
+Debian supports installing compiler and libraries for s390 out of the box.
+Users of other distros may use debootstrap in order to set up a Debian chroot::
+
+  sudo debootstrap \
+    --variant=minbase \
+    --include=sudo \
+    testing \
+    ./s390-toolchain
+  sudo mount --rbind /dev ./s390-toolchain/dev
+  sudo mount --rbind /proc ./s390-toolchain/proc
+  sudo mount --rbind /sys ./s390-toolchain/sys
+  sudo chroot ./s390-toolchain
+
+Once on Debian, the build prerequisites can be installed as follows::
+
+  sudo dpkg --add-architecture s390x
+  sudo apt-get update
+  sudo apt-get install \
+    bc \
+    bison \
+    cmake \
+    debootstrap \
+    dwarves \
+    flex \
+    g++ \
+    gcc \
+    g++-s390x-linux-gnu \
+    gcc-s390x-linux-gnu \
+    gdb-multiarch \
+    git \
+    make \
+    python3 \
+    qemu-system-misc \
+    qemu-utils \
+    rsync \
+    libcap-dev:s390x \
+    libelf-dev:s390x \
+    libncurses-dev
+
+Latest Clang targeting BPF can be installed as follows::
+
+  git clone https://github.com/llvm/llvm-project.git
+  ln -s ../../clang llvm-project/llvm/tools/
+  mkdir llvm-project-build
+  cd llvm-project-build
+  cmake \
+    -DLLVM_TARGETS_TO_BUILD=BPF \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=/opt/clang-bpf \
+    ../llvm-project/llvm
+  make
+  sudo make install
+  export PATH=/opt/clang-bpf/bin:$PATH
+
+The disk image can be prepared using a loopback mount and debootstrap::
+
+  qemu-img create -f raw ./s390.img 1G
+  sudo losetup -f ./s390.img
+  sudo mkfs.ext4 /dev/loopX
+  mkdir ./s390.rootfs
+  sudo mount /dev/loopX ./s390.rootfs
+  sudo debootstrap \
+    --foreign \
+    --arch=s390x \
+    --variant=minbase \
+    --include=" \
+      iproute2, \
+      iputils-ping, \
+      isc-dhcp-client, \
+      kmod, \
+      libcap2, \
+      libelf1, \
+      netcat, \
+      procps" \
+    testing \
+    ./s390.rootfs
+  sudo umount ./s390.rootfs
+  sudo losetup -d /dev/loopX
+
+3. Compilation
+**************
+
+In addition to the usual Kconfig options required to run the BPF test suite, it
+is also helpful to select::
+
+  CONFIG_NET_9P=y
+  CONFIG_9P_FS=y
+  CONFIG_NET_9P_VIRTIO=y
+  CONFIG_VIRTIO_PCI=y
+
+as that would enable a very easy way to share files with the s390 virtual
+machine.
+
+Compiling kernel, modules and testsuite, as well as preparing gdb scripts to
+simplify debugging, can be done using the following commands::
+
+  make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- menuconfig
+  make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- bzImage modules scripts_gdb
+  make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- \
+    -C tools/testing/selftests \
+    TARGETS=bpf \
+    INSTALL_PATH=$PWD/tools/testing/selftests/kselftest_install \
+    install
+
+4. Running the test suite
+*************************
+
+The virtual machine can be started as follows::
+
+  qemu-system-s390x \
+    -cpu max,zpci=on \
+    -smp 2 \
+    -m 4G \
+    -kernel linux/arch/s390/boot/compressed/vmlinux \
+    -drive file=./s390.img,if=virtio,format=raw \
+    -nographic \
+    -append 'root=/dev/vda rw console=ttyS1' \
+    -virtfs local,path=./linux,security_model=none,mount_tag=linux \
+    -object rng-random,filename=/dev/urandom,id=rng0 \
+    -device virtio-rng-ccw,rng=rng0 \
+    -netdev user,id=net0 \
+    -device virtio-net-ccw,netdev=net0
+
+When using this on a real IBM Z, ``-enable-kvm`` may be added for better
+performance. When starting the virtual machine for the first time, disk image
+setup must be finalized using the following command::
+
+  /debootstrap/debootstrap --second-stage
+
+Directory with the code built on the host as well as ``/proc`` and ``/sys``
+need to be mounted as follows::
+
+  mkdir -p /linux
+  mount -t 9p linux /linux
+  mount -t proc proc /proc
+  mount -t sysfs sys /sys
+
+After that, the test suite can be run using the following commands::
+
+  cd /linux/tools/testing/selftests/kselftest_install
+  ./run_kselftest.sh
+
+As usual, tests can be also run individually::
+
+  cd /linux/tools/testing/selftests/bpf
+  ./test_verifier
+
+5. Debugging
+************
+
+It is possible to debug the s390 kernel using QEMU GDB stub, which is activated
+by passing ``-s`` to QEMU.
+
+It is preferable to turn KASLR off, so that gdb would know where to find the
+kernel image in memory, by building the kernel with::
+
+  RANDOMIZE_BASE=n
+
+GDB can then be attached using the following command::
+
+  gdb-multiarch -ex 'target remote localhost:1234' ./vmlinux
+
+6. Network
+**********
+
+In case one needs to use the network in the virtual machine in order to e.g.
+install additional packages, it can be configured using::
+
+  dhclient eth0
+
+7. Links
+********
+
+This document is a compilation of techniques, whose more comprehensive
+descriptions can be found by following these links:
+
+- `Debootstrap <https://wiki.debian.org/EmDebian/CrossDebootstrap>`_
+- `Multiarch <https://wiki.debian.org/Multiarch/HOWTO>`_
+- `Building LLVM <https://llvm.org/docs/CMake.html>`_
+- `Cross-compiling the kernel <https://wiki.gentoo.org/wiki/Embedded_Handbook/General/Cross-compiling_the_kernel>`_
+- `QEMU s390x Guest Support <https://wiki.qemu.org/Documentation/Platforms/S390X>`_
+- `Plan 9 folder sharing over Virtio <https://wiki.qemu.org/Documentation/9psetup>`_
+- `Using GDB with QEMU <https://wiki.osdev.org/Kernel_Debugging#Use_GDB_with_QEMU>`_
index b7336b9..48a7f91 100644 (file)
@@ -44,6 +44,12 @@ Optional properties:
   Admission Control Block supports reporting the number of packets in-flight in a
   switch queue
 
+- resets: a single phandle and reset identifier pair. See
+  Documentation/devicetree/binding/reset/reset.txt for details.
+
+- reset-names: If the "reset" property is specified, this property should have
+  the value "switch" to denote the switch reset line.
+
 Port subnodes:
 
 Optional properties:
index 72e7aaf..f878c11 100644 (file)
@@ -9,6 +9,7 @@ Required properties:
 
      - "aspeed,ast2400-mac"
      - "aspeed,ast2500-mac"
+     - "aspeed,ast2600-mac"
 
 - reg: Address and length of the register set for the device
 - interrupts: Should contain ethernet controller interrupt
@@ -23,6 +24,13 @@ Optional properties:
 - no-hw-checksum: Used to disable HW checksum support. Here for backward
   compatibility as the driver now should have correct defaults based on
   the SoC.
+- clocks: In accordance with the generic clock bindings. Must describe the MAC
+  IP clock, and optionally an RMII RCLK gate for the AST2500/AST2600. The
+  required MAC clock must be the first cell.
+- clock-names:
+
+      - "MACCLK": The MAC IP clock
+      - "RCLK": Clock gate for the RMII RCLK
 
 Example:
 
index b92e927..cfe0e59 100644 (file)
@@ -10,6 +10,11 @@ Optional properties:
   absent, "rmii" is assumed.
 - use-iram: Use LPC32xx internal SRAM (IRAM) for DMA buffering
 
+Optional subnodes:
+- mdio : specifies the mdio bus, used as a container for phy nodes according to
+  phy.txt in the same directory
+
+
 Example:
 
        mac: ethernet@31060000 {
diff --git a/Documentation/devicetree/bindings/net/nfc/pn532.txt b/Documentation/devicetree/bindings/net/nfc/pn532.txt
new file mode 100644 (file)
index 0000000..a5507dc
--- /dev/null
@@ -0,0 +1,46 @@
+* NXP Semiconductors PN532 NFC Controller
+
+Required properties:
+- compatible: Should be
+    - "nxp,pn532" Place a node with this inside the devicetree node of the bus
+                  where the NFC chip is connected to.
+                  Currently the kernel has phy bindings for uart and i2c.
+    - "nxp,pn532-i2c" (DEPRECATED) only works for the i2c binding.
+    - "nxp,pn533-i2c" (DEPRECATED) only works for the i2c binding.
+
+Required properties if connected on i2c:
+- clock-frequency: I²C work frequency.
+- reg: for the I²C bus address. This is fixed at 0x24 for the PN532.
+- interrupts: GPIO interrupt to which the chip is connected
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBone with PN532 on I2C2):
+
+&i2c2 {
+
+
+       pn532: nfc@24 {
+
+               compatible = "nxp,pn532";
+
+               reg = <0x24>;
+               clock-frequency = <400000>;
+
+               interrupt-parent = <&gpio1>;
+               interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
+
+       };
+};
+
+Example (for PN532 connected via uart):
+
+uart4: serial@49042000 {
+        compatible = "ti,omap3-uart";
+
+        pn532: nfc {
+                compatible = "nxp,pn532";
+        };
+};
diff --git a/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt b/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt
deleted file mode 100644 (file)
index 2efe388..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-* NXP Semiconductors PN532 NFC Controller
-
-Required properties:
-- compatible: Should be "nxp,pn532-i2c" or "nxp,pn533-i2c".
-- clock-frequency: I²C work frequency.
-- reg: address on the bus
-- interrupts: GPIO interrupt to which the chip is connected
-
-Optional SoC Specific Properties:
-- pinctrl-names: Contains only one value - "default".
-- pintctrl-0: Specifies the pin control groups used for this controller.
-
-Example (for ARM-based BeagleBone with PN532 on I2C2):
-
-&i2c2 {
-
-
-       pn532: pn532@24 {
-
-               compatible = "nxp,pn532-i2c";
-
-               reg = <0x24>;
-               clock-frequency = <400000>;
-
-               interrupt-parent = <&gpio1>;
-               interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
-
-       };
-};
diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml
new file mode 100644 (file)
index 0000000..7f84df9
--- /dev/null
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/renesas,ether.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Electronics SH EtherMAC
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+maintainers:
+  - Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - renesas,gether-r8a7740   # device is a part of R8A7740 SoC
+              - renesas,gether-r8a77980  # device is a part of R8A77980 SoC
+              - renesas,ether-r7s72100   # device is a part of R7S72100 SoC
+              - renesas,ether-r7s9210    # device is a part of R7S9210 SoC
+      - items:
+          - enum:
+              - renesas,ether-r8a7778    # device is a part of R8A7778 SoC
+              - renesas,ether-r8a7779    # device is a part of R8A7779 SoC
+          - enum:
+              - renesas,rcar-gen1-ether  # a generic R-Car Gen1 device
+      - items:
+          - enum:
+              - renesas,ether-r8a7745    # device is a part of R8A7745 SoC
+              - renesas,ether-r8a7743    # device is a part of R8A7743 SoC
+              - renesas,ether-r8a7790    # device is a part of R8A7790 SoC
+              - renesas,ether-r8a7791    # device is a part of R8A7791 SoC
+              - renesas,ether-r8a7793    # device is a part of R8A7793 SoC
+              - renesas,ether-r8a7794    # device is a part of R8A7794 SoC
+          - enum:
+              - renesas,rcar-gen2-ether  # a generic R-Car Gen2 or RZ/G1 device
+
+  reg:
+    items:
+       - description: E-DMAC/feLic registers
+       - description: TSU registers
+    minItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  '#address-cells':
+    description: number of address cells for the MDIO bus
+    const: 1
+
+  '#size-cells':
+    description: number of size cells on the MDIO bus
+    const: 0
+
+  clocks:
+    maxItems: 1
+
+  pinctrl-0: true
+
+  pinctrl-names: true
+
+  renesas,no-ether-link:
+    type: boolean
+    description:
+      specify when a board does not provide a proper Ether LINK signal
+
+  renesas,ether-link-active-low:
+    type: boolean
+    description:
+      specify when the Ether LINK signal is active-low instead of normal
+      active-high
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - phy-mode
+  - phy-handle
+  - '#address-cells'
+  - '#size-cells'
+  - clocks
+  - pinctrl-0
+
+examples:
+  # Lager board
+  - |
+    #include <dt-bindings/clock/r8a7790-clock.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    ethernet@ee700000 {
+        compatible = "renesas,ether-r8a7790", "renesas,rcar-gen2-ether";
+        reg = <0 0xee700000 0 0x400>;
+        interrupt-parent = <&gic>;
+        interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&mstp8_clks R8A7790_CLK_ETHER>;
+        phy-mode = "rmii";
+        phy-handle = <&phy1>;
+        pinctrl-0 = <&ether_pins>;
+        pinctrl-names = "default";
+        renesas,ether-link-active-low;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        phy1: ethernet-phy@1 {
+            reg = <1>;
+            interrupt-parent = <&irqc0>;
+            interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+            pinctrl-0 = <&phy1_pins>;
+            pinctrl-names = "default";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt
deleted file mode 100644 (file)
index abc3627..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-* Renesas Electronics SH EtherMAC
-
-This file provides information on what the device node for the SH EtherMAC
-interface contains.
-
-Required properties:
-- compatible: Must contain one or more of the following:
-             "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC.
-             "renesas,ether-r8a7743"  if the device is a part of R8A7743 SoC.
-             "renesas,ether-r8a7745"  if the device is a part of R8A7745 SoC.
-             "renesas,ether-r8a7778"  if the device is a part of R8A7778 SoC.
-             "renesas,ether-r8a7779"  if the device is a part of R8A7779 SoC.
-             "renesas,ether-r8a7790"  if the device is a part of R8A7790 SoC.
-             "renesas,ether-r8a7791"  if the device is a part of R8A7791 SoC.
-             "renesas,ether-r8a7793"  if the device is a part of R8A7793 SoC.
-             "renesas,ether-r8a7794"  if the device is a part of R8A7794 SoC.
-             "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC.
-             "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
-             "renesas,ether-r7s9210" if the device is a part of R7S9210 SoC.
-             "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device.
-             "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1
-                                       device.
-
-             When compatible with the generic version, nodes must list
-             the SoC-specific version corresponding to the platform
-             first followed by the generic version.
-
-- reg: offset and length of (1) the E-DMAC/feLic register block (required),
-       (2) the TSU register block (optional).
-- interrupts: interrupt specifier for the sole interrupt.
-- phy-mode: see ethernet.txt file in the same directory.
-- phy-handle: see ethernet.txt file in the same directory.
-- #address-cells: number of address cells for the MDIO bus, must be equal to 1.
-- #size-cells: number of size cells on the MDIO bus, must be equal to 0.
-- clocks: clock phandle and specifier pair.
-- pinctrl-0: phandle, referring to a default pin configuration node.
-
-Optional properties:
-- pinctrl-names: pin configuration state name ("default").
-- renesas,no-ether-link: boolean, specify when a board does not provide a proper
-                        Ether LINK signal.
-- renesas,ether-link-active-low: boolean, specify when the Ether LINK signal is
-                                active-low instead of normal active-high.
-
-Example (Lager board):
-
-       ethernet@ee700000 {
-               compatible = "renesas,ether-r8a7790",
-                            "renesas,rcar-gen2-ether";
-               reg = <0 0xee700000 0 0x400>;
-               interrupt-parent = <&gic>;
-               interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>;
-               clocks = <&mstp8_clks R8A7790_CLK_ETHER>;
-               phy-mode = "rmii";
-               phy-handle = <&phy1>;
-               pinctrl-0 = <&ether_pins>;
-               pinctrl-names = "default";
-               renesas,ether-link-active-low;
-               #address-cells = <1>;
-               #size-cells = <0>;
-
-               phy1: ethernet-phy@1 {
-                       reg = <1>;
-                       interrupt-parent = <&irqc0>;
-                       interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
-                       pinctrl-0 = <&phy1_pins>;
-                       pinctrl-names = "default";
-               };
-       };
diff --git a/Documentation/devicetree/bindings/ptp/ptp-idtcm.yaml b/Documentation/devicetree/bindings/ptp/ptp-idtcm.yaml
new file mode 100644 (file)
index 0000000..9e21b83
--- /dev/null
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ptp/ptp-idtcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: IDT ClockMatrix (TM) PTP Clock Device Tree Bindings
+
+maintainers:
+  - Vincent Cheng <vincent.cheng.xh@renesas.com>
+
+properties:
+  compatible:
+    enum:
+      # For System Synchronizer
+      - idt,8a34000
+      - idt,8a34001
+      - idt,8a34002
+      - idt,8a34003
+      - idt,8a34004
+      - idt,8a34005
+      - idt,8a34006
+      - idt,8a34007
+      - idt,8a34008
+      - idt,8a34009
+      # For Port Synchronizer
+      - idt,8a34010
+      - idt,8a34011
+      - idt,8a34012
+      - idt,8a34013
+      - idt,8a34014
+      - idt,8a34015
+      - idt,8a34016
+      - idt,8a34017
+      - idt,8a34018
+      - idt,8a34019
+      # For Universal Frequency Translator (UFT)
+      - idt,8a34040
+      - idt,8a34041
+      - idt,8a34042
+      - idt,8a34043
+      - idt,8a34044
+      - idt,8a34045
+      - idt,8a34046
+      - idt,8a34047
+      - idt,8a34048
+      - idt,8a34049
+
+  reg:
+    maxItems: 1
+    description:
+      I2C slave address of the device.
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c@1 {
+        compatible = "abc,acme-1234";
+        reg = <0x01 0x400>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        phc@5b {
+            compatible = "idt,8a34000";
+            reg = <0x5b>;
+        };
+    };
index 83f7ae5..7a4caaa 100644 (file)
@@ -40,13 +40,13 @@ allocates memory for this UMEM using whatever means it feels is most
 appropriate (malloc, mmap, huge pages, etc). This memory area is then
 registered with the kernel using the new setsockopt XDP_UMEM_REG. The
 UMEM also has two rings: the FILL ring and the COMPLETION ring. The
-fill ring is used by the application to send down addr for the kernel
+FILL ring is used by the application to send down addr for the kernel
 to fill in with RX packet data. References to these frames will then
 appear in the RX ring once each packet has been received. The
-completion ring, on the other hand, contains frame addr that the
+COMPLETION ring, on the other hand, contains frame addr that the
 kernel has transmitted completely and can now be used again by user
 space, for either TX or RX. Thus, the frame addrs appearing in the
-completion ring are addrs that were previously transmitted using the
+COMPLETION ring are addrs that were previously transmitted using the
 TX ring. In summary, the RX and FILL rings are used for the RX path
 and the TX and COMPLETION rings are used for the TX path.
 
@@ -91,11 +91,16 @@ Concepts
 ========
 
 In order to use an AF_XDP socket, a number of associated objects need
-to be setup.
+to be setup. These objects and their options are explained in the
+following sections.
 
-Jonathan Corbet has also written an excellent article on LWN,
-"Accelerating networking with AF_XDP". It can be found at
-https://lwn.net/Articles/750845/.
+For an overview on how AF_XDP works, you can also take a look at the
+Linux Plumbers paper from 2018 on the subject:
+http://vger.kernel.org/lpc_net2018_talks/lpc18_paper_af_xdp_perf-v2.pdf. Do
+NOT consult the paper from 2017 on "AF_PACKET v4", the first attempt
+at AF_XDP. Nearly everything changed since then. Jonathan Corbet has
+also written an excellent article on LWN, "Accelerating networking
+with AF_XDP". It can be found at https://lwn.net/Articles/750845/.
 
 UMEM
 ----
@@ -113,22 +118,22 @@ the next socket B can do this by setting the XDP_SHARED_UMEM flag in
 struct sockaddr_xdp member sxdp_flags, and passing the file descriptor
 of A to struct sockaddr_xdp member sxdp_shared_umem_fd.
 
-The UMEM has two single-producer/single-consumer rings, that are used
+The UMEM has two single-producer/single-consumer rings that are used
 to transfer ownership of UMEM frames between the kernel and the
 user-space application.
 
 Rings
 -----
 
-There are a four different kind of rings: Fill, Completion, RX and
+There are a four different kind of rings: FILL, COMPLETION, RX and
 TX. All rings are single-producer/single-consumer, so the user-space
 application need explicit synchronization of multiple
 processes/threads are reading/writing to them.
 
-The UMEM uses two rings: Fill and Completion. Each socket associated
+The UMEM uses two rings: FILL and COMPLETION. Each socket associated
 with the UMEM must have an RX queue, TX queue or both. Say, that there
 is a setup with four sockets (all doing TX and RX). Then there will be
-one Fill ring, one Completion ring, four TX rings and four RX rings.
+one FILL ring, one COMPLETION ring, four TX rings and four RX rings.
 
 The rings are head(producer)/tail(consumer) based rings. A producer
 writes the data ring at the index pointed out by struct xdp_ring
@@ -146,7 +151,7 @@ The size of the rings need to be of size power of two.
 UMEM Fill Ring
 ~~~~~~~~~~~~~~
 
-The Fill ring is used to transfer ownership of UMEM frames from
+The FILL ring is used to transfer ownership of UMEM frames from
 user-space to kernel-space. The UMEM addrs are passed in the ring. As
 an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has
 16 chunks and can pass addrs between 0 and 64k.
@@ -164,8 +169,8 @@ chunks mode, then the incoming addr will be left untouched.
 UMEM Completion Ring
 ~~~~~~~~~~~~~~~~~~~~
 
-The Completion Ring is used transfer ownership of UMEM frames from
-kernel-space to user-space. Just like the Fill ring, UMEM indicies are
+The COMPLETION Ring is used transfer ownership of UMEM frames from
+kernel-space to user-space. Just like the FILL ring, UMEM indices are
 used.
 
 Frames passed from the kernel to user-space are frames that has been
@@ -181,7 +186,7 @@ The RX ring is the receiving side of a socket. Each entry in the ring
 is a struct xdp_desc descriptor. The descriptor contains UMEM offset
 (addr) and the length of the data (len).
 
-If no frames have been passed to kernel via the Fill ring, no
+If no frames have been passed to kernel via the FILL ring, no
 descriptors will (or can) appear on the RX ring.
 
 The user application consumes struct xdp_desc descriptors from this
@@ -199,8 +204,24 @@ be relaxed in the future.
 The user application produces struct xdp_desc descriptors to this
 ring.
 
+Libbpf
+======
+
+Libbpf is a helper library for eBPF and XDP that makes using these
+technologies a lot simpler. It also contains specific helper functions
+in tools/lib/bpf/xsk.h for facilitating the use of AF_XDP. It
+contains two types of functions: those that can be used to make the
+setup of AF_XDP socket easier and ones that can be used in the data
+plane to access the rings safely and quickly. To see an example on how
+to use this API, please take a look at the sample application in
+samples/bpf/xdpsock_usr.c which uses libbpf for both setup and data
+plane operations.
+
+We recommend that you use this library unless you have become a power
+user. It will make your program a lot simpler.
+
 XSKMAP / BPF_MAP_TYPE_XSKMAP
-----------------------------
+============================
 
 On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that
 is used in conjunction with bpf_redirect_map() to pass the ingress
@@ -216,21 +237,184 @@ queue 17. Only the XDP program executing for eth0 and queue 17 will
 successfully pass data to the socket. Please refer to the sample
 application (samples/bpf/) in for an example.
 
+Configuration Flags and Socket Options
+======================================
+
+These are the various configuration flags that can be used to control
+and monitor the behavior of AF_XDP sockets.
+
+XDP_COPY and XDP_ZERO_COPY bind flags
+-------------------------------------
+
+When you bind to a socket, the kernel will first try to use zero-copy
+copy. If zero-copy is not supported, it will fall back on using copy
+mode, i.e. copying all packets out to user space. But if you would
+like to force a certain mode, you can use the following flags. If you
+pass the XDP_COPY flag to the bind call, the kernel will force the
+socket into copy mode. If it cannot use copy mode, the bind call will
+fail with an error. Conversely, the XDP_ZERO_COPY flag will force the
+socket into zero-copy mode or fail.
+
+XDP_SHARED_UMEM bind flag
+-------------------------
+
+This flag enables you to bind multiple sockets to the same UMEM, but
+only if they share the same queue id. In this mode, each socket has
+their own RX and TX rings, but the UMEM (tied to the fist socket
+created) only has a single FILL ring and a single COMPLETION
+ring. To use this mode, create the first socket and bind it in the normal
+way. Create a second socket and create an RX and a TX ring, or at
+least one of them, but no FILL or COMPLETION rings as the ones from
+the first socket will be used. In the bind call, set he
+XDP_SHARED_UMEM option and provide the initial socket's fd in the
+sxdp_shared_umem_fd field. You can attach an arbitrary number of extra
+sockets this way.
+
+What socket will then a packet arrive on? This is decided by the XDP
+program. Put all the sockets in the XSK_MAP and just indicate which
+index in the array you would like to send each packet to. A simple
+round-robin example of distributing packets is shown below:
+
+.. code-block:: c
+
+   #include <linux/bpf.h>
+   #include "bpf_helpers.h"
+
+   #define MAX_SOCKS 16
+
+   struct {
+        __uint(type, BPF_MAP_TYPE_XSKMAP);
+        __uint(max_entries, MAX_SOCKS);
+        __uint(key_size, sizeof(int));
+        __uint(value_size, sizeof(int));
+   } xsks_map SEC(".maps");
+
+   static unsigned int rr;
+
+   SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+   {
+       rr = (rr + 1) & (MAX_SOCKS - 1);
+
+       return bpf_redirect_map(&xsks_map, rr, 0);
+   }
+
+Note, that since there is only a single set of FILL and COMPLETION
+rings, and they are single producer, single consumer rings, you need
+to make sure that multiple processes or threads do not use these rings
+concurrently. There are no synchronization primitives in the
+libbpf code that protects multiple users at this point in time.
+
+XDP_USE_NEED_WAKEUP bind flag
+-----------------------------
+
+This option adds support for a new flag called need_wakeup that is
+present in the FILL ring and the TX ring, the rings for which user
+space is a producer. When this option is set in the bind call, the
+need_wakeup flag will be set if the kernel needs to be explicitly
+woken up by a syscall to continue processing packets. If the flag is
+zero, no syscall is needed.
+
+If the flag is set on the FILL ring, the application needs to call
+poll() to be able to continue to receive packets on the RX ring. This
+can happen, for example, when the kernel has detected that there are no
+more buffers on the FILL ring and no buffers left on the RX HW ring of
+the NIC. In this case, interrupts are turned off as the NIC cannot
+receive any packets (as there are no buffers to put them in), and the
+need_wakeup flag is set so that user space can put buffers on the
+FILL ring and then call poll() so that the kernel driver can put these
+buffers on the HW ring and start to receive packets.
+
+If the flag is set for the TX ring, it means that the application
+needs to explicitly notify the kernel to send any packets put on the
+TX ring. This can be accomplished either by a poll() call, as in the
+RX path, or by calling sendto().
+
+An example of how to use this flag can be found in
+samples/bpf/xdpsock_user.c. An example with the use of libbpf helpers
+would look like this for the TX path:
+
+.. code-block:: c
+
+   if (xsk_ring_prod__needs_wakeup(&my_tx_ring))
+      sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+I.e., only use the syscall if the flag is set.
+
+We recommend that you always enable this mode as it usually leads to
+better performance especially if you run the application and the
+driver on the same core, but also if you use different cores for the
+application and the kernel driver, as it reduces the number of
+syscalls needed for the TX path.
+
+XDP_{RX|TX|UMEM_FILL|UMEM_COMPLETION}_RING setsockopts
+------------------------------------------------------
+
+These setsockopts sets the number of descriptors that the RX, TX,
+FILL, and COMPLETION rings respectively should have. It is mandatory
+to set the size of at least one of the RX and TX rings. If you set
+both, you will be able to both receive and send traffic from your
+application, but if you only want to do one of them, you can save
+resources by only setting up one of them. Both the FILL ring and the
+COMPLETION ring are mandatory if you have a UMEM tied to your socket,
+which is the normal case. But if the XDP_SHARED_UMEM flag is used, any
+socket after the first one does not have a UMEM and should in that
+case not have any FILL or COMPLETION rings created.
+
+XDP_UMEM_REG setsockopt
+-----------------------
+
+This setsockopt registers a UMEM to a socket. This is the area that
+contain all the buffers that packet can recide in. The call takes a
+pointer to the beginning of this area and the size of it. Moreover, it
+also has parameter called chunk_size that is the size that the UMEM is
+divided into. It can only be 2K or 4K at the moment. If you have an
+UMEM area that is 128K and a chunk size of 2K, this means that you
+will be able to hold a maximum of 128K / 2K = 64 packets in your UMEM
+area and that your largest packet size can be 2K.
+
+There is also an option to set the headroom of each single buffer in
+the UMEM. If you set this to N bytes, it means that the packet will
+start N bytes into the buffer leaving the first N bytes for the
+application to use. The final option is the flags field, but it will
+be dealt with in separate sections for each UMEM flag.
+
+XDP_STATISTICS getsockopt
+-------------------------
+
+Gets drop statistics of a socket that can be useful for debug
+purposes. The supported statistics are shown below:
+
+.. code-block:: c
+
+   struct xdp_statistics {
+         __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+         __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+         __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+   };
+
+XDP_OPTIONS getsockopt
+----------------------
+
+Gets options from an XDP socket. The only one supported so far is
+XDP_OPTIONS_ZEROCOPY which tells you if zero-copy is on or not.
+
 Usage
 =====
 
-In order to use AF_XDP sockets there are two parts needed. The
+In order to use AF_XDP sockets two parts are needed. The
 user-space application and the XDP program. For a complete setup and
 usage example, please refer to the sample application. The user-space
 side is xdpsock_user.c and the XDP side is part of libbpf.
 
-The XDP code sample included in tools/lib/bpf/xsk.c is the following::
+The XDP code sample included in tools/lib/bpf/xsk.c is the following:
+
+.. code-block:: c
 
    SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
    {
        int index = ctx->rx_queue_index;
 
-       // A set entry here means that the correspnding queue_id
+       // A set entry here means that the corresponding queue_id
        // has an active AF_XDP socket bound to it.
        if (bpf_map_lookup_elem(&xsks_map, &index))
            return bpf_redirect_map(&xsks_map, index, 0);
@@ -238,7 +422,10 @@ The XDP code sample included in tools/lib/bpf/xsk.c is the following::
        return XDP_PASS;
    }
 
-Naive ring dequeue and enqueue could look like this::
+A simple but not so performance ring dequeue and enqueue could look
+like this:
+
+.. code-block:: c
 
     // struct xdp_rxtx_ring {
     //         __u32 *producer;
@@ -287,17 +474,16 @@ Naive ring dequeue and enqueue could look like this::
         return 0;
     }
 
-
-For a more optimized version, please refer to the sample application.
+But please use the libbpf functions as they are optimized and ready to
+use. Will make your life easier.
 
 Sample application
 ==================
 
 There is a xdpsock benchmarking/test application included that
-demonstrates how to use AF_XDP sockets with both private and shared
-UMEMs. Say that you would like your UDP traffic from port 4242 to end
-up in queue 16, that we will enable AF_XDP on. Here, we use ethtool
-for this::
+demonstrates how to use AF_XDP sockets with private UMEMs. Say that
+you would like your UDP traffic from port 4242 to end up in queue 16,
+that we will enable AF_XDP on. Here, we use ethtool for this::
 
       ethtool -N p3p2 rx-flow-hash udp4 fn
       ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \
@@ -311,13 +497,18 @@ using::
 For XDP_SKB mode, use the switch "-S" instead of "-N" and all options
 can be displayed with "-h", as usual.
 
+This sample application uses libbpf to make the setup and usage of
+AF_XDP simpler. If you want to know how the raw uapi of AF_XDP is
+really used to make something more advanced, take a look at the libbpf
+code in tools/lib/bpf/xsk.[ch].
+
 FAQ
 =======
 
 Q: I am not seeing any traffic on the socket. What am I doing wrong?
 
 A: When a netdev of a physical NIC is initialized, Linux usually
-   allocates one Rx and Tx queue pair per core. So on a 8 core system,
+   allocates one RX and TX queue pair per core. So on a 8 core system,
    queue ids 0 to 7 will be allocated, one per core. In the AF_XDP
    bind call or the xsk_socket__create libbpf function call, you
    specify a specific queue id to bind to and it is only the traffic
@@ -343,9 +534,21 @@ A: When a netdev of a physical NIC is initialized, Linux usually
      sudo ethtool -N <interface> flow-type udp4 src-port 4242 dst-port \
      4242 action 2
 
-   A number of other ways are possible all up to the capabilitites of
+   A number of other ways are possible all up to the capabilities of
    the NIC you have.
 
+Q: Can I use the XSKMAP to implement a switch betwen different umems
+   in copy mode?
+
+A: The short answer is no, that is not supported at the moment. The
+   XSKMAP can only be used to switch traffic coming in on queue id X
+   to sockets bound to the same queue id X. The XSKMAP can contain
+   sockets bound to different queue ids, for example X and Y, but only
+   traffic goming in from queue id Y can be directed to sockets bound
+   to the same queue id Y. In zero-copy mode, you should use the
+   switch, or other distribution mechanism, in your NIC to direct
+   traffic to the correct queue id and socket.
+
 Credits
 =======
 
diff --git a/Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst b/Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst
new file mode 100644 (file)
index 0000000..51e6624
--- /dev/null
@@ -0,0 +1,191 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=======================
+DPAA2 MAC / PHY support
+=======================
+
+:Copyright: |copy| 2019 NXP
+
+Overview
+--------
+
+The DPAA2 MAC / PHY support consists of a set of APIs that help DPAA2 network
+drivers (dpaa2-eth, dpaa2-ethsw) interract with the PHY library.
+
+DPAA2 Software Architecture
+---------------------------
+
+Among other DPAA2 objects, the fsl-mc bus exports DPNI objects (abstracting a
+network interface) and DPMAC objects (abstracting a MAC). The dpaa2-eth driver
+probes on the DPNI object and connects to and configures a DPMAC object with
+the help of phylink.
+
+Data connections may be established between a DPNI and a DPMAC, or between two
+DPNIs. Depending on the connection type, the netif_carrier_[on/off] is handled
+directly by the dpaa2-eth driver or by phylink.
+
+.. code-block:: none
+
+  Sources of abstracted link state information presented by the MC firmware
+
+                                               +--------------------------------------+
+  +------------+                  +---------+  |                           xgmac_mdio |
+  | net_device |                  | phylink |--|  +-----+  +-----+  +-----+  +-----+  |
+  +------------+                  +---------+  |  | PHY |  | PHY |  | PHY |  | PHY |  |
+        |                             |        |  +-----+  +-----+  +-----+  +-----+  |
+      +------------------------------------+   |                    External MDIO bus |
+      |            dpaa2-eth               |   +--------------------------------------+
+      +------------------------------------+
+        |                             |                                           Linux
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        |                             |                                     MC firmware
+        |              /|             V
+  +----------+        / |       +----------+
+  |          |       /  |       |          |
+  |          |       |  |       |          |
+  |   DPNI   |<------|  |<------|   DPMAC  |
+  |          |       |  |       |          |
+  |          |       \  |<---+  |          |
+  +----------+        \ |    |  +----------+
+                       \|    |
+                             |
+           +--------------------------------------+
+           | MC firmware polling MAC PCS for link |
+           |  +-----+  +-----+  +-----+  +-----+  |
+           |  | PCS |  | PCS |  | PCS |  | PCS |  |
+           |  +-----+  +-----+  +-----+  +-----+  |
+           |                    Internal MDIO bus |
+           +--------------------------------------+
+
+
+Depending on an MC firmware configuration setting, each MAC may be in one of two modes:
+
+- DPMAC_LINK_TYPE_FIXED: the link state management is handled exclusively by
+  the MC firmware by polling the MAC PCS. Without the need to register a
+  phylink instance, the dpaa2-eth driver will not bind to the connected dpmac
+  object at all.
+
+- DPMAC_LINK_TYPE_PHY: The MC firmware is left waiting for link state update
+  events, but those are in fact passed strictly between the dpaa2-mac (based on
+  phylink) and its attached net_device driver (dpaa2-eth, dpaa2-ethsw),
+  effectively bypassing the firmware.
+
+Implementation
+--------------
+
+At probe time or when a DPNI's endpoint is dynamically changed, the dpaa2-eth
+is responsible to find out if the peer object is a DPMAC and if this is the
+case, to integrate it with PHYLINK using the dpaa2_mac_connect() API, which
+will do the following:
+
+ - look up the device tree for PHYLINK-compatible of binding (phy-handle)
+ - will create a PHYLINK instance associated with the received net_device
+ - connect to the PHY using phylink_of_phy_connect()
+
+The following phylink_mac_ops callback are implemented:
+
+ - .validate() will populate the supported linkmodes with the MAC capabilities
+   only when the phy_interface_t is RGMII_* (at the moment, this is the only
+   link type supported by the driver).
+
+ - .mac_config() will configure the MAC in the new configuration using the
+   dpmac_set_link_state() MC firmware API.
+
+ - .mac_link_up() / .mac_link_down() will update the MAC link using the same
+   API described above.
+
+At driver unbind() or when the DPNI object is disconnected from the DPMAC, the
+dpaa2-eth driver calls dpaa2_mac_disconnect() which will, in turn, disconnect
+from the PHY and destroy the PHYLINK instance.
+
+In case of a DPNI-DPMAC connection, an 'ip link set dev eth0 up' would start
+the following sequence of operations:
+
+(1) phylink_start() called from .dev_open().
+(2) The .mac_config() and .mac_link_up() callbacks are called by PHYLINK.
+(3) In order to configure the HW MAC, the MC Firmware API
+    dpmac_set_link_state() is called.
+(4) The firmware will eventually setup the HW MAC in the new configuration.
+(5) A netif_carrier_on() call is made directly from PHYLINK on the associated
+    net_device.
+(6) The dpaa2-eth driver handles the LINK_STATE_CHANGE irq in order to
+    enable/disable Rx taildrop based on the pause frame settings.
+
+.. code-block:: none
+
+  +---------+               +---------+
+  | PHYLINK |-------------->|  eth0   |
+  +---------+           (5) +---------+
+  (1) ^  |
+      |  |
+      |  v (2)
+  +-----------------------------------+
+  |             dpaa2-eth             |
+  +-----------------------------------+
+         |                    ^ (6)
+         |                    |
+         v (3)                |
+  +---------+---------------+---------+
+  |  DPMAC  |               |  DPNI   |
+  +---------+               +---------+
+  |            MC Firmware            |
+  +-----------------------------------+
+         |
+         |
+         v (4)
+  +-----------------------------------+
+  |             HW MAC                |
+  +-----------------------------------+
+
+In case of a DPNI-DPNI connection, a usual sequence of operations looks like
+the following:
+
+(1) ip link set dev eth0 up
+(2) The dpni_enable() MC API called on the associated fsl_mc_device.
+(3) ip link set dev eth1 up
+(4) The dpni_enable() MC API called on the associated fsl_mc_device.
+(5) The LINK_STATE_CHANGED irq is received by both instances of the dpaa2-eth
+    driver because now the operational link state is up.
+(6) The netif_carrier_on() is called on the exported net_device from
+    link_state_update().
+
+.. code-block:: none
+
+  +---------+               +---------+
+  |  eth0   |               |  eth1   |
+  +---------+               +---------+
+      |  ^                     ^  |
+      |  |                     |  |
+  (1) v  | (6)             (6) |  v (3)
+  +---------+               +---------+
+  |dpaa2-eth|               |dpaa2-eth|
+  +---------+               +---------+
+      |  ^                     ^  |
+      |  |                     |  |
+  (2) v  | (5)             (5) |  v (4)
+  +---------+---------------+---------+
+  |  DPNI   |               |  DPNI   |
+  +---------+               +---------+
+  |            MC Firmware            |
+  +-----------------------------------+
+
+
+Exported API
+------------
+
+Any DPAA2 driver that drivers endpoints of DPMAC objects should service its
+_EVENT_ENDPOINT_CHANGED irq and connect/disconnect from the associated DPMAC
+when necessary using the below listed API::
+
+ - int dpaa2_mac_connect(struct dpaa2_mac *mac);
+ - void dpaa2_mac_disconnect(struct dpaa2_mac *mac);
+
+A phylink integration is necessary only when the partner DPMAC is not of TYPE_FIXED.
+One can check for this condition using the below API::
+
+ - bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,struct fsl_mc_io *mc_io);
+
+Before connection to a MAC, the caller must allocate and populate the
+dpaa2_mac structure with the associated net_device, a pointer to the MC portal
+to be used and the actual fsl_mc_device structure of the DPMAC.
diff --git a/Documentation/networking/devlink-params-mv88e6xxx.txt b/Documentation/networking/devlink-params-mv88e6xxx.txt
new file mode 100644 (file)
index 0000000..21c4b35
--- /dev/null
@@ -0,0 +1,7 @@
+ATU_hash               [DEVICE, DRIVER-SPECIFIC]
+                       Select one of four possible hashing algorithms for
+                       MAC addresses in the Address Translation Unit.
+                       A value of 3 seems to work better than the default of
+                       1 when many MAC addresses have the same OUI.
+                       Configuration mode: runtime
+                       Type: u8. 0-3 valid.
index 5bcbf75..8cb2cd4 100644 (file)
@@ -213,3 +213,29 @@ A patchset to OpenSSL to use ktls as the record layer is
 of calling send directly after a handshake using gnutls.
 Since it doesn't implement a full record layer, control
 messages are not supported.
+
+Statistics
+==========
+
+TLS implementation exposes the following per-namespace statistics
+(``/proc/net/tls_stat``):
+
+- ``TlsCurrTxSw``, ``TlsCurrRxSw`` -
+  number of TX and RX sessions currently installed where host handles
+  cryptography
+
+- ``TlsCurrTxDevice``, ``TlsCurrRxDevice`` -
+  number of TX and RX sessions currently installed where NIC handles
+  cryptography
+
+- ``TlsTxSw``, ``TlsRxSw`` -
+  number of TX and RX sessions opened with host cryptography
+
+- ``TlsTxDevice``, ``TlsRxDevice`` -
+  number of TX and RX sessions opened with NIC cryptography
+
+- ``TlsDecryptError`` -
+  record decryption failed (e.g. due to incorrect authentication tag)
+
+- ``TlsDeviceRxResync`` -
+  number of RX resyncs sent to NICs handling cryptography
index cba1095..c0024b2 100644 (file)
@@ -1190,6 +1190,13 @@ Q:       http://patchwork.ozlabs.org/project/netdev/list/
 F:     drivers/net/ethernet/aquantia/atlantic/
 F:     Documentation/networking/device_drivers/aquantia/atlantic.txt
 
+AQUANTIA ETHERNET DRIVER PTP SUBSYSTEM
+M:     Egor Pomozov <epomozov@marvell.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+W:     http://www.aquantia.com
+F:     drivers/net/ethernet/aquantia/atlantic/aq_ptp*
+
 ARC FRAMEBUFFER DRIVER
 M:     Jaya Kumar <jayalk@intworks.biz>
 S:     Maintained
@@ -5047,10 +5054,14 @@ M:      Ioana Radulescu <ruxandra.radulescu@nxp.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/freescale/dpaa2/dpaa2-eth*
+F:     drivers/net/ethernet/freescale/dpaa2/dpaa2-mac*
 F:     drivers/net/ethernet/freescale/dpaa2/dpni*
+F:     drivers/net/ethernet/freescale/dpaa2/dpmac*
 F:     drivers/net/ethernet/freescale/dpaa2/dpkg.h
 F:     drivers/net/ethernet/freescale/dpaa2/Makefile
 F:     drivers/net/ethernet/freescale/dpaa2/Kconfig
+F:     Documentation/networking/device_drivers/freescale/dpaa2/ethernet-driver.rst
+F:     Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst
 
 DPAA2 ETHERNET SWITCH DRIVER
 M:     Ioana Radulescu <ruxandra.radulescu@nxp.com>
@@ -7444,8 +7455,8 @@ F:        drivers/platform/x86/tc1100-wmi.c
 
 HP100: Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series
 M:     Jaroslav Kysela <perex@perex.cz>
-S:     Maintained
-F:     drivers/net/ethernet/hp/hp100.*
+S:     Obsolete
+F:     drivers/staging/hp/hp100.*
 
 HPET:  High Precision Event Timers driver
 M:     Clemens Ladisch <clemens@ladisch.de>
@@ -9738,6 +9749,7 @@ S:        Maintained
 F:     drivers/net/dsa/mv88e6xxx/
 F:     include/linux/platform_data/mv88e6xxx.h
 F:     Documentation/devicetree/bindings/net/dsa/marvell.txt
+F:     Documentation/networking/devlink-params-mv88e6xxx.txt
 
 MARVELL ARMADA DRM SUPPORT
 M:     Russell King <linux@armlinux.org.uk>
@@ -13815,7 +13827,7 @@ R:      Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
 L:     netdev@vger.kernel.org
 L:     linux-renesas-soc@vger.kernel.org
 F:     Documentation/devicetree/bindings/net/renesas,*.txt
-F:     Documentation/devicetree/bindings/net/sh_eth.txt
+F:     Documentation/devicetree/bindings/net/renesas,*.yaml
 F:     drivers/net/ethernet/renesas/
 F:     include/linux/sh_eth.h
 
index 84373dc..bbc68a5 100644 (file)
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o  = -pg
 endif
 
 obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-           pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+           pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644 (file)
index 0000000..f5b85bd
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+       return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+       /*
+        * Range covering the highest possible canonical userspace address
+        * as well as non-canonical address range. For the canonical range
+        * we also need to include the userspace guard page.
+        */
+       return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+              canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+       return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+       if (unlikely(invalid_probe_range((unsigned long)src)))
+               return -EFAULT;
+
+       return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+       if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+               return -EFAULT;
+
+       return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
index 991549a..8cd23d8 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
-
+#include <asm/extable.h>
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
 
@@ -123,6 +123,19 @@ static const int reg2hex[] = {
        [AUX_REG] = 3,    /* R11 temp register */
 };
 
+static const int reg2pt_regs[] = {
+       [BPF_REG_0] = offsetof(struct pt_regs, ax),
+       [BPF_REG_1] = offsetof(struct pt_regs, di),
+       [BPF_REG_2] = offsetof(struct pt_regs, si),
+       [BPF_REG_3] = offsetof(struct pt_regs, dx),
+       [BPF_REG_4] = offsetof(struct pt_regs, cx),
+       [BPF_REG_5] = offsetof(struct pt_regs, r8),
+       [BPF_REG_6] = offsetof(struct pt_regs, bx),
+       [BPF_REG_7] = offsetof(struct pt_regs, r13),
+       [BPF_REG_8] = offsetof(struct pt_regs, r14),
+       [BPF_REG_9] = offsetof(struct pt_regs, r15),
+};
+
 /*
  * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
  * which need extra byte of encoding.
@@ -377,6 +390,19 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
        *pprog = prog;
 }
 
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+                          struct pt_regs *regs, int trapnr,
+                          unsigned long error_code, unsigned long fault_addr)
+{
+       u32 reg = x->fixup >> 8;
+
+       /* jump over faulting load and clear dest register */
+       *(unsigned long *)((void *)regs + reg) = 0;
+       regs->ip += x->fixup & 0xff;
+       return true;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                  int oldproglen, struct jit_context *ctx)
 {
@@ -384,7 +410,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
        int insn_cnt = bpf_prog->len;
        bool seen_exit = false;
        u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
-       int i, cnt = 0;
+       int i, cnt = 0, excnt = 0;
        int proglen = 0;
        u8 *prog = temp;
 
@@ -778,14 +804,17 @@ stx:                      if (is_imm8(insn->off))
 
                        /* LDX: dst_reg = *(u8*)(src_reg + off) */
                case BPF_LDX | BPF_MEM | BPF_B:
+               case BPF_LDX | BPF_PROBE_MEM | BPF_B:
                        /* Emit 'movzx rax, byte ptr [rax + off]' */
                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
                        goto ldx;
                case BPF_LDX | BPF_MEM | BPF_H:
+               case BPF_LDX | BPF_PROBE_MEM | BPF_H:
                        /* Emit 'movzx rax, word ptr [rax + off]' */
                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
                        goto ldx;
                case BPF_LDX | BPF_MEM | BPF_W:
+               case BPF_LDX | BPF_PROBE_MEM | BPF_W:
                        /* Emit 'mov eax, dword ptr [rax+0x14]' */
                        if (is_ereg(dst_reg) || is_ereg(src_reg))
                                EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
@@ -793,6 +822,7 @@ stx:                        if (is_imm8(insn->off))
                                EMIT1(0x8B);
                        goto ldx;
                case BPF_LDX | BPF_MEM | BPF_DW:
+               case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
                        /* Emit 'mov rax, qword ptr [rax+0x14]' */
                        EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 ldx:                   /*
@@ -805,6 +835,48 @@ ldx:                       /*
                        else
                                EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
                                            insn->off);
+                       if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+                               struct exception_table_entry *ex;
+                               u8 *_insn = image + proglen;
+                               s64 delta;
+
+                               if (!bpf_prog->aux->extable)
+                                       break;
+
+                               if (excnt >= bpf_prog->aux->num_exentries) {
+                                       pr_err("ex gen bug\n");
+                                       return -EFAULT;
+                               }
+                               ex = &bpf_prog->aux->extable[excnt++];
+
+                               delta = _insn - (u8 *)&ex->insn;
+                               if (!is_simm32(delta)) {
+                                       pr_err("extable->insn doesn't fit into 32-bit\n");
+                                       return -EFAULT;
+                               }
+                               ex->insn = delta;
+
+                               delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+                               if (!is_simm32(delta)) {
+                                       pr_err("extable->handler doesn't fit into 32-bit\n");
+                                       return -EFAULT;
+                               }
+                               ex->handler = delta;
+
+                               if (dst_reg > BPF_REG_9) {
+                                       pr_err("verifier error\n");
+                                       return -EFAULT;
+                               }
+                               /*
+                                * Compute size of x86 insn and its target dest x86 register.
+                                * ex_handler_bpf() will use lower 8 bits to adjust
+                                * pt_regs->ip to jump over this x86 instruction
+                                * and upper bits to figure out which pt_regs to zero out.
+                                * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+                                * of 4 bytes will be ignored and rbx will be zero inited.
+                                */
+                               ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+                       }
                        break;
 
                        /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -909,6 +981,16 @@ xadd:                      if (is_imm8(insn->off))
                case BPF_JMP32 | BPF_JSLT | BPF_K:
                case BPF_JMP32 | BPF_JSGE | BPF_K:
                case BPF_JMP32 | BPF_JSLE | BPF_K:
+                       /* test dst_reg, dst_reg to save one extra byte */
+                       if (imm32 == 0) {
+                               if (BPF_CLASS(insn->code) == BPF_JMP)
+                                       EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+                               else if (is_ereg(dst_reg))
+                                       EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+                               EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+                               goto emit_cond_jmp;
+                       }
+
                        /* cmp dst_reg, imm8/32 */
                        if (BPF_CLASS(insn->code) == BPF_JMP)
                                EMIT1(add_1mod(0x48, dst_reg));
@@ -1048,6 +1130,11 @@ emit_jmp:
                addrs[i] = proglen;
                prog = temp;
        }
+
+       if (image && excnt != bpf_prog->aux->num_exentries) {
+               pr_err("extable is not populated\n");
+               return -EFAULT;
+       }
        return proglen;
 }
 
@@ -1148,12 +1235,24 @@ out_image:
                        break;
                }
                if (proglen == oldproglen) {
-                       header = bpf_jit_binary_alloc(proglen, &image,
-                                                     1, jit_fill_hole);
+                       /*
+                        * The number of entries in extable is the number of BPF_LDX
+                        * insns that access kernel memory via "pointer to BTF type".
+                        * The verifier changed their opcode from LDX|MEM|size
+                        * to LDX|PROBE_MEM|size to make JITing easier.
+                        */
+                       u32 align = __alignof__(struct exception_table_entry);
+                       u32 extable_size = prog->aux->num_exentries *
+                               sizeof(struct exception_table_entry);
+
+                       /* allocate module memory for x86 insns and extable */
+                       header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+                                                     &image, align, jit_fill_hole);
                        if (!header) {
                                prog = orig_prog;
                                goto out_addrs;
                        }
+                       prog->aux->extable = (void *) image + roundup(proglen, align);
                }
                oldproglen = proglen;
                cond_resched();
index 2bbab02..aad00d2 100644 (file)
@@ -1070,7 +1070,7 @@ static int fs_open(struct atm_vcc *atm_vcc)
                                        RC_FLAGS_BFPS_BFP * bfp |
                                        RC_FLAGS_RXBM_PSB, 0, 0);
                        break;
-               };
+               }
                if (IS_FS50 (dev)) {
                        submit_command (dev, &dev->hp_txq, 
                                        QE_CMD_REG_WR | QE_CMD_IMM_INQ,
index aae665a..f7aa2dc 100644 (file)
@@ -380,17 +380,6 @@ config BT_ATH3K
          Say Y here to compile support for "Atheros firmware download driver"
          into the kernel or say M to compile it as module (ath3k).
 
-config BT_WILINK
-       tristate "Texas Instruments WiLink7 driver"
-       depends on TI_ST
-       help
-         This enables the Bluetooth driver for Texas Instrument's BT/FM/GPS
-         combo devices. This makes use of shared transport line discipline
-         core driver to communicate with the BT core of the combo chip.
-
-         Say Y here to compile support for Texas Instrument's WiLink7 driver
-         into the kernel or say M to compile it as module (btwilink).
-
 config BT_MTKSDIO
        tristate "MediaTek HCI SDIO driver"
        depends on MMC
index 34887b9..1a58a3a 100644 (file)
@@ -19,7 +19,6 @@ obj-$(CONFIG_BT_INTEL)                += btintel.o
 obj-$(CONFIG_BT_ATH3K)         += ath3k.o
 obj-$(CONFIG_BT_MRVL)          += btmrvl.o
 obj-$(CONFIG_BT_MRVL_SDIO)     += btmrvl_sdio.o
-obj-$(CONFIG_BT_WILINK)                += btwilink.o
 obj-$(CONFIG_BT_MTKSDIO)       += btmtksdio.o
 obj-$(CONFIG_BT_MTKUART)       += btmtkuart.o
 obj-$(CONFIG_BT_QCOMSMD)       += btqcomsmd.o
index bb99c86..62e781a 100644 (file)
@@ -709,6 +709,51 @@ done:
 }
 EXPORT_SYMBOL_GPL(btintel_download_firmware);
 
+void btintel_reset_to_bootloader(struct hci_dev *hdev)
+{
+       struct intel_reset params;
+       struct sk_buff *skb;
+
+       /* Send Intel Reset command. This will result in
+        * re-enumeration of BT controller.
+        *
+        * Intel Reset parameter description:
+        * reset_type :   0x00 (Soft reset),
+        *                0x01 (Hard reset)
+        * patch_enable : 0x00 (Do not enable),
+        *                0x01 (Enable)
+        * ddc_reload :   0x00 (Do not reload),
+        *                0x01 (Reload)
+        * boot_option:   0x00 (Current image),
+        *                0x01 (Specified boot address)
+        * boot_param:    Boot address
+        *
+        */
+       params.reset_type = 0x01;
+       params.patch_enable = 0x01;
+       params.ddc_reload = 0x01;
+       params.boot_option = 0x00;
+       params.boot_param = cpu_to_le32(0x00000000);
+
+       skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(params),
+                            &params, HCI_INIT_TIMEOUT);
+       if (IS_ERR(skb)) {
+               bt_dev_err(hdev, "FW download error recovery failed (%ld)",
+                          PTR_ERR(skb));
+               return;
+       }
+       bt_dev_info(hdev, "Intel reset sent to retry FW download");
+       kfree_skb(skb);
+
+       /* Current Intel BT controllers(ThP/JfP) hold the USB reset
+        * lines for 2ms when it receives Intel Reset in bootloader mode.
+        * Whereas, the upcoming Intel BT controllers will hold USB reset
+        * for 150ms. To keep the delay generic, 150ms is chosen here.
+        */
+       msleep(150);
+}
+EXPORT_SYMBOL_GPL(btintel_reset_to_bootloader);
+
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION);
 MODULE_VERSION(VERSION);
index 3d84619..a69ea8a 100644 (file)
@@ -87,6 +87,7 @@ int btintel_read_boot_params(struct hci_dev *hdev,
                             struct intel_boot_params *params);
 int btintel_download_firmware(struct hci_dev *dev, const struct firmware *fw,
                              u32 *boot_param);
+void btintel_reset_to_bootloader(struct hci_dev *hdev);
 #else
 
 static inline int btintel_check_bdaddr(struct hci_dev *hdev)
@@ -181,4 +182,8 @@ static inline int btintel_download_firmware(struct hci_dev *dev,
 {
        return -EOPNOTSUPP;
 }
+
+static inline void btintel_reset_to_bootloader(struct hci_dev *hdev)
+{
+}
 #endif
index bf3c02b..ae9a204 100644 (file)
@@ -418,7 +418,7 @@ static int rtl_download_firmware(struct hci_dev *hdev,
                if (IS_ERR(skb)) {
                        rtl_dev_err(hdev, "download fw command failed (%ld)",
                                    PTR_ERR(skb));
-                       ret = -PTR_ERR(skb);
+                       ret = PTR_ERR(skb);
                        goto out;
                }
 
index a9c35eb..04a139e 100644 (file)
@@ -2182,8 +2182,11 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
         * loaded.
         */
        err = btintel_read_version(hdev, &ver);
-       if (err)
+       if (err) {
+               bt_dev_err(hdev, "Intel Read version failed (%d)", err);
+               btintel_reset_to_bootloader(hdev);
                return err;
+       }
 
        /* The hardware platform number has a fixed value of 0x37 and
         * for now only accept this single value.
@@ -2326,9 +2329,13 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 
        /* Start firmware downloading and get boot parameter */
        err = btintel_download_firmware(hdev, fw, &boot_param);
-       if (err < 0)
+       if (err < 0) {
+               /* When FW download fails, send Intel Reset to retry
+                * FW download.
+                */
+               btintel_reset_to_bootloader(hdev);
                goto done;
-
+       }
        set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
 
        bt_dev_info(hdev, "Waiting for firmware download to complete");
@@ -2355,6 +2362,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
        if (err) {
                bt_dev_err(hdev, "Firmware loading timeout");
                err = -ETIMEDOUT;
+               btintel_reset_to_bootloader(hdev);
                goto done;
        }
 
@@ -2381,8 +2389,11 @@ done:
        set_bit(BTUSB_BOOTING, &data->flags);
 
        err = btintel_send_intel_reset(hdev, boot_param);
-       if (err)
+       if (err) {
+               bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
+               btintel_reset_to_bootloader(hdev);
                return err;
+       }
 
        /* The bootloader will not indicate when the device is ready. This
         * is done by the operational firmware sending bootup notification.
@@ -2404,6 +2415,7 @@ done:
 
        if (err) {
                bt_dev_err(hdev, "Device boot timeout");
+               btintel_reset_to_bootloader(hdev);
                return -ETIMEDOUT;
        }
 
@@ -2432,6 +2444,13 @@ done:
         */
        btintel_set_event_mask(hdev, false);
 
+       /* Read the Intel version information after loading the FW  */
+       err = btintel_read_version(hdev, &ver);
+       if (err)
+               return err;
+
+       btintel_version_info(hdev, &ver);
+
        return 0;
 }
 
@@ -2489,8 +2508,6 @@ static int btusb_shutdown_intel_new(struct hci_dev *hdev)
        return 0;
 }
 
-#ifdef CONFIG_BT_HCIBTUSB_MTK
-
 #define FIRMWARE_MT7663                "mediatek/mt7663pr2h.bin"
 #define FIRMWARE_MT7668                "mediatek/mt7668pr2h.bin"
 
@@ -3051,7 +3068,6 @@ static int btusb_mtk_shutdown(struct hci_dev *hdev)
 
 MODULE_FIRMWARE(FIRMWARE_MT7663);
 MODULE_FIRMWARE(FIRMWARE_MT7668);
-#endif
 
 #ifdef CONFIG_PM
 /* Configure an out-of-band gpio as wake-up pin, if specified in device tree */
@@ -3411,7 +3427,6 @@ static int btusb_setup_qca(struct hci_dev *hdev)
        return 0;
 }
 
-#ifdef CONFIG_BT_HCIBTUSB_BCM
 static inline int __set_diag_interface(struct hci_dev *hdev)
 {
        struct btusb_data *data = hci_get_drvdata(hdev);
@@ -3498,7 +3513,6 @@ static int btusb_bcm_set_diag(struct hci_dev *hdev, bool enable)
 
        return submit_or_queue_tx_urb(hdev, urb);
 }
-#endif
 
 #ifdef CONFIG_PM
 static irqreturn_t btusb_oob_wake_handler(int irq, void *priv)
@@ -3724,8 +3738,8 @@ static int btusb_probe(struct usb_interface *intf,
        if (id->driver_info & BTUSB_BCM92035)
                hdev->setup = btusb_setup_bcm92035;
 
-#ifdef CONFIG_BT_HCIBTUSB_BCM
-       if (id->driver_info & BTUSB_BCM_PATCHRAM) {
+       if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) &&
+           (id->driver_info & BTUSB_BCM_PATCHRAM)) {
                hdev->manufacturer = 15;
                hdev->setup = btbcm_setup_patchram;
                hdev->set_diag = btusb_bcm_set_diag;
@@ -3735,7 +3749,8 @@ static int btusb_probe(struct usb_interface *intf,
                data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
        }
 
-       if (id->driver_info & BTUSB_BCM_APPLE) {
+       if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) &&
+           (id->driver_info & BTUSB_BCM_APPLE)) {
                hdev->manufacturer = 15;
                hdev->setup = btbcm_setup_apple;
                hdev->set_diag = btusb_bcm_set_diag;
@@ -3743,7 +3758,6 @@ static int btusb_probe(struct usb_interface *intf,
                /* Broadcom LM_DIAG Interface numbers are hardcoded */
                data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
        }
-#endif
 
        if (id->driver_info & BTUSB_INTEL) {
                hdev->manufacturer = 2;
@@ -3774,14 +3788,13 @@ static int btusb_probe(struct usb_interface *intf,
        if (id->driver_info & BTUSB_MARVELL)
                hdev->set_bdaddr = btusb_set_bdaddr_marvell;
 
-#ifdef CONFIG_BT_HCIBTUSB_MTK
-       if (id->driver_info & BTUSB_MEDIATEK) {
+       if (IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) &&
+           (id->driver_info & BTUSB_MEDIATEK)) {
                hdev->setup = btusb_mtk_setup;
                hdev->shutdown = btusb_mtk_shutdown;
                hdev->manufacturer = 70;
                set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
        }
-#endif
 
        if (id->driver_info & BTUSB_SWAVE) {
                set_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks);
@@ -3807,8 +3820,8 @@ static int btusb_probe(struct usb_interface *intf,
                btusb_check_needs_reset_resume(intf);
        }
 
-#ifdef CONFIG_BT_HCIBTUSB_RTL
-       if (id->driver_info & BTUSB_REALTEK) {
+       if (IS_ENABLED(CONFIG_BT_HCIBTUSB_RTL) &&
+           (id->driver_info & BTUSB_REALTEK)) {
                hdev->setup = btrtl_setup_realtek;
                hdev->shutdown = btrtl_shutdown_realtek;
                hdev->cmd_timeout = btusb_rtl_cmd_timeout;
@@ -3819,7 +3832,6 @@ static int btusb_probe(struct usb_interface *intf,
                 */
                set_bit(BTUSB_WAKEUP_DISABLE, &data->flags);
        }
-#endif
 
        if (id->driver_info & BTUSB_AMP) {
                /* AMP controllers do not support SCO packets */
@@ -3887,15 +3899,13 @@ static int btusb_probe(struct usb_interface *intf,
                        goto out_free_dev;
        }
 
-#ifdef CONFIG_BT_HCIBTUSB_BCM
-       if (data->diag) {
+       if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) && data->diag) {
                if (!usb_driver_claim_interface(&btusb_driver,
                                                data->diag, data))
                        __set_diag_interface(hdev);
                else
                        data->diag = NULL;
        }
-#endif
 
        if (enable_autosuspend)
                usb_enable_autosuspend(data->udev);
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
deleted file mode 100644 (file)
index e55f06e..0000000
+++ /dev/null
@@ -1,337 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Texas Instrument's Bluetooth Driver For Shared Transport.
- *
- *  Bluetooth Driver acts as interface between HCI core and
- *  TI Shared Transport Layer.
- *
- *  Copyright (C) 2009-2010 Texas Instruments
- *  Author: Raja Mani <raja_mani@ti.com>
- *     Pavan Savoy <pavan_savoy@ti.com>
- */
-
-#include <linux/platform_device.h>
-#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci_core.h>
-#include <net/bluetooth/hci.h>
-
-#include <linux/ti_wilink_st.h>
-#include <linux/module.h>
-
-/* Bluetooth Driver Version */
-#define VERSION               "1.0"
-#define MAX_BT_CHNL_IDS                3
-
-/* Number of seconds to wait for registration completion
- * when ST returns PENDING status.
- */
-#define BT_REGISTER_TIMEOUT   6000     /* 6 sec */
-
-/**
- * struct ti_st - driver operation structure
- * @hdev: hci device pointer which binds to bt driver
- * @reg_status: ST registration callback status
- * @st_write: write function provided by the ST driver
- *     to be used by the driver during send_frame.
- * @wait_reg_completion - completion sync between ti_st_open
- *     and st_reg_completion_cb.
- */
-struct ti_st {
-       struct hci_dev *hdev;
-       int reg_status;
-       long (*st_write) (struct sk_buff *);
-       struct completion wait_reg_completion;
-};
-
-/* Increments HCI counters based on pocket ID (cmd,acl,sco) */
-static inline void ti_st_tx_complete(struct ti_st *hst, int pkt_type)
-{
-       struct hci_dev *hdev = hst->hdev;
-
-       /* Update HCI stat counters */
-       switch (pkt_type) {
-       case HCI_COMMAND_PKT:
-               hdev->stat.cmd_tx++;
-               break;
-
-       case HCI_ACLDATA_PKT:
-               hdev->stat.acl_tx++;
-               break;
-
-       case HCI_SCODATA_PKT:
-               hdev->stat.sco_tx++;
-               break;
-       }
-}
-
-/* ------- Interfaces to Shared Transport ------ */
-
-/* Called by ST layer to indicate protocol registration completion
- * status.ti_st_open() function will wait for signal from this
- * API when st_register() function returns ST_PENDING.
- */
-static void st_reg_completion_cb(void *priv_data, int data)
-{
-       struct ti_st *lhst = priv_data;
-
-       /* Save registration status for use in ti_st_open() */
-       lhst->reg_status = data;
-       /* complete the wait in ti_st_open() */
-       complete(&lhst->wait_reg_completion);
-}
-
-/* Called by Shared Transport layer when receive data is available */
-static long st_receive(void *priv_data, struct sk_buff *skb)
-{
-       struct ti_st *lhst = priv_data;
-       int err;
-
-       if (!skb)
-               return -EFAULT;
-
-       if (!lhst) {
-               kfree_skb(skb);
-               return -EFAULT;
-       }
-
-       /* Forward skb to HCI core layer */
-       err = hci_recv_frame(lhst->hdev, skb);
-       if (err < 0) {
-               BT_ERR("Unable to push skb to HCI core(%d)", err);
-               return err;
-       }
-
-       lhst->hdev->stat.byte_rx += skb->len;
-
-       return 0;
-}
-
-/* ------- Interfaces to HCI layer ------ */
-/* protocol structure registered with shared transport */
-static struct st_proto_s ti_st_proto[MAX_BT_CHNL_IDS] = {
-       {
-               .chnl_id = HCI_EVENT_PKT, /* HCI Events */
-               .hdr_len = sizeof(struct hci_event_hdr),
-               .offset_len_in_hdr = offsetof(struct hci_event_hdr, plen),
-               .len_size = 1, /* sizeof(plen) in struct hci_event_hdr */
-               .reserve = 8,
-       },
-       {
-               .chnl_id = HCI_ACLDATA_PKT, /* ACL */
-               .hdr_len = sizeof(struct hci_acl_hdr),
-               .offset_len_in_hdr = offsetof(struct hci_acl_hdr, dlen),
-               .len_size = 2,  /* sizeof(dlen) in struct hci_acl_hdr */
-               .reserve = 8,
-       },
-       {
-               .chnl_id = HCI_SCODATA_PKT, /* SCO */
-               .hdr_len = sizeof(struct hci_sco_hdr),
-               .offset_len_in_hdr = offsetof(struct hci_sco_hdr, dlen),
-               .len_size = 1, /* sizeof(dlen) in struct hci_sco_hdr */
-               .reserve = 8,
-       },
-};
-
-/* Called from HCI core to initialize the device */
-static int ti_st_open(struct hci_dev *hdev)
-{
-       unsigned long timeleft;
-       struct ti_st *hst;
-       int err, i;
-
-       BT_DBG("%s %p", hdev->name, hdev);
-
-       /* provide contexts for callbacks from ST */
-       hst = hci_get_drvdata(hdev);
-
-       for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
-               ti_st_proto[i].priv_data = hst;
-               ti_st_proto[i].max_frame_size = HCI_MAX_FRAME_SIZE;
-               ti_st_proto[i].recv = st_receive;
-               ti_st_proto[i].reg_complete_cb = st_reg_completion_cb;
-
-               /* Prepare wait-for-completion handler */
-               init_completion(&hst->wait_reg_completion);
-               /* Reset ST registration callback status flag,
-                * this value will be updated in
-                * st_reg_completion_cb()
-                * function whenever it called from ST driver.
-                */
-               hst->reg_status = -EINPROGRESS;
-
-               err = st_register(&ti_st_proto[i]);
-               if (!err)
-                       goto done;
-
-               if (err != -EINPROGRESS) {
-                       BT_ERR("st_register failed %d", err);
-                       return err;
-               }
-
-               /* ST is busy with either protocol
-                * registration or firmware download.
-                */
-               BT_DBG("waiting for registration "
-                               "completion signal from ST");
-               timeleft = wait_for_completion_timeout
-                       (&hst->wait_reg_completion,
-                        msecs_to_jiffies(BT_REGISTER_TIMEOUT));
-               if (!timeleft) {
-                       BT_ERR("Timeout(%d sec),didn't get reg "
-                                       "completion signal from ST",
-                                       BT_REGISTER_TIMEOUT / 1000);
-                       return -ETIMEDOUT;
-               }
-
-               /* Is ST registration callback
-                * called with ERROR status?
-                */
-               if (hst->reg_status != 0) {
-                       BT_ERR("ST registration completed with invalid "
-                                       "status %d", hst->reg_status);
-                       return -EAGAIN;
-               }
-
-done:
-               hst->st_write = ti_st_proto[i].write;
-               if (!hst->st_write) {
-                       BT_ERR("undefined ST write function");
-                       for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
-                               /* Undo registration with ST */
-                               err = st_unregister(&ti_st_proto[i]);
-                               if (err)
-                                       BT_ERR("st_unregister() failed with "
-                                                       "error %d", err);
-                               hst->st_write = NULL;
-                       }
-                       return -EIO;
-               }
-       }
-       return 0;
-}
-
-/* Close device */
-static int ti_st_close(struct hci_dev *hdev)
-{
-       int err, i;
-       struct ti_st *hst = hci_get_drvdata(hdev);
-
-       for (i = MAX_BT_CHNL_IDS-1; i >= 0; i--) {
-               err = st_unregister(&ti_st_proto[i]);
-               if (err)
-                       BT_ERR("st_unregister(%d) failed with error %d",
-                                       ti_st_proto[i].chnl_id, err);
-       }
-
-       hst->st_write = NULL;
-
-       return err;
-}
-
-static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
-{
-       struct ti_st *hst;
-       long len;
-       int pkt_type;
-
-       hst = hci_get_drvdata(hdev);
-
-       /* Prepend skb with frame type */
-       memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1);
-
-       BT_DBG("%s: type %d len %d", hdev->name, hci_skb_pkt_type(skb),
-              skb->len);
-
-       /* Insert skb to shared transport layer's transmit queue.
-        * Freeing skb memory is taken care in shared transport layer,
-        * so don't free skb memory here.
-        */
-       pkt_type = hci_skb_pkt_type(skb);
-       len = hst->st_write(skb);
-       if (len < 0) {
-               BT_ERR("ST write failed (%ld)", len);
-               /* Try Again, would only fail if UART has gone bad */
-               return -EAGAIN;
-       }
-
-       /* ST accepted our skb. So, Go ahead and do rest */
-       hdev->stat.byte_tx += len;
-       ti_st_tx_complete(hst, pkt_type);
-
-       return 0;
-}
-
-static int bt_ti_probe(struct platform_device *pdev)
-{
-       struct ti_st *hst;
-       struct hci_dev *hdev;
-       int err;
-
-       hst = devm_kzalloc(&pdev->dev, sizeof(struct ti_st), GFP_KERNEL);
-       if (!hst)
-               return -ENOMEM;
-
-       /* Expose "hciX" device to user space */
-       hdev = hci_alloc_dev();
-       if (!hdev)
-               return -ENOMEM;
-
-       BT_DBG("hdev %p", hdev);
-
-       hst->hdev = hdev;
-       hdev->bus = HCI_UART;
-       hci_set_drvdata(hdev, hst);
-       hdev->open = ti_st_open;
-       hdev->close = ti_st_close;
-       hdev->flush = NULL;
-       hdev->send = ti_st_send_frame;
-
-       err = hci_register_dev(hdev);
-       if (err < 0) {
-               BT_ERR("Can't register HCI device error %d", err);
-               hci_free_dev(hdev);
-               return err;
-       }
-
-       BT_DBG("HCI device registered (hdev %p)", hdev);
-
-       dev_set_drvdata(&pdev->dev, hst);
-       return 0;
-}
-
-static int bt_ti_remove(struct platform_device *pdev)
-{
-       struct hci_dev *hdev;
-       struct ti_st *hst = dev_get_drvdata(&pdev->dev);
-
-       if (!hst)
-               return -EFAULT;
-
-       BT_DBG("%s", hst->hdev->name);
-
-       hdev = hst->hdev;
-       ti_st_close(hdev);
-       hci_unregister_dev(hdev);
-
-       hci_free_dev(hdev);
-
-       dev_set_drvdata(&pdev->dev, NULL);
-       return 0;
-}
-
-static struct platform_driver btwilink_driver = {
-       .probe = bt_ti_probe,
-       .remove = bt_ti_remove,
-       .driver = {
-               .name = "btwilink",
-       },
-};
-
-module_platform_driver(btwilink_driver);
-
-/* ------ Module Info ------ */
-
-MODULE_AUTHOR("Raja Mani <raja_mani@ti.com>");
-MODULE_DESCRIPTION("Bluetooth Driver for TI Shared Transport" VERSION);
-MODULE_VERSION(VERSION);
-MODULE_LICENSE("GPL");
index 7646636..0f73f6a 100644 (file)
@@ -445,9 +445,11 @@ static int bcm_open(struct hci_uart *hu)
 
 out:
        if (bcm->dev) {
+               hci_uart_set_flow_control(hu, true);
                hu->init_speed = bcm->dev->init_speed;
                hu->oper_speed = bcm->dev->oper_speed;
                err = bcm_gpio_set_power(bcm->dev, true);
+               hci_uart_set_flow_control(hu, false);
                if (err)
                        goto err_unset_hu;
        }
index 2857066..d9a4c6c 100644 (file)
@@ -621,13 +621,6 @@ static int ll_setup(struct hci_uart *hu)
 
        serdev_device_set_flow_control(serdev, true);
 
-       if (hu->oper_speed)
-               speed = hu->oper_speed;
-       else if (hu->proto->oper_speed)
-               speed = hu->proto->oper_speed;
-       else
-               speed = 0;
-
        do {
                /* Reset the Bluetooth device */
                gpiod_set_value_cansleep(lldev->enable_gpio, 0);
@@ -639,20 +632,6 @@ static int ll_setup(struct hci_uart *hu)
                        return err;
                }
 
-               if (speed) {
-                       __le32 speed_le = cpu_to_le32(speed);
-                       struct sk_buff *skb;
-
-                       skb = __hci_cmd_sync(hu->hdev,
-                                            HCI_VS_UPDATE_UART_HCI_BAUDRATE,
-                                            sizeof(speed_le), &speed_le,
-                                            HCI_INIT_TIMEOUT);
-                       if (!IS_ERR(skb)) {
-                               kfree_skb(skb);
-                               serdev_device_set_baudrate(serdev, speed);
-                       }
-               }
-
                err = download_firmware(lldev);
                if (!err)
                        break;
@@ -677,7 +656,25 @@ static int ll_setup(struct hci_uart *hu)
        }
 
        /* Operational speed if any */
+       if (hu->oper_speed)
+               speed = hu->oper_speed;
+       else if (hu->proto->oper_speed)
+               speed = hu->proto->oper_speed;
+       else
+               speed = 0;
+
+       if (speed) {
+               __le32 speed_le = cpu_to_le32(speed);
+               struct sk_buff *skb;
 
+               skb = __hci_cmd_sync(hu->hdev, HCI_VS_UPDATE_UART_HCI_BAUDRATE,
+                                    sizeof(speed_le), &speed_le,
+                                    HCI_INIT_TIMEOUT);
+               if (!IS_ERR(skb)) {
+                       kfree_skb(skb);
+                       serdev_device_set_baudrate(serdev, speed);
+               }
+       }
 
        return 0;
 }
index 6463350..05f7f6d 100644 (file)
@@ -520,7 +520,7 @@ static int nokia_enqueue(struct hci_uart *hu, struct sk_buff *skb)
                err = skb_pad(skb, 1);
                if (err)
                        return err;
-               skb_put_u8(skb, 0x00);
+               skb_put(skb, 1);
        }
 
        skb_queue_tail(&btdev->txq, skb);
index e3164c2..c591a8b 100644 (file)
@@ -130,8 +130,6 @@ enum qca_speed_type {
  */
 struct qca_vreg {
        const char *name;
-       unsigned int min_uV;
-       unsigned int max_uV;
        unsigned int load_uA;
 };
 
@@ -146,8 +144,8 @@ struct qca_vreg_data {
  */
 struct qca_power {
        struct device *dev;
-       const struct qca_vreg_data *vreg_data;
        struct regulator_bulk_data *vreg_bulk;
+       int num_vregs;
        bool vregs_on;
 };
 
@@ -162,7 +160,8 @@ struct qca_serdev {
        const char *firmware_name;
 };
 
-static int qca_power_setup(struct hci_uart *hu, bool on);
+static int qca_regulator_enable(struct qca_serdev *qcadev);
+static void qca_regulator_disable(struct qca_serdev *qcadev);
 static void qca_power_shutdown(struct hci_uart *hu);
 static int qca_power_off(struct hci_dev *hdev);
 
@@ -518,7 +517,7 @@ static int qca_open(struct hci_uart *hu)
                } else {
                        hu->init_speed = qcadev->init_speed;
                        hu->oper_speed = qcadev->oper_speed;
-                       ret = qca_power_setup(hu, true);
+                       ret = qca_regulator_enable(qcadev);
                        if (ret) {
                                destroy_workqueue(qca->workqueue);
                                kfree_skb(qca->rx_skb);
@@ -1188,7 +1187,7 @@ static int qca_wcn3990_init(struct hci_uart *hu)
        qcadev = serdev_device_get_drvdata(hu->serdev);
        if (!qcadev->bt_power->vregs_on) {
                serdev_device_close(hu->serdev);
-               ret = qca_power_setup(hu, true);
+               ret = qca_regulator_enable(qcadev);
                if (ret)
                        return ret;
 
@@ -1332,10 +1331,10 @@ static const struct hci_uart_proto qca_proto = {
 static const struct qca_vreg_data qca_soc_data_wcn3990 = {
        .soc_type = QCA_WCN3990,
        .vregs = (struct qca_vreg []) {
-               { "vddio",   1800000, 1900000,  15000  },
-               { "vddxo",   1800000, 1900000,  80000  },
-               { "vddrf",   1300000, 1350000,  300000 },
-               { "vddch0",  3300000, 3400000,  450000 },
+               { "vddio", 15000  },
+               { "vddxo", 80000  },
+               { "vddrf", 300000 },
+               { "vddch0", 450000 },
        },
        .num_vregs = 4,
 };
@@ -1343,19 +1342,22 @@ static const struct qca_vreg_data qca_soc_data_wcn3990 = {
 static const struct qca_vreg_data qca_soc_data_wcn3998 = {
        .soc_type = QCA_WCN3998,
        .vregs = (struct qca_vreg []) {
-               { "vddio",   1800000, 1900000,  10000  },
-               { "vddxo",   1800000, 1900000,  80000  },
-               { "vddrf",   1300000, 1352000,  300000 },
-               { "vddch0",  3300000, 3300000,  450000 },
+               { "vddio", 10000  },
+               { "vddxo", 80000  },
+               { "vddrf", 300000 },
+               { "vddch0", 450000 },
        },
        .num_vregs = 4,
 };
 
 static void qca_power_shutdown(struct hci_uart *hu)
 {
+       struct qca_serdev *qcadev;
        struct qca_data *qca = hu->priv;
        unsigned long flags;
 
+       qcadev = serdev_device_get_drvdata(hu->serdev);
+
        /* From this point we go into power off state. But serial port is
         * still open, stop queueing the IBS data and flush all the buffered
         * data in skb's.
@@ -1367,7 +1369,7 @@ static void qca_power_shutdown(struct hci_uart *hu)
 
        host_set_baudrate(hu, 2400);
        qca_send_power_pulse(hu, false);
-       qca_power_setup(hu, false);
+       qca_regulator_disable(qcadev);
 }
 
 static int qca_power_off(struct hci_dev *hdev)
@@ -1383,97 +1385,71 @@ static int qca_power_off(struct hci_dev *hdev)
        return 0;
 }
 
-static int qca_enable_regulator(struct qca_vreg vregs,
-                               struct regulator *regulator)
+static int qca_regulator_enable(struct qca_serdev *qcadev)
 {
+       struct qca_power *power = qcadev->bt_power;
        int ret;
 
-       ret = regulator_set_voltage(regulator, vregs.min_uV,
-                                   vregs.max_uV);
-       if (ret)
-               return ret;
+       /* Already enabled */
+       if (power->vregs_on)
+               return 0;
 
-       if (vregs.load_uA)
-               ret = regulator_set_load(regulator,
-                                        vregs.load_uA);
+       BT_DBG("enabling %d regulators)", power->num_vregs);
 
+       ret = regulator_bulk_enable(power->num_vregs, power->vreg_bulk);
        if (ret)
                return ret;
 
-       return regulator_enable(regulator);
-
-}
-
-static void qca_disable_regulator(struct qca_vreg vregs,
-                                 struct regulator *regulator)
-{
-       regulator_disable(regulator);
-       regulator_set_voltage(regulator, 0, vregs.max_uV);
-       if (vregs.load_uA)
-               regulator_set_load(regulator, 0);
+       power->vregs_on = true;
 
+       return 0;
 }
 
-static int qca_power_setup(struct hci_uart *hu, bool on)
+static void qca_regulator_disable(struct qca_serdev *qcadev)
 {
-       struct qca_vreg *vregs;
-       struct regulator_bulk_data *vreg_bulk;
-       struct qca_serdev *qcadev;
-       int i, num_vregs, ret = 0;
+       struct qca_power *power;
 
-       qcadev = serdev_device_get_drvdata(hu->serdev);
-       if (!qcadev || !qcadev->bt_power || !qcadev->bt_power->vreg_data ||
-           !qcadev->bt_power->vreg_bulk)
-               return -EINVAL;
-
-       vregs = qcadev->bt_power->vreg_data->vregs;
-       vreg_bulk = qcadev->bt_power->vreg_bulk;
-       num_vregs = qcadev->bt_power->vreg_data->num_vregs;
-       BT_DBG("on: %d", on);
-       if (on && !qcadev->bt_power->vregs_on) {
-               for (i = 0; i < num_vregs; i++) {
-                       ret = qca_enable_regulator(vregs[i],
-                                                  vreg_bulk[i].consumer);
-                       if (ret)
-                               break;
-               }
+       if (!qcadev)
+               return;
 
-               if (ret) {
-                       BT_ERR("failed to enable regulator:%s", vregs[i].name);
-                       /* turn off regulators which are enabled */
-                       for (i = i - 1; i >= 0; i--)
-                               qca_disable_regulator(vregs[i],
-                                                     vreg_bulk[i].consumer);
-               } else {
-                       qcadev->bt_power->vregs_on = true;
-               }
-       } else if (!on && qcadev->bt_power->vregs_on) {
-               /* turn off regulator in reverse order */
-               i = qcadev->bt_power->vreg_data->num_vregs - 1;
-               for ( ; i >= 0; i--)
-                       qca_disable_regulator(vregs[i], vreg_bulk[i].consumer);
+       power = qcadev->bt_power;
 
-               qcadev->bt_power->vregs_on = false;
-       }
+       /* Already disabled? */
+       if (!power->vregs_on)
+               return;
 
-       return ret;
+       regulator_bulk_disable(power->num_vregs, power->vreg_bulk);
+       power->vregs_on = false;
 }
 
 static int qca_init_regulators(struct qca_power *qca,
                                const struct qca_vreg *vregs, size_t num_vregs)
 {
+       struct regulator_bulk_data *bulk;
+       int ret;
        int i;
 
-       qca->vreg_bulk = devm_kcalloc(qca->dev, num_vregs,
-                                     sizeof(struct regulator_bulk_data),
-                                     GFP_KERNEL);
-       if (!qca->vreg_bulk)
+       bulk = devm_kcalloc(qca->dev, num_vregs, sizeof(*bulk), GFP_KERNEL);
+       if (!bulk)
                return -ENOMEM;
 
        for (i = 0; i < num_vregs; i++)
-               qca->vreg_bulk[i].supply = vregs[i].name;
+               bulk[i].supply = vregs[i].name;
+
+       ret = devm_regulator_bulk_get(qca->dev, num_vregs, bulk);
+       if (ret < 0)
+               return ret;
 
-       return devm_regulator_bulk_get(qca->dev, num_vregs, qca->vreg_bulk);
+       for (i = 0; i < num_vregs; i++) {
+               ret = regulator_set_load(bulk[i].consumer, vregs[i].load_uA);
+               if (ret)
+                       return ret;
+       }
+
+       qca->vreg_bulk = bulk;
+       qca->num_vregs = num_vregs;
+
+       return 0;
 }
 
 static int qca_serdev_probe(struct serdev_device *serdev)
@@ -1500,7 +1476,6 @@ static int qca_serdev_probe(struct serdev_device *serdev)
                        return -ENOMEM;
 
                qcadev->bt_power->dev = &serdev->dev;
-               qcadev->bt_power->vreg_data = data;
                err = qca_init_regulators(qcadev->bt_power, data->vregs,
                                          data->num_vregs);
                if (err) {
index 52c7e15..c8b1c38 100644 (file)
@@ -104,10 +104,8 @@ static int __fsl_mc_device_match(struct device *dev, void *data)
        return fsl_mc_device_match(mc_dev, obj_desc);
 }
 
-static struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc
-                                                               *obj_desc,
-                                                 struct fsl_mc_device
-                                                               *mc_bus_dev)
+struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc *obj_desc,
+                                          struct fsl_mc_device *mc_bus_dev)
 {
        struct device *dev;
 
index 0fe3f52..602f030 100644 (file)
@@ -554,3 +554,56 @@ int dprc_get_container_id(struct fsl_mc_io *mc_io,
 
        return 0;
 }
+
+/**
+ * dprc_get_connection() - Get connected endpoint and link status if connection
+ *                     exists.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPRC object
+ * @endpoint1: Endpoint 1 configuration parameters
+ * @endpoint2: Returned endpoint 2 configuration parameters
+ * @state:     Returned link state:
+ *             1 - link is up;
+ *             0 - link is down;
+ *             -1 - no connection (endpoint2 information is irrelevant)
+ *
+ * Return:     '0' on Success; -ENOTCONN if connection does not exist.
+ */
+int dprc_get_connection(struct fsl_mc_io *mc_io,
+                       u32 cmd_flags,
+                       u16 token,
+                       const struct dprc_endpoint *endpoint1,
+                       struct dprc_endpoint *endpoint2,
+                       int *state)
+{
+       struct dprc_cmd_get_connection *cmd_params;
+       struct dprc_rsp_get_connection *rsp_params;
+       struct fsl_mc_command cmd = { 0 };
+       int err, i;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_CONNECTION,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dprc_cmd_get_connection *)cmd.params;
+       cmd_params->ep1_id = cpu_to_le32(endpoint1->id);
+       cmd_params->ep1_interface_id = cpu_to_le16(endpoint1->if_id);
+       for (i = 0; i < 16; i++)
+               cmd_params->ep1_type[i] = endpoint1->type[i];
+
+       /* send command to mc */
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return -ENOTCONN;
+
+       /* retrieve response parameters */
+       rsp_params = (struct dprc_rsp_get_connection *)cmd.params;
+       endpoint2->id = le32_to_cpu(rsp_params->ep2_id);
+       endpoint2->if_id = le16_to_cpu(rsp_params->ep2_interface_id);
+       *state = le32_to_cpu(rsp_params->state);
+       for (i = 0; i < 16; i++)
+               endpoint2->type[i] = rsp_params->ep2_type[i];
+
+       return 0;
+}
index 5c9bf2e..a07cc19 100644 (file)
@@ -166,42 +166,52 @@ EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
 struct device_type fsl_mc_bus_dprc_type = {
        .name = "fsl_mc_bus_dprc"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dprc_type);
 
 struct device_type fsl_mc_bus_dpni_type = {
        .name = "fsl_mc_bus_dpni"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpni_type);
 
 struct device_type fsl_mc_bus_dpio_type = {
        .name = "fsl_mc_bus_dpio"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpio_type);
 
 struct device_type fsl_mc_bus_dpsw_type = {
        .name = "fsl_mc_bus_dpsw"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpsw_type);
 
 struct device_type fsl_mc_bus_dpbp_type = {
        .name = "fsl_mc_bus_dpbp"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpbp_type);
 
 struct device_type fsl_mc_bus_dpcon_type = {
        .name = "fsl_mc_bus_dpcon"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpcon_type);
 
 struct device_type fsl_mc_bus_dpmcp_type = {
        .name = "fsl_mc_bus_dpmcp"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpmcp_type);
 
 struct device_type fsl_mc_bus_dpmac_type = {
        .name = "fsl_mc_bus_dpmac"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpmac_type);
 
 struct device_type fsl_mc_bus_dprtc_type = {
        .name = "fsl_mc_bus_dprtc"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dprtc_type);
 
 struct device_type fsl_mc_bus_dpseci_type = {
        .name = "fsl_mc_bus_dpseci"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpseci_type);
 
 static struct device_type *fsl_mc_get_device_type(const char *type)
 {
@@ -702,6 +712,39 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
 }
 EXPORT_SYMBOL_GPL(fsl_mc_device_remove);
 
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
+{
+       struct fsl_mc_device *mc_bus_dev, *endpoint;
+       struct fsl_mc_obj_desc endpoint_desc = { 0 };
+       struct dprc_endpoint endpoint1 = { 0 };
+       struct dprc_endpoint endpoint2 = { 0 };
+       int state, err;
+
+       mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
+       strcpy(endpoint1.type, mc_dev->obj_desc.type);
+       endpoint1.id = mc_dev->obj_desc.id;
+
+       err = dprc_get_connection(mc_bus_dev->mc_io, 0,
+                                 mc_bus_dev->mc_handle,
+                                 &endpoint1, &endpoint2,
+                                 &state);
+
+       if (err == -ENOTCONN || state == -1)
+               return ERR_PTR(-ENOTCONN);
+
+       if (err < 0) {
+               dev_err(&mc_bus_dev->dev, "dprc_get_connection() = %d\n", err);
+               return ERR_PTR(err);
+       }
+
+       strcpy(endpoint_desc.type, endpoint2.type);
+       endpoint_desc.id = endpoint2.id;
+       endpoint = fsl_mc_device_lookup(&endpoint_desc, mc_bus_dev);
+
+       return endpoint;
+}
+EXPORT_SYMBOL_GPL(fsl_mc_get_endpoint);
+
 static int parse_mc_ranges(struct device *dev,
                           int *paddr_cells,
                           int *mc_addr_cells,
index 020fcc0..21ca8c7 100644 (file)
@@ -105,6 +105,8 @@ int dpmcp_reset(struct fsl_mc_io *mc_io,
 #define DPRC_CMDID_GET_OBJ_REG_V2               DPRC_CMD_V2(0x15E)
 #define DPRC_CMDID_SET_OBJ_IRQ                  DPRC_CMD(0x15F)
 
+#define DPRC_CMDID_GET_CONNECTION               DPRC_CMD(0x16C)
+
 struct dprc_cmd_open {
        __le32 container_id;
 };
@@ -228,6 +230,22 @@ struct dprc_cmd_set_obj_irq {
        u8 obj_type[16];
 };
 
+struct dprc_cmd_get_connection {
+       __le32 ep1_id;
+       __le16 ep1_interface_id;
+       u8 pad[2];
+       u8 ep1_type[16];
+};
+
+struct dprc_rsp_get_connection {
+       __le64 pad[3];
+       __le32 ep2_id;
+       __le16 ep2_interface_id;
+       __le16 pad1;
+       u8 ep2_type[16];
+       __le32 state;
+};
+
 /*
  * DPRC API for managing and querying DPAA resources
  */
@@ -392,6 +410,27 @@ int dprc_get_container_id(struct fsl_mc_io *mc_io,
                          u32 cmd_flags,
                          int *container_id);
 
+/**
+ * struct dprc_endpoint - Endpoint description for link connect/disconnect
+ *                     operations
+ * @type:      Endpoint object type: NULL terminated string
+ * @id:                Endpoint object ID
+ * @if_id:     Interface ID; should be set for endpoints with multiple
+ *             interfaces ("dpsw", "dpdmux"); for others, always set to 0
+ */
+struct dprc_endpoint {
+       char type[16];
+       int id;
+       u16 if_id;
+};
+
+int dprc_get_connection(struct fsl_mc_io *mc_io,
+                       u32 cmd_flags,
+                       u16 token,
+                       const struct dprc_endpoint *endpoint1,
+                       struct dprc_endpoint *endpoint2,
+                       int *state);
+
 /*
  * Data Path Buffer Pool (DPBP) API
  */
@@ -574,4 +613,7 @@ void fsl_destroy_mc_io(struct fsl_mc_io *mc_io);
 
 bool fsl_mc_is_root_dprc(struct device *dev);
 
+struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc *obj_desc,
+                                          struct fsl_mc_device *mc_bus_dev);
+
 #endif /* _FSL_MC_PRIVATE_H_ */
index 2501505..91e4243 100644 (file)
@@ -35,7 +35,7 @@ config CHELSIO_IPSEC_INLINE
 config CRYPTO_DEV_CHELSIO_TLS
         tristate "Chelsio Crypto Inline TLS Driver"
         depends on CHELSIO_T4
-        depends on TLS
+        depends on TLS_TOE
         select CRYPTO_DEV_CHELSIO
         ---help---
           Support Chelsio Inline TLS with Chelsio crypto accelerator.
index 025c831..d2bc655 100644 (file)
@@ -21,6 +21,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/tls.h>
 #include <net/tls.h>
+#include <net/tls_toe.h>
 
 #include "t4fw_api.h"
 #include "t4_msg.h"
@@ -118,7 +119,7 @@ struct tls_scmd {
 };
 
 struct chtls_dev {
-       struct tls_device tlsdev;
+       struct tls_toe_device tlsdev;
        struct list_head list;
        struct cxgb4_lld_info *lldi;
        struct pci_dev *pdev;
@@ -362,7 +363,7 @@ enum {
 #define TCP_PAGE(sk)   (sk->sk_frag.page)
 #define TCP_OFF(sk)    (sk->sk_frag.offset)
 
-static inline struct chtls_dev *to_chtls_dev(struct tls_device *tlsdev)
+static inline struct chtls_dev *to_chtls_dev(struct tls_toe_device *tlsdev)
 {
        return container_of(tlsdev, struct chtls_dev, tlsdev);
 }
index e6df5b9..1899693 100644 (file)
@@ -124,7 +124,7 @@ static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk)
        mutex_unlock(&notify_mutex);
 }
 
-static int chtls_inline_feature(struct tls_device *dev)
+static int chtls_inline_feature(struct tls_toe_device *dev)
 {
        struct net_device *netdev;
        struct chtls_dev *cdev;
@@ -140,7 +140,7 @@ static int chtls_inline_feature(struct tls_device *dev)
        return 0;
 }
 
-static int chtls_create_hash(struct tls_device *dev, struct sock *sk)
+static int chtls_create_hash(struct tls_toe_device *dev, struct sock *sk)
 {
        struct chtls_dev *cdev = to_chtls_dev(dev);
 
@@ -149,7 +149,7 @@ static int chtls_create_hash(struct tls_device *dev, struct sock *sk)
        return 0;
 }
 
-static void chtls_destroy_hash(struct tls_device *dev, struct sock *sk)
+static void chtls_destroy_hash(struct tls_toe_device *dev, struct sock *sk)
 {
        struct chtls_dev *cdev = to_chtls_dev(dev);
 
@@ -161,7 +161,7 @@ static void chtls_free_uld(struct chtls_dev *cdev)
 {
        int i;
 
-       tls_unregister_device(&cdev->tlsdev);
+       tls_toe_unregister_device(&cdev->tlsdev);
        kvfree(cdev->kmap.addr);
        idr_destroy(&cdev->hwtid_idr);
        for (i = 0; i < (1 << RSPQ_HASH_BITS); i++)
@@ -173,27 +173,27 @@ static void chtls_free_uld(struct chtls_dev *cdev)
 
 static inline void chtls_dev_release(struct kref *kref)
 {
+       struct tls_toe_device *dev;
        struct chtls_dev *cdev;
-       struct tls_device *dev;
 
-       dev = container_of(kref, struct tls_device, kref);
+       dev = container_of(kref, struct tls_toe_device, kref);
        cdev = to_chtls_dev(dev);
        chtls_free_uld(cdev);
 }
 
 static void chtls_register_dev(struct chtls_dev *cdev)
 {
-       struct tls_device *tlsdev = &cdev->tlsdev;
+       struct tls_toe_device *tlsdev = &cdev->tlsdev;
 
-       strlcpy(tlsdev->name, "chtls", TLS_DEVICE_NAME_MAX);
+       strlcpy(tlsdev->name, "chtls", TLS_TOE_DEVICE_NAME_MAX);
        strlcat(tlsdev->name, cdev->lldi->ports[0]->name,
-               TLS_DEVICE_NAME_MAX);
+               TLS_TOE_DEVICE_NAME_MAX);
        tlsdev->feature = chtls_inline_feature;
        tlsdev->hash = chtls_create_hash;
        tlsdev->unhash = chtls_destroy_hash;
        tlsdev->release = chtls_dev_release;
        kref_init(&tlsdev->kref);
-       tls_register_device(tlsdev);
+       tls_toe_register_device(tlsdev);
        cdev->cdev_state = CHTLS_CDEV_STATE_UP;
 }
 
index d03ed8e..8e3d355 100644 (file)
@@ -22,3 +22,11 @@ config BCM47XX_SPROM
          In case of SoC devices SPROM content is stored on a flash used by
          bootloader firmware CFE. This driver provides method to ssb and bcma
          drivers to read SPROM on SoC.
+
+config TEE_BNXT_FW
+       tristate "Broadcom BNXT firmware manager"
+       depends on (ARCH_BCM_IPROC && OPTEE) || (COMPILE_TEST && TEE)
+       default ARCH_BCM_IPROC
+       help
+         This module help to manage firmware on Broadcom BNXT device. The module
+         registers on tee bus and invoke calls to manage firmware on BNXT device.
index 72c7fdc..17c5061 100644 (file)
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BCM47XX_NVRAM)            += bcm47xx_nvram.o
 obj-$(CONFIG_BCM47XX_SPROM)            += bcm47xx_sprom.o
+obj-$(CONFIG_TEE_BNXT_FW)              += tee_bnxt_fw.o
diff --git a/drivers/firmware/broadcom/tee_bnxt_fw.c b/drivers/firmware/broadcom/tee_bnxt_fw.c
new file mode 100644 (file)
index 0000000..5b7ef89
--- /dev/null
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Broadcom.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include <linux/uuid.h>
+
+#include <linux/firmware/broadcom/tee_bnxt_fw.h>
+
+#define MAX_SHM_MEM_SZ SZ_4M
+
+#define MAX_TEE_PARAM_ARRY_MEMB                4
+
+enum ta_cmd {
+       /*
+        * TA_CMD_BNXT_FASTBOOT - boot bnxt device by copying f/w into sram
+        *
+        *      param[0] unused
+        *      param[1] unused
+        *      param[2] unused
+        *      param[3] unused
+        *
+        * Result:
+        *      TEE_SUCCESS - Invoke command success
+        *      TEE_ERROR_ITEM_NOT_FOUND - Corrupt f/w image found on memory
+        */
+       TA_CMD_BNXT_FASTBOOT = 0,
+
+       /*
+        * TA_CMD_BNXT_COPY_COREDUMP - copy the core dump into shm
+        *
+        *      param[0] (inout memref) - Coredump buffer memory reference
+        *      param[1] (in value) - value.a: offset, data to be copied from
+        *                            value.b: size of data to be copied
+        *      param[2] unused
+        *      param[3] unused
+        *
+        * Result:
+        *      TEE_SUCCESS - Invoke command success
+        *      TEE_ERROR_BAD_PARAMETERS - Incorrect input param
+        *      TEE_ERROR_ITEM_NOT_FOUND - Corrupt core dump
+        */
+       TA_CMD_BNXT_COPY_COREDUMP = 3,
+};
+
+/**
+ * struct tee_bnxt_fw_private - OP-TEE bnxt private data
+ * @dev:               OP-TEE based bnxt device.
+ * @ctx:               OP-TEE context handler.
+ * @session_id:                TA session identifier.
+ */
+struct tee_bnxt_fw_private {
+       struct device *dev;
+       struct tee_context *ctx;
+       u32 session_id;
+       struct tee_shm *fw_shm_pool;
+};
+
+static struct tee_bnxt_fw_private pvt_data;
+
+static void prepare_args(int cmd,
+                        struct tee_ioctl_invoke_arg *arg,
+                        struct tee_param *param)
+{
+       memset(arg, 0, sizeof(*arg));
+       memset(param, 0, MAX_TEE_PARAM_ARRY_MEMB * sizeof(*param));
+
+       arg->func = cmd;
+       arg->session = pvt_data.session_id;
+       arg->num_params = MAX_TEE_PARAM_ARRY_MEMB;
+
+       /* Fill invoke cmd params */
+       switch (cmd) {
+       case TA_CMD_BNXT_COPY_COREDUMP:
+               param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT;
+               param[0].u.memref.shm = pvt_data.fw_shm_pool;
+               param[0].u.memref.size = MAX_SHM_MEM_SZ;
+               param[0].u.memref.shm_offs = 0;
+               param[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+               break;
+       case TA_CMD_BNXT_FASTBOOT:
+       default:
+               /* Nothing to do */
+               break;
+       }
+}
+
+/**
+ * tee_bnxt_fw_load() - Load the bnxt firmware
+ *                 Uses an OP-TEE call to start a secure
+ *                 boot process.
+ * Returns 0 on success, negative errno otherwise.
+ */
+int tee_bnxt_fw_load(void)
+{
+       int ret = 0;
+       struct tee_ioctl_invoke_arg arg;
+       struct tee_param param[MAX_TEE_PARAM_ARRY_MEMB];
+
+       if (!pvt_data.ctx)
+               return -ENODEV;
+
+       prepare_args(TA_CMD_BNXT_FASTBOOT, &arg, param);
+
+       ret = tee_client_invoke_func(pvt_data.ctx, &arg, param);
+       if (ret < 0 || arg.ret != 0) {
+               dev_err(pvt_data.dev,
+                       "TA_CMD_BNXT_FASTBOOT invoke failed TEE err: %x, ret:%x\n",
+                       arg.ret, ret);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(tee_bnxt_fw_load);
+
+/**
+ * tee_bnxt_copy_coredump() - Copy coredump from the allocated memory
+ *                         Uses an OP-TEE call to copy coredump
+ * @buf:       destination buffer where core dump is copied into
+ * @offset:    offset from the base address of core dump area
+ * @size:      size of the dump
+ *
+ * Returns 0 on success, negative errno otherwise.
+ */
+int tee_bnxt_copy_coredump(void *buf, u32 offset, u32 size)
+{
+       struct tee_ioctl_invoke_arg arg;
+       struct tee_param param[MAX_TEE_PARAM_ARRY_MEMB];
+       void *core_data;
+       u32 rbytes = size;
+       u32 nbytes = 0;
+       int ret = 0;
+
+       if (!pvt_data.ctx)
+               return -ENODEV;
+
+       prepare_args(TA_CMD_BNXT_COPY_COREDUMP, &arg, param);
+
+       while (rbytes)  {
+               nbytes = rbytes;
+
+               nbytes = min_t(u32, rbytes, param[0].u.memref.size);
+
+               /* Fill additional invoke cmd params */
+               param[1].u.value.a = offset;
+               param[1].u.value.b = nbytes;
+
+               ret = tee_client_invoke_func(pvt_data.ctx, &arg, param);
+               if (ret < 0 || arg.ret != 0) {
+                       dev_err(pvt_data.dev,
+                               "TA_CMD_BNXT_COPY_COREDUMP invoke failed TEE err: %x, ret:%x\n",
+                               arg.ret, ret);
+                       return -EINVAL;
+               }
+
+               core_data = tee_shm_get_va(pvt_data.fw_shm_pool, 0);
+               if (IS_ERR(core_data)) {
+                       dev_err(pvt_data.dev, "tee_shm_get_va failed\n");
+                       return PTR_ERR(core_data);
+               }
+
+               memcpy(buf, core_data, nbytes);
+
+               rbytes -= nbytes;
+               buf += nbytes;
+               offset += nbytes;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(tee_bnxt_copy_coredump);
+
+static int optee_ctx_match(struct tee_ioctl_version_data *ver, const void *data)
+{
+       return (ver->impl_id == TEE_IMPL_ID_OPTEE);
+}
+
+static int tee_bnxt_fw_probe(struct device *dev)
+{
+       struct tee_client_device *bnxt_device = to_tee_client_device(dev);
+       int ret, err = -ENODEV;
+       struct tee_ioctl_open_session_arg sess_arg;
+       struct tee_shm *fw_shm_pool;
+
+       memset(&sess_arg, 0, sizeof(sess_arg));
+
+       /* Open context with TEE driver */
+       pvt_data.ctx = tee_client_open_context(NULL, optee_ctx_match, NULL,
+                                              NULL);
+       if (IS_ERR(pvt_data.ctx))
+               return -ENODEV;
+
+       /* Open session with Bnxt load Trusted App */
+       memcpy(sess_arg.uuid, bnxt_device->id.uuid.b, TEE_IOCTL_UUID_LEN);
+       sess_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC;
+       sess_arg.num_params = 0;
+
+       ret = tee_client_open_session(pvt_data.ctx, &sess_arg, NULL);
+       if (ret < 0 || sess_arg.ret != 0) {
+               dev_err(dev, "tee_client_open_session failed, err: %x\n",
+                       sess_arg.ret);
+               err = -EINVAL;
+               goto out_ctx;
+       }
+       pvt_data.session_id = sess_arg.session;
+
+       pvt_data.dev = dev;
+
+       fw_shm_pool = tee_shm_alloc(pvt_data.ctx, MAX_SHM_MEM_SZ,
+                                   TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+       if (IS_ERR(fw_shm_pool)) {
+               tee_client_close_context(pvt_data.ctx);
+               dev_err(pvt_data.dev, "tee_shm_alloc failed\n");
+               err = PTR_ERR(fw_shm_pool);
+               goto out_sess;
+       }
+
+       pvt_data.fw_shm_pool = fw_shm_pool;
+
+       return 0;
+
+out_sess:
+       tee_client_close_session(pvt_data.ctx, pvt_data.session_id);
+out_ctx:
+       tee_client_close_context(pvt_data.ctx);
+
+       return err;
+}
+
+static int tee_bnxt_fw_remove(struct device *dev)
+{
+       tee_shm_free(pvt_data.fw_shm_pool);
+       tee_client_close_session(pvt_data.ctx, pvt_data.session_id);
+       tee_client_close_context(pvt_data.ctx);
+       pvt_data.ctx = NULL;
+
+       return 0;
+}
+
+static const struct tee_client_device_id tee_bnxt_fw_id_table[] = {
+       {UUID_INIT(0x6272636D, 0x2019, 0x0716,
+                   0x42, 0x43, 0x4D, 0x5F, 0x53, 0x43, 0x48, 0x49)},
+       {}
+};
+
+MODULE_DEVICE_TABLE(tee, tee_bnxt_fw_id_table);
+
+static struct tee_client_driver tee_bnxt_fw_driver = {
+       .id_table       = tee_bnxt_fw_id_table,
+       .driver         = {
+               .name           = KBUILD_MODNAME,
+               .bus            = &tee_bus_type,
+               .probe          = tee_bnxt_fw_probe,
+               .remove         = tee_bnxt_fw_remove,
+       },
+};
+
+static int __init tee_bnxt_fw_mod_init(void)
+{
+       return driver_register(&tee_bnxt_fw_driver.driver);
+}
+
+static void __exit tee_bnxt_fw_mod_exit(void)
+{
+       driver_unregister(&tee_bnxt_fw_driver.driver);
+}
+
+module_init(tee_bnxt_fw_mod_init);
+module_exit(tee_bnxt_fw_mod_exit);
+
+MODULE_AUTHOR("Vikas Gupta <vikas.gupta@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom bnxt firmware manager");
+MODULE_LICENSE("GPL v2");
index e4fa2a2..7e2bc50 100644 (file)
@@ -173,8 +173,8 @@ symbolic(struct hfcusb_symbolic_list list[], const int num)
 
 
 /*
- * List of all supported enpoints configiration sets, used to find the
- * best matching endpoint configuration within a devices' USB descriptor.
+ * List of all supported endpoint configuration sets, used to find the
+ * best matching endpoint configuration within a device's USB descriptor.
  * We need at least 3 RX endpoints, and 3 TX endpoints, either
  * INT-in and ISO-out, or ISO-in and ISO-out)
  * with 4 RX endpoints even E-Channel logging is possible
index 4a3e748..0303b23 100644 (file)
@@ -27,7 +27,6 @@ MODULE_VERSION(ISAR_REV);
 
 #define DEBUG_HW_FIRMWARE_FIFO 0x10000
 
-static const u8 faxmodulation_s[] = "3,24,48,72,73,74,96,97,98,121,122,145,146";
 static const u8 faxmodulation[] = {3, 24, 48, 72, 73, 74, 96, 97, 98, 121,
                                   122, 145, 146};
 #define FAXMODCNT 13
index 480f945..0059e6b 100644 (file)
@@ -200,6 +200,51 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0);
 
 unsigned int bond_net_id __read_mostly;
 
+static const struct flow_dissector_key flow_keys_bonding_keys[] = {
+       {
+               .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+               .offset = offsetof(struct flow_keys, control),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_BASIC,
+               .offset = offsetof(struct flow_keys, basic),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+               .offset = offsetof(struct flow_keys, addrs.v4addrs),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+               .offset = offsetof(struct flow_keys, addrs.v6addrs),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_TIPC,
+               .offset = offsetof(struct flow_keys, addrs.tipckey),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_PORTS,
+               .offset = offsetof(struct flow_keys, ports),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_ICMP,
+               .offset = offsetof(struct flow_keys, icmp),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_VLAN,
+               .offset = offsetof(struct flow_keys, vlan),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
+               .offset = offsetof(struct flow_keys, tags),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+               .offset = offsetof(struct flow_keys, keyid),
+       },
+};
+
+static struct flow_dissector flow_keys_bonding __read_mostly;
+
 /*-------------------------- Forward declarations ---------------------------*/
 
 static int bond_init(struct net_device *bond_dev);
@@ -3260,10 +3305,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
        const struct iphdr *iph;
        int noff, proto = -1;
 
-       if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
-               return skb_flow_dissect_flow_keys(skb, fk, 0);
+       if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) {
+               memset(fk, 0, sizeof(*fk));
+               return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
+                                         fk, NULL, 0, 0, 0, 0);
+       }
 
        fk->ports.ports = 0;
+       memset(&fk->icmp, 0, sizeof(fk->icmp));
        noff = skb_network_offset(skb);
        if (skb->protocol == htons(ETH_P_IP)) {
                if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
@@ -3283,8 +3332,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
        } else {
                return false;
        }
-       if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
-               fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
+       if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) {
+               if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
+                       skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
+                                             skb_transport_offset(skb),
+                                             skb_headlen(skb));
+               else
+                       fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
+       }
 
        return true;
 }
@@ -3311,10 +3366,14 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
                return bond_eth_hash(skb);
 
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
-           bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
+           bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
                hash = bond_eth_hash(skb);
-       else
-               hash = (__force u32)flow.ports.ports;
+       } else {
+               if (flow.icmp.id)
+                       memcpy(&hash, &flow.icmp, sizeof(hash));
+               else
+                       memcpy(&hash, &flow.ports.ports, sizeof(hash));
+       }
        hash ^= (__force u32)flow_get_u32_dst(&flow) ^
                (__force u32)flow_get_u32_src(&flow);
        hash ^= (hash >> 16);
@@ -4891,6 +4950,10 @@ static int __init bonding_init(void)
                        goto err;
        }
 
+       skb_flow_dissector_init(&flow_keys_bonding,
+                               flow_keys_bonding_keys,
+                               ARRAY_SIZE(flow_keys_bonding_keys));
+
        register_netdevice_notifier(&bond_netdev_notifier);
 out:
        return res;
index 2b9a2f1..96d7cef 100644 (file)
@@ -3,7 +3,13 @@
 # CAIF physical drivers
 #
 
-comment "CAIF transport drivers"
+menuconfig CAIF_DRIVERS
+       bool "CAIF transport drivers"
+       depends on CAIF
+       help
+         Enable this to see CAIF physical drivers.
+
+if CAIF_DRIVERS
 
 config CAIF_TTY
        tristate "CAIF TTY transport driver"
@@ -22,7 +28,7 @@ config CAIF_SPI_SLAVE
        The CAIF Link layer SPI Protocol driver for Slave SPI interface.
        This driver implements a platform driver to accommodate for a
        platform specific SPI device. A sample CAIF SPI Platform device is
-       provided in Documentation/networking/caif/spi_porting.txt
+       provided in <file:Documentation/networking/caif/spi_porting.txt>.
 
 config CAIF_SPI_SYNC
        bool "Next command and length in start of frame"
@@ -38,7 +44,7 @@ config CAIF_HSI
        depends on CAIF
        default n
        ---help---
-       The caif low level driver for CAIF over HSI.
+       The CAIF low level driver for CAIF over HSI.
        Be aware that if you enable this then you also need to
        enable a low-level HSI driver.
 
@@ -50,8 +56,10 @@ config CAIF_VIRTIO
        select GENERIC_ALLOCATOR
        default n
        ---help---
-       The caif driver for CAIF over Virtio.
+       The CAIF driver for CAIF over Virtio.
 
 if CAIF_VIRTIO
 source "drivers/vhost/Kconfig.vringh"
 endif
+
+endif # CAIF_DRIVERS
index f6232ce..685e12b 100644 (file)
@@ -77,6 +77,7 @@ config NET_DSA_REALTEK_SMI
 config NET_DSA_SMSC_LAN9303
        tristate
        select NET_DSA_TAG_LAN9303
+       select REGMAP
        ---help---
          This enables support for the SMSC/Microchip LAN9303 3 port ethernet
          switch chips.
index cc35363..36828f2 100644 (file)
@@ -524,7 +524,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
        if (!dsa_is_user_port(ds, port))
                return 0;
 
-       cpu_port = ds->ports[port].cpu_dp->index;
+       cpu_port = dsa_to_port(ds, port)->cpu_dp->index;
 
        if (dev->ops->irq_enable)
                ret = dev->ops->irq_enable(dev, port);
@@ -1503,11 +1503,25 @@ static int b53_arl_op(struct b53_device *dev, int op, int port,
                idx = 1;
        }
 
-       memset(&ent, 0, sizeof(ent));
-       ent.port = port;
+       /* For multicast address, the port is a bitmask and the validity
+        * is determined by having at least one port being still active
+        */
+       if (!is_multicast_ether_addr(addr)) {
+               ent.port = port;
+               ent.is_valid = is_valid;
+       } else {
+               if (is_valid)
+                       ent.port |= BIT(port);
+               else
+                       ent.port &= ~BIT(port);
+
+               ent.is_valid = !!(ent.port);
+       }
+
        ent.is_valid = is_valid;
        ent.vid = vid;
        ent.is_static = true;
+       ent.is_age = false;
        memcpy(ent.mac, addr, ETH_ALEN);
        b53_arl_from_entry(&mac_vid, &fwd_entry, &ent);
 
@@ -1626,10 +1640,51 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
 }
 EXPORT_SYMBOL(b53_fdb_dump);
 
+int b53_mdb_prepare(struct dsa_switch *ds, int port,
+                   const struct switchdev_obj_port_mdb *mdb)
+{
+       struct b53_device *priv = ds->priv;
+
+       /* 5325 and 5365 require some more massaging, but could
+        * be supported eventually
+        */
+       if (is5325(priv) || is5365(priv))
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+EXPORT_SYMBOL(b53_mdb_prepare);
+
+void b53_mdb_add(struct dsa_switch *ds, int port,
+                const struct switchdev_obj_port_mdb *mdb)
+{
+       struct b53_device *priv = ds->priv;
+       int ret;
+
+       ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true);
+       if (ret)
+               dev_err(ds->dev, "failed to add MDB entry\n");
+}
+EXPORT_SYMBOL(b53_mdb_add);
+
+int b53_mdb_del(struct dsa_switch *ds, int port,
+               const struct switchdev_obj_port_mdb *mdb)
+{
+       struct b53_device *priv = ds->priv;
+       int ret;
+
+       ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, false);
+       if (ret)
+               dev_err(ds->dev, "failed to delete MDB entry\n");
+
+       return ret;
+}
+EXPORT_SYMBOL(b53_mdb_del);
+
 int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br)
 {
        struct b53_device *dev = ds->priv;
-       s8 cpu_port = ds->ports[port].cpu_dp->index;
+       s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index;
        u16 pvlan, reg;
        unsigned int i;
 
@@ -1675,7 +1730,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br)
 {
        struct b53_device *dev = ds->priv;
        struct b53_vlan *vl = &dev->vlans[0];
-       s8 cpu_port = ds->ports[port].cpu_dp->index;
+       s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index;
        unsigned int i;
        u16 pvlan, reg, pvid;
 
@@ -1994,6 +2049,9 @@ static const struct dsa_switch_ops b53_switch_ops = {
        .port_fdb_del           = b53_fdb_del,
        .port_mirror_add        = b53_mirror_add,
        .port_mirror_del        = b53_mirror_del,
+       .port_mdb_prepare       = b53_mdb_prepare,
+       .port_mdb_add           = b53_mdb_add,
+       .port_mdb_del           = b53_mdb_del,
 };
 
 struct b53_chip_data {
@@ -2341,10 +2399,13 @@ struct b53_device *b53_switch_alloc(struct device *base,
        struct dsa_switch *ds;
        struct b53_device *dev;
 
-       ds = dsa_switch_alloc(base, DSA_MAX_PORTS);
+       ds = devm_kzalloc(base, sizeof(*ds), GFP_KERNEL);
        if (!ds)
                return NULL;
 
+       ds->dev = base;
+       ds->num_ports = DSA_MAX_PORTS;
+
        dev = devm_kzalloc(base, sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return NULL;
index a7dd8ac..1877acf 100644 (file)
@@ -250,7 +250,7 @@ b53_build_op(write48, u64);
 b53_build_op(write64, u64);
 
 struct b53_arl_entry {
-       u8 port;
+       u16 port;
        u8 mac[ETH_ALEN];
        u16 vid;
        u8 is_valid:1;
@@ -351,6 +351,12 @@ int b53_fdb_del(struct dsa_switch *ds, int port,
                const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
                 dsa_fdb_dump_cb_t *cb, void *data);
+int b53_mdb_prepare(struct dsa_switch *ds, int port,
+                   const struct switchdev_obj_port_mdb *mdb);
+void b53_mdb_add(struct dsa_switch *ds, int port,
+                const struct switchdev_obj_port_mdb *mdb);
+int b53_mdb_del(struct dsa_switch *ds, int port,
+               const struct switchdev_obj_port_mdb *mdb);
 int b53_mirror_add(struct dsa_switch *ds, int port,
                   struct dsa_mall_mirror_tc_entry *mirror, bool ingress);
 enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port);
index d44651a..9ce5a0d 100644 (file)
@@ -350,6 +350,18 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv)
 {
        unsigned int timeout = 1000;
        u32 reg;
+       int ret;
+
+       /* The watchdog reset does not work on 7278, we need to hit the
+        * "external" reset line through the reset controller.
+        */
+       if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev)) {
+               ret = reset_control_assert(priv->rcdev);
+               if (ret)
+                       return ret;
+
+               return reset_control_deassert(priv->rcdev);
+       }
 
        reg = core_readl(priv, CORE_WATCHDOG_CTRL);
        reg |= SOFTWARE_RESET | EN_CHIP_RST | EN_SW_RESET;
@@ -381,8 +393,9 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
                                   struct device_node *dn)
 {
        struct device_node *port;
-       int mode;
        unsigned int port_num;
+       phy_interface_t mode;
+       int err;
 
        priv->moca_port = -1;
 
@@ -395,8 +408,8 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
                 * has completed, since they might be turned off at that
                 * time
                 */
-               mode = of_get_phy_mode(port);
-               if (mode < 0)
+               err = of_get_phy_mode(port, &mode);
+               if (err)
                        continue;
 
                if (mode == PHY_INTERFACE_MODE_INTERNAL)
@@ -668,7 +681,7 @@ static void bcm_sf2_sw_fixed_state(struct dsa_switch *ds, int port,
                 * state machine and make it go in PHY_FORCING state instead.
                 */
                if (!status->link)
-                       netif_carrier_off(ds->ports[port].slave);
+                       netif_carrier_off(dsa_to_port(ds, port)->slave);
                status->duplex = DUPLEX_FULL;
        } else {
                status->link = true;
@@ -734,7 +747,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
 static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
                               struct ethtool_wolinfo *wol)
 {
-       struct net_device *p = ds->ports[port].cpu_dp->master;
+       struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master;
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        struct ethtool_wolinfo pwol = { };
 
@@ -758,9 +771,9 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
 static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
                              struct ethtool_wolinfo *wol)
 {
-       struct net_device *p = ds->ports[port].cpu_dp->master;
+       struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master;
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-       s8 cpu_port = ds->ports[port].cpu_dp->index;
+       s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index;
        struct ethtool_wolinfo pwol =  { };
 
        if (p->ethtool_ops->get_wol)
@@ -974,6 +987,9 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
        .set_rxnfc              = bcm_sf2_set_rxnfc,
        .port_mirror_add        = b53_mirror_add,
        .port_mirror_del        = b53_mirror_del,
+       .port_mdb_prepare       = b53_mdb_prepare,
+       .port_mdb_add           = b53_mdb_add,
+       .port_mdb_del           = b53_mdb_del,
 };
 
 struct bcm_sf2_of_data {
@@ -1088,6 +1104,11 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
        priv->core_reg_align = data->core_reg_align;
        priv->num_cfp_rules = data->num_cfp_rules;
 
+       priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev,
+                                                               "switch");
+       if (PTR_ERR(priv->rcdev) == -EPROBE_DEFER)
+               return PTR_ERR(priv->rcdev);
+
        /* Auto-detection using standard registers will not work, so
         * provide an indication of what kind of device we are for
         * b53_common to work with
@@ -1220,6 +1241,8 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
        /* Disable all ports and interrupts */
        bcm_sf2_sw_suspend(priv->dev->ds);
        bcm_sf2_mdio_unregister(priv);
+       if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev))
+               reset_control_assert(priv->rcdev);
 
        return 0;
 }
index 1df30cc..de386dd 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
+#include <linux/reset.h>
 
 #include <net/dsa.h>
 
@@ -64,6 +65,8 @@ struct bcm_sf2_priv {
        void __iomem                    *fcb;
        void __iomem                    *acb;
 
+       struct reset_control            *rcdev;
+
        /* Register offsets indirection tables */
        u32                             type;
        const u16                       *reg_offsets;
index d264776..f3f0c3f 100644 (file)
@@ -821,7 +821,7 @@ static int bcm_sf2_cfp_rule_insert(struct dsa_switch *ds, int port,
                                   struct ethtool_rx_flow_spec *fs)
 {
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-       s8 cpu_port = ds->ports[port].cpu_dp->index;
+       s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index;
        __u64 ring_cookie = fs->ring_cookie;
        unsigned int queue_num, port_num;
        int ret;
@@ -1049,7 +1049,7 @@ static int bcm_sf2_cfp_rule_get_all(struct bcm_sf2_priv *priv,
 int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port,
                      struct ethtool_rxnfc *nfc, u32 *rule_locs)
 {
-       struct net_device *p = ds->ports[port].cpu_dp->master;
+       struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master;
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        int ret = 0;
 
@@ -1092,7 +1092,7 @@ int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port,
 int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port,
                      struct ethtool_rxnfc *nfc)
 {
-       struct net_device *p = ds->ports[port].cpu_dp->master;
+       struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master;
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        int ret = 0;
 
index 925ed13..c8d7ef2 100644 (file)
@@ -286,10 +286,13 @@ static int dsa_loop_drv_probe(struct mdio_device *mdiodev)
        dev_info(&mdiodev->dev, "%s: 0x%0x\n",
                 pdata->name, pdata->enabled_ports);
 
-       ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS);
+       ds = devm_kzalloc(&mdiodev->dev, sizeof(*ds), GFP_KERNEL);
        if (!ds)
                return -ENOMEM;
 
+       ds->dev = &mdiodev->dev;
+       ds->num_ports = DSA_MAX_PORTS;
+
        ps = devm_kzalloc(&mdiodev->dev, sizeof(*ps), GFP_KERNEL);
        if (!ps)
                return -ENOMEM;
index bbec86b..e3c333a 100644 (file)
@@ -1283,10 +1283,12 @@ static int lan9303_register_switch(struct lan9303 *chip)
 {
        int base;
 
-       chip->ds = dsa_switch_alloc(chip->dev, LAN9303_NUM_PORTS);
+       chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL);
        if (!chip->ds)
                return -ENOMEM;
 
+       chip->ds->dev = chip->dev;
+       chip->ds->num_ports = LAN9303_NUM_PORTS;
        chip->ds->priv = chip;
        chip->ds->ops = &lan9303_switch_ops;
        base = chip->phy_addr_base;
index a69c9b9..9553249 100644 (file)
@@ -1854,10 +1854,12 @@ static int gswip_probe(struct platform_device *pdev)
        if (!priv->hw_info)
                return -EINVAL;
 
-       priv->ds = dsa_switch_alloc(dev, priv->hw_info->max_ports);
+       priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL);
        if (!priv->ds)
                return -ENOMEM;
 
+       priv->ds->dev = dev;
+       priv->ds->num_ports = priv->hw_info->max_ports;
        priv->ds->priv = priv;
        priv->ds->ops = &gswip_switch_ops;
        priv->dev = dev;
index fdffd9e..7d050fa 100644 (file)
@@ -87,7 +87,6 @@ MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
 static struct i2c_driver ksz9477_i2c_driver = {
        .driver = {
                .name   = "ksz9477-switch",
-               .owner  = THIS_MODULE,
                .of_match_table = of_match_ptr(ksz9477_dt_ids),
        },
        .probe  = ksz9477_i2c_probe,
index fe47180..d8fda4a 100644 (file)
@@ -398,10 +398,13 @@ struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
        struct dsa_switch *ds;
        struct ksz_device *swdev;
 
-       ds = dsa_switch_alloc(base, DSA_MAX_PORTS);
+       ds = devm_kzalloc(base, sizeof(*ds), GFP_KERNEL);
        if (!ds)
                return NULL;
 
+       ds->dev = base;
+       ds->num_ports = DSA_MAX_PORTS;
+
        swdev = devm_kzalloc(base, sizeof(*swdev), GFP_KERNEL);
        if (!swdev)
                return NULL;
@@ -419,6 +422,7 @@ EXPORT_SYMBOL(ksz_switch_alloc);
 int ksz_switch_register(struct ksz_device *dev,
                        const struct ksz_dev_ops *ops)
 {
+       phy_interface_t interface;
        int ret;
 
        if (dev->pdata)
@@ -453,9 +457,9 @@ int ksz_switch_register(struct ksz_device *dev,
         * device tree.
         */
        if (dev->dev->of_node) {
-               ret = of_get_phy_mode(dev->dev->of_node);
-               if (ret >= 0)
-                       dev->interface = ret;
+               ret = of_get_phy_mode(dev->dev->of_node, &interface);
+               if (ret == 0)
+                       dev->interface = interface;
                dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
                                                         "microchip,synclko-125");
        }
index 1d8d36d..ed1ec10 100644 (file)
@@ -862,7 +862,7 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
 
        for (i = 0; i < MT7530_NUM_PORTS; i++) {
                if (dsa_is_user_port(ds, i) &&
-                   dsa_port_is_vlan_filtering(&ds->ports[i])) {
+                   dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) {
                        all_user_ports_removed = false;
                        break;
                }
@@ -922,7 +922,7 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
                 * other port is still a VLAN-aware port.
                 */
                if (dsa_is_user_port(ds, i) && i != port &&
-                  !dsa_port_is_vlan_filtering(&ds->ports[i])) {
+                  !dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) {
                        if (dsa_to_port(ds, i)->bridge_dev != bridge)
                                continue;
                        if (priv->ports[i].enable)
@@ -1165,7 +1165,7 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port,
        /* The port is kept as VLAN-unaware if bridge with vlan_filtering not
         * being set.
         */
-       if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+       if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
                return;
 
        mutex_lock(&priv->reg_mutex);
@@ -1196,7 +1196,7 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
        /* The port is kept as VLAN-unaware if bridge with vlan_filtering not
         * being set.
         */
-       if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+       if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
                return 0;
 
        mutex_lock(&priv->reg_mutex);
@@ -1252,7 +1252,7 @@ mt7530_setup(struct dsa_switch *ds)
         * controller also is the container for two GMACs nodes representing
         * as two netdev instances.
         */
-       dn = ds->ports[MT7530_CPU_PORT].master->dev.of_node->parent;
+       dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
 
        if (priv->id == ID_MT7530) {
                priv->ethernet = syscon_node_to_regmap(dn);
@@ -1340,7 +1340,9 @@ mt7530_setup(struct dsa_switch *ds)
 
        if (!dsa_is_unused_port(ds, 5)) {
                priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
-               interface = of_get_phy_mode(ds->ports[5].dn);
+               ret = of_get_phy_mode(dsa_to_port(ds, 5)->dn, &interface);
+               if (ret && ret != -ENODEV)
+                       return ret;
        } else {
                /* Scan the ethernet nodes. look for GMAC1, lookup used phy */
                for_each_child_of_node(dn, mac_np) {
@@ -1354,7 +1356,9 @@ mt7530_setup(struct dsa_switch *ds)
 
                        phy_node = of_parse_phandle(mac_np, "phy-handle", 0);
                        if (phy_node->parent == priv->dev->of_node->parent) {
-                               interface = of_get_phy_mode(mac_np);
+                               ret = of_get_phy_mode(mac_np, &interface);
+                               if (ret && ret != -ENODEV)
+                                       return ret;
                                id = of_mdio_parse_addr(ds->dev, phy_node);
                                if (id == 0)
                                        priv->p5_intf_sel = P5_INTF_SEL_PHY_P0;
@@ -1632,10 +1636,13 @@ mt7530_probe(struct mdio_device *mdiodev)
        if (!priv)
                return -ENOMEM;
 
-       priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS);
+       priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL);
        if (!priv->ds)
                return -ENOMEM;
 
+       priv->ds->dev = &mdiodev->dev;
+       priv->ds->num_ports = DSA_MAX_PORTS;
+
        /* Use medatek,mcm property to distinguish hardware type that would
         * casues a little bit differences on power-on sequence.
         */
index 2a2489b..a5a37f4 100644 (file)
@@ -270,10 +270,12 @@ static int mv88e6060_probe(struct mdio_device *mdiodev)
 
        dev_info(dev, "switch %s detected\n", name);
 
-       ds = dsa_switch_alloc(dev, MV88E6060_PORTS);
+       ds = devm_kzalloc(dev, sizeof(*ds), GFP_KERNEL);
        if (!ds)
                return -ENOMEM;
 
+       ds->dev = dev;
+       ds->num_ports = MV88E6060_PORTS;
        ds->priv = priv;
        ds->dev = dev;
        ds->ops = &mv88e6060_switch_ops;
index 6787d56..0dbe6c8 100644 (file)
@@ -1057,35 +1057,43 @@ static int mv88e6xxx_set_mac_eee(struct dsa_switch *ds, int port,
        return 0;
 }
 
+/* Mask of the local ports allowed to receive frames from a given fabric port */
 static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
 {
-       struct dsa_switch *ds = NULL;
+       struct dsa_switch *ds = chip->ds;
+       struct dsa_switch_tree *dst = ds->dst;
        struct net_device *br;
+       struct dsa_port *dp;
+       bool found = false;
        u16 pvlan;
-       int i;
 
-       if (dev < DSA_MAX_SWITCHES)
-               ds = chip->ds->dst->ds[dev];
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dp->ds->index == dev && dp->index == port) {
+                       found = true;
+                       break;
+               }
+       }
 
        /* Prevent frames from unknown switch or port */
-       if (!ds || port >= ds->num_ports)
+       if (!found)
                return 0;
 
        /* Frames from DSA links and CPU ports can egress any local port */
-       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+       if (dp->type == DSA_PORT_TYPE_CPU || dp->type == DSA_PORT_TYPE_DSA)
                return mv88e6xxx_port_mask(chip);
 
-       br = ds->ports[port].bridge_dev;
+       br = dp->bridge_dev;
        pvlan = 0;
 
        /* Frames from user ports can egress any local DSA links and CPU ports,
         * as well as any local member of their bridge group.
         */
-       for (i = 0; i < mv88e6xxx_num_ports(chip); ++i)
-               if (dsa_is_cpu_port(chip->ds, i) ||
-                   dsa_is_dsa_port(chip->ds, i) ||
-                   (br && dsa_to_port(chip->ds, i)->bridge_dev == br))
-                       pvlan |= BIT(i);
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dp->ds == ds &&
+                   (dp->type == DSA_PORT_TYPE_CPU ||
+                    dp->type == DSA_PORT_TYPE_DSA ||
+                    (br && dp->bridge_dev == br)))
+                       pvlan |= BIT(dp->index);
 
        return pvlan;
 }
@@ -1135,6 +1143,7 @@ static int mv88e6xxx_pri_setup(struct mv88e6xxx_chip *chip)
 
 static int mv88e6xxx_devmap_setup(struct mv88e6xxx_chip *chip)
 {
+       struct dsa_switch *ds = chip->ds;
        int target, port;
        int err;
 
@@ -1143,10 +1152,9 @@ static int mv88e6xxx_devmap_setup(struct mv88e6xxx_chip *chip)
 
        /* Initialize the routing port to the 32 possible target devices */
        for (target = 0; target < 32; target++) {
-               port = 0x1f;
-               if (target < DSA_MAX_SWITCHES)
-                       if (chip->ds->rtable[target] != DSA_RTABLE_NONE)
-                               port = chip->ds->rtable[target];
+               port = dsa_routing_port(ds, target);
+               if (port == ds->num_ports)
+                       port = 0x1f;
 
                err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
                if (err)
@@ -1253,7 +1261,7 @@ static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port)
        u16 pvlan = 0;
 
        if (!mv88e6xxx_has_pvt(chip))
-               return -EOPNOTSUPP;
+               return 0;
 
        /* Skip the local source device, which uses in-chip port VLAN */
        if (dev != chip->ds->index)
@@ -1370,6 +1378,22 @@ static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid)
        return mv88e6xxx_g1_atu_flush(chip, *fid, true);
 }
 
+static int mv88e6xxx_atu_get_hash(struct mv88e6xxx_chip *chip, u8 *hash)
+{
+       if (chip->info->ops->atu_get_hash)
+               return chip->info->ops->atu_get_hash(chip, hash);
+
+       return -EOPNOTSUPP;
+}
+
+static int mv88e6xxx_atu_set_hash(struct mv88e6xxx_chip *chip, u8 hash)
+{
+       if (chip->info->ops->atu_set_hash)
+               return chip->info->ops->atu_set_hash(chip, hash);
+
+       return -EOPNOTSUPP;
+}
+
 static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
                                        u16 vid_begin, u16 vid_end)
 {
@@ -1402,7 +1426,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
                        if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
                                continue;
 
-                       if (!ds->ports[i].slave)
+                       if (!dsa_to_port(ds, i)->slave)
                                continue;
 
                        if (vlan.member[i] ==
@@ -1410,7 +1434,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
                                continue;
 
                        if (dsa_to_port(ds, i)->bridge_dev ==
-                           ds->ports[port].bridge_dev)
+                           dsa_to_port(ds, port)->bridge_dev)
                                break; /* same bridge, check next VLAN */
 
                        if (!dsa_to_port(ds, i)->bridge_dev)
@@ -2035,32 +2059,26 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
 static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,
                                struct net_device *br)
 {
-       struct dsa_switch *ds;
-       int port;
-       int dev;
+       struct dsa_switch *ds = chip->ds;
+       struct dsa_switch_tree *dst = ds->dst;
+       struct dsa_port *dp;
        int err;
 
-       /* Remap the Port VLAN of each local bridge group member */
-       for (port = 0; port < mv88e6xxx_num_ports(chip); ++port) {
-               if (chip->ds->ports[port].bridge_dev == br) {
-                       err = mv88e6xxx_port_vlan_map(chip, port);
-                       if (err)
-                               return err;
-               }
-       }
-
-       if (!mv88e6xxx_has_pvt(chip))
-               return 0;
-
-       /* Remap the Port VLAN of each cross-chip bridge group member */
-       for (dev = 0; dev < DSA_MAX_SWITCHES; ++dev) {
-               ds = chip->ds->dst->ds[dev];
-               if (!ds)
-                       break;
-
-               for (port = 0; port < ds->num_ports; ++port) {
-                       if (ds->ports[port].bridge_dev == br) {
-                               err = mv88e6xxx_pvt_map(chip, dev, port);
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dp->bridge_dev == br) {
+                       if (dp->ds == ds) {
+                               /* This is a local bridge group member,
+                                * remap its Port VLAN Map.
+                                */
+                               err = mv88e6xxx_port_vlan_map(chip, dp->index);
+                               if (err)
+                                       return err;
+                       } else {
+                               /* This is an external bridge group member,
+                                * remap its cross-chip Port VLAN Table entry.
+                                */
+                               err = mv88e6xxx_pvt_map(chip, dp->ds->index,
+                                                       dp->index);
                                if (err)
                                        return err;
                        }
@@ -2101,9 +2119,6 @@ static int mv88e6xxx_crosschip_bridge_join(struct dsa_switch *ds, int dev,
        struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
-       if (!mv88e6xxx_has_pvt(chip))
-               return 0;
-
        mv88e6xxx_reg_lock(chip);
        err = mv88e6xxx_pvt_map(chip, dev, port);
        mv88e6xxx_reg_unlock(chip);
@@ -2116,9 +2131,6 @@ static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds, int dev,
 {
        struct mv88e6xxx_chip *chip = ds->priv;
 
-       if (!mv88e6xxx_has_pvt(chip))
-               return;
-
        mv88e6xxx_reg_lock(chip);
        if (mv88e6xxx_pvt_map(chip, dev, port))
                dev_err(ds->dev, "failed to remap cross-chip Port VLAN\n");
@@ -2641,6 +2653,248 @@ static int mv88e6390_setup_errata(struct mv88e6xxx_chip *chip)
        return mv88e6xxx_software_reset(chip);
 }
 
+enum mv88e6xxx_devlink_param_id {
+       MV88E6XXX_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       MV88E6XXX_DEVLINK_PARAM_ID_ATU_HASH,
+};
+
+static int mv88e6xxx_devlink_param_get(struct dsa_switch *ds, u32 id,
+                                      struct devlink_param_gset_ctx *ctx)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mv88e6xxx_reg_lock(chip);
+
+       switch (id) {
+       case MV88E6XXX_DEVLINK_PARAM_ID_ATU_HASH:
+               err = mv88e6xxx_atu_get_hash(chip, &ctx->val.vu8);
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               break;
+       }
+
+       mv88e6xxx_reg_unlock(chip);
+
+       return err;
+}
+
+static int mv88e6xxx_devlink_param_set(struct dsa_switch *ds, u32 id,
+                                      struct devlink_param_gset_ctx *ctx)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mv88e6xxx_reg_lock(chip);
+
+       switch (id) {
+       case MV88E6XXX_DEVLINK_PARAM_ID_ATU_HASH:
+               err = mv88e6xxx_atu_set_hash(chip, ctx->val.vu8);
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               break;
+       }
+
+       mv88e6xxx_reg_unlock(chip);
+
+       return err;
+}
+
+static const struct devlink_param mv88e6xxx_devlink_params[] = {
+       DSA_DEVLINK_PARAM_DRIVER(MV88E6XXX_DEVLINK_PARAM_ID_ATU_HASH,
+                                "ATU_hash", DEVLINK_PARAM_TYPE_U8,
+                                BIT(DEVLINK_PARAM_CMODE_RUNTIME)),
+};
+
+static int mv88e6xxx_setup_devlink_params(struct dsa_switch *ds)
+{
+       return dsa_devlink_params_register(ds, mv88e6xxx_devlink_params,
+                                          ARRAY_SIZE(mv88e6xxx_devlink_params));
+}
+
+static void mv88e6xxx_teardown_devlink_params(struct dsa_switch *ds)
+{
+       dsa_devlink_params_unregister(ds, mv88e6xxx_devlink_params,
+                                     ARRAY_SIZE(mv88e6xxx_devlink_params));
+}
+
+enum mv88e6xxx_devlink_resource_id {
+       MV88E6XXX_RESOURCE_ID_ATU,
+       MV88E6XXX_RESOURCE_ID_ATU_BIN_0,
+       MV88E6XXX_RESOURCE_ID_ATU_BIN_1,
+       MV88E6XXX_RESOURCE_ID_ATU_BIN_2,
+       MV88E6XXX_RESOURCE_ID_ATU_BIN_3,
+};
+
+static u64 mv88e6xxx_devlink_atu_bin_get(struct mv88e6xxx_chip *chip,
+                                        u16 bin)
+{
+       u16 occupancy = 0;
+       int err;
+
+       mv88e6xxx_reg_lock(chip);
+
+       err = mv88e6xxx_g2_atu_stats_set(chip, MV88E6XXX_G2_ATU_STATS_MODE_ALL,
+                                        bin);
+       if (err) {
+               dev_err(chip->dev, "failed to set ATU stats kind/bin\n");
+               goto unlock;
+       }
+
+       err = mv88e6xxx_g1_atu_get_next(chip, 0);
+       if (err) {
+               dev_err(chip->dev, "failed to perform ATU get next\n");
+               goto unlock;
+       }
+
+       err = mv88e6xxx_g2_atu_stats_get(chip, &occupancy);
+       if (err) {
+               dev_err(chip->dev, "failed to get ATU stats\n");
+               goto unlock;
+       }
+
+unlock:
+       mv88e6xxx_reg_unlock(chip);
+
+       return occupancy;
+}
+
+static u64 mv88e6xxx_devlink_atu_bin_0_get(void *priv)
+{
+       struct mv88e6xxx_chip *chip = priv;
+
+       return mv88e6xxx_devlink_atu_bin_get(chip,
+                                            MV88E6XXX_G2_ATU_STATS_BIN_0);
+}
+
+static u64 mv88e6xxx_devlink_atu_bin_1_get(void *priv)
+{
+       struct mv88e6xxx_chip *chip = priv;
+
+       return mv88e6xxx_devlink_atu_bin_get(chip,
+                                            MV88E6XXX_G2_ATU_STATS_BIN_1);
+}
+
+static u64 mv88e6xxx_devlink_atu_bin_2_get(void *priv)
+{
+       struct mv88e6xxx_chip *chip = priv;
+
+       return mv88e6xxx_devlink_atu_bin_get(chip,
+                                            MV88E6XXX_G2_ATU_STATS_BIN_2);
+}
+
+static u64 mv88e6xxx_devlink_atu_bin_3_get(void *priv)
+{
+       struct mv88e6xxx_chip *chip = priv;
+
+       return mv88e6xxx_devlink_atu_bin_get(chip,
+                                            MV88E6XXX_G2_ATU_STATS_BIN_3);
+}
+
+static u64 mv88e6xxx_devlink_atu_get(void *priv)
+{
+       return mv88e6xxx_devlink_atu_bin_0_get(priv) +
+               mv88e6xxx_devlink_atu_bin_1_get(priv) +
+               mv88e6xxx_devlink_atu_bin_2_get(priv) +
+               mv88e6xxx_devlink_atu_bin_3_get(priv);
+}
+
+static int mv88e6xxx_setup_devlink_resources(struct dsa_switch *ds)
+{
+       struct devlink_resource_size_params size_params;
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       devlink_resource_size_params_init(&size_params,
+                                         mv88e6xxx_num_macs(chip),
+                                         mv88e6xxx_num_macs(chip),
+                                         1, DEVLINK_RESOURCE_UNIT_ENTRY);
+
+       err = dsa_devlink_resource_register(ds, "ATU",
+                                           mv88e6xxx_num_macs(chip),
+                                           MV88E6XXX_RESOURCE_ID_ATU,
+                                           DEVLINK_RESOURCE_ID_PARENT_TOP,
+                                           &size_params);
+       if (err)
+               goto out;
+
+       devlink_resource_size_params_init(&size_params,
+                                         mv88e6xxx_num_macs(chip) / 4,
+                                         mv88e6xxx_num_macs(chip) / 4,
+                                         1, DEVLINK_RESOURCE_UNIT_ENTRY);
+
+       err = dsa_devlink_resource_register(ds, "ATU_bin_0",
+                                           mv88e6xxx_num_macs(chip) / 4,
+                                           MV88E6XXX_RESOURCE_ID_ATU_BIN_0,
+                                           MV88E6XXX_RESOURCE_ID_ATU,
+                                           &size_params);
+       if (err)
+               goto out;
+
+       err = dsa_devlink_resource_register(ds, "ATU_bin_1",
+                                           mv88e6xxx_num_macs(chip) / 4,
+                                           MV88E6XXX_RESOURCE_ID_ATU_BIN_1,
+                                           MV88E6XXX_RESOURCE_ID_ATU,
+                                           &size_params);
+       if (err)
+               goto out;
+
+       err = dsa_devlink_resource_register(ds, "ATU_bin_2",
+                                           mv88e6xxx_num_macs(chip) / 4,
+                                           MV88E6XXX_RESOURCE_ID_ATU_BIN_2,
+                                           MV88E6XXX_RESOURCE_ID_ATU,
+                                           &size_params);
+       if (err)
+               goto out;
+
+       err = dsa_devlink_resource_register(ds, "ATU_bin_3",
+                                           mv88e6xxx_num_macs(chip) / 4,
+                                           MV88E6XXX_RESOURCE_ID_ATU_BIN_3,
+                                           MV88E6XXX_RESOURCE_ID_ATU,
+                                           &size_params);
+       if (err)
+               goto out;
+
+       dsa_devlink_resource_occ_get_register(ds,
+                                             MV88E6XXX_RESOURCE_ID_ATU,
+                                             mv88e6xxx_devlink_atu_get,
+                                             chip);
+
+       dsa_devlink_resource_occ_get_register(ds,
+                                             MV88E6XXX_RESOURCE_ID_ATU_BIN_0,
+                                             mv88e6xxx_devlink_atu_bin_0_get,
+                                             chip);
+
+       dsa_devlink_resource_occ_get_register(ds,
+                                             MV88E6XXX_RESOURCE_ID_ATU_BIN_1,
+                                             mv88e6xxx_devlink_atu_bin_1_get,
+                                             chip);
+
+       dsa_devlink_resource_occ_get_register(ds,
+                                             MV88E6XXX_RESOURCE_ID_ATU_BIN_2,
+                                             mv88e6xxx_devlink_atu_bin_2_get,
+                                             chip);
+
+       dsa_devlink_resource_occ_get_register(ds,
+                                             MV88E6XXX_RESOURCE_ID_ATU_BIN_3,
+                                             mv88e6xxx_devlink_atu_bin_3_get,
+                                             chip);
+
+       return 0;
+
+out:
+       dsa_devlink_resources_unregister(ds);
+       return err;
+}
+
+static void mv88e6xxx_teardown(struct dsa_switch *ds)
+{
+       mv88e6xxx_teardown_devlink_params(ds);
+       dsa_devlink_resources_unregister(ds);
+}
+
 static int mv88e6xxx_setup(struct dsa_switch *ds)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
@@ -2757,6 +3011,22 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 unlock:
        mv88e6xxx_reg_unlock(chip);
 
+       if (err)
+               return err;
+
+       /* Have to be called without holding the register lock, since
+        * they take the devlink lock, and we later take the locks in
+        * the reverse order when getting/setting parameters or
+        * resource occupancy.
+        */
+       err = mv88e6xxx_setup_devlink_resources(ds);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_setup_devlink_params(ds);
+       if (err)
+               dsa_devlink_resources_unregister(ds);
+
        return err;
 }
 
@@ -3117,6 +3387,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .phylink_validate = mv88e6185_phylink_validate,
@@ -3246,6 +3518,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .avb_ops = &mv88e6165_avb_ops,
@@ -3280,6 +3554,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .avb_ops = &mv88e6165_avb_ops,
@@ -3322,6 +3598,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .phylink_validate = mv88e6185_phylink_validate,
@@ -3366,6 +3644,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6352_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_get_lane = mv88e6352_serdes_get_lane,
@@ -3409,6 +3689,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .phylink_validate = mv88e6185_phylink_validate,
@@ -3453,6 +3735,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6352_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_get_lane = mv88e6352_serdes_get_lane,
@@ -3538,6 +3822,8 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6390_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -3587,6 +3873,8 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6390_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -3635,6 +3923,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6390_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -3686,6 +3976,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6352_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_get_lane = mv88e6352_serdes_get_lane,
@@ -3777,6 +4069,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6390_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -3963,6 +4257,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .phylink_validate = mv88e6185_phylink_validate,
@@ -4003,6 +4299,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .avb_ops = &mv88e6352_avb_ops,
@@ -4049,6 +4347,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6352_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_get_lane = mv88e6352_serdes_get_lane,
@@ -4105,6 +4405,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6390_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -4158,6 +4460,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .rmu_disable = mv88e6390_g1_rmu_disable,
+       .atu_get_hash = mv88e6165_g1_atu_get_hash,
+       .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -4177,6 +4481,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6097,
                .name = "Marvell 88E6085",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 10,
                .num_internal_phys = 5,
                .max_vid = 4095,
@@ -4199,6 +4504,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6095,
                .name = "Marvell 88E6095/88E6095F",
                .num_databases = 256,
+               .num_macs = 8192,
                .num_ports = 11,
                .num_internal_phys = 0,
                .max_vid = 4095,
@@ -4219,6 +4525,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6097,
                .name = "Marvell 88E6097/88E6097F",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 11,
                .num_internal_phys = 8,
                .max_vid = 4095,
@@ -4241,6 +4548,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6165,
                .name = "Marvell 88E6123",
                .num_databases = 4096,
+               .num_macs = 1024,
                .num_ports = 3,
                .num_internal_phys = 5,
                .max_vid = 4095,
@@ -4263,6 +4571,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6185,
                .name = "Marvell 88E6131",
                .num_databases = 256,
+               .num_macs = 8192,
                .num_ports = 8,
                .num_internal_phys = 0,
                .max_vid = 4095,
@@ -4283,6 +4592,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6341,
                .name = "Marvell 88E6141",
                .num_databases = 4096,
+               .num_macs = 2048,
                .num_ports = 6,
                .num_internal_phys = 5,
                .num_gpio = 11,
@@ -4306,6 +4616,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6165,
                .name = "Marvell 88E6161",
                .num_databases = 4096,
+               .num_macs = 1024,
                .num_ports = 6,
                .num_internal_phys = 5,
                .max_vid = 4095,
@@ -4329,6 +4640,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6165,
                .name = "Marvell 88E6165",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 6,
                .num_internal_phys = 0,
                .max_vid = 4095,
@@ -4352,6 +4664,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6351,
                .name = "Marvell 88E6171",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .max_vid = 4095,
@@ -4374,6 +4687,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6352,
                .name = "Marvell 88E6172",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .num_gpio = 15,
@@ -4397,6 +4711,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6351,
                .name = "Marvell 88E6175",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .max_vid = 4095,
@@ -4419,6 +4734,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6352,
                .name = "Marvell 88E6176",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .num_gpio = 15,
@@ -4442,6 +4758,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6185,
                .name = "Marvell 88E6185",
                .num_databases = 256,
+               .num_macs = 8192,
                .num_ports = 10,
                .num_internal_phys = 0,
                .max_vid = 4095,
@@ -4462,6 +4779,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6390,
                .name = "Marvell 88E6190",
                .num_databases = 4096,
+               .num_macs = 16384,
                .num_ports = 11,        /* 10 + Z80 */
                .num_internal_phys = 9,
                .num_gpio = 16,
@@ -4485,6 +4803,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6390,
                .name = "Marvell 88E6190X",
                .num_databases = 4096,
+               .num_macs = 16384,
                .num_ports = 11,        /* 10 + Z80 */
                .num_internal_phys = 9,
                .num_gpio = 16,
@@ -4508,6 +4827,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6390,
                .name = "Marvell 88E6191",
                .num_databases = 4096,
+               .num_macs = 16384,
                .num_ports = 11,        /* 10 + Z80 */
                .num_internal_phys = 9,
                .max_vid = 8191,
@@ -4558,6 +4878,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6352,
                .name = "Marvell 88E6240",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .num_gpio = 15,
@@ -4628,6 +4949,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6320,
                .name = "Marvell 88E6320",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .num_gpio = 15,
@@ -4652,6 +4974,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6320,
                .name = "Marvell 88E6321",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .num_gpio = 15,
@@ -4675,6 +4998,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6341,
                .name = "Marvell 88E6341",
                .num_databases = 4096,
+               .num_macs = 2048,
                .num_internal_phys = 5,
                .num_ports = 6,
                .num_gpio = 11,
@@ -4699,6 +5023,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6351,
                .name = "Marvell 88E6350",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .max_vid = 4095,
@@ -4721,6 +5046,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6351,
                .name = "Marvell 88E6351",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .max_vid = 4095,
@@ -4743,6 +5069,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6352,
                .name = "Marvell 88E6352",
                .num_databases = 4096,
+               .num_macs = 8192,
                .num_ports = 7,
                .num_internal_phys = 5,
                .num_gpio = 15,
@@ -4766,6 +5093,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6390,
                .name = "Marvell 88E6390",
                .num_databases = 4096,
+               .num_macs = 16384,
                .num_ports = 11,        /* 10 + Z80 */
                .num_internal_phys = 9,
                .num_gpio = 16,
@@ -4789,6 +5117,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6390,
                .name = "Marvell 88E6390X",
                .num_databases = 4096,
+               .num_macs = 16384,
                .num_ports = 11,        /* 10 + Z80 */
                .num_internal_phys = 9,
                .num_gpio = 16,
@@ -4933,6 +5262,7 @@ static int mv88e6xxx_port_egress_floods(struct dsa_switch *ds, int port,
 static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .get_tag_protocol       = mv88e6xxx_get_tag_protocol,
        .setup                  = mv88e6xxx_setup,
+       .teardown               = mv88e6xxx_teardown,
        .phylink_validate       = mv88e6xxx_validate,
        .phylink_mac_link_state = mv88e6xxx_link_state,
        .phylink_mac_config     = mv88e6xxx_mac_config,
@@ -4975,6 +5305,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .port_txtstamp          = mv88e6xxx_port_txtstamp,
        .port_rxtstamp          = mv88e6xxx_port_rxtstamp,
        .get_ts_info            = mv88e6xxx_get_ts_info,
+       .devlink_param_get      = mv88e6xxx_devlink_param_get,
+       .devlink_param_set      = mv88e6xxx_devlink_param_set,
 };
 
 static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip)
@@ -4982,10 +5314,12 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip)
        struct device *dev = chip->dev;
        struct dsa_switch *ds;
 
-       ds = dsa_switch_alloc(dev, mv88e6xxx_num_ports(chip));
+       ds = devm_kzalloc(dev, sizeof(*ds), GFP_KERNEL);
        if (!ds)
                return -ENOMEM;
 
+       ds->dev = dev;
+       ds->num_ports = mv88e6xxx_num_ports(chip);
        ds->priv = chip;
        ds->dev = dev;
        ds->ops = &mv88e6xxx_switch_ops;
index e9b1a1a..65ce09b 100644 (file)
@@ -94,6 +94,7 @@ struct mv88e6xxx_info {
        u16 prod_num;
        const char *name;
        unsigned int num_databases;
+       unsigned int num_macs;
        unsigned int num_ports;
        unsigned int num_internal_phys;
        unsigned int num_gpio;
@@ -497,6 +498,10 @@ struct mv88e6xxx_ops {
        int (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
                                uint64_t *data);
 
+       /* Address Translation Unit operations */
+       int (*atu_get_hash)(struct mv88e6xxx_chip *chip, u8 *hash);
+       int (*atu_set_hash)(struct mv88e6xxx_chip *chip, u8 hash);
+
        /* VLAN Translation Unit operations */
        int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
                           struct mv88e6xxx_vtu_entry *entry);
@@ -609,6 +614,11 @@ static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip)
        return chip->info->num_databases;
 }
 
+static inline unsigned int mv88e6xxx_num_macs(struct  mv88e6xxx_chip *chip)
+{
+       return chip->info->num_macs;
+}
+
 static inline unsigned int mv88e6xxx_num_ports(struct mv88e6xxx_chip *chip)
 {
        return chip->info->num_ports;
index 0870fcc..3421722 100644 (file)
 /* Offset 0x0A: ATU Control Register */
 #define MV88E6XXX_G1_ATU_CTL           0x0a
 #define MV88E6XXX_G1_ATU_CTL_LEARN2ALL 0x0008
+#define MV88E6161_G1_ATU_CTL_HASH_MASK 0x0003
 
 /* Offset 0x0B: ATU Operation Register */
 #define MV88E6XXX_G1_ATU_OP                            0x0b
@@ -318,6 +319,8 @@ int mv88e6xxx_g1_atu_remove(struct mv88e6xxx_chip *chip, u16 fid, int port,
                            bool all);
 int mv88e6xxx_g1_atu_prob_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g1_atu_prob_irq_free(struct mv88e6xxx_chip *chip);
+int mv88e6165_g1_atu_get_hash(struct mv88e6xxx_chip *chip, u8 *hash);
+int mv88e6165_g1_atu_set_hash(struct mv88e6xxx_chip *chip, u8 hash);
 
 int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
                             struct mv88e6xxx_vtu_entry *entry);
@@ -338,5 +341,6 @@ int mv88e6390_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 int mv88e6xxx_g1_vtu_flush(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g1_vtu_prob_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g1_vtu_prob_irq_free(struct mv88e6xxx_chip *chip);
+int mv88e6xxx_g1_atu_get_next(struct mv88e6xxx_chip *chip, u16 fid);
 
 #endif /* _MV88E6XXX_GLOBAL1_H */
index 792a96e..bdcd255 100644 (file)
@@ -73,6 +73,38 @@ int mv88e6xxx_g1_atu_set_age_time(struct mv88e6xxx_chip *chip,
        return 0;
 }
 
+int mv88e6165_g1_atu_get_hash(struct mv88e6xxx_chip *chip, u8 *hash)
+{
+       int err;
+       u16 val;
+
+       err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_ATU_CTL, &val);
+       if (err)
+               return err;
+
+       *hash = val & MV88E6161_G1_ATU_CTL_HASH_MASK;
+
+       return 0;
+}
+
+int mv88e6165_g1_atu_set_hash(struct mv88e6xxx_chip *chip, u8 hash)
+{
+       int err;
+       u16 val;
+
+       if (hash & ~MV88E6161_G1_ATU_CTL_HASH_MASK)
+               return -EINVAL;
+
+       err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_ATU_CTL, &val);
+       if (err)
+               return err;
+
+       val &= ~MV88E6161_G1_ATU_CTL_HASH_MASK;
+       val |= hash;
+
+       return mv88e6xxx_g1_write(chip, MV88E6XXX_G1_ATU_CTL, val);
+}
+
 /* Offset 0x0B: ATU Operation Register */
 
 static int mv88e6xxx_g1_atu_op_wait(struct mv88e6xxx_chip *chip)
@@ -122,6 +154,11 @@ static int mv88e6xxx_g1_atu_op(struct mv88e6xxx_chip *chip, u16 fid, u16 op)
        return mv88e6xxx_g1_atu_op_wait(chip);
 }
 
+int mv88e6xxx_g1_atu_get_next(struct mv88e6xxx_chip *chip, u16 fid)
+{
+       return mv88e6xxx_g1_atu_op(chip, fid, MV88E6XXX_G1_ATU_OP_GET_NEXT_DB);
+}
+
 /* Offset 0x0C: ATU Data Register */
 
 static int mv88e6xxx_g1_atu_data_read(struct mv88e6xxx_chip *chip,
index bdbb72f..87bfe7c 100644 (file)
@@ -280,6 +280,19 @@ int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
        return err;
 }
 
+/* Offset 0x0E: ATU Statistics */
+
+int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, u16 kind, u16 bin)
+{
+       return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_ATU_STATS,
+                                 kind | bin);
+}
+
+int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, u16 *stats)
+{
+       return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_ATU_STATS, stats);
+}
+
 /* Offset 0x0F: Priority Override Table */
 
 static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
index 42da4bc..d80ad20 100644 (file)
 #define MV88E6XXX_G2_SWITCH_MAC_DATA_MASK      0x00ff
 
 /* Offset 0x0E: ATU Stats Register */
-#define MV88E6XXX_G2_ATU_STATS         0x0e
+#define MV88E6XXX_G2_ATU_STATS                         0x0e
+#define MV88E6XXX_G2_ATU_STATS_BIN_0                   (0x0 << 14)
+#define MV88E6XXX_G2_ATU_STATS_BIN_1                   (0x1 << 14)
+#define MV88E6XXX_G2_ATU_STATS_BIN_2                   (0x2 << 14)
+#define MV88E6XXX_G2_ATU_STATS_BIN_3                   (0x3 << 14)
+#define MV88E6XXX_G2_ATU_STATS_MODE_ALL                        (0x0 << 12)
+#define MV88E6XXX_G2_ATU_STATS_MODE_ALL_DYNAMIC                (0x1 << 12)
+#define MV88E6XXX_G2_ATU_STATS_MODE_FID_ALL            (0x2 << 12)
+#define MV88E6XXX_G2_ATU_STATS_MODE_FID_ALL_DYNAMIC    (0x3 << 12)
+#define MV88E6XXX_G2_ATU_STATS_MASK                    0x0fff
 
 /* Offset 0x0F: Priority Override Table */
 #define MV88E6XXX_G2_PRIO_OVERRIDE             0x0f
@@ -353,6 +362,8 @@ extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
 
 int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
                                      bool external);
+int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, u16 kind, u16 bin);
+int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, u16 *stats);
 
 #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
@@ -515,6 +526,17 @@ static inline int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
        return -EOPNOTSUPP;
 }
 
+static inline int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip,
+                                            u16 kind, u16 bin)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip)
+{
+       return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
index b00274c..e548289 100644 (file)
@@ -639,7 +639,8 @@ static int
 qca8k_setup(struct dsa_switch *ds)
 {
        struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-       int ret, i, phy_mode = -1;
+       phy_interface_t phy_mode = PHY_INTERFACE_MODE_NA;
+       int ret, i;
        u32 mask;
 
        /* Make sure that port 0 is the cpu port */
@@ -661,10 +662,10 @@ qca8k_setup(struct dsa_switch *ds)
                return ret;
 
        /* Initialize CPU port pad mode (xMII type, delays...) */
-       phy_mode = of_get_phy_mode(ds->ports[QCA8K_CPU_PORT].dn);
-       if (phy_mode < 0) {
+       ret = of_get_phy_mode(dsa_to_port(ds, QCA8K_CPU_PORT)->dn, &phy_mode);
+       if (ret) {
                pr_err("Can't find phy-mode for master device\n");
-               return phy_mode;
+               return ret;
        }
        ret = qca8k_set_pad_ctrl(priv, QCA8K_CPU_PORT, phy_mode);
        if (ret < 0)
@@ -1077,10 +1078,13 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
        if (id != QCA8K_ID_QCA8337)
                return -ENODEV;
 
-       priv->ds = dsa_switch_alloc(&mdiodev->dev, QCA8K_NUM_PORTS);
+       priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds),
+                               QCA8K_NUM_PORTS);
        if (!priv->ds)
                return -ENOMEM;
 
+       priv->ds->dev = &mdiodev->dev;
+       priv->ds->num_ports = QCA8K_NUM_PORTS;
        priv->ds->priv = priv;
        priv->ops = qca8k_switch_ops;
        priv->ds->ops = &priv->ops;
index dc0509c..fae188c 100644 (file)
@@ -444,9 +444,12 @@ static int realtek_smi_probe(struct platform_device *pdev)
                return ret;
        }
 
-       smi->ds = dsa_switch_alloc(dev, smi->num_ports);
+       smi->ds = devm_kzalloc(dev, sizeof(*smi->ds), GFP_KERNEL);
        if (!smi->ds)
                return -ENOMEM;
+
+       smi->ds->dev = dev;
+       smi->ds->num_ports = smi->num_ports;
        smi->ds->priv = smi;
 
        smi->ds->ops = var->ds_ops;
index fbb564c..91063ed 100644 (file)
@@ -21,6 +21,7 @@
 #define SJA1105_AGEING_TIME_MS(ms)     ((ms) / 10)
 
 #include "sja1105_tas.h"
+#include "sja1105_ptp.h"
 
 /* Keeps the different addresses between E/T and P/Q/R/S */
 struct sja1105_regs {
@@ -32,9 +33,8 @@ struct sja1105_regs {
        u64 config;
        u64 rmii_pll1;
        u64 ptp_control;
-       u64 ptpclk;
+       u64 ptpclkval;
        u64 ptpclkrate;
-       u64 ptptsclk;
        u64 ptpegr_ts[SJA1105_NUM_PORTS];
        u64 pad_mii_tx[SJA1105_NUM_PORTS];
        u64 pad_mii_id[SJA1105_NUM_PORTS];
@@ -71,7 +71,8 @@ struct sja1105_info {
        const struct sja1105_dynamic_table_ops *dyn_ops;
        const struct sja1105_table_ops *static_ops;
        const struct sja1105_regs *regs;
-       int (*ptp_cmd)(const void *ctx, const void *data);
+       int (*ptp_cmd)(const struct dsa_switch *ds,
+                      const struct sja1105_ptp_cmd *cmd);
        int (*reset_cmd)(const void *ctx, const void *data);
        int (*setup_rgmii_delay)(const void *ctx, int port);
        /* Prototypes from include/net/dsa.h */
@@ -91,26 +92,16 @@ struct sja1105_private {
        struct spi_device *spidev;
        struct dsa_switch *ds;
        struct sja1105_port ports[SJA1105_NUM_PORTS];
-       struct ptp_clock_info ptp_caps;
-       struct ptp_clock *clock;
-       /* The cycle counter translates the PTP timestamps (based on
-        * a free-running counter) into a software time domain.
-        */
-       struct cyclecounter tstamp_cc;
-       struct timecounter tstamp_tc;
-       struct delayed_work refresh_work;
-       /* Serializes all operations on the cycle counter */
-       struct mutex ptp_lock;
        /* Serializes transmission of management frames so that
         * the switch doesn't confuse them with one another.
         */
        struct mutex mgmt_lock;
        struct sja1105_tagger_data tagger_data;
+       struct sja1105_ptp_data ptp_data;
        struct sja1105_tas_data tas_data;
 };
 
 #include "sja1105_dynamic_config.h"
-#include "sja1105_ptp.h"
 
 struct sja1105_spi_message {
        u64 access;
@@ -127,15 +118,13 @@ typedef enum {
 int sja1105_static_config_reload(struct sja1105_private *priv);
 
 /* From sja1105_spi.c */
-int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
-                               sja1105_spi_rw_mode_t rw, u64 reg_addr,
-                               void *packed_buf, size_t size_bytes);
-int sja1105_spi_send_int(const struct sja1105_private *priv,
-                        sja1105_spi_rw_mode_t rw, u64 reg_addr,
-                        u64 *value, u64 size_bytes);
-int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv,
-                                    sja1105_spi_rw_mode_t rw, u64 base_addr,
-                                    void *packed_buf, u64 buf_len);
+int sja1105_xfer_buf(const struct sja1105_private *priv,
+                    sja1105_spi_rw_mode_t rw, u64 reg_addr,
+                    u8 *buf, size_t len);
+int sja1105_xfer_u32(const struct sja1105_private *priv,
+                    sja1105_spi_rw_mode_t rw, u64 reg_addr, u32 *value);
+int sja1105_xfer_u64(const struct sja1105_private *priv,
+                    sja1105_spi_rw_mode_t rw, u64 reg_addr, u64 *value);
 int sja1105_static_config_upload(struct sja1105_private *priv);
 int sja1105_inhibit_tx(const struct sja1105_private *priv,
                       unsigned long port_bitmap, bool tx_inhibited);
index 608126a..9082e52 100644 (file)
@@ -118,9 +118,8 @@ static int sja1105_cgu_idiv_config(struct sja1105_private *priv, int port,
        idiv.pd        = enabled ? 0 : 1; /* Power down? */
        sja1105_cgu_idiv_packing(packed_buf, &idiv, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->cgu_idiv[port], packed_buf,
-                                          SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->cgu_idiv[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static void
@@ -167,9 +166,8 @@ static int sja1105_cgu_mii_tx_clk_config(struct sja1105_private *priv,
        mii_tx_clk.pd        = 0;  /* Power Down off => enabled */
        sja1105_cgu_mii_control_packing(packed_buf, &mii_tx_clk, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->mii_tx_clk[port], packed_buf,
-                                          SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_tx_clk[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static int
@@ -192,9 +190,8 @@ sja1105_cgu_mii_rx_clk_config(struct sja1105_private *priv, int port)
        mii_rx_clk.pd        = 0;  /* Power Down off => enabled */
        sja1105_cgu_mii_control_packing(packed_buf, &mii_rx_clk, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->mii_rx_clk[port], packed_buf,
-                                          SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_rx_clk[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static int
@@ -217,9 +214,8 @@ sja1105_cgu_mii_ext_tx_clk_config(struct sja1105_private *priv, int port)
        mii_ext_tx_clk.pd        = 0; /* Power Down off => enabled */
        sja1105_cgu_mii_control_packing(packed_buf, &mii_ext_tx_clk, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->mii_ext_tx_clk[port],
-                                          packed_buf, SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_ext_tx_clk[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static int
@@ -242,9 +238,8 @@ sja1105_cgu_mii_ext_rx_clk_config(struct sja1105_private *priv, int port)
        mii_ext_rx_clk.pd        = 0; /* Power Down off => enabled */
        sja1105_cgu_mii_control_packing(packed_buf, &mii_ext_rx_clk, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->mii_ext_rx_clk[port],
-                                          packed_buf, SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_ext_rx_clk[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static int sja1105_mii_clocking_setup(struct sja1105_private *priv, int port,
@@ -337,9 +332,8 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv,
        txc.pd = 0;
        sja1105_cgu_mii_control_packing(packed_buf, &txc, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->rgmii_tx_clk[port],
-                                          packed_buf, SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgmii_tx_clk[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 /* AGU */
@@ -383,9 +377,8 @@ static int sja1105_rgmii_cfg_pad_tx_config(struct sja1105_private *priv,
        pad_mii_tx.clk_ipud  = 2; /* TX_CLK input stage (default) */
        sja1105_cfg_pad_mii_tx_packing(packed_buf, &pad_mii_tx, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->pad_mii_tx[port],
-                                          packed_buf, SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_tx[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static void
@@ -405,7 +398,7 @@ sja1105_cfg_pad_mii_id_packing(void *buf, struct sja1105_cfg_pad_mii_id *cmd,
 }
 
 /* Valid range in degrees is an integer between 73.8 and 101.7 */
-static inline u64 sja1105_rgmii_delay(u64 phase)
+static u64 sja1105_rgmii_delay(u64 phase)
 {
        /* UM11040.pdf: The delay in degree phase is 73.8 + delay_tune * 0.9.
         * To avoid floating point operations we'll multiply by 10
@@ -442,9 +435,8 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port)
        pad_mii_id.txc_pd = 1;
        sja1105_cfg_pad_mii_id_packing(packed_buf, &pad_mii_id, PACK);
 
-       rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                        regs->pad_mii_id[port],
-                                        packed_buf, SJA1105_SIZE_CGU_CMD);
+       rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_id[port],
+                             packed_buf, SJA1105_SIZE_CGU_CMD);
        if (rc < 0)
                return rc;
 
@@ -459,9 +451,8 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port)
        }
        sja1105_cfg_pad_mii_id_packing(packed_buf, &pad_mii_id, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->pad_mii_id[port],
-                                          packed_buf, SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_id[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static int sja1105_rgmii_clocking_setup(struct sja1105_private *priv, int port,
@@ -547,9 +538,8 @@ static int sja1105_cgu_rmii_ref_clk_config(struct sja1105_private *priv,
        ref_clk.pd        = 0;      /* Power Down off => enabled */
        sja1105_cgu_mii_control_packing(packed_buf, &ref_clk, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->rmii_ref_clk[port],
-                                          packed_buf, SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_ref_clk[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static int
@@ -565,9 +555,8 @@ sja1105_cgu_rmii_ext_tx_clk_config(struct sja1105_private *priv, int port)
        ext_tx_clk.pd        = 0;   /* Power Down off => enabled */
        sja1105_cgu_mii_control_packing(packed_buf, &ext_tx_clk, PACK);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE,
-                                          regs->rmii_ext_tx_clk[port],
-                                          packed_buf, SJA1105_SIZE_CGU_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_ext_tx_clk[port],
+                               packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
 static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv)
@@ -595,8 +584,8 @@ static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv)
        pll.pd        = 0x1;
 
        sja1105_cgu_pll_control_packing(packed_buf, &pll, PACK);
-       rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rmii_pll1,
-                                        packed_buf, SJA1105_SIZE_CGU_CMD);
+       rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_pll1, packed_buf,
+                             SJA1105_SIZE_CGU_CMD);
        if (rc < 0) {
                dev_err(dev, "failed to configure PLL1 for 50MHz\n");
                return rc;
@@ -606,8 +595,8 @@ static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv)
        pll.pd = 0x0;
 
        sja1105_cgu_pll_control_packing(packed_buf, &pll, PACK);
-       rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rmii_pll1,
-                                        packed_buf, SJA1105_SIZE_CGU_CMD);
+       rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_pll1, packed_buf,
+                             SJA1105_SIZE_CGU_CMD);
        if (rc < 0) {
                dev_err(dev, "failed to enable PLL1\n");
                return rc;
index 91da430..25381bd 100644 (file)
@@ -686,8 +686,8 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
                ops->entry_packing(packed_buf, entry, PACK);
 
        /* Send SPI write operation: read config table entry */
-       rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, ops->addr,
-                                        packed_buf, ops->packed_size);
+       rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
+                             ops->packed_size);
        if (rc < 0)
                return rc;
 
@@ -698,8 +698,8 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
                memset(packed_buf, 0, ops->packed_size);
 
                /* Retrieve the read operation's result */
-               rc = sja1105_spi_send_packed_buf(priv, SPI_READ, ops->addr,
-                                                packed_buf, ops->packed_size);
+               rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
+                                     ops->packed_size);
                if (rc < 0)
                        return rc;
 
@@ -771,8 +771,8 @@ int sja1105_dynamic_config_write(struct sja1105_private *priv,
                ops->entry_packing(packed_buf, entry, PACK);
 
        /* Send SPI write operation: read config table entry */
-       rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, ops->addr,
-                                        packed_buf, ops->packed_size);
+       rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
+                             ops->packed_size);
        if (rc < 0)
                return rc;
 
index ab581a2..064301c 100644 (file)
@@ -167,8 +167,8 @@ static int sja1105_port_status_get_mac(struct sja1105_private *priv,
        int rc;
 
        /* MAC area */
-       rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac[port],
-                                        packed_buf, SJA1105_SIZE_MAC_AREA);
+       rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac[port], packed_buf,
+                             SJA1105_SIZE_MAC_AREA);
        if (rc < 0)
                return rc;
 
@@ -185,8 +185,8 @@ static int sja1105_port_status_get_hl1(struct sja1105_private *priv,
        u8 packed_buf[SJA1105_SIZE_HL1_AREA] = {0};
        int rc;
 
-       rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac_hl1[port],
-                                        packed_buf, SJA1105_SIZE_HL1_AREA);
+       rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl1[port], packed_buf,
+                             SJA1105_SIZE_HL1_AREA);
        if (rc < 0)
                return rc;
 
@@ -203,8 +203,8 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv,
        u8 packed_buf[SJA1105_SIZE_QLEVEL_AREA] = {0};
        int rc;
 
-       rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac_hl2[port],
-                                        packed_buf, SJA1105_SIZE_HL2_AREA);
+       rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl2[port], packed_buf,
+                             SJA1105_SIZE_HL2_AREA);
        if (rc < 0)
                return rc;
 
@@ -215,8 +215,8 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv,
            priv->info->device_id == SJA1105T_DEVICE_ID)
                return 0;
 
-       rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->qlevel[port],
-                                        packed_buf, SJA1105_SIZE_QLEVEL_AREA);
+       rc = sja1105_xfer_buf(priv, SPI_READ, regs->qlevel[port], packed_buf,
+                             SJA1105_SIZE_QLEVEL_AREA);
        if (rc < 0)
                return rc;
 
index 7687ddc..d5dfda3 100644 (file)
@@ -382,8 +382,8 @@ static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv)
 static int sja1105_init_general_params(struct sja1105_private *priv)
 {
        struct sja1105_general_params_entry default_general_params = {
-               /* Disallow dynamic changing of the mirror port */
-               .mirr_ptacu = 0,
+               /* Allow dynamic changing of the mirror port */
+               .mirr_ptacu = true,
                .switchid = priv->ds->index,
                /* Priority queue for link-local management frames
                 * (both ingress to and egress from CPU - PTP, STP etc)
@@ -403,8 +403,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
                 * by installing a temporary 'management route'
                 */
                .host_port = dsa_upstream_port(priv->ds, 0),
-               /* Same as host port */
-               .mirr_port = dsa_upstream_port(priv->ds, 0),
+               /* Default to an invalid value */
+               .mirr_port = SJA1105_NUM_PORTS,
                /* Link-local traffic received on casc_port will be forwarded
                 * to host_port without embedding the source port and device ID
                 * info in the destination MAC address (presumably because it
@@ -458,9 +458,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 
 #define SJA1105_RATE_MBPS(speed) (((speed) * 64000) / 1000)
 
-static inline void
-sja1105_setup_policer(struct sja1105_l2_policing_entry *policing,
-                     int index)
+static void sja1105_setup_policer(struct sja1105_l2_policing_entry *policing,
+                                 int index)
 {
        policing[index].sharindx = index;
        policing[index].smax = 65535; /* Burst size in bytes */
@@ -507,39 +506,6 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
        return 0;
 }
 
-static int sja1105_init_avb_params(struct sja1105_private *priv,
-                                  bool on)
-{
-       struct sja1105_avb_params_entry *avb;
-       struct sja1105_table *table;
-
-       table = &priv->static_config.tables[BLK_IDX_AVB_PARAMS];
-
-       /* Discard previous AVB Parameters Table */
-       if (table->entry_count) {
-               kfree(table->entries);
-               table->entry_count = 0;
-       }
-
-       /* Configure the reception of meta frames only if requested */
-       if (!on)
-               return 0;
-
-       table->entries = kcalloc(SJA1105_MAX_AVB_PARAMS_COUNT,
-                                table->ops->unpacked_entry_size, GFP_KERNEL);
-       if (!table->entries)
-               return -ENOMEM;
-
-       table->entry_count = SJA1105_MAX_AVB_PARAMS_COUNT;
-
-       avb = table->entries;
-
-       avb->destmeta = SJA1105_META_DMAC;
-       avb->srcmeta  = SJA1105_META_SMAC;
-
-       return 0;
-}
-
 static int sja1105_static_config_load(struct sja1105_private *priv,
                                      struct sja1105_dt_port *ports)
 {
@@ -578,9 +544,6 @@ static int sja1105_static_config_load(struct sja1105_private *priv,
        if (rc < 0)
                return rc;
        rc = sja1105_init_general_params(priv);
-       if (rc < 0)
-               return rc;
-       rc = sja1105_init_avb_params(priv, false);
        if (rc < 0)
                return rc;
 
@@ -621,8 +584,9 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv,
 
        for_each_child_of_node(ports_node, child) {
                struct device_node *phy_node;
-               int phy_mode;
+               phy_interface_t phy_mode;
                u32 index;
+               int err;
 
                /* Get switch port number from DT */
                if (of_property_read_u32(child, "reg", &index) < 0) {
@@ -633,8 +597,8 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv,
                }
 
                /* Get PHY mode from DT */
-               phy_mode = of_get_phy_mode(child);
-               if (phy_mode < 0) {
+               err = of_get_phy_mode(child, &phy_mode);
+               if (err) {
                        dev_err(dev, "Failed to read phy-mode or "
                                "phy-interface-type property for port %d\n",
                                index);
@@ -951,7 +915,7 @@ sja1105_static_fdb_change(struct sja1105_private *priv, int port,
  * For the placement of a newly learnt FDB entry, the switch selects the bin
  * based on a hash function, and the way within that bin incrementally.
  */
-static inline int sja1105et_fdb_index(int bin, int way)
+static int sja1105et_fdb_index(int bin, int way)
 {
        return bin * SJA1105ET_FDB_BIN_SIZE + way;
 }
@@ -1095,7 +1059,7 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
        l2_lookup.vlanid = vid;
        l2_lookup.iotag = SJA1105_S_TAG;
        l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-       if (dsa_port_is_vlan_filtering(&ds->ports[port])) {
+       if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) {
                l2_lookup.mask_vlanid = VLAN_VID_MASK;
                l2_lookup.mask_iotag = BIT(0);
        } else {
@@ -1158,7 +1122,7 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
        l2_lookup.vlanid = vid;
        l2_lookup.iotag = SJA1105_S_TAG;
        l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-       if (dsa_port_is_vlan_filtering(&ds->ports[port])) {
+       if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) {
                l2_lookup.mask_vlanid = VLAN_VID_MASK;
                l2_lookup.mask_iotag = BIT(0);
        } else {
@@ -1204,7 +1168,7 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port,
         * for what gets printed in 'bridge fdb show'.  In the case of zero,
         * no VID gets printed at all.
         */
-       if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+       if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
                vid = 0;
 
        return priv->info->fdb_add_cmd(ds, port, addr, vid);
@@ -1215,7 +1179,7 @@ static int sja1105_fdb_del(struct dsa_switch *ds, int port,
 {
        struct sja1105_private *priv = ds->priv;
 
-       if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+       if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
                vid = 0;
 
        return priv->info->fdb_del_cmd(ds, port, addr, vid);
@@ -1254,7 +1218,7 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
                u64_to_ether_addr(l2_lookup.macaddr, macaddr);
 
                /* We need to hide the dsa_8021q VLANs from the user. */
-               if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+               if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
                        l2_lookup.vlanid = 0;
                cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
        }
@@ -1687,7 +1651,7 @@ static int sja1105_setup(struct dsa_switch *ds)
                return rc;
        }
 
-       rc = sja1105_ptp_clock_register(priv);
+       rc = sja1105_ptp_clock_register(ds);
        if (rc < 0) {
                dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc);
                return rc;
@@ -1729,9 +1693,7 @@ static void sja1105_teardown(struct dsa_switch *ds)
        struct sja1105_private *priv = ds->priv;
 
        sja1105_tas_teardown(ds);
-       cancel_work_sync(&priv->tagger_data.rxtstamp_work);
-       skb_queue_purge(&priv->tagger_data.skb_rxtstamp_queue);
-       sja1105_ptp_clock_unregister(priv);
+       sja1105_ptp_clock_unregister(ds);
        sja1105_static_config_free(&priv->static_config);
 }
 
@@ -1743,7 +1705,7 @@ static int sja1105_port_enable(struct dsa_switch *ds, int port,
        if (!dsa_is_user_port(ds, port))
                return 0;
 
-       slave = ds->ports[port].slave;
+       slave = dsa_to_port(ds, port)->slave;
 
        slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
 
@@ -1775,7 +1737,7 @@ static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
        }
 
        /* Transfer skb to the host port. */
-       dsa_enqueue_skb(skb, ds->ports[port].slave);
+       dsa_enqueue_skb(skb, dsa_to_port(ds, port)->slave);
 
        /* Wait until the switch has processed the frame */
        do {
@@ -1817,11 +1779,8 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
 {
        struct sja1105_private *priv = ds->priv;
        struct sja1105_port *sp = &priv->ports[port];
-       struct skb_shared_hwtstamps shwt = {0};
        int slot = sp->mgmt_slot;
        struct sk_buff *clone;
-       u64 now, ts;
-       int rc;
 
        /* The tragic fact about the switch having 4x2 slots for installing
         * management routes is that all of them except one are actually
@@ -1847,27 +1806,8 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
        if (!clone)
                goto out;
 
-       skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;
-
-       mutex_lock(&priv->ptp_lock);
-
-       now = priv->tstamp_cc.read(&priv->tstamp_cc);
-
-       rc = sja1105_ptpegr_ts_poll(priv, slot, &ts);
-       if (rc < 0) {
-               dev_err(ds->dev, "xmit: timed out polling for tstamp\n");
-               kfree_skb(clone);
-               goto out_unlock_ptp;
-       }
-
-       ts = sja1105_tstamp_reconstruct(priv, now, ts);
-       ts = timecounter_cyc2time(&priv->tstamp_tc, ts);
+       sja1105_ptp_txtstamp_skb(ds, slot, clone);
 
-       shwt.hwtstamp = ns_to_ktime(ts);
-       skb_complete_tx_timestamp(clone, &shwt);
-
-out_unlock_ptp:
-       mutex_unlock(&priv->ptp_lock);
 out:
        mutex_unlock(&priv->mgmt_lock);
        return NETDEV_TX_OK;
@@ -1897,180 +1837,94 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds,
        return sja1105_static_config_reload(priv);
 }
 
-/* Must be called only with priv->tagger_data.state bit
- * SJA1105_HWTS_RX_EN cleared
+static int sja1105_port_setup_tc(struct dsa_switch *ds, int port,
+                                enum tc_setup_type type,
+                                void *type_data)
+{
+       switch (type) {
+       case TC_SETUP_QDISC_TAPRIO:
+               return sja1105_setup_tc_taprio(ds, port, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+/* We have a single mirror (@to) port, but can configure ingress and egress
+ * mirroring on all other (@from) ports.
+ * We need to allow mirroring rules only as long as the @to port is always the
+ * same, and we need to unset the @to port from mirr_port only when there is no
+ * mirroring rule that references it.
  */
-static int sja1105_change_rxtstamping(struct sja1105_private *priv,
-                                     bool on)
+static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to,
+                               bool ingress, bool enabled)
 {
        struct sja1105_general_params_entry *general_params;
+       struct sja1105_mac_config_entry *mac;
        struct sja1105_table *table;
+       bool already_enabled;
+       u64 new_mirr_port;
        int rc;
 
        table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
        general_params = table->entries;
-       general_params->send_meta1 = on;
-       general_params->send_meta0 = on;
 
-       rc = sja1105_init_avb_params(priv, on);
-       if (rc < 0)
-               return rc;
+       mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
 
-       /* Initialize the meta state machine to a known state */
-       if (priv->tagger_data.stampable_skb) {
-               kfree_skb(priv->tagger_data.stampable_skb);
-               priv->tagger_data.stampable_skb = NULL;
+       already_enabled = (general_params->mirr_port != SJA1105_NUM_PORTS);
+       if (already_enabled && enabled && general_params->mirr_port != to) {
+               dev_err(priv->ds->dev,
+                       "Delete mirroring rules towards port %llu first\n",
+                       general_params->mirr_port);
+               return -EBUSY;
        }
 
-       return sja1105_static_config_reload(priv);
-}
-
-static int sja1105_hwtstamp_set(struct dsa_switch *ds, int port,
-                               struct ifreq *ifr)
-{
-       struct sja1105_private *priv = ds->priv;
-       struct hwtstamp_config config;
-       bool rx_on;
-       int rc;
+       new_mirr_port = to;
+       if (!enabled) {
+               bool keep = false;
+               int port;
 
-       if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-               return -EFAULT;
-
-       switch (config.tx_type) {
-       case HWTSTAMP_TX_OFF:
-               priv->ports[port].hwts_tx_en = false;
-               break;
-       case HWTSTAMP_TX_ON:
-               priv->ports[port].hwts_tx_en = true;
-               break;
-       default:
-               return -ERANGE;
-       }
-
-       switch (config.rx_filter) {
-       case HWTSTAMP_FILTER_NONE:
-               rx_on = false;
-               break;
-       default:
-               rx_on = true;
-               break;
+               /* Anybody still referencing mirr_port? */
+               for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+                       if (mac[port].ing_mirr || mac[port].egr_mirr) {
+                               keep = true;
+                               break;
+                       }
+               }
+               /* Unset already_enabled for next time */
+               if (!keep)
+                       new_mirr_port = SJA1105_NUM_PORTS;
        }
+       if (new_mirr_port != general_params->mirr_port) {
+               general_params->mirr_port = new_mirr_port;
 
-       if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) {
-               clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state);
-
-               rc = sja1105_change_rxtstamping(priv, rx_on);
-               if (rc < 0) {
-                       dev_err(ds->dev,
-                               "Failed to change RX timestamping: %d\n", rc);
+               rc = sja1105_dynamic_config_write(priv, BLK_IDX_GENERAL_PARAMS,
+                                                 0, general_params, true);
+               if (rc < 0)
                        return rc;
-               }
-               if (rx_on)
-                       set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state);
        }
 
-       if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
-               return -EFAULT;
-       return 0;
-}
-
-static int sja1105_hwtstamp_get(struct dsa_switch *ds, int port,
-                               struct ifreq *ifr)
-{
-       struct sja1105_private *priv = ds->priv;
-       struct hwtstamp_config config;
-
-       config.flags = 0;
-       if (priv->ports[port].hwts_tx_en)
-               config.tx_type = HWTSTAMP_TX_ON;
+       if (ingress)
+               mac[from].ing_mirr = enabled;
        else
-               config.tx_type = HWTSTAMP_TX_OFF;
-       if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state))
-               config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
-       else
-               config.rx_filter = HWTSTAMP_FILTER_NONE;
-
-       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
-               -EFAULT : 0;
-}
-
-#define to_tagger(d) \
-       container_of((d), struct sja1105_tagger_data, rxtstamp_work)
-#define to_sja1105(d) \
-       container_of((d), struct sja1105_private, tagger_data)
-
-static void sja1105_rxtstamp_work(struct work_struct *work)
-{
-       struct sja1105_tagger_data *data = to_tagger(work);
-       struct sja1105_private *priv = to_sja1105(data);
-       struct sk_buff *skb;
-       u64 now;
-
-       mutex_lock(&priv->ptp_lock);
-
-       while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) {
-               struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb);
-               u64 ts;
-
-               now = priv->tstamp_cc.read(&priv->tstamp_cc);
-
-               *shwt = (struct skb_shared_hwtstamps) {0};
-
-               ts = SJA1105_SKB_CB(skb)->meta_tstamp;
-               ts = sja1105_tstamp_reconstruct(priv, now, ts);
-               ts = timecounter_cyc2time(&priv->tstamp_tc, ts);
-
-               shwt->hwtstamp = ns_to_ktime(ts);
-               netif_rx_ni(skb);
-       }
-
-       mutex_unlock(&priv->ptp_lock);
-}
+               mac[from].egr_mirr = enabled;
 
-/* Called from dsa_skb_defer_rx_timestamp */
-static bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
-                                 struct sk_buff *skb, unsigned int type)
-{
-       struct sja1105_private *priv = ds->priv;
-       struct sja1105_tagger_data *data = &priv->tagger_data;
-
-       if (!test_bit(SJA1105_HWTS_RX_EN, &data->state))
-               return false;
-
-       /* We need to read the full PTP clock to reconstruct the Rx
-        * timestamp. For that we need a sleepable context.
-        */
-       skb_queue_tail(&data->skb_rxtstamp_queue, skb);
-       schedule_work(&data->rxtstamp_work);
-       return true;
+       return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, from,
+                                           &mac[from], true);
 }
 
-/* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone
- * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit
- * callback, where we will timestamp it synchronously.
- */
-static bool sja1105_port_txtstamp(struct dsa_switch *ds, int port,
-                                 struct sk_buff *skb, unsigned int type)
+static int sja1105_mirror_add(struct dsa_switch *ds, int port,
+                             struct dsa_mall_mirror_tc_entry *mirror,
+                             bool ingress)
 {
-       struct sja1105_private *priv = ds->priv;
-       struct sja1105_port *sp = &priv->ports[port];
-
-       if (!sp->hwts_tx_en)
-               return false;
-
-       return true;
+       return sja1105_mirror_apply(ds->priv, port, mirror->to_local_port,
+                                   ingress, true);
 }
 
-static int sja1105_port_setup_tc(struct dsa_switch *ds, int port,
-                                enum tc_setup_type type,
-                                void *type_data)
+static void sja1105_mirror_del(struct dsa_switch *ds, int port,
+                              struct dsa_mall_mirror_tc_entry *mirror)
 {
-       switch (type) {
-       case TC_SETUP_QDISC_TAPRIO:
-               return sja1105_setup_tc_taprio(ds, port, type_data);
-       default:
-               return -EOPNOTSUPP;
-       }
+       sja1105_mirror_apply(ds->priv, port, mirror->to_local_port,
+                            mirror->ingress, false);
 }
 
 static const struct dsa_switch_ops sja1105_switch_ops = {
@@ -2106,6 +1960,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
        .port_rxtstamp          = sja1105_port_rxtstamp,
        .port_txtstamp          = sja1105_port_txtstamp,
        .port_setup_tc          = sja1105_port_setup_tc,
+       .port_mirror_add        = sja1105_mirror_add,
+       .port_mirror_del        = sja1105_mirror_del,
 };
 
 static int sja1105_check_device_id(struct sja1105_private *priv)
@@ -2113,23 +1969,22 @@ static int sja1105_check_device_id(struct sja1105_private *priv)
        const struct sja1105_regs *regs = priv->info->regs;
        u8 prod_id[SJA1105_SIZE_DEVICE_ID] = {0};
        struct device *dev = &priv->spidev->dev;
-       u64 device_id;
+       u32 device_id;
        u64 part_no;
        int rc;
 
-       rc = sja1105_spi_send_int(priv, SPI_READ, regs->device_id,
-                                 &device_id, SJA1105_SIZE_DEVICE_ID);
+       rc = sja1105_xfer_u32(priv, SPI_READ, regs->device_id, &device_id);
        if (rc < 0)
                return rc;
 
        if (device_id != priv->info->device_id) {
-               dev_err(dev, "Expected device ID 0x%llx but read 0x%llx\n",
+               dev_err(dev, "Expected device ID 0x%llx but read 0x%x\n",
                        priv->info->device_id, device_id);
                return -ENODEV;
        }
 
-       rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->prod_id,
-                                        prod_id, SJA1105_SIZE_DEVICE_ID);
+       rc = sja1105_xfer_buf(priv, SPI_READ, regs->prod_id, prod_id,
+                             SJA1105_SIZE_DEVICE_ID);
        if (rc < 0)
                return rc;
 
@@ -2193,32 +2048,37 @@ static int sja1105_probe(struct spi_device *spi)
 
        dev_info(dev, "Probed switch chip: %s\n", priv->info->name);
 
-       ds = dsa_switch_alloc(dev, SJA1105_NUM_PORTS);
+       ds = devm_kzalloc(dev, sizeof(*ds), GFP_KERNEL);
        if (!ds)
                return -ENOMEM;
 
+       ds->dev = dev;
+       ds->num_ports = SJA1105_NUM_PORTS;
        ds->ops = &sja1105_switch_ops;
        ds->priv = priv;
        priv->ds = ds;
 
        tagger_data = &priv->tagger_data;
-       skb_queue_head_init(&tagger_data->skb_rxtstamp_queue);
-       INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work);
-       spin_lock_init(&tagger_data->meta_lock);
+
+       mutex_init(&priv->ptp_data.lock);
+       mutex_init(&priv->mgmt_lock);
+
+       sja1105_tas_setup(ds);
+
+       rc = dsa_register_switch(priv->ds);
+       if (rc)
+               return rc;
 
        /* Connections between dsa_port and sja1105_port */
        for (i = 0; i < SJA1105_NUM_PORTS; i++) {
                struct sja1105_port *sp = &priv->ports[i];
 
-               ds->ports[i].priv = sp;
-               sp->dp = &ds->ports[i];
+               dsa_to_port(ds, i)->priv = sp;
+               sp->dp = dsa_to_port(ds, i);
                sp->data = tagger_data;
        }
-       mutex_init(&priv->mgmt_lock);
 
-       sja1105_tas_setup(ds);
-
-       return dsa_register_switch(priv->ds);
+       return 0;
 }
 
 static int sja1105_remove(struct spi_device *spi)
index d8e8dd5..7831003 100644 (file)
 #define SJA1105_MAX_ADJ_PPB            32000000
 #define SJA1105_SIZE_PTP_CMD           4
 
-/* Timestamps are in units of 8 ns clock ticks (equivalent to a fixed
- * 125 MHz clock) so the scale factor (MULT / SHIFT) needs to be 8.
- * Furthermore, wisely pick SHIFT as 28 bits, which translates
- * MULT into 2^31 (0x80000000).  This is the same value around which
- * the hardware PTPCLKRATE is centered, so the same ppb conversion
- * arithmetic can be reused.
- */
-#define SJA1105_CC_SHIFT               28
-#define SJA1105_CC_MULT                        (8 << SJA1105_CC_SHIFT)
-
-/* Having 33 bits of cycle counter left until a 64-bit overflow during delta
- * conversion, we multiply this by the 8 ns counter resolution and arrive at
- * a comfortable 68.71 second refresh interval until the delta would cause
- * an integer overflow, in absence of any other readout.
- * Approximate to 1 minute.
- */
-#define SJA1105_REFRESH_INTERVAL       (HZ * 60)
-
 /*            This range is actually +/- SJA1105_MAX_ADJ_PPB
  *            divided by 1000 (ppb -> ppm) and with a 16-bit
  *            "fractional" part (actually fixed point).
@@ -41,7 +23,7 @@
  *
  * This forgoes a "ppb" numeric representation (up to NSEC_PER_SEC)
  * and defines the scaling factor between scaled_ppm and the actual
- * frequency adjustments (both cycle counter and hardware).
+ * frequency adjustments of the PHC.
  *
  *   ptpclkrate = scaled_ppm * 2^31 / (10^6 * 2^16)
  *   simplifies to
  */
 #define SJA1105_CC_MULT_NUM            (1 << 9)
 #define SJA1105_CC_MULT_DEM            15625
+#define SJA1105_CC_MULT                        0x80000000
 
-#define ptp_to_sja1105(d) container_of((d), struct sja1105_private, ptp_caps)
-#define cc_to_sja1105(d) container_of((d), struct sja1105_private, tstamp_cc)
-#define dw_to_sja1105(d) container_of((d), struct sja1105_private, refresh_work)
-
-struct sja1105_ptp_cmd {
-       u64 resptp;       /* reset */
+enum sja1105_ptp_clk_mode {
+       PTP_ADD_MODE = 1,
+       PTP_SET_MODE = 0,
 };
 
+#define ptp_caps_to_data(d) \
+               container_of((d), struct sja1105_ptp_data, caps)
+#define ptp_data_to_sja1105(d) \
+               container_of((d), struct sja1105_private, ptp_data)
+
+static int sja1105_init_avb_params(struct sja1105_private *priv,
+                                  bool on)
+{
+       struct sja1105_avb_params_entry *avb;
+       struct sja1105_table *table;
+
+       table = &priv->static_config.tables[BLK_IDX_AVB_PARAMS];
+
+       /* Discard previous AVB Parameters Table */
+       if (table->entry_count) {
+               kfree(table->entries);
+               table->entry_count = 0;
+       }
+
+       /* Configure the reception of meta frames only if requested */
+       if (!on)
+               return 0;
+
+       table->entries = kcalloc(SJA1105_MAX_AVB_PARAMS_COUNT,
+                                table->ops->unpacked_entry_size, GFP_KERNEL);
+       if (!table->entries)
+               return -ENOMEM;
+
+       table->entry_count = SJA1105_MAX_AVB_PARAMS_COUNT;
+
+       avb = table->entries;
+
+       avb->destmeta = SJA1105_META_DMAC;
+       avb->srcmeta  = SJA1105_META_SMAC;
+
+       return 0;
+}
+
+/* Must be called only with priv->tagger_data.state bit
+ * SJA1105_HWTS_RX_EN cleared
+ */
+static int sja1105_change_rxtstamping(struct sja1105_private *priv,
+                                     bool on)
+{
+       struct sja1105_general_params_entry *general_params;
+       struct sja1105_table *table;
+       int rc;
+
+       table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
+       general_params = table->entries;
+       general_params->send_meta1 = on;
+       general_params->send_meta0 = on;
+
+       rc = sja1105_init_avb_params(priv, on);
+       if (rc < 0)
+               return rc;
+
+       /* Initialize the meta state machine to a known state */
+       if (priv->tagger_data.stampable_skb) {
+               kfree_skb(priv->tagger_data.stampable_skb);
+               priv->tagger_data.stampable_skb = NULL;
+       }
+
+       return sja1105_static_config_reload(priv);
+}
+
+int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr)
+{
+       struct sja1105_private *priv = ds->priv;
+       struct hwtstamp_config config;
+       bool rx_on;
+       int rc;
+
+       if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+               return -EFAULT;
+
+       switch (config.tx_type) {
+       case HWTSTAMP_TX_OFF:
+               priv->ports[port].hwts_tx_en = false;
+               break;
+       case HWTSTAMP_TX_ON:
+               priv->ports[port].hwts_tx_en = true;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       switch (config.rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+               rx_on = false;
+               break;
+       default:
+               rx_on = true;
+               break;
+       }
+
+       if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) {
+               clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state);
+
+               rc = sja1105_change_rxtstamping(priv, rx_on);
+               if (rc < 0) {
+                       dev_err(ds->dev,
+                               "Failed to change RX timestamping: %d\n", rc);
+                       return rc;
+               }
+               if (rx_on)
+                       set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state);
+       }
+
+       if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+               return -EFAULT;
+       return 0;
+}
+
+int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr)
+{
+       struct sja1105_private *priv = ds->priv;
+       struct hwtstamp_config config;
+
+       config.flags = 0;
+       if (priv->ports[port].hwts_tx_en)
+               config.tx_type = HWTSTAMP_TX_ON;
+       else
+               config.tx_type = HWTSTAMP_TX_OFF;
+       if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state))
+               config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+       else
+               config.rx_filter = HWTSTAMP_FILTER_NONE;
+
+       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+               -EFAULT : 0;
+}
+
 int sja1105_get_ts_info(struct dsa_switch *ds, int port,
                        struct ethtool_ts_info *info)
 {
        struct sja1105_private *priv = ds->priv;
+       struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 
        /* Called during cleanup */
-       if (!priv->clock)
+       if (!ptp_data->clock)
                return -ENODEV;
 
        info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
@@ -74,14 +188,14 @@ int sja1105_get_ts_info(struct dsa_switch *ds, int port,
                         (1 << HWTSTAMP_TX_ON);
        info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
                           (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT);
-       info->phc_index = ptp_clock_index(priv->clock);
+       info->phc_index = ptp_clock_index(ptp_data->clock);
        return 0;
 }
 
-int sja1105et_ptp_cmd(const void *ctx, const void *data)
+int sja1105et_ptp_cmd(const struct dsa_switch *ds,
+                     const struct sja1105_ptp_cmd *cmd)
 {
-       const struct sja1105_ptp_cmd *cmd = data;
-       const struct sja1105_private *priv = ctx;
+       const struct sja1105_private *priv = ds->priv;
        const struct sja1105_regs *regs = priv->info->regs;
        const int size = SJA1105_SIZE_PTP_CMD;
        u8 buf[SJA1105_SIZE_PTP_CMD] = {0};
@@ -90,15 +204,17 @@ int sja1105et_ptp_cmd(const void *ctx, const void *data)
 
        sja1105_pack(buf, &valid,           31, 31, size);
        sja1105_pack(buf, &cmd->resptp,      2,  2, size);
+       sja1105_pack(buf, &cmd->corrclk4ts,  1,  1, size);
+       sja1105_pack(buf, &cmd->ptpclkadd,   0,  0, size);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control,
-                                          buf, SJA1105_SIZE_PTP_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf,
+                               SJA1105_SIZE_PTP_CMD);
 }
 
-int sja1105pqrs_ptp_cmd(const void *ctx, const void *data)
+int sja1105pqrs_ptp_cmd(const struct dsa_switch *ds,
+                       const struct sja1105_ptp_cmd *cmd)
 {
-       const struct sja1105_ptp_cmd *cmd = data;
-       const struct sja1105_private *priv = ctx;
+       const struct sja1105_private *priv = ds->priv;
        const struct sja1105_regs *regs = priv->info->regs;
        const int size = SJA1105_SIZE_PTP_CMD;
        u8 buf[SJA1105_SIZE_PTP_CMD] = {0};
@@ -107,9 +223,11 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data)
 
        sja1105_pack(buf, &valid,           31, 31, size);
        sja1105_pack(buf, &cmd->resptp,      3,  3, size);
+       sja1105_pack(buf, &cmd->corrclk4ts,  2,  2, size);
+       sja1105_pack(buf, &cmd->ptpclkadd,   0,  0, size);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control,
-                                          buf, SJA1105_SIZE_PTP_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf,
+                               SJA1105_SIZE_PTP_CMD);
 }
 
 /* The switch returns partial timestamps (24 bits for SJA1105 E/T, which wrap
@@ -126,9 +244,10 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data)
  * Must be called within one wraparound period of the partial timestamp since
  * it was generated by the MAC.
  */
-u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now,
-                              u64 ts_partial)
+static u64 sja1105_tstamp_reconstruct(struct dsa_switch *ds, u64 now,
+                                     u64 ts_partial)
 {
+       struct sja1105_private *priv = ds->priv;
        u64 partial_tstamp_mask = CYCLECOUNTER_MASK(priv->info->ptp_ts_bits);
        u64 ts_reconstructed;
 
@@ -170,8 +289,9 @@ u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now,
  * To have common code for E/T and P/Q/R/S for reading the timestamp,
  * we need to juggle with the offset and the bit indices.
  */
-int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts)
+static int sja1105_ptpegr_ts_poll(struct dsa_switch *ds, int port, u64 *ts)
 {
+       struct sja1105_private *priv = ds->priv;
        const struct sja1105_regs *regs = priv->info->regs;
        int tstamp_bit_start, tstamp_bit_end;
        int timeout = 10;
@@ -180,10 +300,8 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts)
        int rc;
 
        do {
-               rc = sja1105_spi_send_packed_buf(priv, SPI_READ,
-                                                regs->ptpegr_ts[port],
-                                                packed_buf,
-                                                priv->info->ptpegr_ts_bytes);
+               rc = sja1105_xfer_buf(priv, SPI_READ, regs->ptpegr_ts[port],
+                                     packed_buf, priv->info->ptpegr_ts_bytes);
                if (rc < 0)
                        return rc;
 
@@ -216,22 +334,109 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts)
        return 0;
 }
 
-int sja1105_ptp_reset(struct sja1105_private *priv)
+/* Caller must hold ptp_data->lock */
+static int sja1105_ptpclkval_read(struct sja1105_private *priv, u64 *ticks)
+{
+       const struct sja1105_regs *regs = priv->info->regs;
+
+       return sja1105_xfer_u64(priv, SPI_READ, regs->ptpclkval, ticks);
+}
+
+/* Caller must hold ptp_data->lock */
+static int sja1105_ptpclkval_write(struct sja1105_private *priv, u64 ticks)
 {
+       const struct sja1105_regs *regs = priv->info->regs;
+
+       return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpclkval, &ticks);
+}
+
+#define rxtstamp_to_tagger(d) \
+       container_of((d), struct sja1105_tagger_data, rxtstamp_work)
+#define tagger_to_sja1105(d) \
+       container_of((d), struct sja1105_private, tagger_data)
+
+static void sja1105_rxtstamp_work(struct work_struct *work)
+{
+       struct sja1105_tagger_data *tagger_data = rxtstamp_to_tagger(work);
+       struct sja1105_private *priv = tagger_to_sja1105(tagger_data);
+       struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
        struct dsa_switch *ds = priv->ds;
-       struct sja1105_ptp_cmd cmd = {0};
+       struct sk_buff *skb;
+
+       mutex_lock(&ptp_data->lock);
+
+       while ((skb = skb_dequeue(&tagger_data->skb_rxtstamp_queue)) != NULL) {
+               struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb);
+               u64 ticks, ts;
+               int rc;
+
+               rc = sja1105_ptpclkval_read(priv, &ticks);
+               if (rc < 0) {
+                       dev_err(ds->dev, "Failed to read PTP clock: %d\n", rc);
+                       kfree_skb(skb);
+                       continue;
+               }
+
+               *shwt = (struct skb_shared_hwtstamps) {0};
+
+               ts = SJA1105_SKB_CB(skb)->meta_tstamp;
+               ts = sja1105_tstamp_reconstruct(ds, ticks, ts);
+
+               shwt->hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts));
+               netif_rx_ni(skb);
+       }
+
+       mutex_unlock(&ptp_data->lock);
+}
+
+/* Called from dsa_skb_defer_rx_timestamp */
+bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
+                          struct sk_buff *skb, unsigned int type)
+{
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
+
+       if (!test_bit(SJA1105_HWTS_RX_EN, &tagger_data->state))
+               return false;
+
+       /* We need to read the full PTP clock to reconstruct the Rx
+        * timestamp. For that we need a sleepable context.
+        */
+       skb_queue_tail(&tagger_data->skb_rxtstamp_queue, skb);
+       schedule_work(&tagger_data->rxtstamp_work);
+       return true;
+}
+
+/* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone
+ * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit
+ * callback, where we will timestamp it synchronously.
+ */
+bool sja1105_port_txtstamp(struct dsa_switch *ds, int port,
+                          struct sk_buff *skb, unsigned int type)
+{
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_port *sp = &priv->ports[port];
+
+       if (!sp->hwts_tx_en)
+               return false;
+
+       return true;
+}
+
+int sja1105_ptp_reset(struct dsa_switch *ds)
+{
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
+       struct sja1105_ptp_cmd cmd = ptp_data->cmd;
        int rc;
 
-       mutex_lock(&priv->ptp_lock);
+       mutex_lock(&ptp_data->lock);
 
        cmd.resptp = 1;
        dev_dbg(ds->dev, "Resetting PTP clock\n");
-       rc = priv->info->ptp_cmd(priv, &cmd);
-
-       timecounter_init(&priv->tstamp_tc, &priv->tstamp_cc,
-                        ktime_to_ns(ktime_get_real()));
+       rc = priv->info->ptp_cmd(ds, &cmd);
 
-       mutex_unlock(&priv->ptp_lock);
+       mutex_unlock(&ptp_data->lock);
 
        return rc;
 }
@@ -239,154 +444,185 @@ int sja1105_ptp_reset(struct sja1105_private *priv)
 static int sja1105_ptp_gettime(struct ptp_clock_info *ptp,
                               struct timespec64 *ts)
 {
-       struct sja1105_private *priv = ptp_to_sja1105(ptp);
-       u64 ns;
+       struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp);
+       struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data);
+       u64 ticks = 0;
+       int rc;
 
-       mutex_lock(&priv->ptp_lock);
-       ns = timecounter_read(&priv->tstamp_tc);
-       mutex_unlock(&priv->ptp_lock);
+       mutex_lock(&ptp_data->lock);
 
-       *ts = ns_to_timespec64(ns);
+       rc = sja1105_ptpclkval_read(priv, &ticks);
+       *ts = ns_to_timespec64(sja1105_ticks_to_ns(ticks));
 
-       return 0;
+       mutex_unlock(&ptp_data->lock);
+
+       return rc;
+}
+
+/* Caller must hold ptp_data->lock */
+static int sja1105_ptp_mode_set(struct sja1105_private *priv,
+                               enum sja1105_ptp_clk_mode mode)
+{
+       struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
+
+       if (ptp_data->cmd.ptpclkadd == mode)
+               return 0;
+
+       ptp_data->cmd.ptpclkadd = mode;
+
+       return priv->info->ptp_cmd(priv->ds, &ptp_data->cmd);
 }
 
+/* Write to PTPCLKVAL while PTPCLKADD is 0 */
 static int sja1105_ptp_settime(struct ptp_clock_info *ptp,
                               const struct timespec64 *ts)
 {
-       struct sja1105_private *priv = ptp_to_sja1105(ptp);
-       u64 ns = timespec64_to_ns(ts);
+       struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp);
+       struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data);
+       u64 ticks = ns_to_sja1105_ticks(timespec64_to_ns(ts));
+       int rc;
 
-       mutex_lock(&priv->ptp_lock);
-       timecounter_init(&priv->tstamp_tc, &priv->tstamp_cc, ns);
-       mutex_unlock(&priv->ptp_lock);
+       mutex_lock(&ptp_data->lock);
 
-       return 0;
+       rc = sja1105_ptp_mode_set(priv, PTP_SET_MODE);
+       if (rc < 0) {
+               dev_err(priv->ds->dev, "Failed to put PTPCLK in set mode\n");
+               goto out;
+       }
+
+       rc = sja1105_ptpclkval_write(priv, ticks);
+out:
+       mutex_unlock(&ptp_data->lock);
+
+       return rc;
 }
 
 static int sja1105_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
-       struct sja1105_private *priv = ptp_to_sja1105(ptp);
+       struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp);
+       struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data);
+       const struct sja1105_regs *regs = priv->info->regs;
+       u32 clkrate32;
        s64 clkrate;
+       int rc;
 
        clkrate = (s64)scaled_ppm * SJA1105_CC_MULT_NUM;
        clkrate = div_s64(clkrate, SJA1105_CC_MULT_DEM);
 
-       mutex_lock(&priv->ptp_lock);
-
-       /* Force a readout to update the timer *before* changing its frequency.
-        *
-        * This way, its corrected time curve can at all times be modeled
-        * as a linear "A * x + B" function, where:
-        *
-        * - B are past frequency adjustments and offset shifts, all
-        *   accumulated into the cycle_last variable.
-        *
-        * - A is the new frequency adjustments we're just about to set.
-        *
-        * Reading now makes B accumulate the correct amount of time,
-        * corrected at the old rate, before changing it.
-        *
-        * Hardware timestamps then become simple points on the curve and
-        * are approximated using the above function.  This is still better
-        * than letting the switch take the timestamps using the hardware
-        * rate-corrected clock (PTPCLKVAL) - the comparison in this case would
-        * be that we're shifting the ruler at the same time as we're taking
-        * measurements with it.
-        *
-        * The disadvantage is that it's possible to receive timestamps when
-        * a frequency adjustment took place in the near past.
-        * In this case they will be approximated using the new ppb value
-        * instead of a compound function made of two segments (one at the old
-        * and the other at the new rate) - introducing some inaccuracy.
-        */
-       timecounter_read(&priv->tstamp_tc);
+       /* Take a +/- value and re-center it around 2^31. */
+       clkrate = SJA1105_CC_MULT + clkrate;
+       WARN_ON(abs(clkrate) >= GENMASK_ULL(31, 0));
+       clkrate32 = clkrate;
 
-       priv->tstamp_cc.mult = SJA1105_CC_MULT + clkrate;
+       mutex_lock(&ptp_data->lock);
 
-       mutex_unlock(&priv->ptp_lock);
+       rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkrate, &clkrate32);
 
-       return 0;
+       mutex_unlock(&ptp_data->lock);
+
+       return rc;
 }
 
+/* Write to PTPCLKVAL while PTPCLKADD is 1 */
 static int sja1105_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
-       struct sja1105_private *priv = ptp_to_sja1105(ptp);
+       struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp);
+       struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data);
+       s64 ticks = ns_to_sja1105_ticks(delta);
+       int rc;
 
-       mutex_lock(&priv->ptp_lock);
-       timecounter_adjtime(&priv->tstamp_tc, delta);
-       mutex_unlock(&priv->ptp_lock);
+       mutex_lock(&ptp_data->lock);
 
-       return 0;
+       rc = sja1105_ptp_mode_set(priv, PTP_ADD_MODE);
+       if (rc < 0) {
+               dev_err(priv->ds->dev, "Failed to put PTPCLK in add mode\n");
+               goto out;
+       }
+
+       rc = sja1105_ptpclkval_write(priv, ticks);
+
+out:
+       mutex_unlock(&ptp_data->lock);
+
+       return rc;
 }
 
-static u64 sja1105_ptptsclk_read(const struct cyclecounter *cc)
+int sja1105_ptp_clock_register(struct dsa_switch *ds)
 {
-       struct sja1105_private *priv = cc_to_sja1105(cc);
-       const struct sja1105_regs *regs = priv->info->regs;
-       u64 ptptsclk = 0;
-       int rc;
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
+       struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
+
+       ptp_data->caps = (struct ptp_clock_info) {
+               .owner          = THIS_MODULE,
+               .name           = "SJA1105 PHC",
+               .adjfine        = sja1105_ptp_adjfine,
+               .adjtime        = sja1105_ptp_adjtime,
+               .gettime64      = sja1105_ptp_gettime,
+               .settime64      = sja1105_ptp_settime,
+               .max_adj        = SJA1105_MAX_ADJ_PPB,
+       };
 
-       rc = sja1105_spi_send_int(priv, SPI_READ, regs->ptptsclk,
-                                 &ptptsclk, 8);
-       if (rc < 0)
-               dev_err_ratelimited(priv->ds->dev,
-                                   "failed to read ptp cycle counter: %d\n",
-                                   rc);
-       return ptptsclk;
+       skb_queue_head_init(&tagger_data->skb_rxtstamp_queue);
+       INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work);
+       spin_lock_init(&tagger_data->meta_lock);
+
+       ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev);
+       if (IS_ERR_OR_NULL(ptp_data->clock))
+               return PTR_ERR(ptp_data->clock);
+
+       ptp_data->cmd.corrclk4ts = true;
+       ptp_data->cmd.ptpclkadd = PTP_SET_MODE;
+
+       return sja1105_ptp_reset(ds);
 }
 
-static void sja1105_ptp_overflow_check(struct work_struct *work)
+void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
 {
-       struct delayed_work *dw = to_delayed_work(work);
-       struct sja1105_private *priv = dw_to_sja1105(dw);
-       struct timespec64 ts;
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 
-       sja1105_ptp_gettime(&priv->ptp_caps, &ts);
+       if (IS_ERR_OR_NULL(ptp_data->clock))
+               return;
 
-       schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL);
+       cancel_work_sync(&priv->tagger_data.rxtstamp_work);
+       skb_queue_purge(&priv->tagger_data.skb_rxtstamp_queue);
+       ptp_clock_unregister(ptp_data->clock);
+       ptp_data->clock = NULL;
 }
 
-static const struct ptp_clock_info sja1105_ptp_caps = {
-       .owner          = THIS_MODULE,
-       .name           = "SJA1105 PHC",
-       .adjfine        = sja1105_ptp_adjfine,
-       .adjtime        = sja1105_ptp_adjtime,
-       .gettime64      = sja1105_ptp_gettime,
-       .settime64      = sja1105_ptp_settime,
-       .max_adj        = SJA1105_MAX_ADJ_PPB,
-};
-
-int sja1105_ptp_clock_register(struct sja1105_private *priv)
+void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
+                             struct sk_buff *skb)
 {
-       struct dsa_switch *ds = priv->ds;
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
+       struct skb_shared_hwtstamps shwt = {0};
+       u64 ticks, ts;
+       int rc;
 
-       /* Set up the cycle counter */
-       priv->tstamp_cc = (struct cyclecounter) {
-               .read = sja1105_ptptsclk_read,
-               .mask = CYCLECOUNTER_MASK(64),
-               .shift = SJA1105_CC_SHIFT,
-               .mult = SJA1105_CC_MULT,
-       };
-       mutex_init(&priv->ptp_lock);
-       priv->ptp_caps = sja1105_ptp_caps;
+       skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
-       priv->clock = ptp_clock_register(&priv->ptp_caps, ds->dev);
-       if (IS_ERR_OR_NULL(priv->clock))
-               return PTR_ERR(priv->clock);
+       mutex_lock(&ptp_data->lock);
 
-       INIT_DELAYED_WORK(&priv->refresh_work, sja1105_ptp_overflow_check);
-       schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL);
+       rc = sja1105_ptpclkval_read(priv, &ticks);
+       if (rc < 0) {
+               dev_err(ds->dev, "Failed to read PTP clock: %d\n", rc);
+               kfree_skb(skb);
+               goto out;
+       }
 
-       return sja1105_ptp_reset(priv);
-}
+       rc = sja1105_ptpegr_ts_poll(ds, slot, &ts);
+       if (rc < 0) {
+               dev_err(ds->dev, "timed out polling for tstamp\n");
+               kfree_skb(skb);
+               goto out;
+       }
 
-void sja1105_ptp_clock_unregister(struct sja1105_private *priv)
-{
-       if (IS_ERR_OR_NULL(priv->clock))
-               return;
+       ts = sja1105_tstamp_reconstruct(ds, ticks, ts);
+
+       shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts));
+       skb_complete_tx_timestamp(skb, &shwt);
 
-       cancel_delayed_work_sync(&priv->refresh_work);
-       ptp_clock_unregister(priv->clock);
-       priv->clock = NULL;
+out:
+       mutex_unlock(&ptp_data->lock);
 }
index 394e12a..243f130 100644 (file)
@@ -6,49 +6,88 @@
 
 #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
 
-int sja1105_ptp_clock_register(struct sja1105_private *priv);
+/* Timestamps are in units of 8 ns clock ticks (equivalent to
+ * a fixed 125 MHz clock).
+ */
+#define SJA1105_TICK_NS                        8
+
+static inline s64 ns_to_sja1105_ticks(s64 ns)
+{
+       return ns / SJA1105_TICK_NS;
+}
+
+static inline s64 sja1105_ticks_to_ns(s64 ticks)
+{
+       return ticks * SJA1105_TICK_NS;
+}
 
-void sja1105_ptp_clock_unregister(struct sja1105_private *priv);
+struct sja1105_ptp_cmd {
+       u64 resptp;             /* reset */
+       u64 corrclk4ts;         /* use the corrected clock for timestamps */
+       u64 ptpclkadd;          /* enum sja1105_ptp_clk_mode */
+};
 
-int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts);
+struct sja1105_ptp_data {
+       struct ptp_clock_info caps;
+       struct ptp_clock *clock;
+       struct sja1105_ptp_cmd cmd;
+       /* Serializes all operations on the PTP hardware clock */
+       struct mutex lock;
+};
 
-int sja1105et_ptp_cmd(const void *ctx, const void *data);
+int sja1105_ptp_clock_register(struct dsa_switch *ds);
 
-int sja1105pqrs_ptp_cmd(const void *ctx, const void *data);
+void sja1105_ptp_clock_unregister(struct dsa_switch *ds);
+
+int sja1105et_ptp_cmd(const struct dsa_switch *ds,
+                     const struct sja1105_ptp_cmd *cmd);
+
+int sja1105pqrs_ptp_cmd(const struct dsa_switch *ds,
+                       const struct sja1105_ptp_cmd *cmd);
 
 int sja1105_get_ts_info(struct dsa_switch *ds, int port,
                        struct ethtool_ts_info *ts);
 
-u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now,
-                              u64 ts_partial);
+void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
+                             struct sk_buff *clone);
+
+int sja1105_ptp_reset(struct dsa_switch *ds);
+
+bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
+                          struct sk_buff *skb, unsigned int type);
+
+bool sja1105_port_txtstamp(struct dsa_switch *ds, int port,
+                          struct sk_buff *skb, unsigned int type);
+
+int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr);
 
-int sja1105_ptp_reset(struct sja1105_private *priv);
+int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr);
 
 #else
 
-static inline int sja1105_ptp_clock_register(struct sja1105_private *priv)
-{
-       return 0;
-}
+struct sja1105_ptp_cmd;
 
-static inline void sja1105_ptp_clock_unregister(struct sja1105_private *priv)
-{
-       return;
-}
+/* Structures cannot be empty in C. Bah!
+ * Keep the mutex as the only element, which is a bit more difficult to
+ * refactor out of sja1105_main.c anyway.
+ */
+struct sja1105_ptp_data {
+       struct mutex lock;
+};
 
-static inline int
-sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts)
+static inline int sja1105_ptp_clock_register(struct dsa_switch *ds)
 {
        return 0;
 }
 
-static inline u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv,
-                                            u64 now, u64 ts_partial)
+static inline void sja1105_ptp_clock_unregister(struct dsa_switch *ds) { }
+
+static inline void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
+                                           struct sk_buff *clone)
 {
-       return 0;
 }
 
-static inline int sja1105_ptp_reset(struct sja1105_private *priv)
+static inline int sja1105_ptp_reset(struct dsa_switch *ds)
 {
        return 0;
 }
@@ -59,6 +98,14 @@ static inline int sja1105_ptp_reset(struct sja1105_private *priv)
 
 #define sja1105_get_ts_info NULL
 
+#define sja1105_port_rxtstamp NULL
+
+#define sja1105_port_txtstamp NULL
+
+#define sja1105_hwtstamp_get NULL
+
+#define sja1105_hwtstamp_set NULL
+
 #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
 
 #endif /* _SJA1105_PTP_H */
index 58dd37e..ed02410 100644 (file)
@@ -7,42 +7,15 @@
 #include <linux/packing.h>
 #include "sja1105.h"
 
-#define SJA1105_SIZE_PORT_CTRL         4
 #define SJA1105_SIZE_RESET_CMD         4
 #define SJA1105_SIZE_SPI_MSG_HEADER    4
 #define SJA1105_SIZE_SPI_MSG_MAXLEN    (64 * 4)
-#define SJA1105_SIZE_SPI_TRANSFER_MAX  \
-       (SJA1105_SIZE_SPI_MSG_HEADER + SJA1105_SIZE_SPI_MSG_MAXLEN)
 
-static int sja1105_spi_transfer(const struct sja1105_private *priv,
-                               const void *tx, void *rx, int size)
-{
-       struct spi_device *spi = priv->spidev;
-       struct spi_transfer transfer = {
-               .tx_buf = tx,
-               .rx_buf = rx,
-               .len = size,
-       };
-       struct spi_message msg;
-       int rc;
-
-       if (size > SJA1105_SIZE_SPI_TRANSFER_MAX) {
-               dev_err(&spi->dev, "SPI message (%d) longer than max of %d\n",
-                       size, SJA1105_SIZE_SPI_TRANSFER_MAX);
-               return -EMSGSIZE;
-       }
-
-       spi_message_init(&msg);
-       spi_message_add_tail(&transfer, &msg);
-
-       rc = spi_sync(spi, &msg);
-       if (rc < 0) {
-               dev_err(&spi->dev, "SPI transfer failed: %d\n", rc);
-               return rc;
-       }
-
-       return rc;
-}
+struct sja1105_chunk {
+       u8      *buf;
+       size_t  len;
+       u64     reg_addr;
+};
 
 static void
 sja1105_spi_message_pack(void *buf, const struct sja1105_spi_message *msg)
@@ -56,121 +29,150 @@ sja1105_spi_message_pack(void *buf, const struct sja1105_spi_message *msg)
        sja1105_pack(buf, &msg->address,    24,  4, size);
 }
 
+#define sja1105_hdr_xfer(xfers, chunk) \
+       ((xfers) + 2 * (chunk))
+#define sja1105_chunk_xfer(xfers, chunk) \
+       ((xfers) + 2 * (chunk) + 1)
+#define sja1105_hdr_buf(hdr_bufs, chunk) \
+       ((hdr_bufs) + (chunk) * SJA1105_SIZE_SPI_MSG_HEADER)
+
 /* If @rw is:
  * - SPI_WRITE: creates and sends an SPI write message at absolute
- *             address reg_addr, taking size_bytes from *packed_buf
+ *             address reg_addr, taking @len bytes from *buf
  * - SPI_READ:  creates and sends an SPI read message from absolute
- *             address reg_addr, writing size_bytes into *packed_buf
- *
- * This function should only be called if it is priorly known that
- * @size_bytes is smaller than SIZE_SPI_MSG_MAXLEN. Larger packed buffers
- * are chunked in smaller pieces by sja1105_spi_send_long_packed_buf below.
+ *             address reg_addr, writing @len bytes into *buf
  */
-int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
-                               sja1105_spi_rw_mode_t rw, u64 reg_addr,
-                               void *packed_buf, size_t size_bytes)
+int sja1105_xfer_buf(const struct sja1105_private *priv,
+                    sja1105_spi_rw_mode_t rw, u64 reg_addr,
+                    u8 *buf, size_t len)
 {
-       u8 tx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0};
-       u8 rx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0};
-       const int msg_len = size_bytes + SJA1105_SIZE_SPI_MSG_HEADER;
-       struct sja1105_spi_message msg = {0};
-       int rc;
+       struct sja1105_chunk chunk = {
+               .len = min_t(size_t, len, SJA1105_SIZE_SPI_MSG_MAXLEN),
+               .reg_addr = reg_addr,
+               .buf = buf,
+       };
+       struct spi_device *spi = priv->spidev;
+       struct spi_transfer *xfers;
+       int num_chunks;
+       int rc, i = 0;
+       u8 *hdr_bufs;
 
-       if (msg_len > SJA1105_SIZE_SPI_TRANSFER_MAX)
-               return -ERANGE;
+       num_chunks = DIV_ROUND_UP(len, SJA1105_SIZE_SPI_MSG_MAXLEN);
 
-       msg.access = rw;
-       msg.address = reg_addr;
-       if (rw == SPI_READ)
-               msg.read_count = size_bytes / 4;
+       /* One transfer for each message header, one for each message
+        * payload (chunk).
+        */
+       xfers = kcalloc(2 * num_chunks, sizeof(struct spi_transfer),
+                       GFP_KERNEL);
+       if (!xfers)
+               return -ENOMEM;
 
-       sja1105_spi_message_pack(tx_buf, &msg);
+       /* Packed buffers for the num_chunks SPI message headers,
+        * stored as a contiguous array
+        */
+       hdr_bufs = kcalloc(num_chunks, SJA1105_SIZE_SPI_MSG_HEADER,
+                          GFP_KERNEL);
+       if (!hdr_bufs) {
+               kfree(xfers);
+               return -ENOMEM;
+       }
 
-       if (rw == SPI_WRITE)
-               memcpy(tx_buf + SJA1105_SIZE_SPI_MSG_HEADER,
-                      packed_buf, size_bytes);
+       for (i = 0; i < num_chunks; i++) {
+               struct spi_transfer *chunk_xfer = sja1105_chunk_xfer(xfers, i);
+               struct spi_transfer *hdr_xfer = sja1105_hdr_xfer(xfers, i);
+               u8 *hdr_buf = sja1105_hdr_buf(hdr_bufs, i);
+               struct sja1105_spi_message msg;
+
+               /* Populate the transfer's header buffer */
+               msg.address = chunk.reg_addr;
+               msg.access = rw;
+               if (rw == SPI_READ)
+                       msg.read_count = chunk.len / 4;
+               else
+                       /* Ignored */
+                       msg.read_count = 0;
+               sja1105_spi_message_pack(hdr_buf, &msg);
+               hdr_xfer->tx_buf = hdr_buf;
+               hdr_xfer->len = SJA1105_SIZE_SPI_MSG_HEADER;
+
+               /* Populate the transfer's data buffer */
+               if (rw == SPI_READ)
+                       chunk_xfer->rx_buf = chunk.buf;
+               else
+                       chunk_xfer->tx_buf = chunk.buf;
+               chunk_xfer->len = chunk.len;
+
+               /* Calculate next chunk */
+               chunk.buf += chunk.len;
+               chunk.reg_addr += chunk.len / 4;
+               chunk.len = min_t(size_t, (ptrdiff_t)(buf + len - chunk.buf),
+                                 SJA1105_SIZE_SPI_MSG_MAXLEN);
+
+               /* De-assert the chip select after each chunk. */
+               if (chunk.len)
+                       chunk_xfer->cs_change = 1;
+       }
 
-       rc = sja1105_spi_transfer(priv, tx_buf, rx_buf, msg_len);
+       rc = spi_sync_transfer(spi, xfers, 2 * num_chunks);
        if (rc < 0)
-               return rc;
+               dev_err(&spi->dev, "SPI transfer failed: %d\n", rc);
 
-       if (rw == SPI_READ)
-               memcpy(packed_buf, rx_buf + SJA1105_SIZE_SPI_MSG_HEADER,
-                      size_bytes);
+       kfree(hdr_bufs);
+       kfree(xfers);
 
-       return 0;
+       return rc;
 }
 
 /* If @rw is:
  * - SPI_WRITE: creates and sends an SPI write message at absolute
- *             address reg_addr, taking size_bytes from *packed_buf
+ *             address reg_addr
  * - SPI_READ:  creates and sends an SPI read message from absolute
- *             address reg_addr, writing size_bytes into *packed_buf
+ *             address reg_addr
  *
  * The u64 *value is unpacked, meaning that it's stored in the native
  * CPU endianness and directly usable by software running on the core.
- *
- * This is a wrapper around sja1105_spi_send_packed_buf().
  */
-int sja1105_spi_send_int(const struct sja1105_private *priv,
-                        sja1105_spi_rw_mode_t rw, u64 reg_addr,
-                        u64 *value, u64 size_bytes)
+int sja1105_xfer_u64(const struct sja1105_private *priv,
+                    sja1105_spi_rw_mode_t rw, u64 reg_addr, u64 *value)
 {
-       u8 packed_buf[SJA1105_SIZE_SPI_MSG_MAXLEN];
+       u8 packed_buf[8];
        int rc;
 
-       if (size_bytes > SJA1105_SIZE_SPI_MSG_MAXLEN)
-               return -ERANGE;
-
        if (rw == SPI_WRITE)
-               sja1105_pack(packed_buf, value, 8 * size_bytes - 1, 0,
-                            size_bytes);
+               sja1105_pack(packed_buf, value, 63, 0, 8);
 
-       rc = sja1105_spi_send_packed_buf(priv, rw, reg_addr, packed_buf,
-                                        size_bytes);
+       rc = sja1105_xfer_buf(priv, rw, reg_addr, packed_buf, 8);
 
        if (rw == SPI_READ)
-               sja1105_unpack(packed_buf, value, 8 * size_bytes - 1, 0,
-                              size_bytes);
+               sja1105_unpack(packed_buf, value, 63, 0, 8);
 
        return rc;
 }
 
-/* Should be used if a @packed_buf larger than SJA1105_SIZE_SPI_MSG_MAXLEN
- * must be sent/received. Splitting the buffer into chunks and assembling
- * those into SPI messages is done automatically by this function.
- */
-int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv,
-                                    sja1105_spi_rw_mode_t rw, u64 base_addr,
-                                    void *packed_buf, u64 buf_len)
+/* Same as above, but transfers only a 4 byte word */
+int sja1105_xfer_u32(const struct sja1105_private *priv,
+                    sja1105_spi_rw_mode_t rw, u64 reg_addr, u32 *value)
 {
-       struct chunk {
-               void *buf_ptr;
-               int len;
-               u64 spi_address;
-       } chunk;
-       int distance_to_end;
+       u8 packed_buf[4];
+       u64 tmp;
        int rc;
 
-       /* Initialize chunk */
-       chunk.buf_ptr = packed_buf;
-       chunk.spi_address = base_addr;
-       chunk.len = min_t(int, buf_len, SJA1105_SIZE_SPI_MSG_MAXLEN);
+       if (rw == SPI_WRITE) {
+               /* The packing API only supports u64 as CPU word size,
+                * so we need to convert.
+                */
+               tmp = *value;
+               sja1105_pack(packed_buf, &tmp, 31, 0, 4);
+       }
 
-       while (chunk.len) {
-               rc = sja1105_spi_send_packed_buf(priv, rw, chunk.spi_address,
-                                                chunk.buf_ptr, chunk.len);
-               if (rc < 0)
-                       return rc;
+       rc = sja1105_xfer_buf(priv, rw, reg_addr, packed_buf, 4);
 
-               chunk.buf_ptr += chunk.len;
-               chunk.spi_address += chunk.len / 4;
-               distance_to_end = (uintptr_t)(packed_buf + buf_len -
-                                             chunk.buf_ptr);
-               chunk.len = min(distance_to_end, SJA1105_SIZE_SPI_MSG_MAXLEN);
+       if (rw == SPI_READ) {
+               sja1105_unpack(packed_buf, &tmp, 31, 0, 4);
+               *value = tmp;
        }
 
-       return 0;
+       return rc;
 }
 
 /* Back-ported structure from UM11040 Table 112.
@@ -241,8 +243,8 @@ static int sja1105et_reset_cmd(const void *ctx, const void *data)
 
        sja1105et_reset_cmd_pack(packed_buf, reset);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rgu,
-                                          packed_buf, SJA1105_SIZE_RESET_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf,
+                               SJA1105_SIZE_RESET_CMD);
 }
 
 static int sja1105pqrs_reset_cmd(const void *ctx, const void *data)
@@ -271,8 +273,8 @@ static int sja1105pqrs_reset_cmd(const void *ctx, const void *data)
 
        sja1105pqrs_reset_cmd_pack(packed_buf, reset);
 
-       return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rgu,
-                                          packed_buf, SJA1105_SIZE_RESET_CMD);
+       return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf,
+                               SJA1105_SIZE_RESET_CMD);
 }
 
 static int sja1105_cold_reset(const struct sja1105_private *priv)
@@ -287,11 +289,11 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv,
                       unsigned long port_bitmap, bool tx_inhibited)
 {
        const struct sja1105_regs *regs = priv->info->regs;
-       u64 inhibit_cmd;
+       u32 inhibit_cmd;
        int rc;
 
-       rc = sja1105_spi_send_int(priv, SPI_READ, regs->port_control,
-                                 &inhibit_cmd, SJA1105_SIZE_PORT_CTRL);
+       rc = sja1105_xfer_u32(priv, SPI_READ, regs->port_control,
+                             &inhibit_cmd);
        if (rc < 0)
                return rc;
 
@@ -300,8 +302,8 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv,
        else
                inhibit_cmd &= ~port_bitmap;
 
-       return sja1105_spi_send_int(priv, SPI_WRITE, regs->port_control,
-                                   &inhibit_cmd, SJA1105_SIZE_PORT_CTRL);
+       return sja1105_xfer_u32(priv, SPI_WRITE, regs->port_control,
+                               &inhibit_cmd);
 }
 
 struct sja1105_status {
@@ -339,9 +341,7 @@ static int sja1105_status_get(struct sja1105_private *priv,
        u8 packed_buf[4];
        int rc;
 
-       rc = sja1105_spi_send_packed_buf(priv, SPI_READ,
-                                        regs->status,
-                                        packed_buf, 4);
+       rc = sja1105_xfer_buf(priv, SPI_READ, regs->status, packed_buf, 4);
        if (rc < 0)
                return rc;
 
@@ -437,9 +437,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv)
                /* Wait for the switch to come out of reset */
                usleep_range(1000, 5000);
                /* Upload the static config to the device */
-               rc = sja1105_spi_send_long_packed_buf(priv, SPI_WRITE,
-                                                     regs->config,
-                                                     config_buf, buf_len);
+               rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->config,
+                                     config_buf, buf_len);
                if (rc < 0) {
                        dev_err(dev, "Failed to upload config, retrying...\n");
                        continue;
@@ -482,7 +481,7 @@ int sja1105_static_config_upload(struct sja1105_private *priv)
                dev_info(dev, "Succeeded after %d tried\n", RETRIES - retries);
        }
 
-       rc = sja1105_ptp_reset(priv);
+       rc = sja1105_ptp_reset(priv->ds);
        if (rc < 0)
                dev_err(dev, "Failed to reset PTP clock: %d\n", rc);
 
@@ -517,9 +516,8 @@ static struct sja1105_regs sja1105et_regs = {
        .rmii_ext_tx_clk = {0x100018, 0x10001F, 0x100026, 0x10002D, 0x100034},
        .ptpegr_ts = {0xC0, 0xC2, 0xC4, 0xC6, 0xC8},
        .ptp_control = 0x17,
-       .ptpclk = 0x18, /* Spans 0x18 to 0x19 */
+       .ptpclkval = 0x18, /* Spans 0x18 to 0x19 */
        .ptpclkrate = 0x1A,
-       .ptptsclk = 0x1B, /* Spans 0x1B to 0x1C */
 };
 
 static struct sja1105_regs sja1105pqrs_regs = {
@@ -548,9 +546,8 @@ static struct sja1105_regs sja1105pqrs_regs = {
        .qlevel = {0x604, 0x614, 0x624, 0x634, 0x644},
        .ptpegr_ts = {0xC0, 0xC4, 0xC8, 0xCC, 0xD0},
        .ptp_control = 0x18,
-       .ptpclk = 0x19,
+       .ptpclkval = 0x19,
        .ptpclkrate = 0x1B,
-       .ptptsclk = 0x1C,
 };
 
 struct sja1105_info sja1105e_info = {
index 614377e..42c1574 100644 (file)
@@ -1178,9 +1178,12 @@ int vsc73xx_probe(struct vsc73xx *vsc)
         * We allocate 8 ports and avoid access to the nonexistant
         * ports.
         */
-       vsc->ds = dsa_switch_alloc(dev, 8);
+       vsc->ds = devm_kzalloc(dev, sizeof(*vsc->ds), GFP_KERNEL);
        if (!vsc->ds)
                return -ENOMEM;
+
+       vsc->ds->dev = dev;
+       vsc->ds->num_ports = 8;
        vsc->ds->priv = vsc;
 
        vsc->ds->ops = &vsc73xx_ds_ops;
index e8e9c16..4ded81b 100644 (file)
@@ -78,7 +78,6 @@ source "drivers/net/ethernet/freescale/Kconfig"
 source "drivers/net/ethernet/fujitsu/Kconfig"
 source "drivers/net/ethernet/google/Kconfig"
 source "drivers/net/ethernet/hisilicon/Kconfig"
-source "drivers/net/ethernet/hp/Kconfig"
 source "drivers/net/ethernet/huawei/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
index 05abebc..f8f38dc 100644 (file)
@@ -41,7 +41,6 @@ obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
 obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
 obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/
 obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/
-obj-$(CONFIG_NET_VENDOR_HP) += hp/
 obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
 obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
 obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
index bb032be..4cd53fc 100644 (file)
@@ -730,12 +730,12 @@ static int altera_tse_phy_get_addr_mdio_create(struct net_device *dev)
 {
        struct altera_tse_private *priv = netdev_priv(dev);
        struct device_node *np = priv->device->of_node;
-       int ret = 0;
+       int ret;
 
-       priv->phy_iface = of_get_phy_mode(np);
+       ret = of_get_phy_mode(np, &priv->phy_iface);
 
        /* Avoid get phy addr and create mdio if no phy is present */
-       if (!priv->phy_iface)
+       if (ret)
                return 0;
 
        /* try to get PHY address from device tree, use PHY autodetection if
index 16553d9..a3250dc 100644 (file)
@@ -133,7 +133,7 @@ static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
        u64 *ptr;
        int i, j;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                /* Tx stats */
                ring = &adapter->tx_ring[i];
 
@@ -205,7 +205,7 @@ int ena_get_sset_count(struct net_device *netdev, int sset)
        if (sset != ETH_SS_STATS)
                return -EOPNOTSUPP;
 
-       return  adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+       return  adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
                + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
 }
 
@@ -214,7 +214,7 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
        const struct ena_stats *ena_stats;
        int i, j;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                /* Tx stats */
                for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
                        ena_stats = &ena_stats_tx_strings[j];
@@ -333,7 +333,7 @@ static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter)
 
        val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev);
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                adapter->tx_ring[i].smoothed_interval = val;
 }
 
@@ -344,7 +344,7 @@ static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter)
 
        val = ena_com_get_nonadaptive_moderation_interval_rx(adapter->ena_dev);
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                adapter->rx_ring[i].smoothed_interval = val;
 }
 
@@ -612,7 +612,7 @@ static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
 
        switch (info->cmd) {
        case ETHTOOL_GRXRINGS:
-               info->data = adapter->num_queues;
+               info->data = adapter->num_io_queues;
                rc = 0;
                break;
        case ETHTOOL_GRXFH:
@@ -734,14 +734,20 @@ static void ena_get_channels(struct net_device *netdev,
 {
        struct ena_adapter *adapter = netdev_priv(netdev);
 
-       channels->max_rx = adapter->num_queues;
-       channels->max_tx = adapter->num_queues;
-       channels->max_other = 0;
-       channels->max_combined = 0;
-       channels->rx_count = adapter->num_queues;
-       channels->tx_count = adapter->num_queues;
-       channels->other_count = 0;
-       channels->combined_count = 0;
+       channels->max_combined = adapter->max_num_io_queues;
+       channels->combined_count = adapter->num_io_queues;
+}
+
+static int ena_set_channels(struct net_device *netdev,
+                           struct ethtool_channels *channels)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       u32 count = channels->combined_count;
+       /* The check for max value is already done in ethtool */
+       if (count < ENA_MIN_NUM_IO_QUEUES)
+               return -EINVAL;
+
+       return ena_update_queue_count(adapter, count);
 }
 
 static int ena_get_tunable(struct net_device *netdev,
@@ -807,6 +813,7 @@ static const struct ethtool_ops ena_ethtool_ops = {
        .get_rxfh               = ena_get_rxfh,
        .set_rxfh               = ena_set_rxfh,
        .get_channels           = ena_get_channels,
+       .set_channels           = ena_set_channels,
        .get_tunable            = ena_get_tunable,
        .set_tunable            = ena_set_tunable,
 };
index c487d2a..d46a912 100644 (file)
@@ -101,7 +101,7 @@ static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                adapter->rx_ring[i].mtu = mtu;
 }
 
@@ -129,10 +129,10 @@ static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
        u32 i;
        int rc;
 
-       adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
+       adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
        if (!adapter->netdev->rx_cpu_rmap)
                return -ENOMEM;
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                int irq_idx = ENA_IO_IRQ_IDX(i);
 
                rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
@@ -172,7 +172,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 
        ena_dev = adapter->ena_dev;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                txr = &adapter->tx_ring[i];
                rxr = &adapter->rx_ring[i];
 
@@ -294,7 +294,7 @@ static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
 {
        int i, rc = 0;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                rc = ena_setup_tx_resources(adapter, i);
                if (rc)
                        goto err_setup_tx;
@@ -322,7 +322,7 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                ena_free_tx_resources(adapter, i);
 }
 
@@ -428,7 +428,7 @@ static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
 {
        int i, rc = 0;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                rc = ena_setup_rx_resources(adapter, i);
                if (rc)
                        goto err_setup_rx;
@@ -456,7 +456,7 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                ena_free_rx_resources(adapter, i);
 }
 
@@ -600,7 +600,7 @@ static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
        struct ena_ring *rx_ring;
        int i, rc, bufs_num;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                rx_ring = &adapter->rx_ring[i];
                bufs_num = rx_ring->ring_size - 1;
                rc = ena_refill_rx_bufs(rx_ring, bufs_num);
@@ -616,7 +616,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                ena_free_rx_bufs(adapter, i);
 }
 
@@ -688,7 +688,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
        struct ena_ring *tx_ring;
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                tx_ring = &adapter->tx_ring[i];
                ena_free_tx_bufs(tx_ring);
        }
@@ -699,7 +699,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
        u16 ena_qid;
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                ena_qid = ENA_IO_TXQ_IDX(i);
                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
        }
@@ -710,7 +710,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
        u16 ena_qid;
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                ena_qid = ENA_IO_RXQ_IDX(i);
                cancel_work_sync(&adapter->ena_napi[i].dim.work);
                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
@@ -1331,7 +1331,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
  * the number of potential io queues is the minimum of what the device
  * supports and the number of vCPUs.
  */
-static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
+static int ena_enable_msix(struct ena_adapter *adapter)
 {
        int msix_vecs, irq_cnt;
 
@@ -1342,7 +1342,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
        }
 
        /* Reserved the max msix vectors we might need */
-       msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
+       msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues);
        netif_dbg(adapter, probe, adapter->netdev,
                  "trying to enable MSI-X, vectors %d\n", msix_vecs);
 
@@ -1359,7 +1359,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
                netif_notice(adapter, probe, adapter->netdev,
                             "enable only %d MSI-X (out of %d), reduce the number of queues\n",
                             irq_cnt, msix_vecs);
-               adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
+               adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
        }
 
        if (ena_init_rx_cpu_rmap(adapter))
@@ -1397,7 +1397,7 @@ static void ena_setup_io_intr(struct ena_adapter *adapter)
 
        netdev = adapter->netdev;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                irq_idx = ENA_IO_IRQ_IDX(i);
                cpu = i % num_online_cpus();
 
@@ -1529,7 +1529,7 @@ static void ena_del_napi(struct ena_adapter *adapter)
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                netif_napi_del(&adapter->ena_napi[i].napi);
 }
 
@@ -1538,7 +1538,7 @@ static void ena_init_napi(struct ena_adapter *adapter)
        struct ena_napi *napi;
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                napi = &adapter->ena_napi[i];
 
                netif_napi_add(adapter->netdev,
@@ -1555,7 +1555,7 @@ static void ena_napi_disable_all(struct ena_adapter *adapter)
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                napi_disable(&adapter->ena_napi[i].napi);
 }
 
@@ -1563,7 +1563,7 @@ static void ena_napi_enable_all(struct ena_adapter *adapter)
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                napi_enable(&adapter->ena_napi[i].napi);
 }
 
@@ -1673,7 +1673,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
        struct ena_com_dev *ena_dev = adapter->ena_dev;
        int rc, i;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                rc = ena_create_io_tx_queue(adapter, i);
                if (rc)
                        goto create_err;
@@ -1741,7 +1741,7 @@ static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
        struct ena_com_dev *ena_dev = adapter->ena_dev;
        int rc, i;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                rc = ena_create_io_rx_queue(adapter, i);
                if (rc)
                        goto create_err;
@@ -1764,7 +1764,7 @@ static void set_io_rings_size(struct ena_adapter *adapter,
 {
        int i;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                adapter->tx_ring[i].ring_size = new_tx_size;
                adapter->rx_ring[i].ring_size = new_rx_size;
        }
@@ -1902,14 +1902,14 @@ static int ena_up(struct ena_adapter *adapter)
        set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 
        /* Enable completion queues interrupt */
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                ena_unmask_interrupt(&adapter->tx_ring[i],
                                     &adapter->rx_ring[i]);
 
        /* schedule napi in case we had pending packets
         * from the last time we disable napi
         */
-       for (i = 0; i < adapter->num_queues; i++)
+       for (i = 0; i < adapter->num_io_queues; i++)
                napi_schedule(&adapter->ena_napi[i].napi);
 
        return rc;
@@ -1984,13 +1984,13 @@ static int ena_open(struct net_device *netdev)
        int rc;
 
        /* Notify the stack of the actual queue counts. */
-       rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
+       rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
        if (rc) {
                netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
                return rc;
        }
 
-       rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
+       rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
        if (rc) {
                netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
                return rc;
@@ -2043,14 +2043,30 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
                           u32 new_tx_size,
                           u32 new_rx_size)
 {
-       bool dev_up;
+       bool dev_was_up;
 
-       dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+       dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
        ena_close(adapter->netdev);
        adapter->requested_tx_ring_size = new_tx_size;
        adapter->requested_rx_ring_size = new_rx_size;
        ena_init_io_rings(adapter);
-       return dev_up ? ena_up(adapter) : 0;
+       return dev_was_up ? ena_up(adapter) : 0;
+}
+
+int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
+{
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       bool dev_was_up;
+
+       dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+       ena_close(adapter->netdev);
+       adapter->num_io_queues = new_channel_count;
+       /* We need to destroy the rss table so that the indirection
+        * table will be reinitialized by ena_up()
+        */
+       ena_com_rss_destroy(ena_dev);
+       ena_init_io_rings(adapter);
+       return dev_was_up ? ena_open(adapter->netdev) : 0;
 }
 
 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
@@ -2495,7 +2511,7 @@ static void ena_get_stats64(struct net_device *netdev,
        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
                return;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                u64 bytes, packets;
 
                tx_ring = &adapter->tx_ring[i];
@@ -2682,14 +2698,13 @@ err_mmio_read_less:
        return rc;
 }
 
-static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
-                                                   int io_vectors)
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
 {
        struct ena_com_dev *ena_dev = adapter->ena_dev;
        struct device *dev = &adapter->pdev->dev;
        int rc;
 
-       rc = ena_enable_msix(adapter, io_vectors);
+       rc = ena_enable_msix(adapter);
        if (rc) {
                dev_err(dev, "Can not reserve msix vectors\n");
                return rc;
@@ -2782,8 +2797,7 @@ static int ena_restore_device(struct ena_adapter *adapter)
                goto err_device_destroy;
        }
 
-       rc = ena_enable_msix_and_set_admin_interrupts(adapter,
-                                                     adapter->num_queues);
+       rc = ena_enable_msix_and_set_admin_interrupts(adapter);
        if (rc) {
                dev_err(&pdev->dev, "Enable MSI-X failed\n");
                goto err_device_destroy;
@@ -2948,7 +2962,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 
        budget = ENA_MONITORED_TX_QUEUES;
 
-       for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
+       for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) {
                tx_ring = &adapter->tx_ring[i];
                rx_ring = &adapter->rx_ring[i];
 
@@ -2965,7 +2979,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
                        break;
        }
 
-       adapter->last_monitored_tx_qid = i % adapter->num_queues;
+       adapter->last_monitored_tx_qid = i % adapter->num_io_queues;
 }
 
 /* trigger napi schedule after 2 consecutive detections */
@@ -2995,7 +3009,7 @@ static void check_for_empty_rx_ring(struct ena_adapter *adapter)
        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
                return;
 
-       for (i = 0; i < adapter->num_queues; i++) {
+       for (i = 0; i < adapter->num_io_queues; i++) {
                rx_ring = &adapter->rx_ring[i];
 
                refill_required =
@@ -3137,16 +3151,16 @@ static void ena_timer_service(struct timer_list *t)
        mod_timer(&adapter->timer_service, jiffies + HZ);
 }
 
-static int ena_calc_io_queue_num(struct pci_dev *pdev,
-                                struct ena_com_dev *ena_dev,
-                                struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static int ena_calc_max_io_queue_num(struct pci_dev *pdev,
+                                    struct ena_com_dev *ena_dev,
+                                    struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
-       int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num;
+       int io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
 
        if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
                struct ena_admin_queue_ext_feature_fields *max_queue_ext =
                        &get_feat_ctx->max_queue_ext.max_queue_ext;
-               io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
+               io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
                                  max_queue_ext->max_rx_cq_num);
 
                io_tx_sq_num = max_queue_ext->max_tx_sq_num;
@@ -3156,25 +3170,25 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
                        &get_feat_ctx->max_queues;
                io_tx_sq_num = max_queues->max_sq_num;
                io_tx_cq_num = max_queues->max_cq_num;
-               io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
+               io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
        }
 
        /* In case of LLQ use the llq fields for the tx SQ/CQ */
        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
                io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
 
-       io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
-       io_queue_num = min_t(int, io_queue_num, io_rx_num);
-       io_queue_num = min_t(int, io_queue_num, io_tx_sq_num);
-       io_queue_num = min_t(int, io_queue_num, io_tx_cq_num);
+       max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
+       max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
+       max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
+       max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
        /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
-       io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
-       if (unlikely(!io_queue_num)) {
+       max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
+       if (unlikely(!max_num_io_queues)) {
                dev_err(&pdev->dev, "The device doesn't have io queues\n");
                return -EFAULT;
        }
 
-       return io_queue_num;
+       return max_num_io_queues;
 }
 
 static int ena_set_queues_placement_policy(struct pci_dev *pdev,
@@ -3302,7 +3316,7 @@ static int ena_rss_init_default(struct ena_adapter *adapter)
        }
 
        for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
-               val = ethtool_rxfh_indir_default(i, adapter->num_queues);
+               val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
                rc = ena_com_indirect_table_fill_entry(ena_dev, i,
                                                       ENA_IO_RXQ_IDX(val));
                if (unlikely(rc && (rc != -EOPNOTSUPP))) {
@@ -3349,7 +3363,7 @@ static void set_default_llq_configurations(struct ena_llq_configurations *llq_co
        llq_config->llq_ring_entry_size_value = 128;
 }
 
-static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx)
+static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
 {
        struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
        struct ena_com_dev *ena_dev = ctx->ena_dev;
@@ -3358,7 +3372,7 @@ static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx)
        u32 max_tx_queue_size;
        u32 max_rx_queue_size;
 
-       if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+       if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
                struct ena_admin_queue_ext_feature_fields *max_queue_ext =
                        &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
                max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
@@ -3432,11 +3446,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct ena_llq_configurations llq_config;
        struct ena_com_dev *ena_dev = NULL;
        struct ena_adapter *adapter;
-       int io_queue_num, bars, rc;
        struct net_device *netdev;
        static int adapters_found;
+       u32 max_num_io_queues;
        char *queue_type_str;
        bool wd_state;
+       int bars, rc;
 
        dev_dbg(&pdev->dev, "%s\n", __func__);
 
@@ -3497,27 +3512,20 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        calc_queue_ctx.pdev = pdev;
 
        /* Initial Tx and RX interrupt delay. Assumes 1 usec granularity.
-       * Updated during device initialization with the real granularity
-       */
+        * Updated during device initialization with the real granularity
+        */
        ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
        ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
        ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
-       io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
-       rc = ena_calc_queue_size(&calc_queue_ctx);
-       if (rc || io_queue_num <= 0) {
+       max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
+       rc = ena_calc_io_queue_size(&calc_queue_ctx);
+       if (rc || !max_num_io_queues) {
                rc = -EFAULT;
                goto err_device_destroy;
        }
 
-       dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n",
-                io_queue_num,
-                calc_queue_ctx.rx_queue_size,
-                calc_queue_ctx.tx_queue_size,
-                (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ?
-                "ENABLED" : "DISABLED");
-
        /* dev zeroed in init_etherdev */
-       netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
+       netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues);
        if (!netdev) {
                dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
                rc = -ENOMEM;
@@ -3545,7 +3553,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
        adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
 
-       adapter->num_queues = io_queue_num;
+       adapter->num_io_queues = max_num_io_queues;
+       adapter->max_num_io_queues = max_num_io_queues;
+
        adapter->last_monitored_tx_qid = 0;
 
        adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
@@ -3569,7 +3579,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        u64_stats_init(&adapter->syncp);
 
-       rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
+       rc = ena_enable_msix_and_set_admin_interrupts(adapter);
        if (rc) {
                dev_err(&pdev->dev,
                        "Failed to enable and set the admin interrupts\n");
@@ -3611,9 +3621,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                queue_type_str = "Low Latency";
 
        dev_info(&pdev->dev,
-                "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n",
+                "%s found at mem %lx, mac addr %pM, Placement policy: %s\n",
                 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
-                netdev->dev_addr, io_queue_num, queue_type_str);
+                netdev->dev_addr, queue_type_str);
 
        set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 
index 72ee51a..bffd778 100644 (file)
@@ -82,6 +82,8 @@
 #define ENA_DEFAULT_RING_SIZE  (1024)
 #define ENA_MIN_RING_SIZE      (256)
 
+#define ENA_MIN_NUM_IO_QUEUES  (1)
+
 #define ENA_TX_WAKEUP_THRESH           (MAX_SKB_FRAGS + 2)
 #define ENA_DEFAULT_RX_COPYBREAK       (256 - NET_IP_ALIGN)
 
@@ -161,10 +163,10 @@ struct ena_calc_queue_size_ctx {
        struct ena_com_dev_get_features_ctx *get_feat_ctx;
        struct ena_com_dev *ena_dev;
        struct pci_dev *pdev;
-       u16 tx_queue_size;
-       u16 rx_queue_size;
-       u16 max_tx_queue_size;
-       u16 max_rx_queue_size;
+       u32 tx_queue_size;
+       u32 rx_queue_size;
+       u32 max_tx_queue_size;
+       u32 max_rx_queue_size;
        u16 max_tx_sgl_size;
        u16 max_rx_sgl_size;
 };
@@ -324,7 +326,8 @@ struct ena_adapter {
        u32 rx_copybreak;
        u32 max_mtu;
 
-       int num_queues;
+       u32 num_io_queues;
+       u32 max_num_io_queues;
 
        int msix_vecs;
 
@@ -387,6 +390,7 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 int ena_update_queue_sizes(struct ena_adapter *adapter,
                           u32 new_tx_size,
                           u32 new_rx_size);
+int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
 
 int ena_get_sset_count(struct net_device *netdev, int sset);
 
index 131cab8..0020726 100644 (file)
@@ -24,8 +24,11 @@ atlantic-objs := aq_main.o \
        aq_ethtool.o \
        aq_drvinfo.o \
        aq_filters.o \
+       aq_phy.o \
        hw_atl/hw_atl_a0.o \
        hw_atl/hw_atl_b0.o \
        hw_atl/hw_atl_utils.o \
        hw_atl/hw_atl_utils_fw2x.o \
        hw_atl/hw_atl_llh.o
+
+atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o
\ No newline at end of file
index 02f1b70..8c633ca 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_cfg.h: Definition of configuration parameters and constants. */
@@ -27,7 +27,7 @@
 
 #define AQ_CFG_INTERRUPT_MODERATION_USEC_MAX (0x1FF * 2)
 
-#define AQ_CFG_IRQ_MASK                      0x1FFU
+#define AQ_CFG_IRQ_MASK                      0x3FFU
 
 #define AQ_CFG_VECS_MAX   8U
 #define AQ_CFG_TCS_MAX    8U
index 24df132..1ae8aab 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_ethtool.c: Definition of ethertool related functions. */
@@ -9,8 +9,11 @@
 #include "aq_ethtool.h"
 #include "aq_nic.h"
 #include "aq_vec.h"
+#include "aq_ptp.h"
 #include "aq_filters.h"
 
+#include <linux/ptp_clock_kernel.h>
+
 static void aq_ethtool_get_regs(struct net_device *ndev,
                                struct ethtool_regs *regs, void *p)
 {
@@ -377,6 +380,35 @@ static int aq_ethtool_set_wol(struct net_device *ndev,
        return err;
 }
 
+static int aq_ethtool_get_ts_info(struct net_device *ndev,
+                                 struct ethtool_ts_info *info)
+{
+       struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+       ethtool_op_get_ts_info(ndev, info);
+
+       if (!aq_nic->aq_ptp)
+               return 0;
+
+       info->so_timestamping |=
+               SOF_TIMESTAMPING_TX_HARDWARE |
+               SOF_TIMESTAMPING_RX_HARDWARE |
+               SOF_TIMESTAMPING_RAW_HARDWARE;
+
+       info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+                        BIT(HWTSTAMP_TX_ON);
+
+       info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+
+       info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+                           BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+                           BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+       info->phc_index = ptp_clock_index(aq_ptp_get_ptp_clock(aq_nic->aq_ptp));
+
+       return 0;
+}
+
 static enum hw_atl_fw2x_rate eee_mask_to_ethtool_mask(u32 speed)
 {
        u32 rate = 0;
@@ -604,4 +636,5 @@ const struct ethtool_ops aq_ethtool_ops = {
        .set_link_ksettings  = aq_ethtool_set_link_ksettings,
        .get_coalesce        = aq_ethtool_get_coalesce,
        .set_coalesce        = aq_ethtool_set_coalesce,
+       .get_ts_info         = aq_ethtool_get_ts_info,
 };
index aee827f..6102251 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2014-2017 aQuantia Corporation. */
+/* Copyright (C) 2014-2019 aQuantia Corporation. */
 
 /* File aq_filters.c: RX filters related functions. */
 
@@ -89,12 +89,14 @@ static int aq_check_approve_fl3l4(struct aq_nic_s *aq_nic,
                                  struct aq_hw_rx_fltrs_s *rx_fltrs,
                                  struct ethtool_rx_flow_spec *fsp)
 {
+       u32 last_location = AQ_RX_LAST_LOC_FL3L4 -
+                           aq_nic->aq_hw_rx_fltrs.fl3l4.reserved_count;
+
        if (fsp->location < AQ_RX_FIRST_LOC_FL3L4 ||
-           fsp->location > AQ_RX_LAST_LOC_FL3L4) {
+           fsp->location > last_location) {
                netdev_err(aq_nic->ndev,
                           "ethtool: location must be in range [%d, %d]",
-                          AQ_RX_FIRST_LOC_FL3L4,
-                          AQ_RX_LAST_LOC_FL3L4);
+                          AQ_RX_FIRST_LOC_FL3L4, last_location);
                return -EINVAL;
        }
        if (rx_fltrs->fl3l4.is_ipv6 && rx_fltrs->fl3l4.active_ipv4) {
@@ -124,12 +126,15 @@ aq_check_approve_fl2(struct aq_nic_s *aq_nic,
                     struct aq_hw_rx_fltrs_s *rx_fltrs,
                     struct ethtool_rx_flow_spec *fsp)
 {
+       u32 last_location = AQ_RX_LAST_LOC_FETHERT -
+                           aq_nic->aq_hw_rx_fltrs.fet_reserved_count;
+
        if (fsp->location < AQ_RX_FIRST_LOC_FETHERT ||
-           fsp->location > AQ_RX_LAST_LOC_FETHERT) {
+           fsp->location > last_location) {
                netdev_err(aq_nic->ndev,
                           "ethtool: location must be in range [%d, %d]",
                           AQ_RX_FIRST_LOC_FETHERT,
-                          AQ_RX_LAST_LOC_FETHERT);
+                          last_location);
                return -EINVAL;
        }
 
index 53d7478..5246cf4 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_hw.h: Declaration of abstract interface for NIC hardware specific
@@ -15,6 +15,9 @@
 #include "aq_rss.h"
 #include "hw_atl/hw_atl_utils.h"
 
+#define AQ_HW_MAC_COUNTER_HZ   312500000ll
+#define AQ_HW_PHY_COUNTER_HZ   160000000ll
+
 #define AQ_RX_FIRST_LOC_FVLANID     0U
 #define AQ_RX_LAST_LOC_FVLANID    15U
 #define AQ_RX_FIRST_LOC_FETHERT    16U
@@ -94,6 +97,7 @@ struct aq_stats_s {
 #define AQ_HW_FLAG_STOPPING    0x00000008U
 #define AQ_HW_FLAG_RESETTING   0x00000010U
 #define AQ_HW_FLAG_CLOSING     0x00000020U
+#define AQ_HW_PTP_AVAILABLE    0x01000000U
 #define AQ_HW_LINK_DOWN        0x04000000U
 #define AQ_HW_FLAG_ERR_UNPLUG  0x40000000U
 #define AQ_HW_FLAG_ERR_HW      0x80000000U
@@ -135,6 +139,8 @@ struct aq_hw_s {
        u32 rpc_addr;
        u32 rpc_tid;
        struct hw_atl_utils_fw_rpc rpc;
+       s64 ptp_clk_offset;
+       u16 phy_id;
 };
 
 struct aq_ring_s;
@@ -235,6 +241,40 @@ struct aq_hw_ops {
        int (*hw_set_offload)(struct aq_hw_s *self,
                              struct aq_nic_cfg_s *aq_nic_cfg);
 
+       int (*hw_tx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode);
+
+       int (*hw_rx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode);
+
+       int (*hw_ring_hwts_rx_fill)(struct aq_hw_s *self,
+                                   struct aq_ring_s *aq_ring);
+
+       int (*hw_ring_hwts_rx_receive)(struct aq_hw_s *self,
+                                      struct aq_ring_s *ring);
+
+       void (*hw_get_ptp_ts)(struct aq_hw_s *self, u64 *stamp);
+
+       int (*hw_adj_clock_freq)(struct aq_hw_s *self, s32 delta);
+
+       int (*hw_adj_sys_clock)(struct aq_hw_s *self, s64 delta);
+
+       int (*hw_set_sys_clock)(struct aq_hw_s *self, u64 time, u64 ts);
+
+       int (*hw_ts_to_sys_clock)(struct aq_hw_s *self, u64 ts, u64 *time);
+
+       int (*hw_gpio_pulse)(struct aq_hw_s *self, u32 index, u64 start,
+                            u32 period);
+
+       int (*hw_extts_gpio_enable)(struct aq_hw_s *self, u32 index,
+                                   u32 enable);
+
+       int (*hw_get_sync_ts)(struct aq_hw_s *self, u64 *ts);
+
+       u16 (*rx_extract_ts)(struct aq_hw_s *self, u8 *p, unsigned int len,
+                            u64 *timestamp);
+
+       int (*extract_hwts)(struct aq_hw_s *self, u8 *p, unsigned int len,
+                           u64 *timestamp);
+
        int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc);
 };
 
@@ -267,6 +307,12 @@ struct aq_fw_ops {
        int (*set_power)(struct aq_hw_s *self, unsigned int power_state,
                         u8 *mac);
 
+       int (*send_fw_request)(struct aq_hw_s *self,
+                              const struct hw_fw_request_iface *fw_req,
+                              size_t size);
+
+       void (*enable_ptp)(struct aq_hw_s *self, int enable);
+
        int (*set_eee_rate)(struct aq_hw_s *self, u32 speed);
 
        int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
index bb65dd3..a26d4a6 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_main.c: Main file for aQuantia Linux driver. */
 #include "aq_nic.h"
 #include "aq_pci_func.h"
 #include "aq_ethtool.h"
+#include "aq_ptp.h"
 #include "aq_filters.h"
 
 #include <linux/netdevice.h>
 #include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
 
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(AQ_CFG_DRV_VERSION);
@@ -93,6 +96,24 @@ static int aq_ndev_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct aq_nic_s *aq_nic = netdev_priv(ndev);
 
+       if (unlikely(aq_utils_obj_test(&aq_nic->flags, AQ_NIC_PTP_DPATH_UP))) {
+               /* Hardware adds the Timestamp for PTPv2 802.AS1
+                * and PTPv2 IPv4 UDP.
+                * We have to push even general 320 port messages to the ptp
+                * queue explicitly. This is a limitation of current firmware
+                * and hardware PTP design of the chip. Otherwise ptp stream
+                * will fail to sync
+                */
+               if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ||
+                   unlikely((ip_hdr(skb)->version == 4) &&
+                            (ip_hdr(skb)->protocol == IPPROTO_UDP) &&
+                            ((udp_hdr(skb)->dest == htons(319)) ||
+                             (udp_hdr(skb)->dest == htons(320)))) ||
+                   unlikely(eth_hdr(skb)->h_proto == htons(ETH_P_1588)))
+                       return aq_ptp_xmit(aq_nic, skb);
+       }
+
+       skb_tx_timestamp(skb);
        return aq_nic_xmit(aq_nic, skb);
 }
 
@@ -197,6 +218,87 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev)
        (void)aq_nic_set_multicast_list(aq_nic, ndev);
 }
 
+static int aq_ndev_config_hwtstamp(struct aq_nic_s *aq_nic,
+                                  struct hwtstamp_config *config)
+{
+       if (config->flags)
+               return -EINVAL;
+
+       switch (config->tx_type) {
+       case HWTSTAMP_TX_OFF:
+       case HWTSTAMP_TX_ON:
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       switch (config->rx_filter) {
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+               config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_NONE:
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       return aq_ptp_hwtstamp_config_set(aq_nic->aq_ptp, config);
+}
+
+static int aq_ndev_hwtstamp_set(struct aq_nic_s *aq_nic, struct ifreq *ifr)
+{
+       struct hwtstamp_config config;
+       int ret_val;
+
+       if (!aq_nic->aq_ptp)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+               return -EFAULT;
+
+       ret_val = aq_ndev_config_hwtstamp(aq_nic, &config);
+       if (ret_val)
+               return ret_val;
+
+       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+              -EFAULT : 0;
+}
+
+static int aq_ndev_hwtstamp_get(struct aq_nic_s *aq_nic, struct ifreq *ifr)
+{
+       struct hwtstamp_config config;
+
+       if (!aq_nic->aq_ptp)
+               return -EOPNOTSUPP;
+
+       aq_ptp_hwtstamp_config_get(aq_nic->aq_ptp, &config);
+       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+              -EFAULT : 0;
+}
+
+static int aq_ndev_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+       struct aq_nic_s *aq_nic = netdev_priv(netdev);
+
+       switch (cmd) {
+       case SIOCSHWTSTAMP:
+               return aq_ndev_hwtstamp_set(aq_nic, ifr);
+
+       case SIOCGHWTSTAMP:
+               return aq_ndev_hwtstamp_get(aq_nic, ifr);
+       }
+
+       return -EOPNOTSUPP;
+}
+
 static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto,
                                  u16 vid)
 {
@@ -234,6 +336,7 @@ static const struct net_device_ops aq_ndev_ops = {
        .ndo_change_mtu = aq_ndev_change_mtu,
        .ndo_set_mac_address = aq_ndev_set_mac_address,
        .ndo_set_features = aq_ndev_set_features,
+       .ndo_do_ioctl = aq_ndev_ioctl,
        .ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid,
 };
index 137c1de..433adc0 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_nic.c: Definition of common code for NIC. */
@@ -12,6 +12,9 @@
 #include "aq_hw.h"
 #include "aq_pci_func.h"
 #include "aq_main.h"
+#include "aq_phy.h"
+#include "aq_ptp.h"
+#include "aq_filters.h"
 
 #include <linux/moduleparam.h>
 #include <linux/netdevice.h>
@@ -145,6 +148,13 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
                        self->aq_hw->aq_link_status.mbps);
                aq_nic_update_interrupt_moderation_settings(self);
 
+               if (self->aq_ptp) {
+                       aq_ptp_clock_init(self);
+                       aq_ptp_tm_offset_set(self,
+                                            self->aq_hw->aq_link_status.mbps);
+                       aq_ptp_link_change(self);
+               }
+
                /* Driver has to update flow control settings on RX block
                 * on any link event.
                 * We should query FW whether it negotiated FC.
@@ -192,6 +202,8 @@ static void aq_nic_service_task(struct work_struct *work)
                                             service_task);
        int err;
 
+       aq_ptp_service_task(self);
+
        if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAGS_IS_NOT_READY))
                return;
 
@@ -327,10 +339,27 @@ int aq_nic_init(struct aq_nic_s *self)
        if (err < 0)
                goto err_exit;
 
+       if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_TP) {
+               self->aq_hw->phy_id = HW_ATL_PHY_ID_MAX;
+               err = aq_phy_init(self->aq_hw);
+       }
+
        for (i = 0U, aq_vec = self->aq_vec[0];
                self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
                aq_vec_init(aq_vec, self->aq_hw_ops, self->aq_hw);
 
+       err = aq_ptp_init(self, self->irqvecs - 1);
+       if (err < 0)
+               goto err_exit;
+
+       err = aq_ptp_ring_alloc(self);
+       if (err < 0)
+               goto err_exit;
+
+       err = aq_ptp_ring_init(self);
+       if (err < 0)
+               goto err_exit;
+
        netif_carrier_off(self->ndev);
 
 err_exit:
@@ -361,6 +390,10 @@ int aq_nic_start(struct aq_nic_s *self)
                        goto err_exit;
        }
 
+       err = aq_ptp_ring_start(self);
+       if (err < 0)
+               goto err_exit;
+
        err = self->aq_hw_ops->hw_start(self->aq_hw);
        if (err < 0)
                goto err_exit;
@@ -388,6 +421,10 @@ int aq_nic_start(struct aq_nic_s *self)
                                goto err_exit;
                }
 
+               err = aq_ptp_irq_alloc(self);
+               if (err < 0)
+                       goto err_exit;
+
                if (self->aq_nic_cfg.link_irq_vec) {
                        int irqvec = pci_irq_vector(self->pdev,
                                                   self->aq_nic_cfg.link_irq_vec);
@@ -420,9 +457,8 @@ err_exit:
        return err;
 }
 
-static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
-                                  struct sk_buff *skb,
-                                  struct aq_ring_s *ring)
+unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
+                           struct aq_ring_s *ring)
 {
        unsigned int ret = 0U;
        unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
@@ -953,10 +989,14 @@ int aq_nic_stop(struct aq_nic_s *self)
        else
                aq_pci_func_free_irqs(self);
 
+       aq_ptp_irq_free(self);
+
        for (i = 0U, aq_vec = self->aq_vec[0];
                self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
                aq_vec_stop(aq_vec);
 
+       aq_ptp_ring_stop(self);
+
        return self->aq_hw_ops->hw_stop(self->aq_hw);
 }
 
@@ -972,6 +1012,11 @@ void aq_nic_deinit(struct aq_nic_s *self)
                self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
                aq_vec_deinit(aq_vec);
 
+       aq_ptp_unregister(self);
+       aq_ptp_ring_deinit(self);
+       aq_ptp_ring_free(self);
+       aq_ptp_free(self);
+
        if (likely(self->aq_fw_ops->deinit)) {
                mutex_lock(&self->fwreq_mutex);
                self->aq_fw_ops->deinit(self->aq_hw);
@@ -1068,3 +1113,46 @@ void aq_nic_shutdown(struct aq_nic_s *self)
 err_exit:
        rtnl_unlock();
 }
+
+u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type)
+{
+       u8 location = 0xFF;
+       u32 fltr_cnt;
+       u32 n_bit;
+
+       switch (type) {
+       case aq_rx_filter_ethertype:
+               location = AQ_RX_LAST_LOC_FETHERT - AQ_RX_FIRST_LOC_FETHERT -
+                          self->aq_hw_rx_fltrs.fet_reserved_count;
+               self->aq_hw_rx_fltrs.fet_reserved_count++;
+               break;
+       case aq_rx_filter_l3l4:
+               fltr_cnt = AQ_RX_LAST_LOC_FL3L4 - AQ_RX_FIRST_LOC_FL3L4;
+               n_bit = fltr_cnt - self->aq_hw_rx_fltrs.fl3l4.reserved_count;
+
+               self->aq_hw_rx_fltrs.fl3l4.active_ipv4 |= BIT(n_bit);
+               self->aq_hw_rx_fltrs.fl3l4.reserved_count++;
+               location = n_bit;
+               break;
+       default:
+               break;
+       }
+
+       return location;
+}
+
+void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
+                          u32 location)
+{
+       switch (type) {
+       case aq_rx_filter_ethertype:
+               self->aq_hw_rx_fltrs.fet_reserved_count--;
+               break;
+       case aq_rx_filter_l3l4:
+               self->aq_hw_rx_fltrs.fl3l4.reserved_count--;
+               self->aq_hw_rx_fltrs.fl3l4.active_ipv4 &= ~BIT(location);
+               break;
+       default:
+               break;
+       }
+}
index 255b54a..c2513b7 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_nic.h: Declaration of common code for NIC. */
@@ -17,6 +17,8 @@ struct aq_ring_s;
 struct aq_hw_ops;
 struct aq_fw_s;
 struct aq_vec_s;
+struct aq_ptp_s;
+enum aq_rx_filter_type;
 
 struct aq_nic_cfg_s {
        const struct aq_hw_caps_s *aq_hw_caps;
@@ -53,6 +55,7 @@ struct aq_nic_cfg_s {
 #define AQ_NIC_FLAG_STOPPING    0x00000008U
 #define AQ_NIC_FLAG_RESETTING   0x00000010U
 #define AQ_NIC_FLAG_CLOSING     0x00000020U
+#define AQ_NIC_PTP_DPATH_UP     0x02000000U
 #define AQ_NIC_LINK_DOWN        0x04000000U
 #define AQ_NIC_FLAG_ERR_UNPLUG  0x40000000U
 #define AQ_NIC_FLAG_ERR_HW      0x80000000U
@@ -70,6 +73,7 @@ struct aq_hw_rx_fl3l4 {
        u8   active_ipv4;
        u8   active_ipv6:2;
        u8 is_ipv6;
+       u8 reserved_count;
 };
 
 struct aq_hw_rx_fltrs_s {
@@ -77,6 +81,8 @@ struct aq_hw_rx_fltrs_s {
        u16                   active_filters;
        struct aq_hw_rx_fl2   fl2;
        struct aq_hw_rx_fl3l4 fl3l4;
+       /*filter ether type */
+       u8 fet_reserved_count;
 };
 
 struct aq_nic_s {
@@ -108,6 +114,8 @@ struct aq_nic_s {
        u32 irqvecs;
        /* mutex to serialize FW interface access operations */
        struct mutex fwreq_mutex;
+       /* PTP support */
+       struct aq_ptp_s *aq_ptp;
        struct aq_hw_rx_fltrs_s aq_hw_rx_fltrs;
 };
 
@@ -126,6 +134,8 @@ void aq_nic_cfg_start(struct aq_nic_s *self);
 int aq_nic_ndev_register(struct aq_nic_s *self);
 void aq_nic_ndev_free(struct aq_nic_s *self);
 int aq_nic_start(struct aq_nic_s *self);
+unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
+                           struct aq_ring_s *ring);
 int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb);
 int aq_nic_get_regs(struct aq_nic_s *self, struct ethtool_regs *regs, void *p);
 int aq_nic_get_regs_count(struct aq_nic_s *self);
@@ -148,5 +158,7 @@ u32 aq_nic_get_fw_version(struct aq_nic_s *self);
 int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg);
 int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self);
 void aq_nic_shutdown(struct aq_nic_s *self);
-
+u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type);
+void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
+                          u32 location);
 #endif /* AQ_NIC_H */
index 74b9f3f..e82c96b 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_pci_func.c: Definition of PCI functions. */
@@ -269,6 +269,9 @@ static int aq_pci_probe(struct pci_dev *pdev,
        numvecs = min((u8)AQ_CFG_VECS_DEF,
                      aq_nic_get_cfg(self)->aq_hw_caps->msix_irqs);
        numvecs = min(numvecs, num_online_cpus());
+       /* Request IRQ vector for PTP */
+       numvecs += 1;
+
        numvecs += AQ_HW_SERVICE_IRQS;
        /*enable interrupts */
 #if !AQ_CFG_FORCE_LEGACY_INT
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_phy.c b/drivers/net/ethernet/aquantia/atlantic/aq_phy.c
new file mode 100644 (file)
index 0000000..51ae921
--- /dev/null
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* aQuantia Corporation Network Driver
+ * Copyright (C) 2018-2019 aQuantia Corporation. All rights reserved
+ */
+
+#include "aq_phy.h"
+
+bool aq_mdio_busy_wait(struct aq_hw_s *aq_hw)
+{
+       int err = 0;
+       u32 val;
+
+       err = readx_poll_timeout_atomic(hw_atl_mdio_busy_get, aq_hw,
+                                       val, val == 0U, 10U, 100000U);
+
+       if (err < 0)
+               return false;
+
+       return true;
+}
+
+u16 aq_mdio_read_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr)
+{
+       u16 phy_addr = aq_hw->phy_id << 5 | mmd;
+
+       /* Set Address register. */
+       hw_atl_glb_mdio_iface4_set(aq_hw, (addr & HW_ATL_MDIO_ADDRESS_MSK) <<
+                                  HW_ATL_MDIO_ADDRESS_SHIFT);
+       /* Send Address command. */
+       hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+                                  (3 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+                                  ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+                                   HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+
+       aq_mdio_busy_wait(aq_hw);
+
+       /* Send Read command. */
+       hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+                                  (1 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+                                  ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+                                   HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+       /* Read result. */
+       aq_mdio_busy_wait(aq_hw);
+
+       return (u16)hw_atl_glb_mdio_iface5_get(aq_hw);
+}
+
+void aq_mdio_write_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr, u16 data)
+{
+       u16 phy_addr = aq_hw->phy_id << 5 | mmd;
+
+       /* Set Address register. */
+       hw_atl_glb_mdio_iface4_set(aq_hw, (addr & HW_ATL_MDIO_ADDRESS_MSK) <<
+                                  HW_ATL_MDIO_ADDRESS_SHIFT);
+       /* Send Address command. */
+       hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+                                  (3 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+                                  ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+                                   HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+
+       aq_mdio_busy_wait(aq_hw);
+
+       hw_atl_glb_mdio_iface3_set(aq_hw, (data & HW_ATL_MDIO_WRITE_DATA_MSK) <<
+                                  HW_ATL_MDIO_WRITE_DATA_SHIFT);
+       /* Send Write command. */
+       hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+                                  (2 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+                                  ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+                                   HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+
+       aq_mdio_busy_wait(aq_hw);
+}
+
+u16 aq_phy_read_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address)
+{
+       int err = 0;
+       u32 val;
+
+       err = readx_poll_timeout_atomic(hw_atl_sem_mdio_get, aq_hw,
+                                       val, val == 1U, 10U, 100000U);
+
+       if (err < 0) {
+               err = 0xffff;
+               goto err_exit;
+       }
+
+       err = aq_mdio_read_word(aq_hw, mmd, address);
+
+       hw_atl_reg_glb_cpu_sem_set(aq_hw, 1U, HW_ATL_FW_SM_MDIO);
+
+err_exit:
+       return err;
+}
+
+void aq_phy_write_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address, u16 data)
+{
+       int err = 0;
+       u32 val;
+
+       err = readx_poll_timeout_atomic(hw_atl_sem_mdio_get, aq_hw,
+                                       val, val == 1U, 10U, 100000U);
+       if (err < 0)
+               return;
+
+       aq_mdio_write_word(aq_hw, mmd, address, data);
+       hw_atl_reg_glb_cpu_sem_set(aq_hw, 1U, HW_ATL_FW_SM_MDIO);
+}
+
+bool aq_phy_init_phy_id(struct aq_hw_s *aq_hw)
+{
+       u16 val;
+
+       for (aq_hw->phy_id = 0; aq_hw->phy_id < HW_ATL_PHY_ID_MAX;
+            ++aq_hw->phy_id) {
+               /* PMA Standard Device Identifier 2: Address 1.3 */
+               val = aq_phy_read_reg(aq_hw, MDIO_MMD_PMAPMD, 3);
+
+               if (val != 0xffff)
+                       return true;
+       }
+
+       return false;
+}
+
+bool aq_phy_init(struct aq_hw_s *aq_hw)
+{
+       u32 dev_id;
+
+       if (aq_hw->phy_id == HW_ATL_PHY_ID_MAX)
+               if (!aq_phy_init_phy_id(aq_hw))
+                       return false;
+
+       /* PMA Standard Device Identifier:
+        * Address 1.2 = MSW,
+        * Address 1.3 = LSW
+        */
+       dev_id = aq_phy_read_reg(aq_hw, MDIO_MMD_PMAPMD, 2);
+       dev_id <<= 16;
+       dev_id |= aq_phy_read_reg(aq_hw, MDIO_MMD_PMAPMD, 3);
+
+       if (dev_id == 0xffffffff) {
+               aq_hw->phy_id = HW_ATL_PHY_ID_MAX;
+               return false;
+       }
+
+       return true;
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_phy.h b/drivers/net/ethernet/aquantia/atlantic/aq_phy.h
new file mode 100644 (file)
index 0000000..84b72ad
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* aQuantia Corporation Network Driver
+ * Copyright (C) 2018-2019 aQuantia Corporation. All rights reserved
+ */
+
+#ifndef AQ_PHY_H
+#define AQ_PHY_H
+
+#include <linux/mdio.h>
+
+#include "hw_atl/hw_atl_llh.h"
+#include "hw_atl/hw_atl_llh_internal.h"
+#include "aq_hw_utils.h"
+#include "aq_hw.h"
+
+#define HW_ATL_PHY_ID_MAX 32U
+
+bool aq_mdio_busy_wait(struct aq_hw_s *aq_hw);
+
+u16 aq_mdio_read_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr);
+
+void aq_mdio_write_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr, u16 data);
+
+u16 aq_phy_read_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address);
+
+void aq_phy_write_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address, u16 data);
+
+bool aq_phy_init_phy_id(struct aq_hw_s *aq_hw);
+
+bool aq_phy_init(struct aq_hw_s *aq_hw);
+
+#endif /* AQ_PHY_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
new file mode 100644 (file)
index 0000000..8175513
--- /dev/null
@@ -0,0 +1,1392 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Aquantia Corporation Network Driver
+ * Copyright (C) 2014-2019 Aquantia Corporation. All rights reserved
+ */
+
+/* File aq_ptp.c:
+ * Definition of functions for Linux PTP support.
+ */
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/ptp_classify.h>
+#include <linux/interrupt.h>
+#include <linux/clocksource.h>
+
+#include "aq_nic.h"
+#include "aq_ptp.h"
+#include "aq_ring.h"
+#include "aq_phy.h"
+#include "aq_filters.h"
+
+#define AQ_PTP_TX_TIMEOUT        (HZ *  10)
+
+#define POLL_SYNC_TIMER_MS 15
+
+enum ptp_speed_offsets {
+       ptp_offset_idx_10 = 0,
+       ptp_offset_idx_100,
+       ptp_offset_idx_1000,
+       ptp_offset_idx_2500,
+       ptp_offset_idx_5000,
+       ptp_offset_idx_10000,
+};
+
+struct ptp_skb_ring {
+       struct sk_buff **buff;
+       spinlock_t lock;
+       unsigned int size;
+       unsigned int head;
+       unsigned int tail;
+};
+
+struct ptp_tx_timeout {
+       spinlock_t lock;
+       bool active;
+       unsigned long tx_start;
+};
+
+struct aq_ptp_s {
+       struct aq_nic_s *aq_nic;
+       struct hwtstamp_config hwtstamp_config;
+       spinlock_t ptp_lock;
+       spinlock_t ptp_ring_lock;
+       struct ptp_clock *ptp_clock;
+       struct ptp_clock_info ptp_info;
+
+       atomic_t offset_egress;
+       atomic_t offset_ingress;
+
+       struct aq_ring_param_s ptp_ring_param;
+
+       struct ptp_tx_timeout ptp_tx_timeout;
+
+       unsigned int idx_vector;
+       struct napi_struct napi;
+
+       struct aq_ring_s ptp_tx;
+       struct aq_ring_s ptp_rx;
+       struct aq_ring_s hwts_rx;
+
+       struct ptp_skb_ring skb_ring;
+
+       struct aq_rx_filter_l3l4 udp_filter;
+       struct aq_rx_filter_l2 eth_type_filter;
+
+       struct delayed_work poll_sync;
+       u32 poll_timeout_ms;
+
+       bool extts_pin_enabled;
+       u64 last_sync1588_ts;
+};
+
+struct ptp_tm_offset {
+       unsigned int mbps;
+       int egress;
+       int ingress;
+};
+
+static struct ptp_tm_offset ptp_offset[6];
+
+void aq_ptp_tm_offset_set(struct aq_nic_s *aq_nic, unsigned int mbps)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       int i, egress, ingress;
+
+       if (!aq_ptp)
+               return;
+
+       egress = 0;
+       ingress = 0;
+
+       for (i = 0; i < ARRAY_SIZE(ptp_offset); i++) {
+               if (mbps == ptp_offset[i].mbps) {
+                       egress = ptp_offset[i].egress;
+                       ingress = ptp_offset[i].ingress;
+                       break;
+               }
+       }
+
+       atomic_set(&aq_ptp->offset_egress, egress);
+       atomic_set(&aq_ptp->offset_ingress, ingress);
+}
+
+static int __aq_ptp_skb_put(struct ptp_skb_ring *ring, struct sk_buff *skb)
+{
+       unsigned int next_head = (ring->head + 1) % ring->size;
+
+       if (next_head == ring->tail)
+               return -ENOMEM;
+
+       ring->buff[ring->head] = skb_get(skb);
+       ring->head = next_head;
+
+       return 0;
+}
+
+static int aq_ptp_skb_put(struct ptp_skb_ring *ring, struct sk_buff *skb)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&ring->lock, flags);
+       ret = __aq_ptp_skb_put(ring, skb);
+       spin_unlock_irqrestore(&ring->lock, flags);
+
+       return ret;
+}
+
+static struct sk_buff *__aq_ptp_skb_get(struct ptp_skb_ring *ring)
+{
+       struct sk_buff *skb;
+
+       if (ring->tail == ring->head)
+               return NULL;
+
+       skb = ring->buff[ring->tail];
+       ring->tail = (ring->tail + 1) % ring->size;
+
+       return skb;
+}
+
+static struct sk_buff *aq_ptp_skb_get(struct ptp_skb_ring *ring)
+{
+       unsigned long flags;
+       struct sk_buff *skb;
+
+       spin_lock_irqsave(&ring->lock, flags);
+       skb = __aq_ptp_skb_get(ring);
+       spin_unlock_irqrestore(&ring->lock, flags);
+
+       return skb;
+}
+
+static unsigned int aq_ptp_skb_buf_len(struct ptp_skb_ring *ring)
+{
+       unsigned long flags;
+       unsigned int len;
+
+       spin_lock_irqsave(&ring->lock, flags);
+       len = (ring->head >= ring->tail) ?
+       ring->head - ring->tail :
+       ring->size - ring->tail + ring->head;
+       spin_unlock_irqrestore(&ring->lock, flags);
+
+       return len;
+}
+
+static int aq_ptp_skb_ring_init(struct ptp_skb_ring *ring, unsigned int size)
+{
+       struct sk_buff **buff = kmalloc(sizeof(*buff) * size, GFP_KERNEL);
+
+       if (!buff)
+               return -ENOMEM;
+
+       spin_lock_init(&ring->lock);
+
+       ring->buff = buff;
+       ring->size = size;
+       ring->head = 0;
+       ring->tail = 0;
+
+       return 0;
+}
+
+static void aq_ptp_skb_ring_clean(struct ptp_skb_ring *ring)
+{
+       struct sk_buff *skb;
+
+       while ((skb = aq_ptp_skb_get(ring)) != NULL)
+               dev_kfree_skb_any(skb);
+}
+
+static void aq_ptp_skb_ring_release(struct ptp_skb_ring *ring)
+{
+       if (ring->buff) {
+               aq_ptp_skb_ring_clean(ring);
+               kfree(ring->buff);
+               ring->buff = NULL;
+       }
+}
+
+static void aq_ptp_tx_timeout_init(struct ptp_tx_timeout *timeout)
+{
+       spin_lock_init(&timeout->lock);
+       timeout->active = false;
+}
+
+static void aq_ptp_tx_timeout_start(struct aq_ptp_s *aq_ptp)
+{
+       struct ptp_tx_timeout *timeout = &aq_ptp->ptp_tx_timeout;
+       unsigned long flags;
+
+       spin_lock_irqsave(&timeout->lock, flags);
+       timeout->active = true;
+       timeout->tx_start = jiffies;
+       spin_unlock_irqrestore(&timeout->lock, flags);
+}
+
+static void aq_ptp_tx_timeout_update(struct aq_ptp_s *aq_ptp)
+{
+       if (!aq_ptp_skb_buf_len(&aq_ptp->skb_ring)) {
+               struct ptp_tx_timeout *timeout = &aq_ptp->ptp_tx_timeout;
+               unsigned long flags;
+
+               spin_lock_irqsave(&timeout->lock, flags);
+               timeout->active = false;
+               spin_unlock_irqrestore(&timeout->lock, flags);
+       }
+}
+
+static void aq_ptp_tx_timeout_check(struct aq_ptp_s *aq_ptp)
+{
+       struct ptp_tx_timeout *timeout = &aq_ptp->ptp_tx_timeout;
+       unsigned long flags;
+       bool timeout_flag;
+
+       timeout_flag = false;
+
+       spin_lock_irqsave(&timeout->lock, flags);
+       if (timeout->active) {
+               timeout_flag = time_is_before_jiffies(timeout->tx_start +
+                                                     AQ_PTP_TX_TIMEOUT);
+               /* reset active flag if timeout detected */
+               if (timeout_flag)
+                       timeout->active = false;
+       }
+       spin_unlock_irqrestore(&timeout->lock, flags);
+
+       if (timeout_flag) {
+               aq_ptp_skb_ring_clean(&aq_ptp->skb_ring);
+               netdev_err(aq_ptp->aq_nic->ndev,
+                          "PTP Timeout. Clearing Tx Timestamp SKBs\n");
+       }
+}
+
+/* aq_ptp_adjfine
+ * @ptp: the ptp clock structure
+ * @ppb: parts per billion adjustment from base
+ *
+ * adjust the frequency of the ptp cycle counter by the
+ * indicated ppb from the base frequency.
+ */
+static int aq_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+       struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+
+       mutex_lock(&aq_nic->fwreq_mutex);
+       aq_nic->aq_hw_ops->hw_adj_clock_freq(aq_nic->aq_hw,
+                                            scaled_ppm_to_ppb(scaled_ppm));
+       mutex_unlock(&aq_nic->fwreq_mutex);
+
+       return 0;
+}
+
+/* aq_ptp_adjtime
+ * @ptp: the ptp clock structure
+ * @delta: offset to adjust the cycle counter by
+ *
+ * adjust the timer by resetting the timecounter structure.
+ */
+static int aq_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+       struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       unsigned long flags;
+
+       spin_lock_irqsave(&aq_ptp->ptp_lock, flags);
+       aq_nic->aq_hw_ops->hw_adj_sys_clock(aq_nic->aq_hw, delta);
+       spin_unlock_irqrestore(&aq_ptp->ptp_lock, flags);
+
+       return 0;
+}
+
+/* aq_ptp_gettime
+ * @ptp: the ptp clock structure
+ * @ts: timespec structure to hold the current time value
+ *
+ * read the timecounter and return the correct value on ns,
+ * after converting it into a struct timespec.
+ */
+static int aq_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+       struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       unsigned long flags;
+       u64 ns;
+
+       spin_lock_irqsave(&aq_ptp->ptp_lock, flags);
+       aq_nic->aq_hw_ops->hw_get_ptp_ts(aq_nic->aq_hw, &ns);
+       spin_unlock_irqrestore(&aq_ptp->ptp_lock, flags);
+
+       *ts = ns_to_timespec64(ns);
+
+       return 0;
+}
+
+/* aq_ptp_settime
+ * @ptp: the ptp clock structure
+ * @ts: the timespec containing the new time for the cycle counter
+ *
+ * reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ */
+static int aq_ptp_settime(struct ptp_clock_info *ptp,
+                         const struct timespec64 *ts)
+{
+       struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       unsigned long flags;
+       u64 ns = timespec64_to_ns(ts);
+       u64 now;
+
+       spin_lock_irqsave(&aq_ptp->ptp_lock, flags);
+       aq_nic->aq_hw_ops->hw_get_ptp_ts(aq_nic->aq_hw, &now);
+       aq_nic->aq_hw_ops->hw_adj_sys_clock(aq_nic->aq_hw, (s64)ns - (s64)now);
+
+       spin_unlock_irqrestore(&aq_ptp->ptp_lock, flags);
+
+       return 0;
+}
+
+static void aq_ptp_convert_to_hwtstamp(struct aq_ptp_s *aq_ptp,
+                                      struct skb_shared_hwtstamps *hwtstamp,
+                                      u64 timestamp)
+{
+       memset(hwtstamp, 0, sizeof(*hwtstamp));
+       hwtstamp->hwtstamp = ns_to_ktime(timestamp);
+}
+
+static int aq_ptp_hw_pin_conf(struct aq_nic_s *aq_nic, u32 pin_index, u64 start,
+                             u64 period)
+{
+       if (period)
+               netdev_dbg(aq_nic->ndev,
+                          "Enable GPIO %d pulsing, start time %llu, period %u\n",
+                          pin_index, start, (u32)period);
+       else
+               netdev_dbg(aq_nic->ndev,
+                          "Disable GPIO %d pulsing, start time %llu, period %u\n",
+                          pin_index, start, (u32)period);
+
+       /* Notify hardware of request to being sending pulses.
+        * If period is ZERO then pulsen is disabled.
+        */
+       mutex_lock(&aq_nic->fwreq_mutex);
+       aq_nic->aq_hw_ops->hw_gpio_pulse(aq_nic->aq_hw, pin_index,
+                                        start, (u32)period);
+       mutex_unlock(&aq_nic->fwreq_mutex);
+
+       return 0;
+}
+
+static int aq_ptp_perout_pin_configure(struct ptp_clock_info *ptp,
+                                      struct ptp_clock_request *rq, int on)
+{
+       struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+       struct ptp_clock_time *t = &rq->perout.period;
+       struct ptp_clock_time *s = &rq->perout.start;
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       u64 start, period;
+       u32 pin_index = rq->perout.index;
+
+       /* verify the request channel is there */
+       if (pin_index >= ptp->n_per_out)
+               return -EINVAL;
+
+       /* we cannot support periods greater
+        * than 4 seconds due to reg limit
+        */
+       if (t->sec > 4 || t->sec < 0)
+               return -ERANGE;
+
+       /* convert to unsigned 64b ns,
+        * verify we can put it in a 32b register
+        */
+       period = on ? t->sec * NSEC_PER_SEC + t->nsec : 0;
+
+       /* verify the value is in range supported by hardware */
+       if (period > U32_MAX)
+               return -ERANGE;
+       /* convert to unsigned 64b ns */
+       /* TODO convert to AQ time */
+       start = on ? s->sec * NSEC_PER_SEC + s->nsec : 0;
+
+       aq_ptp_hw_pin_conf(aq_nic, pin_index, start, period);
+
+       return 0;
+}
+
+static int aq_ptp_pps_pin_configure(struct ptp_clock_info *ptp,
+                                   struct ptp_clock_request *rq, int on)
+{
+       struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       u64 start, period;
+       u32 pin_index = 0;
+       u32 rest = 0;
+
+       /* verify the request channel is there */
+       if (pin_index >= ptp->n_per_out)
+               return -EINVAL;
+
+       aq_nic->aq_hw_ops->hw_get_ptp_ts(aq_nic->aq_hw, &start);
+       div_u64_rem(start, NSEC_PER_SEC, &rest);
+       period = on ? NSEC_PER_SEC : 0; /* PPS - pulse per second */
+       start = on ? start - rest + NSEC_PER_SEC *
+               (rest > 990000000LL ? 2 : 1) : 0;
+
+       aq_ptp_hw_pin_conf(aq_nic, pin_index, start, period);
+
+       return 0;
+}
+
+static void aq_ptp_extts_pin_ctrl(struct aq_ptp_s *aq_ptp)
+{
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       u32 enable = aq_ptp->extts_pin_enabled;
+
+       if (aq_nic->aq_hw_ops->hw_extts_gpio_enable)
+               aq_nic->aq_hw_ops->hw_extts_gpio_enable(aq_nic->aq_hw, 0,
+                                                       enable);
+}
+
+static int aq_ptp_extts_pin_configure(struct ptp_clock_info *ptp,
+                                     struct ptp_clock_request *rq, int on)
+{
+       struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+
+       u32 pin_index = rq->extts.index;
+
+       if (pin_index >= ptp->n_ext_ts)
+               return -EINVAL;
+
+       aq_ptp->extts_pin_enabled = !!on;
+       if (on) {
+               aq_ptp->poll_timeout_ms = POLL_SYNC_TIMER_MS;
+               cancel_delayed_work_sync(&aq_ptp->poll_sync);
+               schedule_delayed_work(&aq_ptp->poll_sync,
+                                     msecs_to_jiffies(aq_ptp->poll_timeout_ms));
+       }
+
+       aq_ptp_extts_pin_ctrl(aq_ptp);
+       return 0;
+}
+
+/* aq_ptp_gpio_feature_enable
+ * @ptp: the ptp clock structure
+ * @rq: the requested feature to change
+ * @on: whether to enable or disable the feature
+ */
+static int aq_ptp_gpio_feature_enable(struct ptp_clock_info *ptp,
+                                     struct ptp_clock_request *rq, int on)
+{
+       switch (rq->type) {
+       case PTP_CLK_REQ_EXTTS:
+               return aq_ptp_extts_pin_configure(ptp, rq, on);
+       case PTP_CLK_REQ_PEROUT:
+               return aq_ptp_perout_pin_configure(ptp, rq, on);
+       case PTP_CLK_REQ_PPS:
+               return aq_ptp_pps_pin_configure(ptp, rq, on);
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+/* aq_ptp_verify
+ * @ptp: the ptp clock structure
+ * @pin: index of the pin in question
+ * @func: the desired function to use
+ * @chan: the function channel index to use
+ */
+static int aq_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+                        enum ptp_pin_function func, unsigned int chan)
+{
+       /* verify the requested pin is there */
+       if (!ptp->pin_config || pin >= ptp->n_pins)
+               return -EINVAL;
+
+       /* enforce locked channels, no changing them */
+       if (chan != ptp->pin_config[pin].chan)
+               return -EINVAL;
+
+       /* we want to keep the functions locked as well */
+       if (func != ptp->pin_config[pin].func)
+               return -EINVAL;
+
+       return 0;
+}
+
+/* aq_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: the private adapter struct
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the hwtstamps structure which
+ * is passed up the network stack
+ */
+void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       struct sk_buff *skb = aq_ptp_skb_get(&aq_ptp->skb_ring);
+       struct skb_shared_hwtstamps hwtstamp;
+
+       if (!skb) {
+               netdev_err(aq_nic->ndev, "have timestamp but tx_queues empty\n");
+               return;
+       }
+
+       timestamp += atomic_read(&aq_ptp->offset_egress);
+       aq_ptp_convert_to_hwtstamp(aq_ptp, &hwtstamp, timestamp);
+       skb_tstamp_tx(skb, &hwtstamp);
+       dev_kfree_skb_any(skb);
+
+       aq_ptp_tx_timeout_update(aq_ptp);
+}
+
+/* aq_ptp_rx_hwtstamp - utility function which checks for RX time stamp
+ * @adapter: pointer to adapter struct
+ * @skb: particular skb to send timestamp with
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the hwtstamps structure which
+ * is passed up the network stack
+ */
+static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct sk_buff *skb,
+                              u64 timestamp)
+{
+       timestamp -= atomic_read(&aq_ptp->offset_ingress);
+       aq_ptp_convert_to_hwtstamp(aq_ptp, skb_hwtstamps(skb), timestamp);
+}
+
+void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp,
+                               struct hwtstamp_config *config)
+{
+       *config = aq_ptp->hwtstamp_config;
+}
+
+static void aq_ptp_prepare_filters(struct aq_ptp_s *aq_ptp)
+{
+       aq_ptp->udp_filter.cmd = HW_ATL_RX_ENABLE_FLTR_L3L4 |
+                              HW_ATL_RX_ENABLE_CMP_PROT_L4 |
+                              HW_ATL_RX_UDP |
+                              HW_ATL_RX_ENABLE_CMP_DEST_PORT_L4 |
+                              HW_ATL_RX_HOST << HW_ATL_RX_ACTION_FL3F4_SHIFT |
+                              HW_ATL_RX_ENABLE_QUEUE_L3L4 |
+                              aq_ptp->ptp_rx.idx << HW_ATL_RX_QUEUE_FL3L4_SHIFT;
+       aq_ptp->udp_filter.p_dst = PTP_EV_PORT;
+
+       aq_ptp->eth_type_filter.ethertype = ETH_P_1588;
+       aq_ptp->eth_type_filter.queue = aq_ptp->ptp_rx.idx;
+}
+
+int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp,
+                              struct hwtstamp_config *config)
+{
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       const struct aq_hw_ops *hw_ops;
+       int err = 0;
+
+       hw_ops = aq_nic->aq_hw_ops;
+       if (config->tx_type == HWTSTAMP_TX_ON ||
+           config->rx_filter == HWTSTAMP_FILTER_PTP_V2_EVENT) {
+               aq_ptp_prepare_filters(aq_ptp);
+               if (hw_ops->hw_filter_l3l4_set) {
+                       err = hw_ops->hw_filter_l3l4_set(aq_nic->aq_hw,
+                                                        &aq_ptp->udp_filter);
+               }
+               if (!err && hw_ops->hw_filter_l2_set) {
+                       err = hw_ops->hw_filter_l2_set(aq_nic->aq_hw,
+                                                      &aq_ptp->eth_type_filter);
+               }
+               aq_utils_obj_set(&aq_nic->flags, AQ_NIC_PTP_DPATH_UP);
+       } else {
+               aq_ptp->udp_filter.cmd &= ~HW_ATL_RX_ENABLE_FLTR_L3L4;
+               if (hw_ops->hw_filter_l3l4_set) {
+                       err = hw_ops->hw_filter_l3l4_set(aq_nic->aq_hw,
+                                                        &aq_ptp->udp_filter);
+               }
+               if (!err && hw_ops->hw_filter_l2_clear) {
+                       err = hw_ops->hw_filter_l2_clear(aq_nic->aq_hw,
+                                                       &aq_ptp->eth_type_filter);
+               }
+               aq_utils_obj_clear(&aq_nic->flags, AQ_NIC_PTP_DPATH_UP);
+       }
+
+       if (err)
+               return -EREMOTEIO;
+
+       aq_ptp->hwtstamp_config = *config;
+
+       return 0;
+}
+
+bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp)
+               return false;
+
+       return &aq_ptp->ptp_tx == ring ||
+              &aq_ptp->ptp_rx == ring || &aq_ptp->hwts_rx == ring;
+}
+
+u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p,
+                     unsigned int len)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       u64 timestamp = 0;
+       u16 ret = aq_nic->aq_hw_ops->rx_extract_ts(aq_nic->aq_hw,
+                                                  p, len, &timestamp);
+
+       if (ret > 0)
+               aq_ptp_rx_hwtstamp(aq_ptp, skb, timestamp);
+
+       return ret;
+}
+
+static int aq_ptp_poll(struct napi_struct *napi, int budget)
+{
+       struct aq_ptp_s *aq_ptp = container_of(napi, struct aq_ptp_s, napi);
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       bool was_cleaned = false;
+       int work_done = 0;
+       int err;
+
+       /* Processing PTP TX traffic */
+       err = aq_nic->aq_hw_ops->hw_ring_tx_head_update(aq_nic->aq_hw,
+                                                       &aq_ptp->ptp_tx);
+       if (err < 0)
+               goto err_exit;
+
+       if (aq_ptp->ptp_tx.sw_head != aq_ptp->ptp_tx.hw_head) {
+               aq_ring_tx_clean(&aq_ptp->ptp_tx);
+
+               was_cleaned = true;
+       }
+
+       /* Processing HW_TIMESTAMP RX traffic */
+       err = aq_nic->aq_hw_ops->hw_ring_hwts_rx_receive(aq_nic->aq_hw,
+                                                        &aq_ptp->hwts_rx);
+       if (err < 0)
+               goto err_exit;
+
+       if (aq_ptp->hwts_rx.sw_head != aq_ptp->hwts_rx.hw_head) {
+               aq_ring_hwts_rx_clean(&aq_ptp->hwts_rx, aq_nic);
+
+               err = aq_nic->aq_hw_ops->hw_ring_hwts_rx_fill(aq_nic->aq_hw,
+                                                             &aq_ptp->hwts_rx);
+               if (err < 0)
+                       goto err_exit;
+
+               was_cleaned = true;
+       }
+
+       /* Processing PTP RX traffic */
+       err = aq_nic->aq_hw_ops->hw_ring_rx_receive(aq_nic->aq_hw,
+                                                   &aq_ptp->ptp_rx);
+       if (err < 0)
+               goto err_exit;
+
+       if (aq_ptp->ptp_rx.sw_head != aq_ptp->ptp_rx.hw_head) {
+               unsigned int sw_tail_old;
+
+               err = aq_ring_rx_clean(&aq_ptp->ptp_rx, napi, &work_done, budget);
+               if (err < 0)
+                       goto err_exit;
+
+               sw_tail_old = aq_ptp->ptp_rx.sw_tail;
+               err = aq_ring_rx_fill(&aq_ptp->ptp_rx);
+               if (err < 0)
+                       goto err_exit;
+
+               err = aq_nic->aq_hw_ops->hw_ring_rx_fill(aq_nic->aq_hw,
+                                                        &aq_ptp->ptp_rx,
+                                                        sw_tail_old);
+               if (err < 0)
+                       goto err_exit;
+       }
+
+       if (was_cleaned)
+               work_done = budget;
+
+       if (work_done < budget) {
+               napi_complete_done(napi, work_done);
+               aq_nic->aq_hw_ops->hw_irq_enable(aq_nic->aq_hw,
+                                       BIT_ULL(aq_ptp->ptp_ring_param.vec_idx));
+       }
+
+err_exit:
+       return work_done;
+}
+
+static irqreturn_t aq_ptp_isr(int irq, void *private)
+{
+       struct aq_ptp_s *aq_ptp = private;
+       int err = 0;
+
+       if (!aq_ptp) {
+               err = -EINVAL;
+               goto err_exit;
+       }
+       napi_schedule(&aq_ptp->napi);
+
+err_exit:
+       return err >= 0 ? IRQ_HANDLED : IRQ_NONE;
+}
+
+int aq_ptp_xmit(struct aq_nic_s *aq_nic, struct sk_buff *skb)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       struct aq_ring_s *ring = &aq_ptp->ptp_tx;
+       unsigned long irq_flags;
+       int err = NETDEV_TX_OK;
+       unsigned int frags;
+
+       if (skb->len <= 0) {
+               dev_kfree_skb_any(skb);
+               goto err_exit;
+       }
+
+       frags = skb_shinfo(skb)->nr_frags + 1;
+       /* Frags cannot be bigger 16KB
+        * because PTP usually works
+        * without Jumbo even in a background
+        */
+       if (frags > AQ_CFG_SKB_FRAGS_MAX || frags > aq_ring_avail_dx(ring)) {
+               /* Drop packet because it doesn't make sence to delay it */
+               dev_kfree_skb_any(skb);
+               goto err_exit;
+       }
+
+       err = aq_ptp_skb_put(&aq_ptp->skb_ring, skb);
+       if (err) {
+               netdev_err(aq_nic->ndev, "SKB Ring is overflow (%u)!\n",
+                          ring->size);
+               return NETDEV_TX_BUSY;
+       }
+       skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+       aq_ptp_tx_timeout_start(aq_ptp);
+       skb_tx_timestamp(skb);
+
+       spin_lock_irqsave(&aq_nic->aq_ptp->ptp_ring_lock, irq_flags);
+       frags = aq_nic_map_skb(aq_nic, skb, ring);
+
+       if (likely(frags)) {
+               err = aq_nic->aq_hw_ops->hw_ring_tx_xmit(aq_nic->aq_hw,
+                                                      ring, frags);
+               if (err >= 0) {
+                       ++ring->stats.tx.packets;
+                       ring->stats.tx.bytes += skb->len;
+               }
+       } else {
+               err = NETDEV_TX_BUSY;
+       }
+       spin_unlock_irqrestore(&aq_nic->aq_ptp->ptp_ring_lock, irq_flags);
+
+err_exit:
+       return err;
+}
+
+void aq_ptp_service_task(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp)
+               return;
+
+       aq_ptp_tx_timeout_check(aq_ptp);
+}
+
+int aq_ptp_irq_alloc(struct aq_nic_s *aq_nic)
+{
+       struct pci_dev *pdev = aq_nic->pdev;
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       int err = 0;
+
+       if (!aq_ptp)
+               return 0;
+
+       if (pdev->msix_enabled || pdev->msi_enabled) {
+               err = request_irq(pci_irq_vector(pdev, aq_ptp->idx_vector),
+                                 aq_ptp_isr, 0, aq_nic->ndev->name, aq_ptp);
+       } else {
+               err = -EINVAL;
+               goto err_exit;
+       }
+
+err_exit:
+       return err;
+}
+
+void aq_ptp_irq_free(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       struct pci_dev *pdev = aq_nic->pdev;
+
+       if (!aq_ptp)
+               return;
+
+       free_irq(pci_irq_vector(pdev, aq_ptp->idx_vector), aq_ptp);
+}
+
+int aq_ptp_ring_init(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       int err = 0;
+
+       if (!aq_ptp)
+               return 0;
+
+       err = aq_ring_init(&aq_ptp->ptp_tx);
+       if (err < 0)
+               goto err_exit;
+       err = aq_nic->aq_hw_ops->hw_ring_tx_init(aq_nic->aq_hw,
+                                                &aq_ptp->ptp_tx,
+                                                &aq_ptp->ptp_ring_param);
+       if (err < 0)
+               goto err_exit;
+
+       err = aq_ring_init(&aq_ptp->ptp_rx);
+       if (err < 0)
+               goto err_exit;
+       err = aq_nic->aq_hw_ops->hw_ring_rx_init(aq_nic->aq_hw,
+                                                &aq_ptp->ptp_rx,
+                                                &aq_ptp->ptp_ring_param);
+       if (err < 0)
+               goto err_exit;
+
+       err = aq_ring_rx_fill(&aq_ptp->ptp_rx);
+       if (err < 0)
+               goto err_rx_free;
+       err = aq_nic->aq_hw_ops->hw_ring_rx_fill(aq_nic->aq_hw,
+                                                &aq_ptp->ptp_rx,
+                                                0U);
+       if (err < 0)
+               goto err_rx_free;
+
+       err = aq_ring_init(&aq_ptp->hwts_rx);
+       if (err < 0)
+               goto err_rx_free;
+       err = aq_nic->aq_hw_ops->hw_ring_rx_init(aq_nic->aq_hw,
+                                                &aq_ptp->hwts_rx,
+                                                &aq_ptp->ptp_ring_param);
+       if (err < 0)
+               goto err_exit;
+       err = aq_nic->aq_hw_ops->hw_ring_hwts_rx_fill(aq_nic->aq_hw,
+                                                     &aq_ptp->hwts_rx);
+       if (err < 0)
+               goto err_exit;
+
+       return err;
+
+err_rx_free:
+       aq_ring_rx_deinit(&aq_ptp->ptp_rx);
+err_exit:
+       return err;
+}
+
+int aq_ptp_ring_start(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       int err = 0;
+
+       if (!aq_ptp)
+               return 0;
+
+       err = aq_nic->aq_hw_ops->hw_ring_tx_start(aq_nic->aq_hw, &aq_ptp->ptp_tx);
+       if (err < 0)
+               goto err_exit;
+
+       err = aq_nic->aq_hw_ops->hw_ring_rx_start(aq_nic->aq_hw, &aq_ptp->ptp_rx);
+       if (err < 0)
+               goto err_exit;
+
+       err = aq_nic->aq_hw_ops->hw_ring_rx_start(aq_nic->aq_hw,
+                                                 &aq_ptp->hwts_rx);
+       if (err < 0)
+               goto err_exit;
+
+       napi_enable(&aq_ptp->napi);
+
+err_exit:
+       return err;
+}
+
+void aq_ptp_ring_stop(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp)
+               return;
+
+       aq_nic->aq_hw_ops->hw_ring_tx_stop(aq_nic->aq_hw, &aq_ptp->ptp_tx);
+       aq_nic->aq_hw_ops->hw_ring_rx_stop(aq_nic->aq_hw, &aq_ptp->ptp_rx);
+
+       aq_nic->aq_hw_ops->hw_ring_rx_stop(aq_nic->aq_hw, &aq_ptp->hwts_rx);
+
+       napi_disable(&aq_ptp->napi);
+}
+
+void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp || !aq_ptp->ptp_tx.aq_nic || !aq_ptp->ptp_rx.aq_nic)
+               return;
+
+       aq_ring_tx_clean(&aq_ptp->ptp_tx);
+       aq_ring_rx_deinit(&aq_ptp->ptp_rx);
+}
+
+#define PTP_8TC_RING_IDX             8
+#define PTP_4TC_RING_IDX            16
+#define PTP_HWST_RING_IDX           31
+
+int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       unsigned int tx_ring_idx, rx_ring_idx;
+       struct aq_ring_s *hwts;
+       u32 tx_tc_mode, rx_tc_mode;
+       struct aq_ring_s *ring;
+       int err;
+
+       if (!aq_ptp)
+               return 0;
+
+       /* Index must to be 8 (8 TCs) or 16 (4 TCs).
+        * It depends from Traffic Class mode.
+        */
+       aq_nic->aq_hw_ops->hw_tx_tc_mode_get(aq_nic->aq_hw, &tx_tc_mode);
+       if (tx_tc_mode == 0)
+               tx_ring_idx = PTP_8TC_RING_IDX;
+       else
+               tx_ring_idx = PTP_4TC_RING_IDX;
+
+       ring = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic,
+                               tx_ring_idx, &aq_nic->aq_nic_cfg);
+       if (!ring) {
+               err = -ENOMEM;
+               goto err_exit;
+       }
+
+       aq_nic->aq_hw_ops->hw_rx_tc_mode_get(aq_nic->aq_hw, &rx_tc_mode);
+       if (rx_tc_mode == 0)
+               rx_ring_idx = PTP_8TC_RING_IDX;
+       else
+               rx_ring_idx = PTP_4TC_RING_IDX;
+
+       ring = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic,
+                               rx_ring_idx, &aq_nic->aq_nic_cfg);
+       if (!ring) {
+               err = -ENOMEM;
+               goto err_exit_ptp_tx;
+       }
+
+       hwts = aq_ring_hwts_rx_alloc(&aq_ptp->hwts_rx, aq_nic, PTP_HWST_RING_IDX,
+                                    aq_nic->aq_nic_cfg.rxds,
+                                    aq_nic->aq_nic_cfg.aq_hw_caps->rxd_size);
+       if (!hwts) {
+               err = -ENOMEM;
+               goto err_exit_ptp_rx;
+       }
+
+       err = aq_ptp_skb_ring_init(&aq_ptp->skb_ring, aq_nic->aq_nic_cfg.rxds);
+       if (err != 0) {
+               err = -ENOMEM;
+               goto err_exit_hwts_rx;
+       }
+
+       aq_ptp->ptp_ring_param.vec_idx = aq_ptp->idx_vector;
+       aq_ptp->ptp_ring_param.cpu = aq_ptp->ptp_ring_param.vec_idx +
+                       aq_nic_get_cfg(aq_nic)->aq_rss.base_cpu_number;
+       cpumask_set_cpu(aq_ptp->ptp_ring_param.cpu,
+                       &aq_ptp->ptp_ring_param.affinity_mask);
+
+       return 0;
+
+err_exit_hwts_rx:
+       aq_ring_free(&aq_ptp->hwts_rx);
+err_exit_ptp_rx:
+       aq_ring_free(&aq_ptp->ptp_rx);
+err_exit_ptp_tx:
+       aq_ring_free(&aq_ptp->ptp_tx);
+err_exit:
+       return err;
+}
+
+void aq_ptp_ring_free(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp)
+               return;
+
+       aq_ring_free(&aq_ptp->ptp_tx);
+       aq_ring_free(&aq_ptp->ptp_rx);
+       aq_ring_free(&aq_ptp->hwts_rx);
+
+       aq_ptp_skb_ring_release(&aq_ptp->skb_ring);
+}
+
+#define MAX_PTP_GPIO_COUNT 4
+
+static struct ptp_clock_info aq_ptp_clock = {
+       .owner          = THIS_MODULE,
+       .name           = "atlantic ptp",
+       .max_adj        = 999999999,
+       .n_ext_ts       = 0,
+       .pps            = 0,
+       .adjfine        = aq_ptp_adjfine,
+       .adjtime        = aq_ptp_adjtime,
+       .gettime64      = aq_ptp_gettime,
+       .settime64      = aq_ptp_settime,
+       .n_per_out      = 0,
+       .enable         = aq_ptp_gpio_feature_enable,
+       .n_pins         = 0,
+       .verify         = aq_ptp_verify,
+       .pin_config     = NULL,
+};
+
+#define ptp_offset_init(__idx, __mbps, __egress, __ingress)   do { \
+               ptp_offset[__idx].mbps = (__mbps); \
+               ptp_offset[__idx].egress = (__egress); \
+               ptp_offset[__idx].ingress = (__ingress); } \
+               while (0)
+
+static void aq_ptp_offset_init_from_fw(const struct hw_aq_ptp_offset *offsets)
+{
+       int i;
+
+       /* Load offsets for PTP */
+       for (i = 0; i < ARRAY_SIZE(ptp_offset); i++) {
+               switch (i) {
+               /* 100M */
+               case ptp_offset_idx_100:
+                       ptp_offset_init(i, 100,
+                                       offsets->egress_100,
+                                       offsets->ingress_100);
+                       break;
+               /* 1G */
+               case ptp_offset_idx_1000:
+                       ptp_offset_init(i, 1000,
+                                       offsets->egress_1000,
+                                       offsets->ingress_1000);
+                       break;
+               /* 2.5G */
+               case ptp_offset_idx_2500:
+                       ptp_offset_init(i, 2500,
+                                       offsets->egress_2500,
+                                       offsets->ingress_2500);
+                       break;
+               /* 5G */
+               case ptp_offset_idx_5000:
+                       ptp_offset_init(i, 5000,
+                                       offsets->egress_5000,
+                                       offsets->ingress_5000);
+                       break;
+               /* 10G */
+               case ptp_offset_idx_10000:
+                       ptp_offset_init(i, 10000,
+                                       offsets->egress_10000,
+                                       offsets->ingress_10000);
+                       break;
+               }
+       }
+}
+
+static void aq_ptp_offset_init(const struct hw_aq_ptp_offset *offsets)
+{
+       memset(ptp_offset, 0, sizeof(ptp_offset));
+
+       aq_ptp_offset_init_from_fw(offsets);
+}
+
+static void aq_ptp_gpio_init(struct ptp_clock_info *info,
+                            struct hw_aq_info *hw_info)
+{
+       struct ptp_pin_desc pin_desc[MAX_PTP_GPIO_COUNT];
+       u32 extts_pin_cnt = 0;
+       u32 out_pin_cnt = 0;
+       u32 i;
+
+       memset(pin_desc, 0, sizeof(pin_desc));
+
+       for (i = 0; i < MAX_PTP_GPIO_COUNT - 1; i++) {
+               if (hw_info->gpio_pin[i] ==
+                   (GPIO_PIN_FUNCTION_PTP0 + out_pin_cnt)) {
+                       snprintf(pin_desc[out_pin_cnt].name,
+                                sizeof(pin_desc[out_pin_cnt].name),
+                                "AQ_GPIO%d", i);
+                       pin_desc[out_pin_cnt].index = out_pin_cnt;
+                       pin_desc[out_pin_cnt].chan = out_pin_cnt;
+                       pin_desc[out_pin_cnt++].func = PTP_PF_PEROUT;
+               }
+       }
+
+       info->n_per_out = out_pin_cnt;
+
+       if (hw_info->caps_ex & BIT(CAPS_EX_PHY_CTRL_TS_PIN)) {
+               extts_pin_cnt += 1;
+
+               snprintf(pin_desc[out_pin_cnt].name,
+                        sizeof(pin_desc[out_pin_cnt].name),
+                         "AQ_GPIO%d", out_pin_cnt);
+               pin_desc[out_pin_cnt].index = out_pin_cnt;
+               pin_desc[out_pin_cnt].chan = 0;
+               pin_desc[out_pin_cnt].func = PTP_PF_EXTTS;
+       }
+
+       info->n_pins = out_pin_cnt + extts_pin_cnt;
+       info->n_ext_ts = extts_pin_cnt;
+
+       if (!info->n_pins)
+               return;
+
+       info->pin_config = kcalloc(info->n_pins, sizeof(struct ptp_pin_desc),
+                                  GFP_KERNEL);
+
+       if (!info->pin_config)
+               return;
+
+       memcpy(info->pin_config, &pin_desc,
+              sizeof(struct ptp_pin_desc) * info->n_pins);
+}
+
+void aq_ptp_clock_init(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+       struct timespec64 ts;
+
+       ktime_get_real_ts64(&ts);
+       aq_ptp_settime(&aq_ptp->ptp_info, &ts);
+}
+
+static void aq_ptp_poll_sync_work_cb(struct work_struct *w);
+
+int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec)
+{
+       struct hw_atl_utils_mbox mbox;
+       struct ptp_clock *clock;
+       struct aq_ptp_s *aq_ptp;
+       int err = 0;
+
+       if (!aq_nic->aq_hw_ops->hw_get_ptp_ts) {
+               aq_nic->aq_ptp = NULL;
+               return 0;
+       }
+
+       if (!aq_nic->aq_fw_ops->enable_ptp) {
+               aq_nic->aq_ptp = NULL;
+               return 0;
+       }
+
+       hw_atl_utils_mpi_read_stats(aq_nic->aq_hw, &mbox);
+
+       if (!(mbox.info.caps_ex & BIT(CAPS_EX_PHY_PTP_EN))) {
+               aq_nic->aq_ptp = NULL;
+               return 0;
+       }
+
+       aq_ptp_offset_init(&mbox.info.ptp_offset);
+
+       aq_ptp = kzalloc(sizeof(*aq_ptp), GFP_KERNEL);
+       if (!aq_ptp) {
+               err = -ENOMEM;
+               goto err_exit;
+       }
+
+       aq_ptp->aq_nic = aq_nic;
+
+       spin_lock_init(&aq_ptp->ptp_lock);
+       spin_lock_init(&aq_ptp->ptp_ring_lock);
+
+       aq_ptp->ptp_info = aq_ptp_clock;
+       aq_ptp_gpio_init(&aq_ptp->ptp_info, &mbox.info);
+       clock = ptp_clock_register(&aq_ptp->ptp_info, &aq_nic->ndev->dev);
+       if (!clock || IS_ERR(clock)) {
+               netdev_err(aq_nic->ndev, "ptp_clock_register failed\n");
+               err = PTR_ERR(clock);
+               goto err_exit;
+       }
+       aq_ptp->ptp_clock = clock;
+       aq_ptp_tx_timeout_init(&aq_ptp->ptp_tx_timeout);
+
+       atomic_set(&aq_ptp->offset_egress, 0);
+       atomic_set(&aq_ptp->offset_ingress, 0);
+
+       netif_napi_add(aq_nic_get_ndev(aq_nic), &aq_ptp->napi,
+                      aq_ptp_poll, AQ_CFG_NAPI_WEIGHT);
+
+       aq_ptp->idx_vector = idx_vec;
+
+       aq_nic->aq_ptp = aq_ptp;
+
+       /* enable ptp counter */
+       aq_utils_obj_set(&aq_nic->aq_hw->flags, AQ_HW_PTP_AVAILABLE);
+       mutex_lock(&aq_nic->fwreq_mutex);
+       aq_nic->aq_fw_ops->enable_ptp(aq_nic->aq_hw, 1);
+       aq_ptp_clock_init(aq_nic);
+       mutex_unlock(&aq_nic->fwreq_mutex);
+
+       INIT_DELAYED_WORK(&aq_ptp->poll_sync, &aq_ptp_poll_sync_work_cb);
+       aq_ptp->eth_type_filter.location =
+                       aq_nic_reserve_filter(aq_nic, aq_rx_filter_ethertype);
+       aq_ptp->udp_filter.location =
+                       aq_nic_reserve_filter(aq_nic, aq_rx_filter_l3l4);
+
+       return 0;
+
+err_exit:
+       if (aq_ptp)
+               kfree(aq_ptp->ptp_info.pin_config);
+       kfree(aq_ptp);
+       aq_nic->aq_ptp = NULL;
+       return err;
+}
+
+void aq_ptp_unregister(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp)
+               return;
+
+       ptp_clock_unregister(aq_ptp->ptp_clock);
+}
+
+void aq_ptp_free(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp)
+               return;
+
+       aq_nic_release_filter(aq_nic, aq_rx_filter_ethertype,
+                             aq_ptp->eth_type_filter.location);
+       aq_nic_release_filter(aq_nic, aq_rx_filter_l3l4,
+                             aq_ptp->udp_filter.location);
+       cancel_delayed_work_sync(&aq_ptp->poll_sync);
+       /* disable ptp */
+       mutex_lock(&aq_nic->fwreq_mutex);
+       aq_nic->aq_fw_ops->enable_ptp(aq_nic->aq_hw, 0);
+       mutex_unlock(&aq_nic->fwreq_mutex);
+
+       kfree(aq_ptp->ptp_info.pin_config);
+
+       netif_napi_del(&aq_ptp->napi);
+       kfree(aq_ptp);
+       aq_nic->aq_ptp = NULL;
+}
+
+struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp)
+{
+       return aq_ptp->ptp_clock;
+}
+
+/* PTP external GPIO nanoseconds count */
+static uint64_t aq_ptp_get_sync1588_ts(struct aq_nic_s *aq_nic)
+{
+       u64 ts = 0;
+
+       if (aq_nic->aq_hw_ops->hw_get_sync_ts)
+               aq_nic->aq_hw_ops->hw_get_sync_ts(aq_nic->aq_hw, &ts);
+
+       return ts;
+}
+
+static void aq_ptp_start_work(struct aq_ptp_s *aq_ptp)
+{
+       if (aq_ptp->extts_pin_enabled) {
+               aq_ptp->poll_timeout_ms = POLL_SYNC_TIMER_MS;
+               aq_ptp->last_sync1588_ts =
+                               aq_ptp_get_sync1588_ts(aq_ptp->aq_nic);
+               schedule_delayed_work(&aq_ptp->poll_sync,
+                                     msecs_to_jiffies(aq_ptp->poll_timeout_ms));
+       }
+}
+
+int aq_ptp_link_change(struct aq_nic_s *aq_nic)
+{
+       struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+       if (!aq_ptp)
+               return 0;
+
+       if (aq_nic->aq_hw->aq_link_status.mbps)
+               aq_ptp_start_work(aq_ptp);
+       else
+               cancel_delayed_work_sync(&aq_ptp->poll_sync);
+
+       return 0;
+}
+
+static bool aq_ptp_sync_ts_updated(struct aq_ptp_s *aq_ptp, u64 *new_ts)
+{
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       u64 sync_ts2;
+       u64 sync_ts;
+
+       sync_ts = aq_ptp_get_sync1588_ts(aq_nic);
+
+       if (sync_ts != aq_ptp->last_sync1588_ts) {
+               sync_ts2 = aq_ptp_get_sync1588_ts(aq_nic);
+               if (sync_ts != sync_ts2) {
+                       sync_ts = sync_ts2;
+                       sync_ts2 = aq_ptp_get_sync1588_ts(aq_nic);
+                       if (sync_ts != sync_ts2) {
+                               netdev_err(aq_nic->ndev,
+                                          "%s: Unable to get correct GPIO TS",
+                                          __func__);
+                               sync_ts = 0;
+                       }
+               }
+
+               *new_ts = sync_ts;
+               return true;
+       }
+       return false;
+}
+
+static int aq_ptp_check_sync1588(struct aq_ptp_s *aq_ptp)
+{
+       struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+       u64 sync_ts;
+
+        /* Sync1588 pin was triggered */
+       if (aq_ptp_sync_ts_updated(aq_ptp, &sync_ts)) {
+               if (aq_ptp->extts_pin_enabled) {
+                       struct ptp_clock_event ptp_event;
+                       u64 time = 0;
+
+                       aq_nic->aq_hw_ops->hw_ts_to_sys_clock(aq_nic->aq_hw,
+                                                             sync_ts, &time);
+                       ptp_event.index = aq_ptp->ptp_info.n_pins - 1;
+                       ptp_event.timestamp = time;
+
+                       ptp_event.type = PTP_CLOCK_EXTTS;
+                       ptp_clock_event(aq_ptp->ptp_clock, &ptp_event);
+               }
+
+               aq_ptp->last_sync1588_ts = sync_ts;
+       }
+
+       return 0;
+}
+
+static void aq_ptp_poll_sync_work_cb(struct work_struct *w)
+{
+       struct delayed_work *dw = to_delayed_work(w);
+       struct aq_ptp_s *aq_ptp = container_of(dw, struct aq_ptp_s, poll_sync);
+
+       aq_ptp_check_sync1588(aq_ptp);
+
+       if (aq_ptp->extts_pin_enabled) {
+               unsigned long timeout = msecs_to_jiffies(aq_ptp->poll_timeout_ms);
+
+               schedule_delayed_work(&aq_ptp->poll_sync, timeout);
+       }
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h
new file mode 100644 (file)
index 0000000..2319064
--- /dev/null
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Aquantia Corporation Network Driver
+ * Copyright (C) 2014-2019 Aquantia Corporation. All rights reserved
+ */
+
+/* File aq_ptp.h: Declaration of PTP functions.
+ */
+#ifndef AQ_PTP_H
+#define AQ_PTP_H
+
+#include <linux/net_tstamp.h>
+
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+
+/* Common functions */
+int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec);
+
+void aq_ptp_unregister(struct aq_nic_s *aq_nic);
+void aq_ptp_free(struct aq_nic_s *aq_nic);
+
+int aq_ptp_irq_alloc(struct aq_nic_s *aq_nic);
+void aq_ptp_irq_free(struct aq_nic_s *aq_nic);
+
+int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic);
+void aq_ptp_ring_free(struct aq_nic_s *aq_nic);
+
+int aq_ptp_ring_init(struct aq_nic_s *aq_nic);
+int aq_ptp_ring_start(struct aq_nic_s *aq_nic);
+void aq_ptp_ring_stop(struct aq_nic_s *aq_nic);
+void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic);
+
+void aq_ptp_service_task(struct aq_nic_s *aq_nic);
+
+void aq_ptp_tm_offset_set(struct aq_nic_s *aq_nic, unsigned int mbps);
+
+void aq_ptp_clock_init(struct aq_nic_s *aq_nic);
+
+/* Traffic processing functions */
+int aq_ptp_xmit(struct aq_nic_s *aq_nic, struct sk_buff *skb);
+void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp);
+
+/* Must be to check available of PTP before call */
+void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp,
+                               struct hwtstamp_config *config);
+int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp,
+                              struct hwtstamp_config *config);
+
+/* Return either ring is belong to PTP or not*/
+bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring);
+
+u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p,
+                     unsigned int len);
+
+struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp);
+
+int aq_ptp_link_change(struct aq_nic_s *aq_nic);
+
+#else
+
+static inline int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec)
+{
+       return 0;
+}
+
+static inline void aq_ptp_unregister(struct aq_nic_s *aq_nic) {}
+
+static inline void aq_ptp_free(struct aq_nic_s *aq_nic)
+{
+}
+
+static inline int aq_ptp_irq_alloc(struct aq_nic_s *aq_nic)
+{
+       return 0;
+}
+
+static inline void aq_ptp_irq_free(struct aq_nic_s *aq_nic)
+{
+}
+
+static inline int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
+{
+       return 0;
+}
+
+static inline void aq_ptp_ring_free(struct aq_nic_s *aq_nic) {}
+
+static inline int aq_ptp_ring_init(struct aq_nic_s *aq_nic)
+{
+       return 0;
+}
+
+static inline int aq_ptp_ring_start(struct aq_nic_s *aq_nic)
+{
+       return 0;
+}
+
+static inline void aq_ptp_ring_stop(struct aq_nic_s *aq_nic) {}
+static inline void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic) {}
+static inline void aq_ptp_service_task(struct aq_nic_s *aq_nic) {}
+static inline void aq_ptp_tm_offset_set(struct aq_nic_s *aq_nic,
+                                       unsigned int mbps) {}
+static inline void aq_ptp_clock_init(struct aq_nic_s *aq_nic) {}
+static inline int aq_ptp_xmit(struct aq_nic_s *aq_nic, struct sk_buff *skb)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp) {}
+static inline void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp,
+                                             struct hwtstamp_config *config) {}
+static inline int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp,
+                                            struct hwtstamp_config *config)
+{
+       return 0;
+}
+
+static inline bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring)
+{
+       return false;
+}
+
+static inline u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic,
+                                   struct sk_buff *skb, u8 *p,
+                                   unsigned int len)
+{
+       return 0;
+}
+
+static inline struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp)
+{
+       return NULL;
+}
+
+static inline int aq_ptp_link_change(struct aq_nic_s *aq_nic)
+{
+       return 0;
+}
+#endif
+
+#endif /* AQ_PTP_H */
index 76bdbe1..f756cc0 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_ring.c: Definition of functions for Rx/Tx rings. */
@@ -10,6 +10,7 @@
 #include "aq_nic.h"
 #include "aq_hw.h"
 #include "aq_hw_utils.h"
+#include "aq_ptp.h"
 
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -177,6 +178,30 @@ err_exit:
        return self;
 }
 
+struct aq_ring_s *
+aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic,
+                     unsigned int idx, unsigned int size, unsigned int dx_size)
+{
+       struct device *dev = aq_nic_get_dev(aq_nic);
+       size_t sz = size * dx_size + AQ_CFG_RXDS_DEF;
+
+       memset(self, 0, sizeof(*self));
+
+       self->aq_nic = aq_nic;
+       self->idx = idx;
+       self->size = size;
+       self->dx_size = dx_size;
+
+       self->dx_ring = dma_alloc_coherent(dev, sz, &self->dx_ring_pa,
+                                          GFP_KERNEL);
+       if (!self->dx_ring) {
+               aq_ring_free(self);
+               return NULL;
+       }
+
+       return self;
+}
+
 int aq_ring_init(struct aq_ring_s *self)
 {
        self->hw_head = 0;
@@ -290,6 +315,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                self->sw_head = aq_ring_next_dx(self, self->sw_head),
                --budget, ++(*work_done)) {
                struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
+               bool is_ptp_ring = aq_ptp_ring(self->aq_nic, self);
                struct aq_ring_buff_s *buff_ = NULL;
                struct sk_buff *skb = NULL;
                unsigned int next_ = 0U;
@@ -354,6 +380,11 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                                err = -ENOMEM;
                                goto err_exit;
                        }
+                       if (is_ptp_ring)
+                               buff->len -=
+                                       aq_ptp_extract_ts(self->aq_nic, skb,
+                                               aq_buf_vaddr(&buff->rxdata),
+                                               buff->len);
                        skb_put(skb, buff->len);
                        page_ref_inc(buff->rxdata.page);
                } else {
@@ -362,6 +393,11 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                                err = -ENOMEM;
                                goto err_exit;
                        }
+                       if (is_ptp_ring)
+                               buff->len -=
+                                       aq_ptp_extract_ts(self->aq_nic, skb,
+                                               aq_buf_vaddr(&buff->rxdata),
+                                               buff->len);
 
                        hdr_len = buff->len;
                        if (hdr_len > AQ_CFG_RX_HDR_SIZE)
@@ -421,8 +457,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                skb_set_hash(skb, buff->rss_hash,
                             buff->is_hash_l4 ? PKT_HASH_TYPE_L4 :
                             PKT_HASH_TYPE_NONE);
-
-               skb_record_rx_queue(skb, self->idx);
+               /* Send all PTP traffic to 0 queue */
+               skb_record_rx_queue(skb, is_ptp_ring ? 0 : self->idx);
 
                ++self->stats.rx.packets;
                self->stats.rx.bytes += skb->len;
@@ -434,6 +470,21 @@ err_exit:
        return err;
 }
 
+void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic)
+{
+       while (self->sw_head != self->hw_head) {
+               u64 ns;
+
+               aq_nic->aq_hw_ops->extract_hwts(aq_nic->aq_hw,
+                                               self->dx_ring +
+                                               (self->sw_head * self->dx_size),
+                                               self->dx_size, &ns);
+               aq_ptp_tx_hwtstamp(aq_nic, ns);
+
+               self->sw_head = aq_ring_next_dx(self, self->sw_head);
+       }
+}
+
 int aq_ring_rx_fill(struct aq_ring_s *self)
 {
        unsigned int page_order = self->page_order;
index 47abd09..be3702a 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_ring.h: Declaration of functions for Rx/Tx rings. */
@@ -174,4 +174,9 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                     int budget);
 int aq_ring_rx_fill(struct aq_ring_s *self);
 
+struct aq_ring_s *aq_ring_hwts_rx_alloc(struct aq_ring_s *self,
+               struct aq_nic_s *aq_nic, unsigned int idx,
+               unsigned int size, unsigned int dx_size);
+void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic);
+
 #endif /* AQ_RING_H */
index 2ad3fa6..c7297ca 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_b0.c: Definition of Atlantic hardware specific functions. */
@@ -10,6 +10,7 @@
 #include "../aq_hw_utils.h"
 #include "../aq_ring.h"
 #include "../aq_nic.h"
+#include "../aq_phy.h"
 #include "hw_atl_b0.h"
 #include "hw_atl_utils.h"
 #include "hw_atl_llh.h"
@@ -49,6 +50,8 @@
        .mac_regs_count = 88,             \
        .hw_alive_check_addr = 0x10U
 
+#define FRAC_PER_NS 0x100000000LL
+
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc100 = {
        DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
        .media_type = AQ_HW_MEDIA_TYPE_FIBRE,
@@ -124,13 +127,16 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
        hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
        hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
-       hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, 0U);
-       hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, 0U);
-       hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, 0U);
-       hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, 0U);
+       tc = 0;
+
+       /* TX Packet Scheduler Data TC0 */
+       hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc);
+       hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc);
+       hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
+       hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
 
-       /* Tx buf size */
-       buff_size = HW_ATL_B0_TXBUF_MAX;
+       /* Tx buf size TC0 */
+       buff_size = HW_ATL_B0_TXBUF_MAX - HW_ATL_B0_PTP_TXBUF_SIZE;
 
        hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, buff_size, tc);
        hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self,
@@ -141,10 +147,15 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
                                                   (buff_size *
                                                   (1024 / 32U) * 50U) /
                                                   100U, tc);
+       /* Init TC2 for PTP_TX */
+       tc = 2;
+
+       hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_TXBUF_SIZE,
+                                              tc);
 
        /* QoS Rx buf size per TC */
        tc = 0;
-       buff_size = HW_ATL_B0_RXBUF_MAX;
+       buff_size = HW_ATL_B0_RXBUF_MAX - HW_ATL_B0_PTP_RXBUF_SIZE;
 
        hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc);
        hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self,
@@ -158,6 +169,14 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 
        hw_atl_b0_set_fc(self, self->aq_nic_cfg->flow_control, tc);
 
+       /* Init TC2 for PTP_RX */
+       tc = 2;
+
+       hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_RXBUF_SIZE,
+                                              tc);
+       /* No flow control for PTP */
+       hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, tc);
+
        /* QoS 802.1p priority -> TC mapping */
        for (i_priority = 8U; i_priority--;)
                hw_atl_rpf_rpb_user_priority_tc_map_set(self, i_priority, 0U);
@@ -664,6 +683,46 @@ static int hw_atl_b0_hw_ring_rx_fill(struct aq_hw_s *self,
        return aq_hw_err_from_flags(self);
 }
 
+static int hw_atl_b0_hw_ring_hwts_rx_fill(struct aq_hw_s *self,
+                                         struct aq_ring_s *ring)
+{
+       unsigned int i;
+
+       for (i = aq_ring_avail_dx(ring); i--;
+                       ring->sw_tail = aq_ring_next_dx(ring, ring->sw_tail)) {
+               struct hw_atl_rxd_s *rxd =
+                       (struct hw_atl_rxd_s *)
+                       &ring->dx_ring[ring->sw_tail * HW_ATL_B0_RXD_SIZE];
+
+               rxd->buf_addr = ring->dx_ring_pa + ring->size * ring->dx_size;
+               rxd->hdr_addr = 0U;
+       }
+       /* Make sure descriptors are updated before bump tail*/
+       wmb();
+
+       hw_atl_reg_rx_dma_desc_tail_ptr_set(self, ring->sw_tail, ring->idx);
+
+       return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_hw_ring_hwts_rx_receive(struct aq_hw_s *self,
+                                            struct aq_ring_s *ring)
+{
+       while (ring->hw_head != ring->sw_tail) {
+               struct hw_atl_rxd_hwts_wb_s *hwts_wb =
+                       (struct hw_atl_rxd_hwts_wb_s *)
+                       (ring->dx_ring + (ring->hw_head * HW_ATL_B0_RXD_SIZE));
+
+               /* RxD is not done */
+               if (!(hwts_wb->sec_lw0 & 0x1U))
+                       break;
+
+               ring->hw_head = aq_ring_next_dx(ring, ring->hw_head);
+       }
+
+       return aq_hw_err_from_flags(self);
+}
+
 static int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
                                            struct aq_ring_s *ring)
 {
@@ -1005,6 +1064,227 @@ static int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self,
        return aq_hw_err_from_flags(self);
 }
 
+static int hw_atl_b0_tx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode)
+{
+       *tc_mode = hw_atl_rpb_tps_tx_tc_mode_get(self);
+       return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_rx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode)
+{
+       *tc_mode = hw_atl_rpb_rpf_rx_traf_class_mode_get(self);
+       return aq_hw_err_from_flags(self);
+}
+
+#define get_ptp_ts_val_u64(self, indx) \
+       ((u64)(hw_atl_pcs_ptp_clock_get(self, indx) & 0xffff))
+
+static void hw_atl_b0_get_ptp_ts(struct aq_hw_s *self, u64 *stamp)
+{
+       u64 ns;
+
+       hw_atl_pcs_ptp_clock_read_enable(self, 1);
+       hw_atl_pcs_ptp_clock_read_enable(self, 0);
+       ns = (get_ptp_ts_val_u64(self, 0) +
+             (get_ptp_ts_val_u64(self, 1) << 16)) * NSEC_PER_SEC +
+            (get_ptp_ts_val_u64(self, 3) +
+             (get_ptp_ts_val_u64(self, 4) << 16));
+
+       *stamp = ns + self->ptp_clk_offset;
+}
+
+static void hw_atl_b0_adj_params_get(u64 freq, s64 adj, u32 *ns, u32 *fns)
+{
+       /* For accuracy, the digit is extended */
+       s64 base_ns = ((adj + NSEC_PER_SEC) * NSEC_PER_SEC);
+       u64 nsi_frac = 0;
+       u64 nsi;
+
+       base_ns = div64_s64(base_ns, freq);
+       nsi = div64_u64(base_ns, NSEC_PER_SEC);
+
+       if (base_ns != nsi * NSEC_PER_SEC) {
+               s64 divisor = div64_s64((s64)NSEC_PER_SEC * NSEC_PER_SEC,
+                                       base_ns - nsi * NSEC_PER_SEC);
+               nsi_frac = div64_s64(FRAC_PER_NS * NSEC_PER_SEC, divisor);
+       }
+
+       *ns = (u32)nsi;
+       *fns = (u32)nsi_frac;
+}
+
+static void
+hw_atl_b0_mac_adj_param_calc(struct hw_fw_request_ptp_adj_freq *ptp_adj_freq,
+                            u64 phyfreq, u64 macfreq)
+{
+       s64 adj_fns_val;
+       s64 fns_in_sec_phy = phyfreq * (ptp_adj_freq->fns_phy +
+                                       FRAC_PER_NS * ptp_adj_freq->ns_phy);
+       s64 fns_in_sec_mac = macfreq * (ptp_adj_freq->fns_mac +
+                                       FRAC_PER_NS * ptp_adj_freq->ns_mac);
+       s64 fault_in_sec_phy = FRAC_PER_NS * NSEC_PER_SEC - fns_in_sec_phy;
+       s64 fault_in_sec_mac = FRAC_PER_NS * NSEC_PER_SEC - fns_in_sec_mac;
+       /* MAC MCP counter freq is macfreq / 4 */
+       s64 diff_in_mcp_overflow = (fault_in_sec_mac - fault_in_sec_phy) *
+                                  4 * FRAC_PER_NS;
+
+       diff_in_mcp_overflow = div64_s64(diff_in_mcp_overflow,
+                                        AQ_HW_MAC_COUNTER_HZ);
+       adj_fns_val = (ptp_adj_freq->fns_mac + FRAC_PER_NS *
+                      ptp_adj_freq->ns_mac) + diff_in_mcp_overflow;
+
+       ptp_adj_freq->mac_ns_adj = div64_s64(adj_fns_val, FRAC_PER_NS);
+       ptp_adj_freq->mac_fns_adj = adj_fns_val - ptp_adj_freq->mac_ns_adj *
+                                   FRAC_PER_NS;
+}
+
+static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta)
+{
+       self->ptp_clk_offset += delta;
+
+       return 0;
+}
+
+static int hw_atl_b0_set_sys_clock(struct aq_hw_s *self, u64 time, u64 ts)
+{
+       s64 delta = time - (self->ptp_clk_offset + ts);
+
+       return hw_atl_b0_adj_sys_clock(self, delta);
+}
+
+static int hw_atl_b0_ts_to_sys_clock(struct aq_hw_s *self, u64 ts, u64 *time)
+{
+       *time = self->ptp_clk_offset + ts;
+       return 0;
+}
+
+static int hw_atl_b0_adj_clock_freq(struct aq_hw_s *self, s32 ppb)
+{
+       struct hw_fw_request_iface fwreq;
+       size_t size;
+
+       memset(&fwreq, 0, sizeof(fwreq));
+
+       fwreq.msg_id = HW_AQ_FW_REQUEST_PTP_ADJ_FREQ;
+       hw_atl_b0_adj_params_get(AQ_HW_MAC_COUNTER_HZ, ppb,
+                                &fwreq.ptp_adj_freq.ns_mac,
+                                &fwreq.ptp_adj_freq.fns_mac);
+       hw_atl_b0_adj_params_get(AQ_HW_PHY_COUNTER_HZ, ppb,
+                                &fwreq.ptp_adj_freq.ns_phy,
+                                &fwreq.ptp_adj_freq.fns_phy);
+       hw_atl_b0_mac_adj_param_calc(&fwreq.ptp_adj_freq,
+                                    AQ_HW_PHY_COUNTER_HZ,
+                                    AQ_HW_MAC_COUNTER_HZ);
+
+       size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_adj_freq);
+       return self->aq_fw_ops->send_fw_request(self, &fwreq, size);
+}
+
+static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index,
+                               u64 start, u32 period)
+{
+       struct hw_fw_request_iface fwreq;
+       size_t size;
+
+       memset(&fwreq, 0, sizeof(fwreq));
+
+       fwreq.msg_id = HW_AQ_FW_REQUEST_PTP_GPIO_CTRL;
+       fwreq.ptp_gpio_ctrl.index = index;
+       fwreq.ptp_gpio_ctrl.period = period;
+       /* Apply time offset */
+       fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset;
+
+       size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl);
+       return self->aq_fw_ops->send_fw_request(self, &fwreq, size);
+}
+
+static int hw_atl_b0_extts_gpio_enable(struct aq_hw_s *self, u32 index,
+                                      u32 enable)
+{
+       /* Enable/disable Sync1588 GPIO Timestamping */
+       aq_phy_write_reg(self, MDIO_MMD_PCS, 0xc611, enable ? 0x71 : 0);
+
+       return 0;
+}
+
+static int hw_atl_b0_get_sync_ts(struct aq_hw_s *self, u64 *ts)
+{
+       u64 sec_l;
+       u64 sec_h;
+       u64 nsec_l;
+       u64 nsec_h;
+
+       if (!ts)
+               return -1;
+
+       /* PTP external GPIO clock seconds count 15:0 */
+       sec_l = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc914);
+       /* PTP external GPIO clock seconds count 31:16 */
+       sec_h = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc915);
+       /* PTP external GPIO clock nanoseconds count 15:0 */
+       nsec_l = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc916);
+       /* PTP external GPIO clock nanoseconds count 31:16 */
+       nsec_h = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc917);
+
+       *ts = (nsec_h << 16) + nsec_l + ((sec_h << 16) + sec_l) * NSEC_PER_SEC;
+
+       return 0;
+}
+
+static u16 hw_atl_b0_rx_extract_ts(struct aq_hw_s *self, u8 *p,
+                                  unsigned int len, u64 *timestamp)
+{
+       unsigned int offset = 14;
+       struct ethhdr *eth;
+       __be64 sec;
+       __be32 ns;
+       u8 *ptr;
+
+       if (len <= offset || !timestamp)
+               return 0;
+
+       /* The TIMESTAMP in the end of package has following format:
+        * (big-endian)
+        *   struct {
+        *     uint64_t sec;
+        *     uint32_t ns;
+        *     uint16_t stream_id;
+        *   };
+        */
+       ptr = p + (len - offset);
+       memcpy(&sec, ptr, sizeof(sec));
+       ptr += sizeof(sec);
+       memcpy(&ns, ptr, sizeof(ns));
+
+       *timestamp = (be64_to_cpu(sec) & 0xffffffffffffllu) * NSEC_PER_SEC +
+                    be32_to_cpu(ns) + self->ptp_clk_offset;
+
+       eth = (struct ethhdr *)p;
+
+       return (eth->h_proto == htons(ETH_P_1588)) ? 12 : 14;
+}
+
+static int hw_atl_b0_extract_hwts(struct aq_hw_s *self, u8 *p, unsigned int len,
+                                 u64 *timestamp)
+{
+       struct hw_atl_rxd_hwts_wb_s *hwts_wb = (struct hw_atl_rxd_hwts_wb_s *)p;
+       u64 tmp, sec, ns;
+
+       sec = 0;
+       tmp = (hwts_wb->sec_lw0 >> 2) & 0x3ff;
+       sec += tmp;
+       tmp = (u64)((hwts_wb->sec_lw1 >> 16) & 0xffff) << 10;
+       sec += tmp;
+       tmp = (u64)(hwts_wb->sec_hw & 0xfff) << 26;
+       sec += tmp;
+       tmp = (u64)((hwts_wb->sec_hw >> 22) & 0x3ff) << 38;
+       sec += tmp;
+       ns = sec * NSEC_PER_SEC + hwts_wb->ns;
+       if (timestamp)
+               *timestamp = ns + self->ptp_clk_offset;
+       return 0;
+}
+
 static int hw_atl_b0_hw_fl3l4_clear(struct aq_hw_s *self,
                                    struct aq_rx_filter_l3l4 *data)
 {
@@ -1038,7 +1318,8 @@ static int hw_atl_b0_hw_fl3l4_set(struct aq_hw_s *self,
 
        hw_atl_b0_hw_fl3l4_clear(self, data);
 
-       if (data->cmd) {
+       if (data->cmd & (HW_ATL_RX_ENABLE_CMP_DEST_ADDR_L3 |
+                        HW_ATL_RX_ENABLE_CMP_SRC_ADDR_L3)) {
                if (!data->is_ipv6) {
                        hw_atl_rpfl3l4_ipv4_dest_addr_set(self,
                                                          location,
@@ -1055,8 +1336,13 @@ static int hw_atl_b0_hw_fl3l4_set(struct aq_hw_s *self,
                                                         data->ip_src);
                }
        }
-       hw_atl_rpf_l4_dpd_set(self, data->p_dst, location);
-       hw_atl_rpf_l4_spd_set(self, data->p_src, location);
+
+       if (data->cmd & (HW_ATL_RX_ENABLE_CMP_DEST_PORT_L4 |
+                        HW_ATL_RX_ENABLE_CMP_SRC_PORT_L4)) {
+               hw_atl_rpf_l4_dpd_set(self, data->p_dst, location);
+               hw_atl_rpf_l4_spd_set(self, data->p_src, location);
+       }
+
        hw_atl_rpfl3l4_cmd_set(self, location, data->cmd);
 
        return aq_hw_err_from_flags(self);
@@ -1177,6 +1463,23 @@ const struct aq_hw_ops hw_atl_ops_b0 = {
        .hw_get_regs                 = hw_atl_utils_hw_get_regs,
        .hw_get_hw_stats             = hw_atl_utils_get_hw_stats,
        .hw_get_fw_version           = hw_atl_utils_get_fw_version,
-       .hw_set_offload              = hw_atl_b0_hw_offload_set,
+
+       .hw_tx_tc_mode_get       = hw_atl_b0_tx_tc_mode_get,
+       .hw_rx_tc_mode_get       = hw_atl_b0_rx_tc_mode_get,
+
+       .hw_ring_hwts_rx_fill        = hw_atl_b0_hw_ring_hwts_rx_fill,
+       .hw_ring_hwts_rx_receive     = hw_atl_b0_hw_ring_hwts_rx_receive,
+
+       .hw_get_ptp_ts           = hw_atl_b0_get_ptp_ts,
+       .hw_adj_sys_clock        = hw_atl_b0_adj_sys_clock,
+       .hw_set_sys_clock        = hw_atl_b0_set_sys_clock,
+       .hw_ts_to_sys_clock      = hw_atl_b0_ts_to_sys_clock,
+       .hw_adj_clock_freq       = hw_atl_b0_adj_clock_freq,
+       .hw_gpio_pulse           = hw_atl_b0_gpio_pulse,
+       .hw_extts_gpio_enable    = hw_atl_b0_extts_gpio_enable,
+       .hw_get_sync_ts          = hw_atl_b0_get_sync_ts,
+       .rx_extract_ts           = hw_atl_b0_rx_extract_ts,
+       .extract_hwts            = hw_atl_b0_extract_hwts,
+       .hw_set_offload          = hw_atl_b0_hw_offload_set,
        .hw_set_fc                   = hw_atl_b0_set_fc,
 };
index 808d8cd..7ab23a1 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_b0_internal.h: Definition of Atlantic B0 chip specific
 #define HW_ATL_B0_MPI_SPEED_MSK         0xFFFFU
 #define HW_ATL_B0_MPI_SPEED_SHIFT       16U
 
-#define HW_ATL_B0_TXBUF_MAX  160U
-#define HW_ATL_B0_RXBUF_MAX  320U
+#define HW_ATL_B0_TXBUF_MAX              160U
+#define HW_ATL_B0_PTP_TXBUF_SIZE           8U
+
+#define HW_ATL_B0_RXBUF_MAX              320U
+#define HW_ATL_B0_PTP_RXBUF_SIZE          16U
 
 #define HW_ATL_B0_RSS_REDIRECTION_MAX 64U
 #define HW_ATL_B0_RSS_REDIRECTION_BITS 3U
index 6f34069..6cadc90 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_llh.c: Definitions of bitfield and register access functions for
@@ -572,6 +572,13 @@ void hw_atl_rpb_rpf_rx_traf_class_mode_set(struct aq_hw_s *aq_hw,
                            rx_traf_class_mode);
 }
 
+u32 hw_atl_rpb_rpf_rx_traf_class_mode_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_RPB_RPF_RX_TC_MODE_ADR,
+                       HW_ATL_RPB_RPF_RX_TC_MODE_MSK,
+                       HW_ATL_RPB_RPF_RX_TC_MODE_SHIFT);
+}
+
 void hw_atl_rpb_rx_buff_en_set(struct aq_hw_s *aq_hw, u32 rx_buff_en)
 {
        aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RX_BUF_EN_ADR,
@@ -636,8 +643,8 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
                            rx_pkt_buff_size_per_tc);
 }
 
-void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
-                                     u32 buffer)
+void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw,
+                                     u32 rx_xoff_en_per_tc, u32 buffer)
 {
        aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RXBXOFF_EN_ADR(buffer),
                            HW_ATL_RPB_RXBXOFF_EN_MSK,
@@ -1290,6 +1297,13 @@ void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en)
                            HW_ATL_TPB_TX_BUF_EN_SHIFT, tx_buff_en);
 }
 
+u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
+                       HW_ATL_TPB_TX_TC_MODE_MSK,
+                       HW_ATL_TPB_TX_TC_MODE_SHIFT);
+}
+
 void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
                                   u32 tx_traf_class_mode)
 {
@@ -1526,6 +1540,20 @@ void hw_atl_reg_glb_cpu_scratch_scp_set(struct aq_hw_s *aq_hw,
                        glb_cpu_scratch_scp);
 }
 
+void hw_atl_pcs_ptp_clock_read_enable(struct aq_hw_s *aq_hw,
+                                     u32 ptp_clock_read_enable)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_ADR,
+                           HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_MSK,
+                           HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_SHIFT,
+                           ptp_clock_read_enable);
+}
+
+u32 hw_atl_pcs_ptp_clock_get(struct aq_hw_s *aq_hw, u32 index)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_PCS_PTP_TS_VAL_ADDR(index));
+}
+
 void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr)
 {
        aq_hw_write_reg_bit(aq_hw, HW_ATL_MCP_UP_FORCE_INTERRUPT_ADR,
@@ -1616,6 +1644,11 @@ u32 hw_atl_sem_ram_get(struct aq_hw_s *self)
        return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
 }
 
+u32 hw_atl_sem_mdio_get(struct aq_hw_s *self)
+{
+       return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_MDIO);
+}
+
 u32 hw_atl_scrpad_get(struct aq_hw_s *aq_hw, u32 scratch_scp)
 {
        return aq_hw_read_reg(aq_hw,
@@ -1631,3 +1664,60 @@ u32 hw_atl_scrpad25_get(struct aq_hw_s *self)
 {
        return hw_atl_scrpad_get(self, 0x18);
 }
+
+void hw_atl_glb_mdio_iface1_set(struct aq_hw_s *aq_hw, u32 value)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(1), value);
+}
+
+u32 hw_atl_glb_mdio_iface1_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(1));
+}
+
+void hw_atl_glb_mdio_iface2_set(struct aq_hw_s *aq_hw, u32 value)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(2), value);
+}
+
+u32 hw_atl_glb_mdio_iface2_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(2));
+}
+
+void hw_atl_glb_mdio_iface3_set(struct aq_hw_s *aq_hw, u32 value)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(3), value);
+}
+
+u32 hw_atl_glb_mdio_iface3_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(3));
+}
+
+void hw_atl_glb_mdio_iface4_set(struct aq_hw_s *aq_hw, u32 value)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(4), value);
+}
+
+u32 hw_atl_glb_mdio_iface4_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(4));
+}
+
+void hw_atl_glb_mdio_iface5_set(struct aq_hw_s *aq_hw, u32 value)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(5), value);
+}
+
+u32 hw_atl_glb_mdio_iface5_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(5));
+}
+
+u32 hw_atl_mdio_busy_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_MDIO_BUSY_ADR,
+                                 HW_ATL_MDIO_BUSY_MSK,
+                                 HW_ATL_MDIO_BUSY_SHIFT);
+}
index c3ee278..5750b0c 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_llh.h: Declarations of bitfield and register access functions for
@@ -292,6 +292,9 @@ void hw_atl_rpb_dma_sys_lbk_set(struct aq_hw_s *aq_hw, u32 dma_sys_lbk);
 void hw_atl_rpb_rpf_rx_traf_class_mode_set(struct aq_hw_s *aq_hw,
                                           u32 rx_traf_class_mode);
 
+/* get rx traffic class mode */
+u32 hw_atl_rpb_rpf_rx_traf_class_mode_get(struct aq_hw_s *aq_hw);
+
 /* set rx buffer enable */
 void hw_atl_rpb_rx_buff_en_set(struct aq_hw_s *aq_hw, u32 rx_buff_en);
 
@@ -306,7 +309,8 @@ void hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(struct aq_hw_s *aq_hw,
                                         u32 buffer);
 
 /* set rx flow control mode */
-void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode);
+void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw,
+                                    u32 rx_flow_ctl_mode);
 
 /* set rx packet buffer size (per tc) */
 void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
@@ -320,7 +324,8 @@ void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw);
 u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw);
 
 /* set rx xoff enable (per tc) */
-void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
+void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw,
+                                     u32 rx_xoff_en_per_tc,
                                      u32 buffer);
 
 /* rpf */
@@ -605,6 +610,9 @@ void hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(struct aq_hw_s *aq_hw,
 void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
                                   u32 tx_traf_class_mode);
 
+/* get TX Traffic Class Mode */
+u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw);
+
 /* set tx buffer enable */
 void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en);
 
@@ -623,7 +631,8 @@ void hw_atl_tpb_tx_dma_sys_lbk_en_set(struct aq_hw_s *aq_hw, u32 tx_dma_sys_lbk_
 
 /* set tx packet buffer size (per tc) */
 void hw_atl_tpb_tx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
-                                           u32 tx_pkt_buff_size_per_tc, u32 buffer);
+                                           u32 tx_pkt_buff_size_per_tc,
+                                           u32 buffer);
 
 /* set tx path pad insert enable */
 void hw_atl_tpb_tx_path_scp_ins_en_set(struct aq_hw_s *aq_hw, u32 tx_path_scp_ins_en);
@@ -715,6 +724,12 @@ void hw_atl_msm_reg_wr_strobe_set(struct aq_hw_s *aq_hw, u32 reg_wr_strobe);
 /* set pci register reset disable */
 void hw_atl_pci_pci_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 pci_reg_res_dis);
 
+/* pcs */
+void hw_atl_pcs_ptp_clock_read_enable(struct aq_hw_s *aq_hw,
+                                     u32 ptp_clock_read_enable);
+
+u32 hw_atl_pcs_ptp_clock_get(struct aq_hw_s *aq_hw, u32 index);
+
 /* set uP Force Interrupt */
 void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr);
 
@@ -752,9 +767,44 @@ void hw_atl_rpfl3l4_ipv6_src_addr_set(struct aq_hw_s *aq_hw, u8 location,
 void hw_atl_rpfl3l4_ipv6_dest_addr_set(struct aq_hw_s *aq_hw, u8 location,
                                       u32 *ipv6_dest);
 
+/* set Global MDIO Interface 1 */
+void hw_atl_glb_mdio_iface1_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 1 */
+u32 hw_atl_glb_mdio_iface1_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 2 */
+void hw_atl_glb_mdio_iface2_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 2 */
+u32 hw_atl_glb_mdio_iface2_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 3 */
+void hw_atl_glb_mdio_iface3_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 3 */
+u32 hw_atl_glb_mdio_iface3_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 4 */
+void hw_atl_glb_mdio_iface4_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 4 */
+u32 hw_atl_glb_mdio_iface4_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 5 */
+void hw_atl_glb_mdio_iface5_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 5 */
+u32 hw_atl_glb_mdio_iface5_get(struct aq_hw_s *hw);
+
+u32 hw_atl_mdio_busy_get(struct aq_hw_s *aq_hw);
+
 /* get global microprocessor ram semaphore */
 u32 hw_atl_sem_ram_get(struct aq_hw_s *self);
 
+/* get global microprocessor mdio semaphore */
+u32 hw_atl_sem_mdio_get(struct aq_hw_s *self);
+
 /* get global microprocessor scratch pad register */
 u32 hw_atl_scrpad_get(struct aq_hw_s *aq_hw, u32 scratch_scp);
 
index 35887ad..ec3bcdc 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_llh_internal.h: Preprocessor definitions
 /* default value of bitfield et_val{f}[f:0] */
 #define HW_ATL_RPF_ET_VALF_DEFAULT 0x0
 
+/* RX l3_l4_en{F} Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_l4_en{F}".
+ * Parameter: filter {F} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_l4_en_i[0]"
+ */
+
+#define HW_ATL_RPF_L3_REG_CTRL_ADR(filter) (0x00005380 + (filter) * 0x4)
+
+/* RX rpf_l3_sa{D}[1F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_sa{D}[1F:0]".
+ * Parameter: location {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_sa0_i[31:0]"
+ */
+
+/* Register address for bitfield pif_rpf_l3_sa0_i[31:0] */
+#define HW_ATL_RPF_L3_SRCA_ADR(filter) (0x000053B0 + (filter) * 0x4)
+/* Bitmask for bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_MSKN 0xFFFFFFFFu
+/* Lower bit position of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_SHIFT 0
+/* Width of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_WIDTH 32
+/* Default value of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_DEFAULT 0x0
+
+/* RX rpf_l3_da{D}[1F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_da{D}[1F:0]".
+ * Parameter: location {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_da0_i[31:0]"
+ */
+
+ /* Register address for bitfield pif_rpf_l3_da0_i[31:0] */
+#define HW_ATL_RPF_L3_DSTA_ADR(filter) (0x000053B0 + (filter) * 0x4)
+/* Bitmask for bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_MSKN 0xFFFFFFFFu
+/* Lower bit position of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_SHIFT 0
+/* Width of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_WIDTH 32
+/* Default value of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_DEFAULT 0x0
+
 /* RX l4_sp{D}[F:0] Bitfield Definitions
  * Preprocessor definitions for the bitfield "l4_sp{D}[F:0]".
  * Parameter: srcport {D} | stride size 0x4 | range [0, 7]
 /* default value of bitfield register write strobe */
 #define HW_ATL_MSM_REG_WR_STROBE_DEFAULT 0x0
 
+/* register address for bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_ADR 0x00004628
+/* bitmask for bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_MSK 0x00000010
+/* inverted bitmask for bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_MSKN 0xFFFFFFEF
+/* lower bit position of bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_SHIFT 4
+/* width of bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_WIDTH 1
+/* default value of bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_DEFAULT 0x0
+
+/* register address for ptp counter reading */
+#define HW_ATL_PCS_PTP_TS_VAL_ADDR(index) (0x00004900 + (index) * 0x4)
+
 /* mif soft reset bitfield definitions
  * preprocessor definitions for the bitfield "soft reset".
  * port="pif_glb_res_i"
 /* default value of bitfield uP Force Interrupt */
 #define HW_ATL_MCP_UP_FORCE_INTERRUPT_DEFAULT 0x0
 
-#define HW_ATL_RX_CTRL_ADDR_BEGIN_FL3L4   0x00005380
-#define HW_ATL_RX_SRCA_ADDR_BEGIN_FL3L4   0x000053B0
-#define HW_ATL_RX_DESTA_ADDR_BEGIN_FL3L4  0x000053D0
-
-#define HW_ATL_RPF_L3_REG_CTRL_ADR(location) (0x00005380 + (location) * 0x4)
-
-/* RX rpf_l3_sa{D}[1F:0] Bitfield Definitions
- * Preprocessor definitions for the bitfield "l3_sa{D}[1F:0]".
- * Parameter: location {D} | stride size 0x4 | range [0, 7]
- * PORT="pif_rpf_l3_sa0_i[31:0]"
- */
-
-/* Register address for bitfield pif_rpf_l3_sa0_i[31:0] */
-#define HW_ATL_RPF_L3_SRCA_ADR(location) (0x000053B0 + (location) * 0x4)
-/* Bitmask for bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_MSK 0xFFFFFFFFu
-/* Inverted bitmask for bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_MSKN 0xFFFFFFFFu
-/* Lower bit position of bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_SHIFT 0
-/* Width of bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_WIDTH 32
-/* Default value of bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_DEFAULT 0x0
-
-/* RX rpf_l3_da{D}[1F:0] Bitfield Definitions
- * Preprocessor definitions for the bitfield "l3_da{D}[1F:0]".
- * Parameter: location {D} | stride size 0x4 | range [0, 7]
- * PORT="pif_rpf_l3_da0_i[31:0]"
- */
-
- /* Register address for bitfield pif_rpf_l3_da0_i[31:0] */
-#define HW_ATL_RPF_L3_DSTA_ADR(location) (0x000053B0 + (location) * 0x4)
-/* Bitmask for bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_MSK 0xFFFFFFFFu
-/* Inverted bitmask for bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_MSKN 0xFFFFFFFFu
-/* Lower bit position of bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_SHIFT 0
-/* Width of bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_WIDTH 32
-/* Default value of bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_DEFAULT 0x0
-
+/* Preprocessor definitions for Global MDIO Interfaces
+ * Address: 0x00000280 + 0x4 * Number of interface
+ */
+#define HW_ATL_GLB_MDIO_IFACE_ADDR_BEGIN   0x00000280u
+
+#define HW_ATL_GLB_MDIO_IFACE_N_ADR(number) \
+       (HW_ATL_GLB_MDIO_IFACE_ADDR_BEGIN + (((number) - 1) * 0x4))
+
+/* MIF MDIO Busy Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO Busy".
+ * PORT="mdio_pif_busy_o"
+ */
+
+/* Register address for bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_ADR 0x00000284
+/* Bitmask for bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_MSK 0x80000000
+/* Inverted bitmask for bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_MSKN 0x7FFFFFFF
+/* Lower bit position of bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_SHIFT 31
+/* Width of bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_WIDTH 1
+
+/* MIF MDIO Execute Operation Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO Execute Operation".
+ * PORT="pif_mdio_op_start_i"
+ */
+
+/* Register address for bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_ADR 0x00000284
+/* Bitmask for bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_MSK 0x00008000
+/* Inverted bitmask for bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_MSKN 0xFFFF7FFF
+/* Lower bit position of bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_SHIFT 15
+/* Width of bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_WIDTH 1
+/* Default value of bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_DEFAULT 0x0
+
+/* MIF Op Mode [1:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "Op Mode [1:0]".
+ * PORT="pif_mdio_mode_i[1:0]"
+ */
+
+/* Register address for bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_ADR 0x00000284
+/* Bitmask for bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_MSK 0x00003000
+/* Inverted bitmask for bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_MSKN 0xFFFFCFFF
+/* Lower bit position of bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_SHIFT 12
+/* Width of bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_WIDTH 2
+/* Default value of bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_DEFAULT 0x0
+
+/* MIF PHY address Bitfield Definitions
+ * Preprocessor definitions for the bitfield "PHY address".
+ * PORT="pif_mdio_phy_addr_i[9:0]"
+ */
+
+/* Register address for bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_ADR 0x00000284
+/* Bitmask for bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_MSK 0x000003FF
+/* Inverted bitmask for bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_MSKN 0xFFFFFC00
+/* Lower bit position of bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_SHIFT 0
+/* Width of bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_WIDTH 10
+/* Default value of bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_DEFAULT 0x0
+
+/* MIF MDIO WriteData [F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO WriteData [F:0]".
+ * PORT="pif_mdio_wdata_i[15:0]"
+ */
+
+/* Register address for bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_ADR 0x00000288
+/* Bitmask for bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_MSK 0x0000FFFF
+/* Inverted bitmask for bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_MSKN 0xFFFF0000
+/* Lower bit position of bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_SHIFT 0
+/* Width of bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_WIDTH 16
+/* Default value of bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_DEFAULT 0x0
+
+/* MIF MDIO Address [F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO Address [F:0]".
+ * PORT="pif_mdio_addr_i[15:0]"
+ */
+
+/* Register address for bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_ADR 0x0000028C
+/* Bitmask for bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_MSK 0x0000FFFF
+/* Inverted bitmask for bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_MSKN 0xFFFF0000
+/* Lower bit position of bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_SHIFT 0
+/* Width of bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_WIDTH 16
+/* Default value of bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_DEFAULT 0x0
+
+#define HW_ATL_FW_SM_MDIO       0x0U
 #define HW_ATL_FW_SM_RAM        0x2U
 
 #endif /* HW_ATL_LLH_INTERNAL_H */
index 5264685..6fc5640 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_utils.c: Definition of common functions for Atlantic hardware
@@ -327,8 +327,7 @@ err_exit:
        return err;
 }
 
-static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
-                                        u32 cnt)
+int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p, u32 cnt)
 {
        u32 val;
        int err = 0;
@@ -964,4 +963,6 @@ const struct aq_fw_ops aq_fw_1x_ops = {
        .set_eee_rate = NULL,
        .get_eee_rate = NULL,
        .set_flow_control = NULL,
+       .send_fw_request = NULL,
+       .enable_ptp = NULL,
 };
index 692bed7..ee11b10 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_utils.h: Declaration of common functions for Atlantic hardware
@@ -41,7 +41,15 @@ struct __packed hw_atl_rxd_wb_s {
        u16 status;
        u16 pkt_len;
        u16 next_desc_ptr;
-       u16 vlan;
+       __le16 vlan;
+};
+
+/* Hardware rx HW TIMESTAMP writeback */
+struct __packed hw_atl_rxd_hwts_wb_s {
+       u32 sec_hw;
+       u32 ns;
+       u32 sec_lw0;
+       u32 sec_lw1;
 };
 
 struct __packed hw_atl_stats_s {
@@ -168,6 +176,34 @@ struct __packed hw_atl_utils_mbox_header {
        u32 error;
 };
 
+struct __packed hw_aq_ptp_offset {
+       u16 ingress_100;
+       u16 egress_100;
+       u16 ingress_1000;
+       u16 egress_1000;
+       u16 ingress_2500;
+       u16 egress_2500;
+       u16 ingress_5000;
+       u16 egress_5000;
+       u16 ingress_10000;
+       u16 egress_10000;
+};
+
+enum gpio_pin_function {
+       GPIO_PIN_FUNCTION_NC,
+       GPIO_PIN_FUNCTION_VAUX_ENABLE,
+       GPIO_PIN_FUNCTION_EFUSE_BURN_ENABLE,
+       GPIO_PIN_FUNCTION_SFP_PLUS_DETECT,
+       GPIO_PIN_FUNCTION_TX_DISABLE,
+       GPIO_PIN_FUNCTION_RATE_SEL_0,
+       GPIO_PIN_FUNCTION_RATE_SEL_1,
+       GPIO_PIN_FUNCTION_TX_FAULT,
+       GPIO_PIN_FUNCTION_PTP0,
+       GPIO_PIN_FUNCTION_PTP1,
+       GPIO_PIN_FUNCTION_PTP2,
+       GPIO_PIN_FUNCTION_SIZE
+};
+
 struct __packed hw_aq_info {
        u8 reserved[6];
        u16 phy_fault_code;
@@ -175,9 +211,23 @@ struct __packed hw_aq_info {
        u8 cable_len;
        u8 reserved1;
        u32 cable_diag_data[4];
-       u8 reserved2[32];
+       struct hw_aq_ptp_offset ptp_offset;
+       u8 reserved2[12];
        u32 caps_lo;
        u32 caps_hi;
+       u32 reserved_datapath;
+       u32 reserved3[7];
+       u32 reserved_simpleresp[3];
+       u32 reserved_linkstat[7];
+       u32 reserved_wakes_count;
+       u32 reserved_eee_stat[12];
+       u32 tx_stuck_cnt;
+       u32 setting_address;
+       u32 setting_length;
+       u32 caps_ex;
+       enum gpio_pin_function gpio_pin[3];
+       u32 pcie_aer_dump[18];
+       u16 snr_margin[4];
 };
 
 struct __packed hw_atl_utils_mbox {
@@ -237,6 +287,42 @@ struct __packed offload_info {
        u8 buf[0];
 };
 
+/* Mailbox FW Request interface */
+struct __packed hw_fw_request_ptp_gpio_ctrl {
+       u32 index;
+       u32 period;
+       u64 start;
+};
+
+struct __packed hw_fw_request_ptp_adj_freq {
+       u32 ns_mac;
+       u32 fns_mac;
+       u32 ns_phy;
+       u32 fns_phy;
+       u32 mac_ns_adj;
+       u32 mac_fns_adj;
+};
+
+struct __packed hw_fw_request_ptp_adj_clock {
+       u32 ns;
+       u32 sec;
+       int sign;
+};
+
+#define HW_AQ_FW_REQUEST_PTP_GPIO_CTRL          0x11
+#define HW_AQ_FW_REQUEST_PTP_ADJ_FREQ           0x12
+#define HW_AQ_FW_REQUEST_PTP_ADJ_CLOCK          0x13
+
+struct __packed hw_fw_request_iface {
+       u32 msg_id;
+       union {
+               /* PTP FW Request */
+               struct hw_fw_request_ptp_gpio_ctrl ptp_gpio_ctrl;
+               struct hw_fw_request_ptp_adj_freq ptp_adj_freq;
+               struct hw_fw_request_ptp_adj_clock ptp_adj_clock;
+       };
+};
+
 enum hw_atl_rx_action_with_traffic {
        HW_ATL_RX_DISCARD,
        HW_ATL_RX_HOST,
@@ -344,91 +430,135 @@ enum hw_atl_fw2x_rate {
        FW2X_RATE_10G     = 0x800,
 };
 
+/* 0x370
+ * Link capabilities resolution register
+ */
 enum hw_atl_fw2x_caps_lo {
-       CAPS_LO_10BASET_HD = 0x00,
+       CAPS_LO_10BASET_HD        = 0,
        CAPS_LO_10BASET_FD,
        CAPS_LO_100BASETX_HD,
        CAPS_LO_100BASET4_HD,
        CAPS_LO_100BASET2_HD,
-       CAPS_LO_100BASETX_FD,
+       CAPS_LO_100BASETX_FD      = 5,
        CAPS_LO_100BASET2_FD,
        CAPS_LO_1000BASET_HD,
        CAPS_LO_1000BASET_FD,
        CAPS_LO_2P5GBASET_FD,
-       CAPS_LO_5GBASET_FD,
+       CAPS_LO_5GBASET_FD        = 10,
        CAPS_LO_10GBASET_FD,
 };
 
+/* 0x374
+ * Status register
+ */
 enum hw_atl_fw2x_caps_hi {
-       CAPS_HI_RESERVED1 = 0x00,
+       CAPS_HI_RESERVED1         = 0,
        CAPS_HI_10BASET_EEE,
        CAPS_HI_RESERVED2,
        CAPS_HI_PAUSE,
        CAPS_HI_ASYMMETRIC_PAUSE,
-       CAPS_HI_100BASETX_EEE,
+       CAPS_HI_100BASETX_EEE     = 5,
        CAPS_HI_RESERVED3,
        CAPS_HI_RESERVED4,
        CAPS_HI_1000BASET_FD_EEE,
        CAPS_HI_2P5GBASET_FD_EEE,
-       CAPS_HI_5GBASET_FD_EEE,
+       CAPS_HI_5GBASET_FD_EEE    = 10,
        CAPS_HI_10GBASET_FD_EEE,
-       CAPS_HI_RESERVED5,
+       CAPS_HI_FW_REQUEST,
        CAPS_HI_RESERVED6,
        CAPS_HI_RESERVED7,
-       CAPS_HI_RESERVED8,
+       CAPS_HI_RESERVED8         = 15,
        CAPS_HI_RESERVED9,
        CAPS_HI_CABLE_DIAG,
        CAPS_HI_TEMPERATURE,
        CAPS_HI_DOWNSHIFT,
-       CAPS_HI_PTP_AVB_EN,
+       CAPS_HI_PTP_AVB_EN_FW2X   = 20,
        CAPS_HI_MEDIA_DETECT,
        CAPS_HI_LINK_DROP,
        CAPS_HI_SLEEP_PROXY,
        CAPS_HI_WOL,
-       CAPS_HI_MAC_STOP,
+       CAPS_HI_MAC_STOP          = 25,
        CAPS_HI_EXT_LOOPBACK,
        CAPS_HI_INT_LOOPBACK,
        CAPS_HI_EFUSE_AGENT,
        CAPS_HI_WOL_TIMER,
-       CAPS_HI_STATISTICS,
+       CAPS_HI_STATISTICS        = 30,
        CAPS_HI_TRANSACTION_ID,
 };
 
+/* 0x36C
+ * Control register
+ */
 enum hw_atl_fw2x_ctrl {
-       CTRL_RESERVED1 = 0x00,
+       CTRL_RESERVED1            = 0,
        CTRL_RESERVED2,
        CTRL_RESERVED3,
        CTRL_PAUSE,
        CTRL_ASYMMETRIC_PAUSE,
-       CTRL_RESERVED4,
+       CTRL_RESERVED4            = 5,
        CTRL_RESERVED5,
        CTRL_RESERVED6,
        CTRL_1GBASET_FD_EEE,
        CTRL_2P5GBASET_FD_EEE,
-       CTRL_5GBASET_FD_EEE,
+       CTRL_5GBASET_FD_EEE       = 10,
        CTRL_10GBASET_FD_EEE,
        CTRL_THERMAL_SHUTDOWN,
        CTRL_PHY_LOGS,
        CTRL_EEE_AUTO_DISABLE,
-       CTRL_PFC,
+       CTRL_PFC                  = 15,
        CTRL_WAKE_ON_LINK,
        CTRL_CABLE_DIAG,
        CTRL_TEMPERATURE,
        CTRL_DOWNSHIFT,
-       CTRL_PTP_AVB,
+       CTRL_PTP_AVB              = 20,
        CTRL_RESERVED7,
        CTRL_LINK_DROP,
        CTRL_SLEEP_PROXY,
        CTRL_WOL,
-       CTRL_MAC_STOP,
+       CTRL_MAC_STOP             = 25,
        CTRL_EXT_LOOPBACK,
        CTRL_INT_LOOPBACK,
        CTRL_RESERVED8,
        CTRL_WOL_TIMER,
-       CTRL_STATISTICS,
+       CTRL_STATISTICS           = 30,
        CTRL_FORCE_RECONNECT,
 };
 
+enum hw_atl_caps_ex {
+       CAPS_EX_LED_CONTROL       =  0,
+       CAPS_EX_LED0_MODE_LO,
+       CAPS_EX_LED0_MODE_HI,
+       CAPS_EX_LED1_MODE_LO,
+       CAPS_EX_LED1_MODE_HI,
+       CAPS_EX_LED2_MODE_LO      =  5,
+       CAPS_EX_LED2_MODE_HI,
+       CAPS_EX_RESERVED07,
+       CAPS_EX_RESERVED08,
+       CAPS_EX_RESERVED09,
+       CAPS_EX_RESERVED10        = 10,
+       CAPS_EX_RESERVED11,
+       CAPS_EX_RESERVED12,
+       CAPS_EX_RESERVED13,
+       CAPS_EX_RESERVED14,
+       CAPS_EX_RESERVED15        = 15,
+       CAPS_EX_PHY_PTP_EN,
+       CAPS_EX_MAC_PTP_EN,
+       CAPS_EX_EXT_CLK_EN,
+       CAPS_EX_SCHED_DMA_EN,
+       CAPS_EX_PTP_GPIO_EN       = 20,
+       CAPS_EX_UPDATE_SETTINGS,
+       CAPS_EX_PHY_CTRL_TS_PIN,
+       CAPS_EX_SNR_OPERATING_MARGIN,
+       CAPS_EX_RESERVED24,
+       CAPS_EX_RESERVED25        = 25,
+       CAPS_EX_RESERVED26,
+       CAPS_EX_RESERVED27,
+       CAPS_EX_RESERVED28,
+       CAPS_EX_RESERVED29,
+       CAPS_EX_RESERVED30        = 30,
+       CAPS_EX_RESERVED31
+};
+
 struct aq_hw_s;
 struct aq_fw_ops;
 struct aq_hw_caps_s;
@@ -475,6 +605,8 @@ struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self);
 int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
                                  u32 *p, u32 cnt);
 
+int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p, u32 cnt);
+
 int hw_atl_utils_fw_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac);
 
 int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size);
index 7bc51f8..f649ac9 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_utils_fw2x.c: Definition of firmware 2.x functions for
 #include "hw_atl_utils.h"
 #include "hw_atl_llh.h"
 
-#define HW_ATL_FW2X_MPI_RPC_ADDR        0x334
+#define HW_ATL_FW2X_MPI_RPC_ADDR         0x334
 
-#define HW_ATL_FW2X_MPI_MBOX_ADDR       0x360
-#define HW_ATL_FW2X_MPI_EFUSE_ADDR     0x364
-#define HW_ATL_FW2X_MPI_CONTROL_ADDR   0x368
-#define HW_ATL_FW2X_MPI_CONTROL2_ADDR  0x36C
-#define HW_ATL_FW2X_MPI_STATE_ADDR     0x370
-#define HW_ATL_FW2X_MPI_STATE2_ADDR     0x374
+#define HW_ATL_FW2X_MPI_MBOX_ADDR        0x360
+#define HW_ATL_FW2X_MPI_EFUSE_ADDR       0x364
+#define HW_ATL_FW2X_MPI_CONTROL_ADDR     0x368
+#define HW_ATL_FW2X_MPI_CONTROL2_ADDR    0x36C
+#define HW_ATL_FW2X_MPI_STATE_ADDR       0x370
+#define HW_ATL_FW2X_MPI_STATE2_ADDR      0x374
+
+#define HW_ATL_FW3X_EXT_CONTROL_ADDR     0x378
+#define HW_ATL_FW3X_EXT_STATE_ADDR       0x37c
 
 #define HW_ATL_FW2X_CAP_PAUSE            BIT(CAPS_HI_PAUSE)
 #define HW_ATL_FW2X_CAP_ASYM_PAUSE       BIT(CAPS_HI_ASYMMETRIC_PAUSE)
@@ -444,6 +447,54 @@ err_exit:
        return err;
 }
 
+static int aq_fw2x_send_fw_request(struct aq_hw_s *self,
+                                  const struct hw_fw_request_iface *fw_req,
+                                  size_t size)
+{
+       u32 ctrl2, orig_ctrl2;
+       u32 dword_cnt;
+       int err = 0;
+       u32 val;
+
+       /* Write data to drvIface Mailbox */
+       dword_cnt = size / sizeof(u32);
+       if (size % sizeof(u32))
+               dword_cnt++;
+       err = hw_atl_utils_fw_upload_dwords(self, aq_fw2x_rpc_get(self),
+                                           (void *)fw_req, dword_cnt);
+       if (err < 0)
+               goto err_exit;
+
+       /* Toggle statistics bit for FW to update */
+       ctrl2 = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+       orig_ctrl2 = ctrl2 & BIT(CAPS_HI_FW_REQUEST);
+       ctrl2 = ctrl2 ^ BIT(CAPS_HI_FW_REQUEST);
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, ctrl2);
+
+       /* Wait FW to report back */
+       err = readx_poll_timeout_atomic(aq_fw2x_state2_get, self, val,
+                                       orig_ctrl2 != (val &
+                                                      BIT(CAPS_HI_FW_REQUEST)),
+                                       1U, 10000U);
+
+err_exit:
+       return err;
+}
+
+static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable)
+{
+       u32 ptp_opts = aq_hw_read_reg(self, HW_ATL_FW3X_EXT_STATE_ADDR);
+       u32 all_ptp_features = BIT(CAPS_EX_PHY_PTP_EN) |
+                                                  BIT(CAPS_EX_PTP_GPIO_EN);
+
+       if (enable)
+               ptp_opts |= all_ptp_features;
+       else
+               ptp_opts &= ~all_ptp_features;
+
+       aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts);
+}
+
 static int aq_fw2x_set_eee_rate(struct aq_hw_s *self, u32 speed)
 {
        u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
@@ -534,19 +585,21 @@ static u32 aq_fw2x_state2_get(struct aq_hw_s *self)
 }
 
 const struct aq_fw_ops aq_fw_2x_ops = {
-       .init = aq_fw2x_init,
-       .deinit = aq_fw2x_deinit,
-       .reset = NULL,
-       .renegotiate = aq_fw2x_renegotiate,
-       .get_mac_permanent = aq_fw2x_get_mac_permanent,
-       .set_link_speed = aq_fw2x_set_link_speed,
-       .set_state = aq_fw2x_set_state,
+       .init               = aq_fw2x_init,
+       .deinit             = aq_fw2x_deinit,
+       .reset              = NULL,
+       .renegotiate        = aq_fw2x_renegotiate,
+       .get_mac_permanent  = aq_fw2x_get_mac_permanent,
+       .set_link_speed     = aq_fw2x_set_link_speed,
+       .set_state          = aq_fw2x_set_state,
        .update_link_status = aq_fw2x_update_link_status,
-       .update_stats = aq_fw2x_update_stats,
-       .get_phy_temp = aq_fw2x_get_phy_temp,
-       .set_power = aq_fw2x_set_power,
-       .set_eee_rate = aq_fw2x_set_eee_rate,
-       .get_eee_rate = aq_fw2x_get_eee_rate,
-       .set_flow_control = aq_fw2x_set_flow_control,
-       .get_flow_control = aq_fw2x_get_flow_control
+       .update_stats       = aq_fw2x_update_stats,
+       .get_phy_temp       = aq_fw2x_get_phy_temp,
+       .set_power          = aq_fw2x_set_power,
+       .set_eee_rate       = aq_fw2x_set_eee_rate,
+       .get_eee_rate       = aq_fw2x_get_eee_rate,
+       .set_flow_control   = aq_fw2x_set_flow_control,
+       .get_flow_control   = aq_fw2x_get_flow_control,
+       .send_fw_request    = aq_fw2x_send_fw_request,
+       .enable_ptp         = aq_fw3x_enable_ptp,
 };
index 78e52d2..5391661 100644 (file)
 static int emac_arc_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
-       struct net_device *ndev;
        struct arc_emac_priv *priv;
-       int interface, err;
+       phy_interface_t interface;
+       struct net_device *ndev;
+       int err;
 
        if (!dev->of_node)
                return -ENODEV;
@@ -37,9 +38,13 @@ static int emac_arc_probe(struct platform_device *pdev)
        priv->drv_name = DRV_NAME;
        priv->drv_version = DRV_VERSION;
 
-       interface = of_get_phy_mode(dev->of_node);
-       if (interface < 0)
-               interface = PHY_INTERFACE_MODE_MII;
+       err = of_get_phy_mode(dev->of_node, &interface);
+       if (err) {
+               if (err == -ENODEV)
+                       interface = PHY_INTERFACE_MODE_MII;
+               else
+                       goto out_netdev;
+       }
 
        priv->clk = devm_clk_get(dev, "hclk");
        if (IS_ERR(priv->clk)) {
index 664d664..aae231c 100644 (file)
@@ -97,8 +97,9 @@ static int emac_rockchip_probe(struct platform_device *pdev)
        struct net_device *ndev;
        struct rockchip_priv_data *priv;
        const struct of_device_id *match;
+       phy_interface_t interface;
        u32 data;
-       int err, interface;
+       int err;
 
        if (!pdev->dev.of_node)
                return -ENODEV;
@@ -114,7 +115,9 @@ static int emac_rockchip_probe(struct platform_device *pdev)
        priv->emac.drv_version = DRV_VERSION;
        priv->emac.set_mac_speed = emac_rockchip_set_mac_speed;
 
-       interface = of_get_phy_mode(dev->of_node);
+       err = of_get_phy_mode(dev->of_node, &interface);
+       if (err)
+               goto out_netdev;
 
        /* RK3036/RK3066/RK3188 SoCs only support RMII */
        if (interface != PHY_INTERFACE_MODE_RMII) {
index 1b1a090..8f50210 100644 (file)
@@ -1744,10 +1744,9 @@ static int ag71xx_probe(struct platform_device *pdev)
                eth_random_addr(ndev->dev_addr);
        }
 
-       ag->phy_if_mode = of_get_phy_mode(np);
-       if (ag->phy_if_mode < 0) {
+       err = of_get_phy_mode(np, ag->phy_if_mode);
+       if (err) {
                netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
-               err = ag->phy_if_mode;
                goto err_free;
        }
 
index 37752d9..30b4550 100644 (file)
@@ -1371,8 +1371,8 @@ static int nb8800_probe(struct platform_device *pdev)
        priv = netdev_priv(dev);
        priv->base = base;
 
-       priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-       if (priv->phy_mode < 0)
+       ret = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode);
+       if (ret)
                priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
 
        priv->clk = devm_clk_get(&pdev->dev, NULL);
index aacc3cc..40941fb 100644 (file)
@@ -287,7 +287,7 @@ struct nb8800_priv {
        struct device_node              *phy_node;
 
        /* PHY connection type from DT */
-       int                             phy_mode;
+       phy_interface_t                 phy_mode;
 
        /* Current link status */
        int                             speed;
index 97ab0dd..035dbb1 100644 (file)
@@ -511,9 +511,6 @@ static void b44_stats_update(struct b44 *bp)
                *val++ += br32(bp, reg);
        }
 
-       /* Pad */
-       reg += 8*4UL;
-
        for (reg = B44_RX_GOOD_O; reg <= B44_RX_NPAUSE; reg += 4UL) {
                *val++ += br32(bp, reg);
        }
index a977a45..825af70 100644 (file)
@@ -2479,9 +2479,9 @@ static int bcm_sysport_probe(struct platform_device *pdev)
        priv->netdev = dev;
        priv->pdev = pdev;
 
-       priv->phy_interface = of_get_phy_mode(dn);
+       ret = of_get_phy_mode(dn, &priv->phy_interface);
        /* Default to GMII interface mode */
-       if ((int)priv->phy_interface < 0)
+       if (ret)
                priv->phy_interface = PHY_INTERFACE_MODE_GMII;
 
        /* In the case of a fixed PHY, the DT node associated
index d10b421..5e037a3 100644 (file)
@@ -1934,7 +1934,8 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
        }
 
        /* select a non-FCoE queue */
-       return netdev_pick_tx(dev, skb, NULL) % (BNX2X_NUM_ETH_QUEUES(bp));
+       return netdev_pick_tx(dev, skb, NULL) %
+                       (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
index 226ab29..3f84352 100644 (file)
        * IRO[142].m2) + ((sbId) * IRO[142].m3))
 #define CSTORM_IGU_MODE_OFFSET (IRO[161].base)
 #define CSTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \
-       (IRO[323].base + ((pfId) * IRO[323].m1))
-#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
        (IRO[324].base + ((pfId) * IRO[324].m1))
+#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
+       (IRO[325].base + ((pfId) * IRO[325].m1))
 #define CSTORM_ISCSI_EQ_CONS_OFFSET(pfId, iscsiEqId) \
-       (IRO[316].base + ((pfId) * IRO[316].m1) + ((iscsiEqId) * IRO[316].m2))
+       (IRO[317].base + ((pfId) * IRO[317].m1) + ((iscsiEqId) * IRO[317].m2))
 #define CSTORM_ISCSI_EQ_NEXT_EQE_ADDR_OFFSET(pfId, iscsiEqId) \
-       (IRO[318].base + ((pfId) * IRO[318].m1) + ((iscsiEqId) * IRO[318].m2))
+       (IRO[319].base + ((pfId) * IRO[319].m1) + ((iscsiEqId) * IRO[319].m2))
 #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_OFFSET(pfId, iscsiEqId) \
-       (IRO[317].base + ((pfId) * IRO[317].m1) + ((iscsiEqId) * IRO[317].m2))
+       (IRO[318].base + ((pfId) * IRO[318].m1) + ((iscsiEqId) * IRO[318].m2))
 #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_VALID_OFFSET(pfId, iscsiEqId) \
-       (IRO[319].base + ((pfId) * IRO[319].m1) + ((iscsiEqId) * IRO[319].m2))
+       (IRO[320].base + ((pfId) * IRO[320].m1) + ((iscsiEqId) * IRO[320].m2))
 #define CSTORM_ISCSI_EQ_PROD_OFFSET(pfId, iscsiEqId) \
-       (IRO[315].base + ((pfId) * IRO[315].m1) + ((iscsiEqId) * IRO[315].m2))
+       (IRO[316].base + ((pfId) * IRO[316].m1) + ((iscsiEqId) * IRO[316].m2))
 #define CSTORM_ISCSI_EQ_SB_INDEX_OFFSET(pfId, iscsiEqId) \
-       (IRO[321].base + ((pfId) * IRO[321].m1) + ((iscsiEqId) * IRO[321].m2))
+       (IRO[322].base + ((pfId) * IRO[322].m1) + ((iscsiEqId) * IRO[322].m2))
 #define CSTORM_ISCSI_EQ_SB_NUM_OFFSET(pfId, iscsiEqId) \
-       (IRO[320].base + ((pfId) * IRO[320].m1) + ((iscsiEqId) * IRO[320].m2))
+       (IRO[321].base + ((pfId) * IRO[321].m1) + ((iscsiEqId) * IRO[321].m2))
 #define CSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \
-       (IRO[322].base + ((pfId) * IRO[322].m1))
+       (IRO[323].base + ((pfId) * IRO[323].m1))
 #define CSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-       (IRO[314].base + ((pfId) * IRO[314].m1))
+       (IRO[315].base + ((pfId) * IRO[315].m1))
 #define CSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-       (IRO[313].base + ((pfId) * IRO[313].m1))
+       (IRO[314].base + ((pfId) * IRO[314].m1))
 #define CSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-       (IRO[312].base + ((pfId) * IRO[312].m1))
+       (IRO[313].base + ((pfId) * IRO[313].m1))
 #define CSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
        (IRO[155].base + ((funcId) * IRO[155].m1))
 #define CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(pfId) \
 #define TSTORM_FUNC_EN_OFFSET(funcId) \
        (IRO[107].base + ((funcId) * IRO[107].m1))
 #define TSTORM_ISCSI_ERROR_BITMAP_OFFSET(pfId) \
-       (IRO[278].base + ((pfId) * IRO[278].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_CID_TABLE_OFFSET(pfId) \
        (IRO[279].base + ((pfId) * IRO[279].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_CLIENT_ID_TABLE_OFFSET(pfId) \
+#define TSTORM_ISCSI_L2_ISCSI_OOO_CID_TABLE_OFFSET(pfId) \
        (IRO[280].base + ((pfId) * IRO[280].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_PROD_OFFSET(pfId) \
+#define TSTORM_ISCSI_L2_ISCSI_OOO_CLIENT_ID_TABLE_OFFSET(pfId) \
        (IRO[281].base + ((pfId) * IRO[281].m1))
+#define TSTORM_ISCSI_L2_ISCSI_OOO_PROD_OFFSET(pfId) \
+       (IRO[282].base + ((pfId) * IRO[282].m1))
 #define TSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-       (IRO[277].base + ((pfId) * IRO[277].m1))
+       (IRO[278].base + ((pfId) * IRO[278].m1))
 #define TSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-       (IRO[276].base + ((pfId) * IRO[276].m1))
+       (IRO[277].base + ((pfId) * IRO[277].m1))
 #define TSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-       (IRO[275].base + ((pfId) * IRO[275].m1))
+       (IRO[276].base + ((pfId) * IRO[276].m1))
 #define TSTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
-       (IRO[274].base + ((pfId) * IRO[274].m1))
+       (IRO[275].base + ((pfId) * IRO[275].m1))
 #define TSTORM_ISCSI_TCP_LOCAL_ADV_WND_OFFSET(pfId) \
-       (IRO[284].base + ((pfId) * IRO[284].m1))
+       (IRO[285].base + ((pfId) * IRO[285].m1))
 #define TSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \
-       (IRO[270].base + ((pfId) * IRO[270].m1))
-#define TSTORM_ISCSI_TCP_VARS_LSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
        (IRO[271].base + ((pfId) * IRO[271].m1))
-#define TSTORM_ISCSI_TCP_VARS_MID_LOCAL_MAC_ADDR_OFFSET(pfId) \
+#define TSTORM_ISCSI_TCP_VARS_LSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
        (IRO[272].base + ((pfId) * IRO[272].m1))
-#define TSTORM_ISCSI_TCP_VARS_MSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
+#define TSTORM_ISCSI_TCP_VARS_MID_LOCAL_MAC_ADDR_OFFSET(pfId) \
        (IRO[273].base + ((pfId) * IRO[273].m1))
+#define TSTORM_ISCSI_TCP_VARS_MSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
+       (IRO[274].base + ((pfId) * IRO[274].m1))
 #define TSTORM_MAC_FILTER_CONFIG_OFFSET(pfId) \
        (IRO[206].base + ((pfId) * IRO[206].m1))
 #define TSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
        (IRO[109].base + ((funcId) * IRO[109].m1))
 #define TSTORM_TCP_MAX_CWND_OFFSET(pfId) \
-       (IRO[223].base + ((pfId) * IRO[223].m1))
+       (IRO[224].base + ((pfId) * IRO[224].m1))
 #define TSTORM_VF_TO_PF_OFFSET(funcId) \
        (IRO[108].base + ((funcId) * IRO[108].m1))
-#define USTORM_AGG_DATA_OFFSET (IRO[212].base)
-#define USTORM_AGG_DATA_SIZE (IRO[212].size)
+#define USTORM_AGG_DATA_OFFSET (IRO[213].base)
+#define USTORM_AGG_DATA_SIZE (IRO[213].size)
 #define USTORM_ASSERT_LIST_INDEX_OFFSET        (IRO[181].base)
 #define USTORM_ASSERT_LIST_OFFSET(assertListEntry) \
        (IRO[180].base + ((assertListEntry) * IRO[180].m1))
 #define USTORM_ETH_PAUSE_ENABLED_OFFSET(portId) \
        (IRO[187].base + ((portId) * IRO[187].m1))
 #define USTORM_FCOE_EQ_PROD_OFFSET(pfId) \
-       (IRO[325].base + ((pfId) * IRO[325].m1))
+       (IRO[326].base + ((pfId) * IRO[326].m1))
 #define USTORM_FUNC_EN_OFFSET(funcId) \
        (IRO[182].base + ((funcId) * IRO[182].m1))
 #define USTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \
-       (IRO[289].base + ((pfId) * IRO[289].m1))
-#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
        (IRO[290].base + ((pfId) * IRO[290].m1))
+#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
+       (IRO[291].base + ((pfId) * IRO[291].m1))
 #define USTORM_ISCSI_ERROR_BITMAP_OFFSET(pfId) \
-       (IRO[294].base + ((pfId) * IRO[294].m1))
+       (IRO[295].base + ((pfId) * IRO[295].m1))
 #define USTORM_ISCSI_GLOBAL_BUF_PHYS_ADDR_OFFSET(pfId) \
-       (IRO[291].base + ((pfId) * IRO[291].m1))
+       (IRO[292].base + ((pfId) * IRO[292].m1))
 #define USTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-       (IRO[287].base + ((pfId) * IRO[287].m1))
+       (IRO[288].base + ((pfId) * IRO[288].m1))
 #define USTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-       (IRO[286].base + ((pfId) * IRO[286].m1))
+       (IRO[287].base + ((pfId) * IRO[287].m1))
 #define USTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-       (IRO[285].base + ((pfId) * IRO[285].m1))
+       (IRO[286].base + ((pfId) * IRO[286].m1))
 #define USTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \
-       (IRO[288].base + ((pfId) * IRO[288].m1))
+       (IRO[289].base + ((pfId) * IRO[289].m1))
 #define USTORM_ISCSI_RQ_BUFFER_SIZE_OFFSET(pfId) \
-       (IRO[292].base + ((pfId) * IRO[292].m1))
-#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
        (IRO[293].base + ((pfId) * IRO[293].m1))
+#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
+       (IRO[294].base + ((pfId) * IRO[294].m1))
 #define USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(pfId) \
        (IRO[186].base + ((pfId) * IRO[186].m1))
 #define USTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
        (IRO[184].base + ((funcId) * IRO[184].m1))
 #define USTORM_RX_PRODS_E1X_OFFSET(portId, clientId) \
-       (IRO[215].base + ((portId) * IRO[215].m1) + ((clientId) * \
-       IRO[215].m2))
+       (IRO[216].base + ((portId) * IRO[216].m1) + ((clientId) * \
+       IRO[216].m2))
 #define USTORM_RX_PRODS_E2_OFFSET(qzoneId) \
-       (IRO[216].base + ((qzoneId) * IRO[216].m1))
-#define USTORM_TPA_BTR_OFFSET (IRO[213].base)
-#define USTORM_TPA_BTR_SIZE (IRO[213].size)
+       (IRO[217].base + ((qzoneId) * IRO[217].m1))
+#define USTORM_TPA_BTR_OFFSET (IRO[214].base)
+#define USTORM_TPA_BTR_SIZE (IRO[214].size)
 #define USTORM_VF_TO_PF_OFFSET(funcId) \
        (IRO[183].base + ((funcId) * IRO[183].m1))
 #define XSTORM_AGG_INT_FINAL_CLEANUP_COMP_TYPE (IRO[67].base)
 #define XSTORM_FUNC_EN_OFFSET(funcId) \
        (IRO[47].base + ((funcId) * IRO[47].m1))
 #define XSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \
-       (IRO[302].base + ((pfId) * IRO[302].m1))
+       (IRO[303].base + ((pfId) * IRO[303].m1))
 #define XSTORM_ISCSI_LOCAL_MAC_ADDR0_OFFSET(pfId) \
-       (IRO[305].base + ((pfId) * IRO[305].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \
        (IRO[306].base + ((pfId) * IRO[306].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \
        (IRO[307].base + ((pfId) * IRO[307].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \
        (IRO[308].base + ((pfId) * IRO[308].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \
        (IRO[309].base + ((pfId) * IRO[309].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \
        (IRO[310].base + ((pfId) * IRO[310].m1))
-#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \
        (IRO[311].base + ((pfId) * IRO[311].m1))
+#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \
+       (IRO[312].base + ((pfId) * IRO[312].m1))
 #define XSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-       (IRO[301].base + ((pfId) * IRO[301].m1))
+       (IRO[302].base + ((pfId) * IRO[302].m1))
 #define XSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-       (IRO[300].base + ((pfId) * IRO[300].m1))
+       (IRO[301].base + ((pfId) * IRO[301].m1))
 #define XSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-       (IRO[299].base + ((pfId) * IRO[299].m1))
+       (IRO[300].base + ((pfId) * IRO[300].m1))
 #define XSTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \
-       (IRO[304].base + ((pfId) * IRO[304].m1))
+       (IRO[305].base + ((pfId) * IRO[305].m1))
 #define XSTORM_ISCSI_SQ_SIZE_OFFSET(pfId) \
-       (IRO[303].base + ((pfId) * IRO[303].m1))
+       (IRO[304].base + ((pfId) * IRO[304].m1))
 #define XSTORM_ISCSI_TCP_VARS_ADV_WND_SCL_OFFSET(pfId) \
-       (IRO[298].base + ((pfId) * IRO[298].m1))
+       (IRO[299].base + ((pfId) * IRO[299].m1))
 #define XSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \
-       (IRO[297].base + ((pfId) * IRO[297].m1))
+       (IRO[298].base + ((pfId) * IRO[298].m1))
 #define XSTORM_ISCSI_TCP_VARS_TOS_OFFSET(pfId) \
-       (IRO[296].base + ((pfId) * IRO[296].m1))
+       (IRO[297].base + ((pfId) * IRO[297].m1))
 #define XSTORM_ISCSI_TCP_VARS_TTL_OFFSET(pfId) \
-       (IRO[295].base + ((pfId) * IRO[295].m1))
+       (IRO[296].base + ((pfId) * IRO[296].m1))
 #define XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(pfId) \
        (IRO[44].base + ((pfId) * IRO[44].m1))
 #define XSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
 #define XSTORM_SPQ_PROD_OFFSET(funcId) \
        (IRO[31].base + ((funcId) * IRO[31].m1))
 #define XSTORM_TCP_GLOBAL_DEL_ACK_COUNTER_ENABLED_OFFSET(portId) \
-       (IRO[217].base + ((portId) * IRO[217].m1))
-#define XSTORM_TCP_GLOBAL_DEL_ACK_COUNTER_MAX_COUNT_OFFSET(portId) \
        (IRO[218].base + ((portId) * IRO[218].m1))
+#define XSTORM_TCP_GLOBAL_DEL_ACK_COUNTER_MAX_COUNT_OFFSET(portId) \
+       (IRO[219].base + ((portId) * IRO[219].m1))
 #define XSTORM_TCP_TX_SWS_TIMER_VAL_OFFSET(pfId) \
-       (IRO[220].base + (((pfId)>>1) * IRO[220].m1) + (((pfId)&1) * \
-       IRO[220].m2))
+       (IRO[221].base + (((pfId)>>1) * IRO[221].m1) + (((pfId)&1) * \
+       IRO[221].m2))
 #define XSTORM_VF_TO_PF_OFFSET(funcId) \
        (IRO[48].base + ((funcId) * IRO[48].m1))
 #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
index 78326a6..622fadc 100644 (file)
@@ -3024,7 +3024,7 @@ struct afex_stats {
 
 #define BCM_5710_FW_MAJOR_VERSION                      7
 #define BCM_5710_FW_MINOR_VERSION                      13
-#define BCM_5710_FW_REVISION_VERSION           11
+#define BCM_5710_FW_REVISION_VERSION           15
 #define BCM_5710_FW_ENGINEERING_VERSION                0
 #define BCM_5710_FW_COMPILE_FLAGS                      1
 
index 0edbb0a..5097a44 100644 (file)
@@ -2397,15 +2397,21 @@ static int bnx2x_set_pf_tx_switching(struct bnx2x *bp, bool enable)
        /* send the ramrod on all the queues of the PF */
        for_each_eth_queue(bp, i) {
                struct bnx2x_fastpath *fp = &bp->fp[i];
+               int tx_idx;
 
                /* Set the appropriate Queue object */
                q_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj;
 
-               /* Update the Queue state */
-               rc = bnx2x_queue_state_change(bp, &q_params);
-               if (rc) {
-                       BNX2X_ERR("Failed to configure Tx switching\n");
-                       return rc;
+               for (tx_idx = FIRST_TX_COS_INDEX;
+                    tx_idx < fp->max_cos; tx_idx++) {
+                       q_params.params.update.cid_index = tx_idx;
+
+                       /* Update the Queue state */
+                       rc = bnx2x_queue_state_change(bp, &q_params);
+                       if (rc) {
+                               BNX2X_ERR("Failed to configure Tx switching\n");
+                               return rc;
+                       }
                }
        }
 
index 04ec909..c071724 100644 (file)
@@ -8762,6 +8762,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
        }
        if (resc_reinit || fw_reset) {
                if (fw_reset) {
+                       if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+                               bnxt_ulp_stop(bp);
                        rc = bnxt_fw_init_one(bp);
                        if (rc) {
                                set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
@@ -9224,13 +9226,16 @@ static int bnxt_open(struct net_device *dev)
        if (rc) {
                bnxt_hwrm_if_change(bp, false);
        } else {
-               if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state) &&
-                   BNXT_PF(bp)) {
-                       struct bnxt_pf_info *pf = &bp->pf;
-                       int n = pf->active_vfs;
+               if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) {
+                       if (BNXT_PF(bp)) {
+                               struct bnxt_pf_info *pf = &bp->pf;
+                               int n = pf->active_vfs;
 
-                       if (n)
-                               bnxt_cfg_hw_sriov(bp, &n, true);
+                               if (n)
+                                       bnxt_cfg_hw_sriov(bp, &n, true);
+                       }
+                       if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+                               bnxt_ulp_start(bp, 0);
                }
                bnxt_hwmon_open(bp);
        }
@@ -9927,12 +9932,15 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
        if (netif_running(bp->dev)) {
                int rc;
 
-               if (!silent)
+               if (silent) {
+                       bnxt_close_nic(bp, false, false);
+                       bnxt_open_nic(bp, false, false);
+               } else {
                        bnxt_ulp_stop(bp);
-               bnxt_close_nic(bp, false, false);
-               rc = bnxt_open_nic(bp, false, false);
-               if (!silent && !rc)
-                       bnxt_ulp_start(bp);
+                       bnxt_close_nic(bp, true, false);
+                       rc = bnxt_open_nic(bp, true, false);
+                       bnxt_ulp_start(bp, rc);
+               }
        }
 }
 
@@ -10004,7 +10012,7 @@ static void bnxt_timer(struct timer_list *t)
 
        if (bp->link_info.phy_retry) {
                if (time_after(jiffies, bp->link_info.phy_retry_expires)) {
-                       bp->link_info.phy_retry = 0;
+                       bp->link_info.phy_retry = false;
                        netdev_warn(bp->dev, "failed to update phy settings after maximum retries.\n");
                } else {
                        set_bit(BNXT_UPDATE_PHY_SP_EVENT, &bp->sp_event);
@@ -10048,8 +10056,8 @@ static void bnxt_reset(struct bnxt *bp, bool silent)
 
 static void bnxt_fw_reset_close(struct bnxt *bp)
 {
+       bnxt_ulp_stop(bp);
        __bnxt_close_nic(bp, true, false);
-       bnxt_ulp_irq_stop(bp);
        bnxt_clear_int_mode(bp);
        bnxt_hwrm_func_drv_unrgtr(bp);
        bnxt_free_ctx_mem(bp);
@@ -10582,14 +10590,23 @@ static void bnxt_fw_reset_writel(struct bnxt *bp, int reg_idx)
 static void bnxt_reset_all(struct bnxt *bp)
 {
        struct bnxt_fw_health *fw_health = bp->fw_health;
-       int i;
+       int i, rc;
+
+       if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) {
+#ifdef CONFIG_TEE_BNXT_FW
+               rc = tee_bnxt_fw_load();
+               if (rc)
+                       netdev_err(bp->dev, "Unable to reset FW rc=%d\n", rc);
+               bp->fw_reset_timestamp = jiffies;
+#endif
+               return;
+       }
 
        if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_HOST) {
                for (i = 0; i < fw_health->fw_reset_seq_cnt; i++)
                        bnxt_fw_reset_writel(bp, i);
        } else if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) {
                struct hwrm_fw_reset_input req = {0};
-               int rc;
 
                bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
                req.resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
@@ -10720,13 +10737,13 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                        clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
                        dev_close(bp->dev);
                }
-               bnxt_ulp_irq_restart(bp, rc);
-               rtnl_unlock();
 
                bp->fw_reset_state = 0;
                /* Make sure fw_reset_state is 0 before clearing the flag */
                smp_mb__before_atomic();
                clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+               bnxt_ulp_start(bp, rc);
+               rtnl_unlock();
                break;
        }
        return;
@@ -10934,7 +10951,7 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
        }
 }
 
-static LIST_HEAD(bnxt_block_cb_list);
+LIST_HEAD(bnxt_block_cb_list);
 
 static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
                         void *type_data)
@@ -11882,6 +11899,7 @@ static int bnxt_suspend(struct device *device)
        int rc = 0;
 
        rtnl_lock();
+       bnxt_ulp_stop(bp);
        if (netif_running(dev)) {
                netif_device_detach(dev);
                rc = bnxt_close(dev);
@@ -11915,6 +11933,7 @@ static int bnxt_resume(struct device *device)
        }
 
 resume_exit:
+       bnxt_ulp_start(bp, rc);
        rtnl_unlock();
        return rc;
 }
@@ -11994,10 +12013,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
                if (!err && netif_running(netdev))
                        err = bnxt_open(netdev);
 
-               if (!err) {
+               if (!err)
                        result = PCI_ERS_RESULT_RECOVERED;
-                       bnxt_ulp_start(bp);
-               }
+               bnxt_ulp_start(bp, err);
        }
 
        if (result != PCI_ERS_RESULT_RECOVERED && netif_running(netdev))
index d333589..a3545c8 100644 (file)
 #include <net/dst_metadata.h>
 #include <net/xdp.h>
 #include <linux/dim.h>
+#ifdef CONFIG_TEE_BNXT_FW
+#include <linux/firmware/broadcom/tee_bnxt_fw.h>
+#endif
+
+extern struct list_head bnxt_block_cb_list;
 
 struct page_pool;
 
@@ -1241,6 +1246,14 @@ struct bnxt_tc_flow_stats {
        u64             bytes;
 };
 
+#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
+struct bnxt_flower_indr_block_cb_priv {
+       struct net_device *tunnel_netdev;
+       struct bnxt *bp;
+       struct list_head list;
+};
+#endif
+
 struct bnxt_tc_info {
        bool                            enabled;
 
@@ -1804,6 +1817,9 @@ struct bnxt {
 
        u8                      num_leds;
        struct bnxt_led_info    leds[BNXT_MAX_LED];
+       u16                     dump_flag;
+#define BNXT_DUMP_LIVE         0
+#define BNXT_DUMP_CRASH                1
 
        struct bpf_prog         *xdp_prog;
 
@@ -1815,6 +1831,8 @@ struct bnxt {
        u16                     *cfa_code_map; /* cfa_code -> vf_idx map */
        u8                      switch_id[8];
        struct bnxt_tc_info     *tc_info;
+       struct list_head        tc_indr_block_list;
+       struct notifier_block   tc_netdev_nb;
        struct dentry           *debugfs_pdev;
        struct device           *hwmon_dev;
 };
index 7151244..ae4ddf3 100644 (file)
@@ -16,7 +16,8 @@
 #include "bnxt_devlink.h"
 
 static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
-                                    struct devlink_fmsg *fmsg)
+                                    struct devlink_fmsg *fmsg,
+                                    struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = devlink_health_reporter_priv(reporter);
        struct bnxt_fw_health *health = bp->fw_health;
@@ -61,7 +62,8 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
 };
 
 static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
-                                void *priv_ctx)
+                                void *priv_ctx,
+                                struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = devlink_health_reporter_priv(reporter);
 
@@ -79,7 +81,8 @@ struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = {
 };
 
 static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
-                                void *priv_ctx)
+                                void *priv_ctx,
+                                struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = devlink_health_reporter_priv(reporter);
        struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
index 51c1404..f2220b8 100644 (file)
@@ -3311,6 +3311,24 @@ err:
        return rc;
 }
 
+static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump)
+{
+       struct bnxt *bp = netdev_priv(dev);
+
+       if (dump->flag > BNXT_DUMP_CRASH) {
+               netdev_info(dev, "Supports only Live(0) and Crash(1) dumps.\n");
+               return -EINVAL;
+       }
+
+       if (!IS_ENABLED(CONFIG_TEE_BNXT_FW) && dump->flag == BNXT_DUMP_CRASH) {
+               netdev_info(dev, "Cannot collect crash dump as TEE_BNXT_FW config option is not enabled.\n");
+               return -EOPNOTSUPP;
+       }
+
+       bp->dump_flag = dump->flag;
+       return 0;
+}
+
 static int bnxt_get_dump_flag(struct net_device *dev, struct ethtool_dump *dump)
 {
        struct bnxt *bp = netdev_priv(dev);
@@ -3323,7 +3341,12 @@ static int bnxt_get_dump_flag(struct net_device *dev, struct ethtool_dump *dump)
                        bp->ver_resp.hwrm_fw_bld_8b << 8 |
                        bp->ver_resp.hwrm_fw_rsvd_8b;
 
-       return bnxt_get_coredump(bp, NULL, &dump->len);
+       dump->flag = bp->dump_flag;
+       if (bp->dump_flag == BNXT_DUMP_CRASH)
+               dump->len = BNXT_CRASH_DUMP_LEN;
+       else
+               bnxt_get_coredump(bp, NULL, &dump->len);
+       return 0;
 }
 
 static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump,
@@ -3336,7 +3359,16 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump,
 
        memset(buf, 0, dump->len);
 
-       return bnxt_get_coredump(bp, buf, &dump->len);
+       dump->flag = bp->dump_flag;
+       if (dump->flag == BNXT_DUMP_CRASH) {
+#ifdef CONFIG_TEE_BNXT_FW
+               return tee_bnxt_copy_coredump(buf, 0, dump->len);
+#endif
+       } else {
+               return bnxt_get_coredump(bp, buf, &dump->len);
+       }
+
+       return 0;
 }
 
 void bnxt_ethtool_init(struct bnxt *bp)
@@ -3446,6 +3478,7 @@ const struct ethtool_ops bnxt_ethtool_ops = {
        .set_phys_id            = bnxt_set_phys_id,
        .self_test              = bnxt_self_test,
        .reset                  = bnxt_reset,
+       .set_dump               = bnxt_set_dump,
        .get_dump_flag          = bnxt_get_dump_flag,
        .get_dump_data          = bnxt_get_dump_data,
 };
index b5b65b3..01de7e7 100644 (file)
@@ -59,6 +59,8 @@ struct hwrm_dbg_cmn_output {
        #define HWRM_DBG_CMN_FLAGS_MORE 1
 };
 
+#define BNXT_CRASH_DUMP_LEN    (8 << 20)
+
 #define BNXT_LED_DFLT_ENA                              \
        (PORT_LED_CFG_REQ_ENABLES_LED0_ID |             \
         PORT_LED_CFG_REQ_ENABLES_LED0_STATE |          \
index c8062d0..174412a 100644 (file)
@@ -16,7 +16,9 @@
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_tunnel_key.h>
+#include <net/vxlan.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -36,6 +38,8 @@
 #define is_vid_exactmatch(vlan_tci_mask)       \
        ((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
 
+static bool is_wildcard(void *mask, int len);
+static bool is_exactmatch(void *mask, int len);
 /* Return the dst fid of the func for flow forwarding
  * For PFs: src_fid is the fid of the PF
  * For VF-reps: src_fid the fid of the VF
@@ -111,10 +115,182 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
        return 0;
 }
 
+/* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
+ * each(u32).
+ * This routine consolidates such multiple unaligned values into one
+ * field each for Key & Mask (for src and dst macs separately)
+ * For example,
+ *                     Mask/Key        Offset  Iteration
+ *                     ==========      ======  =========
+ *     dst mac         0xffffffff      0       1
+ *     dst mac         0x0000ffff      4       2
+ *
+ *     src mac         0xffff0000      4       1
+ *     src mac         0xffffffff      8       2
+ *
+ * The above combination coming from the stack will be consolidated as
+ *                     Mask/Key
+ *                     ==============
+ *     src mac:        0xffffffffffff
+ *     dst mac:        0xffffffffffff
+ */
+static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
+                                u8 *actual_key, u8 *actual_mask)
+{
+       u32 key = get_unaligned((u32 *)actual_key);
+       u32 mask = get_unaligned((u32 *)actual_mask);
+
+       part_key &= part_mask;
+       part_key |= key & ~part_mask;
+
+       put_unaligned(mask | part_mask, (u32 *)actual_mask);
+       put_unaligned(part_key, (u32 *)actual_key);
+}
+
+static int
+bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
+                           u16 *eth_addr, u16 *eth_addr_mask)
+{
+       u16 *p;
+       int j;
+
+       if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
+               return -EINVAL;
+
+       if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
+               if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
+                       return -EINVAL;
+               /* FW expects dmac to be in u16 array format */
+               p = eth_addr;
+               for (j = 0; j < 3; j++)
+                       actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
+       }
+
+       if (!is_wildcard(&eth_addr_mask[ETH_ALEN], ETH_ALEN)) {
+               if (!is_exactmatch(&eth_addr_mask[ETH_ALEN], ETH_ALEN))
+                       return -EINVAL;
+               /* FW expects smac to be in u16 array format */
+               p = &eth_addr[ETH_ALEN / 2];
+               for (j = 0; j < 3; j++)
+                       actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
+       }
+
+       return 0;
+}
+
+static int
+bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
+                   struct flow_action_entry *act, int act_idx, u8 *eth_addr,
+                   u8 *eth_addr_mask)
+{
+       size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
+       size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
+       u32 mask, val, offset, idx;
+       u8 htype;
+
+       offset = act->mangle.offset;
+       htype = act->mangle.htype;
+       mask = ~act->mangle.mask;
+       val = act->mangle.val;
+
+       switch (htype) {
+       case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+               if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
+                       netdev_err(bp->dev,
+                                  "%s: eth_hdr: Invalid pedit field\n",
+                                  __func__);
+                       return -EINVAL;
+               }
+               actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
+
+               bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
+                                    &eth_addr_mask[offset]);
+               break;
+       case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+               actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
+               actions->nat.l3_is_ipv4 = true;
+               if (offset ==  offsetof(struct iphdr, saddr)) {
+                       actions->nat.src_xlate = true;
+                       actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
+               } else if (offset ==  offsetof(struct iphdr, daddr)) {
+                       actions->nat.src_xlate = false;
+                       actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
+               } else {
+                       netdev_err(bp->dev,
+                                  "%s: IPv4_hdr: Invalid pedit field\n",
+                                  __func__);
+                       return -EINVAL;
+               }
+
+               netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
+                          actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
+                          &actions->nat.l3.ipv4.daddr);
+               break;
+
+       case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+               actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
+               actions->nat.l3_is_ipv4 = false;
+               if (offset >= offsetof(struct ipv6hdr, saddr) &&
+                   offset < offset_of_ip6_daddr) {
+                       /* 16 byte IPv6 address comes in 4 iterations of
+                        * 4byte chunks each
+                        */
+                       actions->nat.src_xlate = true;
+                       idx = (offset - offset_of_ip6_saddr) / 4;
+                       /* First 4bytes will be copied to idx 0 and so on */
+                       actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
+               } else if (offset >= offset_of_ip6_daddr &&
+                          offset < offset_of_ip6_daddr + 16) {
+                       actions->nat.src_xlate = false;
+                       idx = (offset - offset_of_ip6_daddr) / 4;
+                       actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
+               } else {
+                       netdev_err(bp->dev,
+                                  "%s: IPv6_hdr: Invalid pedit field\n",
+                                  __func__);
+                       return -EINVAL;
+               }
+               break;
+       case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+       case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+               /* HW does not support L4 rewrite alone without L3
+                * rewrite
+                */
+               if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
+                       netdev_err(bp->dev,
+                                  "Need to specify L3 rewrite as well\n");
+                       return -EINVAL;
+               }
+               if (actions->nat.src_xlate)
+                       actions->nat.l4.ports.sport = htons(val);
+               else
+                       actions->nat.l4.ports.dport = htons(val);
+               netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
+                          actions->nat.l4.ports.sport,
+                          actions->nat.l4.ports.dport);
+               break;
+       default:
+               netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
+                          __func__);
+               return -EINVAL;
+       }
+       return 0;
+}
+
 static int bnxt_tc_parse_actions(struct bnxt *bp,
                                 struct bnxt_tc_actions *actions,
                                 struct flow_action *flow_action)
 {
+       /* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
+        * smac (6 bytes) if rewrite of both is specified, otherwise either
+        * dmac or smac
+        */
+       u16 eth_addr_mask[ETH_ALEN] = { 0 };
+       /* Used to store the L2 rewrite key for dmac (6 bytes) followed by
+        * smac (6 bytes) if rewrite of both is specified, otherwise either
+        * dmac or smac
+        */
+       u16 eth_addr[ETH_ALEN] = { 0 };
        struct flow_action_entry *act;
        int i, rc;
 
@@ -148,11 +324,26 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
                case FLOW_ACTION_TUNNEL_DECAP:
                        actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
                        break;
+               /* Packet edit: L2 rewrite, NAT, NAPT */
+               case FLOW_ACTION_MANGLE:
+                       rc = bnxt_tc_parse_pedit(bp, actions, act, i,
+                                                (u8 *)eth_addr,
+                                                (u8 *)eth_addr_mask);
+                       if (rc)
+                               return rc;
+                       break;
                default:
                        break;
                }
        }
 
+       if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
+               rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
+                                                eth_addr_mask);
+               if (rc)
+                       return rc;
+       }
+
        if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
                if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
                        /* dst_fid is PF's fid */
@@ -401,6 +592,76 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
        req.src_fid = cpu_to_le16(flow->src_fid);
        req.ref_flow_handle = ref_flow_handle;
 
+       if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
+               memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
+                      ETH_ALEN);
+               memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
+                      ETH_ALEN);
+               action_flags |=
+                       CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
+       }
+
+       if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
+               if (actions->nat.l3_is_ipv4) {
+                       action_flags |=
+                               CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
+
+                       if (actions->nat.src_xlate) {
+                               action_flags |=
+                                       CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
+                               /* L3 source rewrite */
+                               req.nat_ip_address[0] =
+                                       actions->nat.l3.ipv4.saddr.s_addr;
+                               /* L4 source port */
+                               if (actions->nat.l4.ports.sport)
+                                       req.nat_port =
+                                               actions->nat.l4.ports.sport;
+                       } else {
+                               action_flags |=
+                                       CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
+                               /* L3 destination rewrite */
+                               req.nat_ip_address[0] =
+                                       actions->nat.l3.ipv4.daddr.s_addr;
+                               /* L4 destination port */
+                               if (actions->nat.l4.ports.dport)
+                                       req.nat_port =
+                                               actions->nat.l4.ports.dport;
+                       }
+                       netdev_dbg(bp->dev,
+                                  "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
+                                  req.nat_ip_address, actions->nat.src_xlate,
+                                  req.nat_port);
+               } else {
+                       if (actions->nat.src_xlate) {
+                               action_flags |=
+                                       CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
+                               /* L3 source rewrite */
+                               memcpy(req.nat_ip_address,
+                                      actions->nat.l3.ipv6.saddr.s6_addr32,
+                                      sizeof(req.nat_ip_address));
+                               /* L4 source port */
+                               if (actions->nat.l4.ports.sport)
+                                       req.nat_port =
+                                               actions->nat.l4.ports.sport;
+                       } else {
+                               action_flags |=
+                                       CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
+                               /* L3 destination rewrite */
+                               memcpy(req.nat_ip_address,
+                                      actions->nat.l3.ipv6.daddr.s6_addr32,
+                                      sizeof(req.nat_ip_address));
+                               /* L4 destination port */
+                               if (actions->nat.l4.ports.dport)
+                                       req.nat_port =
+                                               actions->nat.l4.ports.dport;
+                       }
+                       netdev_dbg(bp->dev,
+                                  "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
+                                  req.nat_ip_address, actions->nat.src_xlate,
+                                  req.nat_port);
+               }
+       }
+
        if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
            actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
                req.tunnel_handle = tunnel_handle;
@@ -1274,7 +1535,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 
        if (!bnxt_tc_can_offload(bp, flow)) {
                rc = -EOPNOTSUPP;
-               goto free_node;
+               kfree_rcu(new_node, rcu);
+               return rc;
        }
 
        /* If a flow exists with the same cookie, delete it */
@@ -1580,6 +1842,147 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
        }
 }
 
+static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
+                                      void *type_data, void *cb_priv)
+{
+       struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
+       struct flow_cls_offload *flower = type_data;
+       struct bnxt *bp = priv->bp;
+
+       if (flower->common.chain_index)
+               return -EOPNOTSUPP;
+
+       switch (type) {
+       case TC_SETUP_CLSFLOWER:
+               return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static struct bnxt_flower_indr_block_cb_priv *
+bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
+{
+       struct bnxt_flower_indr_block_cb_priv *cb_priv;
+
+       /* All callback list access should be protected by RTNL. */
+       ASSERT_RTNL();
+
+       list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
+               if (cb_priv->tunnel_netdev == netdev)
+                       return cb_priv;
+
+       return NULL;
+}
+
+static void bnxt_tc_setup_indr_rel(void *cb_priv)
+{
+       struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
+
+       list_del(&priv->list);
+       kfree(priv);
+}
+
+static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
+                                   struct flow_block_offload *f)
+{
+       struct bnxt_flower_indr_block_cb_priv *cb_priv;
+       struct flow_block_cb *block_cb;
+
+       if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+
+       switch (f->command) {
+       case FLOW_BLOCK_BIND:
+               cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
+               if (!cb_priv)
+                       return -ENOMEM;
+
+               cb_priv->tunnel_netdev = netdev;
+               cb_priv->bp = bp;
+               list_add(&cb_priv->list, &bp->tc_indr_block_list);
+
+               block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
+                                              cb_priv, cb_priv,
+                                              bnxt_tc_setup_indr_rel);
+               if (IS_ERR(block_cb)) {
+                       list_del(&cb_priv->list);
+                       kfree(cb_priv);
+                       return PTR_ERR(block_cb);
+               }
+
+               flow_block_cb_add(block_cb, f);
+               list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
+               break;
+       case FLOW_BLOCK_UNBIND:
+               cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
+               if (!cb_priv)
+                       return -ENOENT;
+
+               block_cb = flow_block_cb_lookup(f->block,
+                                               bnxt_tc_setup_indr_block_cb,
+                                               cb_priv);
+               if (!block_cb)
+                       return -ENOENT;
+
+               flow_block_cb_remove(block_cb, f);
+               list_del(&block_cb->driver_list);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
+                                enum tc_setup_type type, void *type_data)
+{
+       switch (type) {
+       case TC_SETUP_BLOCK:
+               return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
+{
+       return netif_is_vxlan(netdev);
+}
+
+static int bnxt_tc_indr_block_event(struct notifier_block *nb,
+                                   unsigned long event, void *ptr)
+{
+       struct net_device *netdev;
+       struct bnxt *bp;
+       int rc;
+
+       netdev = netdev_notifier_info_to_dev(ptr);
+       if (!bnxt_is_netdev_indr_offload(netdev))
+               return NOTIFY_OK;
+
+       bp = container_of(nb, struct bnxt, tc_netdev_nb);
+
+       switch (event) {
+       case NETDEV_REGISTER:
+               rc = __flow_indr_block_cb_register(netdev, bp,
+                                                  bnxt_tc_setup_indr_cb,
+                                                  bp);
+               if (rc)
+                       netdev_info(bp->dev,
+                                   "Failed to register indirect blk: dev: %s",
+                                   netdev->name);
+               break;
+       case NETDEV_UNREGISTER:
+               __flow_indr_block_cb_unregister(netdev,
+                                               bnxt_tc_setup_indr_cb,
+                                               bp);
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
        .head_offset = offsetof(struct bnxt_tc_flow_node, node),
        .key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
@@ -1663,7 +2066,15 @@ int bnxt_init_tc(struct bnxt *bp)
        bp->dev->hw_features |= NETIF_F_HW_TC;
        bp->dev->features |= NETIF_F_HW_TC;
        bp->tc_info = tc_info;
-       return 0;
+
+       /* init indirect block notifications */
+       INIT_LIST_HEAD(&bp->tc_indr_block_list);
+       bp->tc_netdev_nb.notifier_call = bnxt_tc_indr_block_event;
+       rc = register_netdevice_notifier(&bp->tc_netdev_nb);
+       if (!rc)
+               return 0;
+
+       rhashtable_destroy(&tc_info->encap_table);
 
 destroy_decap_table:
        rhashtable_destroy(&tc_info->decap_table);
@@ -1685,6 +2096,7 @@ void bnxt_shutdown_tc(struct bnxt *bp)
        if (!bnxt_tc_flower_enabled(bp))
                return;
 
+       unregister_netdevice_notifier(&bp->tc_netdev_nb);
        rhashtable_destroy(&tc_info->flow_table);
        rhashtable_destroy(&tc_info->l2_table);
        rhashtable_destroy(&tc_info->decap_l2_table);
index 4f05305..2867549 100644 (file)
@@ -62,6 +62,12 @@ struct bnxt_tc_tunnel_key {
        __be32                  id;
 };
 
+#define bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)                \
+       ((is_wildcard(&(eth_addr)[0], ETH_ALEN) &&                      \
+        is_wildcard(&(eth_addr)[ETH_ALEN], ETH_ALEN)) ||               \
+       (is_wildcard(&(eth_addr_mask)[0], ETH_ALEN) &&                  \
+        is_wildcard(&(eth_addr_mask)[ETH_ALEN], ETH_ALEN)))
+
 struct bnxt_tc_actions {
        u32                             flags;
 #define BNXT_TC_ACTION_FLAG_FWD                        BIT(0)
@@ -71,6 +77,8 @@ struct bnxt_tc_actions {
 #define BNXT_TC_ACTION_FLAG_DROP               BIT(5)
 #define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP       BIT(6)
 #define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP       BIT(7)
+#define BNXT_TC_ACTION_FLAG_L2_REWRITE         BIT(8)
+#define BNXT_TC_ACTION_FLAG_NAT_XLATE          BIT(9)
 
        u16                             dst_fid;
        struct net_device               *dst_dev;
@@ -79,6 +87,18 @@ struct bnxt_tc_actions {
 
        /* tunnel encap */
        struct ip_tunnel_key            tun_encap_key;
+#define        PEDIT_OFFSET_SMAC_LAST_4_BYTES          0x8
+       __be16                          l2_rewrite_dmac[3];
+       __be16                          l2_rewrite_smac[3];
+       struct {
+               bool src_xlate;  /* true => translate src,
+                                 * false => translate dst
+                                 * Mutually exclusive, i.e cannot set both
+                                 */
+               bool l3_is_ipv4; /* false means L3 is ipv6 */
+               struct bnxt_tc_l3_key l3;
+               struct bnxt_tc_l4_key l4;
+       } nat;
 };
 
 struct bnxt_tc_flow {
index b2c1609..077fd10 100644 (file)
@@ -182,7 +182,7 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
 
        edev->ulp_tbl[ulp_id].msix_requested = 0;
        edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED;
-       if (netif_running(dev)) {
+       if (netif_running(dev) && !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) {
                bnxt_close_nic(bp, true, false);
                bnxt_open_nic(bp, true, false);
        }
@@ -266,6 +266,7 @@ void bnxt_ulp_stop(struct bnxt *bp)
        if (!edev)
                return;
 
+       edev->flags |= BNXT_EN_FLAG_ULP_STOPPED;
        for (i = 0; i < BNXT_MAX_ULP; i++) {
                struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
 
@@ -276,7 +277,7 @@ void bnxt_ulp_stop(struct bnxt *bp)
        }
 }
 
-void bnxt_ulp_start(struct bnxt *bp)
+void bnxt_ulp_start(struct bnxt *bp, int err)
 {
        struct bnxt_en_dev *edev = bp->edev;
        struct bnxt_ulp_ops *ops;
@@ -285,6 +286,11 @@ void bnxt_ulp_start(struct bnxt *bp)
        if (!edev)
                return;
 
+       edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED;
+
+       if (err)
+               return;
+
        for (i = 0; i < BNXT_MAX_ULP; i++) {
                struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
 
index cd78453..9895406 100644 (file)
@@ -64,6 +64,7 @@ struct bnxt_en_dev {
        #define BNXT_EN_FLAG_ROCE_CAP           (BNXT_EN_FLAG_ROCEV1_CAP | \
                                                 BNXT_EN_FLAG_ROCEV2_CAP)
        #define BNXT_EN_FLAG_MSIX_REQUESTED     0x4
+       #define BNXT_EN_FLAG_ULP_STOPPED        0x8
        const struct bnxt_en_ops        *en_ops;
        struct bnxt_ulp                 ulp_tbl[BNXT_MAX_ULP];
 };
@@ -92,7 +93,7 @@ int bnxt_get_ulp_msix_num(struct bnxt *bp);
 int bnxt_get_ulp_msix_base(struct bnxt *bp);
 int bnxt_get_ulp_stat_ctxs(struct bnxt *bp);
 void bnxt_ulp_stop(struct bnxt *bp);
-void bnxt_ulp_start(struct bnxt *bp);
+void bnxt_ulp_start(struct bnxt *bp, int err);
 void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
 void bnxt_ulp_shutdown(struct bnxt *bp);
 void bnxt_ulp_irq_stop(struct bnxt *bp);
index 155599d..61ab7d2 100644 (file)
@@ -5208,6 +5208,8 @@ static void cnic_init_rings(struct cnic_dev *dev)
                cnic_init_bnx2x_tx_ring(dev, data);
                cnic_init_bnx2x_rx_ring(dev, data);
 
+               data->general.fp_hsi_ver =  ETH_FP_HSI_VERSION;
+
                l5_data.phy_address.lo = udev->l2_buf_map & 0xffffffff;
                l5_data.phy_address.hi = (u64) udev->l2_buf_map >> 32;
 
index 0f13828..4f689fb 100644 (file)
@@ -3472,16 +3472,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
                goto err;
        }
 
-       if (dn) {
+       if (dn)
                macaddr = of_get_mac_address(dn);
-               if (IS_ERR(macaddr)) {
-                       dev_err(&pdev->dev, "can't find MAC address\n");
-                       err = -EINVAL;
-                       goto err;
-               }
-       } else {
+       else
                macaddr = pd->mac_address;
-       }
 
        priv->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->base)) {
@@ -3493,7 +3487,12 @@ static int bcmgenet_probe(struct platform_device *pdev)
 
        SET_NETDEV_DEV(dev, &pdev->dev);
        dev_set_drvdata(&pdev->dev, dev);
-       ether_addr_copy(dev->dev_addr, macaddr);
+       if (IS_ERR_OR_NULL(macaddr) || !is_valid_ether_addr(macaddr)) {
+               dev_warn(&pdev->dev, "using random Ethernet MAC\n");
+               eth_hw_addr_random(dev);
+       } else {
+               ether_addr_copy(dev->dev_addr, macaddr);
+       }
        dev->watchdog_timeo = 2 * HZ;
        dev->ethtool_ops = &bcmgenet_ethtool_ops;
        dev->netdev_ops = &bcmgenet_netdev_ops;
@@ -3608,6 +3607,11 @@ static int bcmgenet_remove(struct platform_device *pdev)
        return 0;
 }
 
+static void bcmgenet_shutdown(struct platform_device *pdev)
+{
+       bcmgenet_remove(pdev);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int bcmgenet_resume(struct device *d)
 {
@@ -3726,6 +3730,7 @@ static SIMPLE_DEV_PM_OPS(bcmgenet_pm_ops, bcmgenet_suspend, bcmgenet_resume);
 static struct platform_driver bcmgenet_driver = {
        .probe  = bcmgenet_probe,
        .remove = bcmgenet_remove,
+       .shutdown = bcmgenet_shutdown,
        .driver = {
                .name   = "bcmgenet",
                .of_match_table = bcmgenet_match,
index 17bb8d6..b797a7e 100644 (file)
@@ -436,7 +436,7 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
        struct device_node *dn = priv->pdev->dev.of_node;
        struct device *kdev = &priv->pdev->dev;
        struct phy_device *phydev;
-       int phy_mode;
+       phy_interface_t phy_mode;
        int ret;
 
        /* Fetch the PHY phandle */
@@ -454,10 +454,10 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
        }
 
        /* Get the link mode */
-       phy_mode = of_get_phy_mode(dn);
-       if (phy_mode < 0) {
+       ret = of_get_phy_mode(dn, &phy_mode);
+       if (ret) {
                dev_err(kdev, "invalid PHY mode property\n");
-               return phy_mode;
+               return ret;
        }
 
        priv->phy_interface = phy_mode;
index 1e1b774..b884cf7 100644 (file)
@@ -4182,6 +4182,7 @@ static int macb_probe(struct platform_device *pdev)
        unsigned int queue_mask, num_queues;
        bool native_io;
        struct phy_device *phydev;
+       phy_interface_t interface;
        struct net_device *dev;
        struct resource *regs;
        void __iomem *mem;
@@ -4308,12 +4309,12 @@ static int macb_probe(struct platform_device *pdev)
                macb_get_hwaddr(bp);
        }
 
-       err = of_get_phy_mode(np);
-       if (err < 0)
+       err = of_get_phy_mode(np, &interface);
+       if (err)
                /* not found in DT, MII by default */
                bp->phy_interface = PHY_INTERFACE_MODE_MII;
        else
-               bp->phy_interface = err;
+               bp->phy_interface = interface;
 
        /* IP specific init */
        err = init(pdev);
index f96a42a..af04a2c 100644 (file)
@@ -1914,10 +1914,10 @@ static struct platform_driver xgmac_driver = {
        .driver = {
                .name = "calxedaxgmac",
                .of_match_table = xgmac_of_match,
+               .pm = &xgmac_pm_ops,
        },
        .probe = xgmac_probe,
        .remove = xgmac_remove,
-       .driver.pm = &xgmac_pm_ops,
 };
 
 module_platform_driver(xgmac_driver);
index acb0168..1e09fdb 100644 (file)
@@ -1007,14 +1007,14 @@ static void bgx_poll_for_link(struct work_struct *work)
 
        if ((spu_link & SPU_STATUS1_RCV_LNK) &&
            !(smu_link & SMU_RX_CTL_STATUS)) {
-               lmac->link_up = 1;
+               lmac->link_up = true;
                if (lmac->lmac_type == BGX_MODE_XLAUI)
                        lmac->last_speed = SPEED_40000;
                else
                        lmac->last_speed = SPEED_10000;
                lmac->last_duplex = DUPLEX_FULL;
        } else {
-               lmac->link_up = 0;
+               lmac->link_up = false;
                lmac->last_speed = SPEED_UNKNOWN;
                lmac->last_duplex = DUPLEX_UNKNOWN;
        }
@@ -1023,7 +1023,7 @@ static void bgx_poll_for_link(struct work_struct *work)
                if (lmac->link_up) {
                        if (bgx_xaui_check_link(lmac)) {
                                /* Errors, clear link_up state */
-                               lmac->link_up = 0;
+                               lmac->link_up = false;
                                lmac->last_speed = SPEED_UNKNOWN;
                                lmac->last_duplex = DUPLEX_UNKNOWN;
                        }
@@ -1055,11 +1055,11 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
        if ((lmac->lmac_type == BGX_MODE_SGMII) ||
            (lmac->lmac_type == BGX_MODE_QSGMII) ||
            (lmac->lmac_type == BGX_MODE_RGMII)) {
-               lmac->is_sgmii = 1;
+               lmac->is_sgmii = true;
                if (bgx_lmac_sgmii_init(bgx, lmac))
                        return -1;
        } else {
-               lmac->is_sgmii = 0;
+               lmac->is_sgmii = false;
                if (bgx_lmac_xaui_init(bgx, lmac))
                        return -1;
        }
@@ -1304,7 +1304,7 @@ static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid)
 {
        if ((lmac->lmac_type != BGX_MODE_10G_KR) &&
            (lmac->lmac_type != BGX_MODE_40G_KR)) {
-               lmac->use_training = 0;
+               lmac->use_training = false;
                return;
        }
 
index 3802487..33a923c 100644 (file)
@@ -184,6 +184,8 @@ static struct dentry *cxgb4_debugfs_root;
 LIST_HEAD(adapter_list);
 DEFINE_MUTEX(uld_mutex);
 
+static int cfg_queues(struct adapter *adap);
+
 static void link_report(struct net_device *dev)
 {
        if (!netif_carrier_ok(dev))
@@ -4286,14 +4288,14 @@ static struct fw_info *find_fw_info(int chip)
 /*
  * Phase 0 of initialization: contact FW, obtain config, perform basic init.
  */
-static int adap_init0(struct adapter *adap)
+static int adap_init0(struct adapter *adap, int vpd_skip)
 {
-       int ret;
-       u32 v, port_vec;
-       enum dev_state state;
-       u32 params[7], val[7];
        struct fw_caps_config_cmd caps_cmd;
+       u32 params[7], val[7];
+       enum dev_state state;
+       u32 v, port_vec;
        int reset = 1;
+       int ret;
 
        /* Grab Firmware Device Log parameters as early as possible so we have
         * access to it for debugging, etc.
@@ -4448,9 +4450,11 @@ static int adap_init0(struct adapter *adap)
         * could have FLASHed a new VPD which won't be read by the firmware
         * until we do the RESET ...
         */
-       ret = t4_get_vpd_params(adap, &adap->params.vpd);
-       if (ret < 0)
-               goto bye;
+       if (!vpd_skip) {
+               ret = t4_get_vpd_params(adap, &adap->params.vpd);
+               if (ret < 0)
+                       goto bye;
+       }
 
        /* Find out what ports are available to us.  Note that we need to do
         * this before calling adap_init0_no_config() since it needs nports
@@ -5050,10 +5054,93 @@ static void eeh_resume(struct pci_dev *pdev)
        rtnl_unlock();
 }
 
+static void eeh_reset_prepare(struct pci_dev *pdev)
+{
+       struct adapter *adapter = pci_get_drvdata(pdev);
+       int i;
+
+       if (adapter->pf != 4)
+               return;
+
+       adapter->flags &= ~CXGB4_FW_OK;
+
+       notify_ulds(adapter, CXGB4_STATE_DOWN);
+
+       for_each_port(adapter, i)
+               if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+                       cxgb_close(adapter->port[i]);
+
+       disable_interrupts(adapter);
+       cxgb4_free_mps_ref_entries(adapter);
+
+       adap_free_hma_mem(adapter);
+
+       if (adapter->flags & CXGB4_FULL_INIT_DONE)
+               cxgb_down(adapter);
+}
+
+static void eeh_reset_done(struct pci_dev *pdev)
+{
+       struct adapter *adapter = pci_get_drvdata(pdev);
+       int err, i;
+
+       if (adapter->pf != 4)
+               return;
+
+       err = t4_wait_dev_ready(adapter->regs);
+       if (err < 0) {
+               dev_err(adapter->pdev_dev,
+                       "Device not ready, err %d", err);
+               return;
+       }
+
+       setup_memwin(adapter);
+
+       err = adap_init0(adapter, 1);
+       if (err) {
+               dev_err(adapter->pdev_dev,
+                       "Adapter init failed, err %d", err);
+               return;
+       }
+
+       setup_memwin_rdma(adapter);
+
+       if (adapter->flags & CXGB4_FW_OK) {
+               err = t4_port_init(adapter, adapter->pf, adapter->pf, 0);
+               if (err) {
+                       dev_err(adapter->pdev_dev,
+                               "Port init failed, err %d", err);
+                       return;
+               }
+       }
+
+       err = cfg_queues(adapter);
+       if (err) {
+               dev_err(adapter->pdev_dev,
+                       "Config queues failed, err %d", err);
+               return;
+       }
+
+       cxgb4_init_mps_ref_entries(adapter);
+
+       err = setup_fw_sge_queues(adapter);
+       if (err) {
+               dev_err(adapter->pdev_dev,
+                       "FW sge queue allocation failed, err %d", err);
+               return;
+       }
+
+       for_each_port(adapter, i)
+               if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+                       cxgb_open(adapter->port[i]);
+}
+
 static const struct pci_error_handlers cxgb4_eeh = {
        .error_detected = eeh_err_detected,
        .slot_reset     = eeh_slot_reset,
        .resume         = eeh_resume,
+       .reset_prepare  = eeh_reset_prepare,
+       .reset_done     = eeh_reset_done,
 };
 
 /* Return true if the Link Configuration supports "High Speeds" (those greater
@@ -5837,7 +5924,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
 
        setup_memwin(adapter);
-       err = adap_init0(adapter);
+       err = adap_init0(adapter, 0);
 #ifdef CONFIG_DEBUG_FS
        bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz);
 #endif
index 1a407d3..e9e4500 100644 (file)
@@ -351,15 +351,13 @@ exists:
 static void _t4_l2e_free(struct l2t_entry *e)
 {
        struct l2t_data *d;
-       struct sk_buff *skb;
 
        if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
                if (e->neigh) {
                        neigh_release(e->neigh);
                        e->neigh = NULL;
                }
-               while ((skb = __skb_dequeue(&e->arpq)) != NULL)
-                       kfree_skb(skb);
+               __skb_queue_purge(&e->arpq);
        }
 
        d = container_of(e, struct l2t_data, l2tab[e->idx]);
@@ -370,7 +368,6 @@ static void _t4_l2e_free(struct l2t_entry *e)
 static void t4_l2e_free(struct l2t_entry *e)
 {
        struct l2t_data *d;
-       struct sk_buff *skb;
 
        spin_lock_bh(&e->lock);
        if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
@@ -378,8 +375,7 @@ static void t4_l2e_free(struct l2t_entry *e)
                        neigh_release(e->neigh);
                        e->neigh = NULL;
                }
-               while ((skb = __skb_dequeue(&e->arpq)) != NULL)
-                       kfree_skb(skb);
+               __skb_queue_purge(&e->arpq);
        }
        spin_unlock_bh(&e->lock);
 
index 96e9565..a6f2063 100644 (file)
@@ -90,6 +90,9 @@ struct ftgmac100 {
        struct mii_bus *mii_bus;
        struct clk *clk;
 
+       /* AST2500/AST2600 RMII ref clock gate */
+       struct clk *rclk;
+
        /* Link management */
        int cur_speed;
        int cur_duplex;
@@ -1609,7 +1612,7 @@ static int ftgmac100_setup_mdio(struct net_device *netdev)
 {
        struct ftgmac100 *priv = netdev_priv(netdev);
        struct platform_device *pdev = to_platform_device(priv->dev);
-       int phy_intf = PHY_INTERFACE_MODE_RGMII;
+       phy_interface_t phy_intf = PHY_INTERFACE_MODE_RGMII;
        struct device_node *np = pdev->dev.of_node;
        int i, err = 0;
        u32 reg;
@@ -1634,8 +1637,8 @@ static int ftgmac100_setup_mdio(struct net_device *netdev)
        /* Get PHY mode from device-tree */
        if (np) {
                /* Default to RGMII. It's a gigabit part after all */
-               phy_intf = of_get_phy_mode(np);
-               if (phy_intf < 0)
+               err = of_get_phy_mode(np, &phy_intf);
+               if (err)
                        phy_intf = PHY_INTERFACE_MODE_RGMII;
 
                /* Aspeed only supports these. I don't know about other IP
@@ -1717,20 +1720,41 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd)
                   nd->link_up ? "up" : "down");
 }
 
-static void ftgmac100_setup_clk(struct ftgmac100 *priv)
+static int ftgmac100_setup_clk(struct ftgmac100 *priv)
 {
-       priv->clk = devm_clk_get(priv->dev, NULL);
-       if (IS_ERR(priv->clk))
-               return;
+       struct clk *clk;
+       int rc;
 
-       clk_prepare_enable(priv->clk);
+       clk = devm_clk_get(priv->dev, NULL /* MACCLK */);
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+       priv->clk = clk;
+       rc = clk_prepare_enable(priv->clk);
+       if (rc)
+               return rc;
 
        /* Aspeed specifies a 100MHz clock is required for up to
         * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz
         * is sufficient
         */
-       clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ :
-                       FTGMAC_100MHZ);
+       rc = clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ :
+                         FTGMAC_100MHZ);
+       if (rc)
+               goto cleanup_clk;
+
+       /* RCLK is for RMII, typically used for NCSI. Optional because its not
+        * necessary if it's the AST2400 MAC, or the MAC is configured for
+        * RGMII, or the controller is not an ASPEED-based controller.
+        */
+       priv->rclk = devm_clk_get_optional(priv->dev, "RCLK");
+       rc = clk_prepare_enable(priv->rclk);
+       if (!rc)
+               return 0;
+
+cleanup_clk:
+       clk_disable_unprepare(priv->clk);
+
+       return rc;
 }
 
 static int ftgmac100_probe(struct platform_device *pdev)
@@ -1852,8 +1876,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
                        goto err_setup_mdio;
        }
 
-       if (priv->is_aspeed)
-               ftgmac100_setup_clk(priv);
+       if (priv->is_aspeed) {
+               err = ftgmac100_setup_clk(priv);
+               if (err)
+                       goto err_ncsi_dev;
+       }
 
        /* Default ring sizes */
        priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES;
@@ -1885,8 +1912,10 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
        return 0;
 
-err_ncsi_dev:
 err_register_netdev:
+       clk_disable_unprepare(priv->rclk);
+       clk_disable_unprepare(priv->clk);
+err_ncsi_dev:
        ftgmac100_destroy_mdio(netdev);
 err_setup_mdio:
        iounmap(priv->base);
@@ -1908,6 +1937,7 @@ static int ftgmac100_remove(struct platform_device *pdev)
 
        unregister_netdev(netdev);
 
+       clk_disable_unprepare(priv->rclk);
        clk_disable_unprepare(priv->clk);
 
        /* There's a small chance the reset task will have been re-queued,
index b4b82b9..6a9d12d 100644 (file)
@@ -178,31 +178,9 @@ struct fm_port_fqs {
 /* All the dpa bps in use at any moment */
 static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
 
-/* The raw buffer size must be cacheline aligned */
 #define DPAA_BP_RAW_SIZE 4096
-/* When using more than one buffer pool, the raw sizes are as follows:
- * 1 bp: 4KB
- * 2 bp: 2KB, 4KB
- * 3 bp: 1KB, 2KB, 4KB
- * 4 bp: 1KB, 2KB, 4KB, 8KB
- */
-static inline size_t bpool_buffer_raw_size(u8 index, u8 cnt)
-{
-       size_t res = DPAA_BP_RAW_SIZE / 4;
-       u8 i;
-
-       for (i = (cnt < 3) ? cnt : 3; i < 3 + index; i++)
-               res *= 2;
-       return res;
-}
 
-/* FMan-DMA requires 16-byte alignment for Rx buffers, but SKB_DATA_ALIGN is
- * even stronger (SMP_CACHE_BYTES-aligned), so we just get away with that,
- * via SKB_WITH_OVERHEAD(). We can't rely on netdev_alloc_frag() giving us
- * half-page-aligned buffers, so we reserve some more space for start-of-buffer
- * alignment.
- */
-#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD((raw_size) - SMP_CACHE_BYTES)
+#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size)
 
 static int dpaa_max_frm;
 
@@ -288,7 +266,7 @@ static int dpaa_stop(struct net_device *net_dev)
        /* Allow the Fman (Tx) port to process in-flight frames before we
         * try switching it off.
         */
-       usleep_range(5000, 10000);
+       msleep(200);
 
        err = mac_dev->stop(mac_dev);
        if (err < 0)
@@ -305,6 +283,8 @@ static int dpaa_stop(struct net_device *net_dev)
                phy_disconnect(net_dev->phydev);
        net_dev->phydev = NULL;
 
+       msleep(200);
+
        return err;
 }
 
@@ -596,10 +576,7 @@ static void dpaa_bp_free(struct dpaa_bp *dpaa_bp)
 
 static void dpaa_bps_free(struct dpaa_priv *priv)
 {
-       int i;
-
-       for (i = 0; i < DPAA_BPS_NUM; i++)
-               dpaa_bp_free(priv->dpaa_bps[i]);
+       dpaa_bp_free(priv->dpaa_bp);
 }
 
 /* Use multiple WQs for FQ assignment:
@@ -773,7 +750,7 @@ static void dpaa_release_channel(void)
        qman_release_pool(rx_pool_channel);
 }
 
-static void dpaa_eth_add_channel(u16 channel)
+static void dpaa_eth_add_channel(u16 channel, struct device *dev)
 {
        u32 pool = QM_SDQCR_CHANNELS_POOL_CONV(channel);
        const cpumask_t *cpus = qman_affine_cpus();
@@ -783,6 +760,7 @@ static void dpaa_eth_add_channel(u16 channel)
        for_each_cpu_and(cpu, cpus, cpu_online_mask) {
                portal = qman_get_affine_portal(cpu);
                qman_p_static_dequeue_add(portal, pool);
+               qman_start_using_portal(portal, dev);
        }
 }
 
@@ -901,7 +879,7 @@ static void dpaa_fq_setup(struct dpaa_priv *priv,
 
        if (num_portals == 0)
                dev_err(priv->net_dev->dev.parent,
-                       "No Qman software (affine) channels found");
+                       "No Qman software (affine) channels found\n");
 
        /* Initialize each FQ in the list */
        list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
@@ -1197,15 +1175,15 @@ static int dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
        return err;
 }
 
-static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
-                                size_t count, struct dpaa_fq *errq,
+static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp *bp,
+                                struct dpaa_fq *errq,
                                 struct dpaa_fq *defq, struct dpaa_fq *pcdq,
                                 struct dpaa_buffer_layout *buf_layout)
 {
        struct fman_buffer_prefix_content buf_prefix_content;
        struct fman_port_rx_params *rx_p;
        struct fman_port_params params;
-       int i, err;
+       int err;
 
        memset(&params, 0, sizeof(params));
        memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
@@ -1224,12 +1202,9 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
                rx_p->pcd_fqs_count = DPAA_ETH_PCD_RXQ_NUM;
        }
 
-       count = min(ARRAY_SIZE(rx_p->ext_buf_pools.ext_buf_pool), count);
-       rx_p->ext_buf_pools.num_of_pools_used = (u8)count;
-       for (i = 0; i < count; i++) {
-               rx_p->ext_buf_pools.ext_buf_pool[i].id =  bps[i]->bpid;
-               rx_p->ext_buf_pools.ext_buf_pool[i].size = (u16)bps[i]->size;
-       }
+       rx_p->ext_buf_pools.num_of_pools_used = 1;
+       rx_p->ext_buf_pools.ext_buf_pool[0].id =  bp->bpid;
+       rx_p->ext_buf_pools.ext_buf_pool[0].size = (u16)bp->size;
 
        err = fman_port_config(port, &params);
        if (err) {
@@ -1252,7 +1227,7 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
 }
 
 static int dpaa_eth_init_ports(struct mac_device *mac_dev,
-                              struct dpaa_bp **bps, size_t count,
+                              struct dpaa_bp *bp,
                               struct fm_port_fqs *port_fqs,
                               struct dpaa_buffer_layout *buf_layout,
                               struct device *dev)
@@ -1266,7 +1241,7 @@ static int dpaa_eth_init_ports(struct mac_device *mac_dev,
        if (err)
                return err;
 
-       err = dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
+       err = dpaa_eth_init_rx_port(rxport, bp, port_fqs->rx_errq,
                                    port_fqs->rx_defq, port_fqs->rx_pcdq,
                                    &buf_layout[RX]);
 
@@ -1335,15 +1310,16 @@ static void dpaa_fd_release(const struct net_device *net_dev,
                vaddr = phys_to_virt(qm_fd_addr(fd));
                sgt = vaddr + qm_fd_get_offset(fd);
 
-               dma_unmap_single(dpaa_bp->dev, qm_fd_addr(fd), dpaa_bp->size,
-                                DMA_FROM_DEVICE);
+               dma_unmap_page(dpaa_bp->priv->rx_dma_dev, qm_fd_addr(fd),
+                              DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
 
                dpaa_release_sgt_members(sgt);
 
-               addr = dma_map_single(dpaa_bp->dev, vaddr, dpaa_bp->size,
-                                     DMA_FROM_DEVICE);
-               if (dma_mapping_error(dpaa_bp->dev, addr)) {
-                       dev_err(dpaa_bp->dev, "DMA mapping failed");
+               addr = dma_map_page(dpaa_bp->priv->rx_dma_dev,
+                                   virt_to_page(vaddr), 0, DPAA_BP_RAW_SIZE,
+                                   DMA_FROM_DEVICE);
+               if (dma_mapping_error(dpaa_bp->priv->rx_dma_dev, addr)) {
+                       netdev_err(net_dev, "DMA mapping failed\n");
                        return;
                }
                bm_buffer_set64(&bmb, addr);
@@ -1396,7 +1372,7 @@ static void count_ern(struct dpaa_percpu_priv *percpu_priv,
 static int dpaa_enable_tx_csum(struct dpaa_priv *priv,
                               struct sk_buff *skb,
                               struct qm_fd *fd,
-                              char *parse_results)
+                              void *parse_results)
 {
        struct fman_prs_result *parse_result;
        u16 ethertype = ntohs(skb->protocol);
@@ -1488,25 +1464,24 @@ return_error:
 
 static int dpaa_bp_add_8_bufs(const struct dpaa_bp *dpaa_bp)
 {
-       struct device *dev = dpaa_bp->dev;
+       struct net_device *net_dev = dpaa_bp->priv->net_dev;
        struct bm_buffer bmb[8];
        dma_addr_t addr;
-       void *new_buf;
+       struct page *p;
        u8 i;
 
        for (i = 0; i < 8; i++) {
-               new_buf = netdev_alloc_frag(dpaa_bp->raw_size);
-               if (unlikely(!new_buf)) {
-                       dev_err(dev, "netdev_alloc_frag() failed, size %zu\n",
-                               dpaa_bp->raw_size);
+               p = dev_alloc_pages(0);
+               if (unlikely(!p)) {
+                       netdev_err(net_dev, "dev_alloc_pages() failed\n");
                        goto release_previous_buffs;
                }
-               new_buf = PTR_ALIGN(new_buf, SMP_CACHE_BYTES);
 
-               addr = dma_map_single(dev, new_buf,
-                                     dpaa_bp->size, DMA_FROM_DEVICE);
-               if (unlikely(dma_mapping_error(dev, addr))) {
-                       dev_err(dpaa_bp->dev, "DMA map failed");
+               addr = dma_map_page(dpaa_bp->priv->rx_dma_dev, p, 0,
+                                   DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
+               if (unlikely(dma_mapping_error(dpaa_bp->priv->rx_dma_dev,
+                                              addr))) {
+                       netdev_err(net_dev, "DMA map failed\n");
                        goto release_previous_buffs;
                }
 
@@ -1581,17 +1556,16 @@ static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
 {
        struct dpaa_bp *dpaa_bp;
        int *countptr;
-       int res, i;
+       int res;
+
+       dpaa_bp = priv->dpaa_bp;
+       if (!dpaa_bp)
+               return -EINVAL;
+       countptr = this_cpu_ptr(dpaa_bp->percpu_count);
+       res  = dpaa_eth_refill_bpool(dpaa_bp, countptr);
+       if (res)
+               return res;
 
-       for (i = 0; i < DPAA_BPS_NUM; i++) {
-               dpaa_bp = priv->dpaa_bps[i];
-               if (!dpaa_bp)
-                       return -EINVAL;
-               countptr = this_cpu_ptr(dpaa_bp->percpu_count);
-               res  = dpaa_eth_refill_bpool(dpaa_bp, countptr);
-               if (res)
-                       return res;
-       }
        return 0;
 }
 
@@ -1600,68 +1574,74 @@ static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
  * Skb freeing is not handled here.
  *
  * This function may be called on error paths in the Tx function, so guard
- * against cases when not all fd relevant fields were filled in.
+ * against cases when not all fd relevant fields were filled in. To avoid
+ * reading the invalid transmission timestamp for the error paths set ts to
+ * false.
  *
  * Return the skb backpointer, since for S/G frames the buffer containing it
  * gets freed here.
  */
 static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
-                                         const struct qm_fd *fd)
+                                         const struct qm_fd *fd, bool ts)
 {
        const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
        struct device *dev = priv->net_dev->dev.parent;
        struct skb_shared_hwtstamps shhwtstamps;
        dma_addr_t addr = qm_fd_addr(fd);
+       void *vaddr = phys_to_virt(addr);
        const struct qm_sg_entry *sgt;
-       struct sk_buff **skbh, *skb;
-       int nr_frags, i;
+       struct sk_buff *skb;
        u64 ns;
-
-       skbh = (struct sk_buff **)phys_to_virt(addr);
-       skb = *skbh;
-
-       if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
-               memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-
-               if (!fman_port_get_tstamp(priv->mac_dev->port[TX], (void *)skbh,
-                                         &ns)) {
-                       shhwtstamps.hwtstamp = ns_to_ktime(ns);
-                       skb_tstamp_tx(skb, &shhwtstamps);
-               } else {
-                       dev_warn(dev, "fman_port_get_tstamp failed!\n");
-               }
-       }
+       int i;
 
        if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
-               nr_frags = skb_shinfo(skb)->nr_frags;
-               dma_unmap_single(dev, addr,
-                                qm_fd_get_offset(fd) + DPAA_SGT_SIZE,
-                                dma_dir);
+               dma_unmap_page(priv->tx_dma_dev, addr,
+                              qm_fd_get_offset(fd) + DPAA_SGT_SIZE,
+                              dma_dir);
 
                /* The sgt buffer has been allocated with netdev_alloc_frag(),
                 * it's from lowmem.
                 */
-               sgt = phys_to_virt(addr + qm_fd_get_offset(fd));
+               sgt = vaddr + qm_fd_get_offset(fd);
 
                /* sgt[0] is from lowmem, was dma_map_single()-ed */
-               dma_unmap_single(dev, qm_sg_addr(&sgt[0]),
+               dma_unmap_single(priv->tx_dma_dev, qm_sg_addr(&sgt[0]),
                                 qm_sg_entry_get_len(&sgt[0]), dma_dir);
 
                /* remaining pages were mapped with skb_frag_dma_map() */
-               for (i = 1; i <= nr_frags; i++) {
+               for (i = 1; (i < DPAA_SGT_MAX_ENTRIES) &&
+                    !qm_sg_entry_is_final(&sgt[i - 1]); i++) {
                        WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
 
-                       dma_unmap_page(dev, qm_sg_addr(&sgt[i]),
+                       dma_unmap_page(priv->tx_dma_dev, qm_sg_addr(&sgt[i]),
                                       qm_sg_entry_get_len(&sgt[i]), dma_dir);
                }
-
-               /* Free the page frag that we allocated on Tx */
-               skb_free_frag(phys_to_virt(addr));
        } else {
-               dma_unmap_single(dev, addr,
-                                skb_tail_pointer(skb) - (u8 *)skbh, dma_dir);
+               dma_unmap_single(priv->tx_dma_dev, addr,
+                                priv->tx_headroom + qm_fd_get_length(fd),
+                                dma_dir);
+       }
+
+       skb = *(struct sk_buff **)vaddr;
+
+       /* DMA unmapping is required before accessing the HW provided info */
+       if (ts && priv->tx_tstamp &&
+           skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+               memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+
+               if (!fman_port_get_tstamp(priv->mac_dev->port[TX], vaddr,
+                                         &ns)) {
+                       shhwtstamps.hwtstamp = ns_to_ktime(ns);
+                       skb_tstamp_tx(skb, &shhwtstamps);
+               } else {
+                       dev_warn(dev, "fman_port_get_tstamp failed!\n");
+               }
        }
 
+       if (qm_fd_get_format(fd) == qm_fd_sg)
+               /* Free the page that we allocated on Tx for the SGT */
+               free_pages((unsigned long)vaddr, 0);
+
        return skb;
 }
 
@@ -1715,7 +1695,7 @@ static struct sk_buff *contig_fd_to_skb(const struct dpaa_priv *priv,
        return skb;
 
 free_buffer:
-       skb_free_frag(vaddr);
+       free_pages((unsigned long)vaddr, 0);
        return NULL;
 }
 
@@ -1762,8 +1742,8 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
                        goto free_buffers;
 
                count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
-               dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size,
-                                DMA_FROM_DEVICE);
+               dma_unmap_page(priv->rx_dma_dev, sg_addr,
+                              DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
                if (!skb) {
                        sz = dpaa_bp->size +
                                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -1815,7 +1795,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
        WARN_ONCE(i == DPAA_SGT_MAX_ENTRIES, "No final bit on SGT\n");
 
        /* free the SG table buffer */
-       skb_free_frag(vaddr);
+       free_pages((unsigned long)vaddr, 0);
 
        return skb;
 
@@ -1832,7 +1812,7 @@ free_buffers:
        for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) {
                sg_addr = qm_sg_addr(&sgt[i]);
                sg_vaddr = phys_to_virt(sg_addr);
-               skb_free_frag(sg_vaddr);
+               free_pages((unsigned long)sg_vaddr, 0);
                dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
                if (dpaa_bp) {
                        count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
@@ -1843,7 +1823,7 @@ free_buffers:
                        break;
        }
        /* free the SGT fragment */
-       skb_free_frag(vaddr);
+       free_pages((unsigned long)vaddr, 0);
 
        return NULL;
 }
@@ -1853,9 +1833,8 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
                            int *offset)
 {
        struct net_device *net_dev = priv->net_dev;
-       struct device *dev = net_dev->dev.parent;
        enum dma_data_direction dma_dir;
-       unsigned char *buffer_start;
+       unsigned char *buff_start;
        struct sk_buff **skbh;
        dma_addr_t addr;
        int err;
@@ -1864,10 +1843,10 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
         * available, so just use that for offset.
         */
        fd->bpid = FSL_DPAA_BPID_INV;
-       buffer_start = skb->data - priv->tx_headroom;
+       buff_start = skb->data - priv->tx_headroom;
        dma_dir = DMA_TO_DEVICE;
 
-       skbh = (struct sk_buff **)buffer_start;
+       skbh = (struct sk_buff **)buff_start;
        *skbh = skb;
 
        /* Enable L3/L4 hardware checksum computation.
@@ -1876,7 +1855,7 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
         * need to write into the skb.
         */
        err = dpaa_enable_tx_csum(priv, skb, fd,
-                                 ((char *)skbh) + DPAA_TX_PRIV_DATA_SIZE);
+                                 buff_start + DPAA_TX_PRIV_DATA_SIZE);
        if (unlikely(err < 0)) {
                if (net_ratelimit())
                        netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
@@ -1889,9 +1868,9 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
        fd->cmd |= cpu_to_be32(FM_FD_CMD_FCO);
 
        /* Map the entire buffer size that may be seen by FMan, but no more */
-       addr = dma_map_single(dev, skbh,
-                             skb_tail_pointer(skb) - buffer_start, dma_dir);
-       if (unlikely(dma_mapping_error(dev, addr))) {
+       addr = dma_map_single(priv->tx_dma_dev, buff_start,
+                             priv->tx_headroom + skb->len, dma_dir);
+       if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
                if (net_ratelimit())
                        netif_err(priv, tx_err, net_dev, "dma_map_single() failed\n");
                return -EINVAL;
@@ -1907,24 +1886,22 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
        const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
        const int nr_frags = skb_shinfo(skb)->nr_frags;
        struct net_device *net_dev = priv->net_dev;
-       struct device *dev = net_dev->dev.parent;
        struct qm_sg_entry *sgt;
        struct sk_buff **skbh;
-       int i, j, err, sz;
-       void *buffer_start;
+       void *buff_start;
        skb_frag_t *frag;
        dma_addr_t addr;
        size_t frag_len;
-       void *sgt_buf;
-
-       /* get a page frag to store the SGTable */
-       sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE);
-       sgt_buf = netdev_alloc_frag(sz);
-       if (unlikely(!sgt_buf)) {
-               netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
-                          sz);
+       struct page *p;
+       int i, j, err;
+
+       /* get a page to store the SGTable */
+       p = dev_alloc_pages(0);
+       if (unlikely(!p)) {
+               netdev_err(net_dev, "dev_alloc_pages() failed\n");
                return -ENOMEM;
        }
+       buff_start = page_address(p);
 
        /* Enable L3/L4 hardware checksum computation.
         *
@@ -1932,7 +1909,7 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
         * need to write into the skb.
         */
        err = dpaa_enable_tx_csum(priv, skb, fd,
-                                 sgt_buf + DPAA_TX_PRIV_DATA_SIZE);
+                                 buff_start + DPAA_TX_PRIV_DATA_SIZE);
        if (unlikely(err < 0)) {
                if (net_ratelimit())
                        netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
@@ -1941,15 +1918,15 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
        }
 
        /* SGT[0] is used by the linear part */
-       sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
+       sgt = (struct qm_sg_entry *)(buff_start + priv->tx_headroom);
        frag_len = skb_headlen(skb);
        qm_sg_entry_set_len(&sgt[0], frag_len);
        sgt[0].bpid = FSL_DPAA_BPID_INV;
        sgt[0].offset = 0;
-       addr = dma_map_single(dev, skb->data,
+       addr = dma_map_single(priv->tx_dma_dev, skb->data,
                              skb_headlen(skb), dma_dir);
-       if (unlikely(dma_mapping_error(dev, addr))) {
-               dev_err(dev, "DMA mapping failed");
+       if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
+               netdev_err(priv->net_dev, "DMA mapping failed\n");
                err = -EINVAL;
                goto sg0_map_failed;
        }
@@ -1960,10 +1937,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
                frag = &skb_shinfo(skb)->frags[i];
                frag_len = skb_frag_size(frag);
                WARN_ON(!skb_frag_page(frag));
-               addr = skb_frag_dma_map(dev, frag, 0,
+               addr = skb_frag_dma_map(priv->tx_dma_dev, frag, 0,
                                        frag_len, dma_dir);
-               if (unlikely(dma_mapping_error(dev, addr))) {
-                       dev_err(dev, "DMA mapping failed");
+               if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
+                       netdev_err(priv->net_dev, "DMA mapping failed\n");
                        err = -EINVAL;
                        goto sg_map_failed;
                }
@@ -1979,17 +1956,17 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
        /* Set the final bit in the last used entry of the SGT */
        qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
+       /* set fd offset to priv->tx_headroom */
        qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 
        /* DMA map the SGT page */
-       buffer_start = (void *)sgt - priv->tx_headroom;
-       skbh = (struct sk_buff **)buffer_start;
+       skbh = (struct sk_buff **)buff_start;
        *skbh = skb;
 
-       addr = dma_map_single(dev, buffer_start,
-                             priv->tx_headroom + DPAA_SGT_SIZE, dma_dir);
-       if (unlikely(dma_mapping_error(dev, addr))) {
-               dev_err(dev, "DMA mapping failed");
+       addr = dma_map_page(priv->tx_dma_dev, p, 0,
+                           priv->tx_headroom + DPAA_SGT_SIZE, dma_dir);
+       if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
+               netdev_err(priv->net_dev, "DMA mapping failed\n");
                err = -EINVAL;
                goto sgt_map_failed;
        }
@@ -2003,11 +1980,11 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 sgt_map_failed:
 sg_map_failed:
        for (j = 0; j < i; j++)
-               dma_unmap_page(dev, qm_sg_addr(&sgt[j]),
+               dma_unmap_page(priv->tx_dma_dev, qm_sg_addr(&sgt[j]),
                               qm_sg_entry_get_len(&sgt[j]), dma_dir);
 sg0_map_failed:
 csum_failed:
-       skb_free_frag(sgt_buf);
+       free_pages((unsigned long)buff_start, 0);
 
        return err;
 }
@@ -2114,7 +2091,7 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
        if (likely(dpaa_xmit(priv, percpu_stats, queue_mapping, &fd) == 0))
                return NETDEV_TX_OK;
 
-       dpaa_cleanup_tx_fd(priv, &fd);
+       dpaa_cleanup_tx_fd(priv, &fd, false);
 skb_to_fd_failed:
 enomem:
        percpu_stats->tx_errors++;
@@ -2160,7 +2137,7 @@ static void dpaa_tx_error(struct net_device *net_dev,
 
        percpu_priv->stats.tx_errors++;
 
-       skb = dpaa_cleanup_tx_fd(priv, fd);
+       skb = dpaa_cleanup_tx_fd(priv, fd, false);
        dev_kfree_skb(skb);
 }
 
@@ -2200,7 +2177,7 @@ static void dpaa_tx_conf(struct net_device *net_dev,
 
        percpu_priv->tx_confirm++;
 
-       skb = dpaa_cleanup_tx_fd(priv, fd);
+       skb = dpaa_cleanup_tx_fd(priv, fd, true);
 
        consume_skb(skb);
 }
@@ -2304,11 +2281,8 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
                return qman_cb_dqrr_consume;
        }
 
-       dpaa_bp = dpaa_bpid2pool(fd->bpid);
-       if (!dpaa_bp)
-               return qman_cb_dqrr_consume;
-
-       dma_unmap_single(dpaa_bp->dev, addr, dpaa_bp->size, DMA_FROM_DEVICE);
+       dma_unmap_page(dpaa_bp->priv->rx_dma_dev, addr, DPAA_BP_RAW_SIZE,
+                      DMA_FROM_DEVICE);
 
        /* prefetch the first 64 bytes of the frame or the SGT start */
        vaddr = phys_to_virt(addr);
@@ -2430,7 +2404,7 @@ static void egress_ern(struct qman_portal *portal,
        percpu_priv->stats.tx_fifo_errors++;
        count_ern(percpu_priv, msg);
 
-       skb = dpaa_cleanup_tx_fd(priv, fd);
+       skb = dpaa_cleanup_tx_fd(priv, fd, false);
        dev_kfree_skb_any(skb);
 }
 
@@ -2663,7 +2637,8 @@ static inline void dpaa_bp_free_pf(const struct dpaa_bp *bp,
 {
        dma_addr_t addr = bm_buf_addr(bmb);
 
-       dma_unmap_single(bp->dev, addr, bp->size, DMA_FROM_DEVICE);
+       dma_unmap_page(bp->priv->rx_dma_dev, addr, DPAA_BP_RAW_SIZE,
+                      DMA_FROM_DEVICE);
 
        skb_free_frag(phys_to_virt(addr));
 }
@@ -2764,21 +2739,46 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
 
 static int dpaa_eth_probe(struct platform_device *pdev)
 {
-       struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL};
        struct net_device *net_dev = NULL;
+       struct dpaa_bp *dpaa_bp = NULL;
        struct dpaa_fq *dpaa_fq, *tmp;
        struct dpaa_priv *priv = NULL;
        struct fm_port_fqs port_fqs;
        struct mac_device *mac_dev;
-       int err = 0, i, channel;
+       int err = 0, channel;
        struct device *dev;
 
-       /* device used for DMA mapping */
-       dev = pdev->dev.parent;
-       err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
-       if (err) {
-               dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
-               return err;
+       dev = &pdev->dev;
+
+       err = bman_is_probed();
+       if (!err)
+               return -EPROBE_DEFER;
+       if (err < 0) {
+               dev_err(dev, "failing probe due to bman probe error\n");
+               return -ENODEV;
+       }
+       err = qman_is_probed();
+       if (!err)
+               return -EPROBE_DEFER;
+       if (err < 0) {
+               dev_err(dev, "failing probe due to qman probe error\n");
+               return -ENODEV;
+       }
+       err = bman_portals_probed();
+       if (!err)
+               return -EPROBE_DEFER;
+       if (err < 0) {
+               dev_err(dev,
+                       "failing probe due to bman portals probe error\n");
+               return -ENODEV;
+       }
+       err = qman_portals_probed();
+       if (!err)
+               return -EPROBE_DEFER;
+       if (err < 0) {
+               dev_err(dev,
+                       "failing probe due to qman portals probe error\n");
+               return -ENODEV;
        }
 
        /* Allocate this early, so we can store relevant information in
@@ -2801,11 +2801,23 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 
        mac_dev = dpaa_mac_dev_get(pdev);
        if (IS_ERR(mac_dev)) {
-               dev_err(dev, "dpaa_mac_dev_get() failed\n");
+               netdev_err(net_dev, "dpaa_mac_dev_get() failed\n");
                err = PTR_ERR(mac_dev);
                goto free_netdev;
        }
 
+       /* Devices used for DMA mapping */
+       priv->rx_dma_dev = fman_port_get_device(mac_dev->port[RX]);
+       priv->tx_dma_dev = fman_port_get_device(mac_dev->port[TX]);
+       err = dma_coerce_mask_and_coherent(priv->rx_dma_dev, DMA_BIT_MASK(40));
+       if (!err)
+               err = dma_coerce_mask_and_coherent(priv->tx_dma_dev,
+                                                  DMA_BIT_MASK(40));
+       if (err) {
+               netdev_err(net_dev, "dma_coerce_mask_and_coherent() failed\n");
+               return err;
+       }
+
        /* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
         * we choose conservatively and let the user explicitly set a higher
         * MTU via ifconfig. Otherwise, the user may end up with different MTUs
@@ -2822,23 +2834,21 @@ static int dpaa_eth_probe(struct platform_device *pdev)
        priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
 
        /* bp init */
-       for (i = 0; i < DPAA_BPS_NUM; i++) {
-               dpaa_bps[i] = dpaa_bp_alloc(dev);
-               if (IS_ERR(dpaa_bps[i])) {
-                       err = PTR_ERR(dpaa_bps[i]);
-                       goto free_dpaa_bps;
-               }
-               /* the raw size of the buffers used for reception */
-               dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM);
-               /* avoid runtime computations by keeping the usable size here */
-               dpaa_bps[i]->size = dpaa_bp_size(dpaa_bps[i]->raw_size);
-               dpaa_bps[i]->dev = dev;
-
-               err = dpaa_bp_alloc_pool(dpaa_bps[i]);
-               if (err < 0)
-                       goto free_dpaa_bps;
-               priv->dpaa_bps[i] = dpaa_bps[i];
+       dpaa_bp = dpaa_bp_alloc(dev);
+       if (IS_ERR(dpaa_bp)) {
+               err = PTR_ERR(dpaa_bp);
+               goto free_dpaa_bps;
        }
+       /* the raw size of the buffers used for reception */
+       dpaa_bp->raw_size = DPAA_BP_RAW_SIZE;
+       /* avoid runtime computations by keeping the usable size here */
+       dpaa_bp->size = dpaa_bp_size(dpaa_bp->raw_size);
+       dpaa_bp->priv = priv;
+
+       err = dpaa_bp_alloc_pool(dpaa_bp);
+       if (err < 0)
+               goto free_dpaa_bps;
+       priv->dpaa_bp = dpaa_bp;
 
        INIT_LIST_HEAD(&priv->dpaa_fq_list);
 
@@ -2864,7 +2874,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
        /* Walk the CPUs with affine portals
         * and add this pool channel to each's dequeue mask.
         */
-       dpaa_eth_add_channel(priv->channel);
+       dpaa_eth_add_channel(priv->channel, &pdev->dev);
 
        dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]);
 
@@ -2896,7 +2906,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
        priv->rx_headroom = dpaa_get_headroom(&priv->buf_layout[RX]);
 
        /* All real interfaces need their ports initialized */
-       err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
+       err = dpaa_eth_init_ports(mac_dev, dpaa_bp, &port_fqs,
                                  &priv->buf_layout[0], dev);
        if (err)
                goto free_dpaa_fqs;
@@ -2955,7 +2965,7 @@ static int dpaa_remove(struct platform_device *pdev)
        struct device *dev;
        int err;
 
-       dev = pdev->dev.parent;
+       dev = &pdev->dev;
        net_dev = dev_get_drvdata(dev);
 
        priv = netdev_priv(net_dev);
index f7e59e8..fc2cc4c 100644 (file)
@@ -47,8 +47,6 @@
 /* Total number of Tx queues */
 #define DPAA_ETH_TXQ_NUM       (DPAA_TC_NUM * DPAA_TC_TXQ_NUM)
 
-#define DPAA_BPS_NUM 3 /* number of bpools per interface */
-
 /* More detailed FQ types - used for fine-grained WQ assignments */
 enum dpaa_fq_type {
        FQ_TYPE_RX_DEFAULT = 1, /* Rx Default FQs */
@@ -80,9 +78,11 @@ struct dpaa_fq_cbs {
        struct qman_fq egress_ern;
 };
 
+struct dpaa_priv;
+
 struct dpaa_bp {
-       /* device used in the DMA mapping operations */
-       struct device *dev;
+       /* used in the DMA mapping operations */
+       struct dpaa_priv *priv;
        /* current number of buffers in the buffer pool alloted to each CPU */
        int __percpu *percpu_count;
        /* all buffers allocated for this pool have this raw size */
@@ -146,13 +146,15 @@ struct dpaa_buffer_layout {
 
 struct dpaa_priv {
        struct dpaa_percpu_priv __percpu *percpu_priv;
-       struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM];
+       struct dpaa_bp *dpaa_bp;
        /* Store here the needed Tx headroom for convenience and speed
         * (even though it can be computed based on the fields of buf_layout)
         */
        u16 tx_headroom;
        struct net_device *net_dev;
        struct mac_device *mac_dev;
+       struct device *rx_dma_dev;
+       struct device *tx_dma_dev;
        struct qman_fq *egress_fqs[DPAA_ETH_TXQ_NUM];
        struct qman_fq *conf_fqs[DPAA_ETH_TXQ_NUM];
 
index 0d9b185..ee62d25 100644 (file)
@@ -131,11 +131,9 @@ static ssize_t dpaa_eth_show_bpids(struct device *dev,
 {
        struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
        ssize_t bytes = 0;
-       int i = 0;
 
-       for (i = 0; i < DPAA_BPS_NUM; i++)
-               bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, "%u\n",
-                                 priv->dpaa_bps[i]->bpid);
+       bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, "%u\n",
+                                 priv->dpaa_bp->bpid);
 
        return bytes;
 }
index 7ce2e99..66d1508 100644 (file)
@@ -47,6 +47,8 @@ static const char dpaa_stats_percpu[][ETH_GSTRING_LEN] = {
        "tx S/G",
        "tx error",
        "rx error",
+       "rx dropped",
+       "tx dropped",
 };
 
 static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
@@ -78,10 +80,8 @@ static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
 static int dpaa_get_link_ksettings(struct net_device *net_dev,
                                   struct ethtool_link_ksettings *cmd)
 {
-       if (!net_dev->phydev) {
-               netdev_dbg(net_dev, "phy device not initialized\n");
+       if (!net_dev->phydev)
                return 0;
-       }
 
        phy_ethtool_ksettings_get(net_dev->phydev, cmd);
 
@@ -93,10 +93,8 @@ static int dpaa_set_link_ksettings(struct net_device *net_dev,
 {
        int err;
 
-       if (!net_dev->phydev) {
-               netdev_err(net_dev, "phy device not initialized\n");
+       if (!net_dev->phydev)
                return -ENODEV;
-       }
 
        err = phy_ethtool_ksettings_set(net_dev->phydev, cmd);
        if (err < 0)
@@ -140,10 +138,8 @@ static int dpaa_nway_reset(struct net_device *net_dev)
 {
        int err;
 
-       if (!net_dev->phydev) {
-               netdev_err(net_dev, "phy device not initialized\n");
+       if (!net_dev->phydev)
                return -ENODEV;
-       }
 
        err = 0;
        if (net_dev->phydev->autoneg) {
@@ -165,10 +161,8 @@ static void dpaa_get_pauseparam(struct net_device *net_dev,
        priv = netdev_priv(net_dev);
        mac_dev = priv->mac_dev;
 
-       if (!net_dev->phydev) {
-               netdev_err(net_dev, "phy device not initialized\n");
+       if (!net_dev->phydev)
                return;
-       }
 
        epause->autoneg = mac_dev->autoneg_pause;
        epause->rx_pause = mac_dev->rx_pause_active;
@@ -223,7 +217,7 @@ static int dpaa_get_sset_count(struct net_device *net_dev, int type)
        unsigned int total_stats, num_stats;
 
        num_stats   = num_online_cpus() + 1;
-       total_stats = num_stats * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM) +
+       total_stats = num_stats * (DPAA_STATS_PERCPU_LEN + 1) +
                        DPAA_STATS_GLOBAL_LEN;
 
        switch (type) {
@@ -235,10 +229,10 @@ static int dpaa_get_sset_count(struct net_device *net_dev, int type)
 }
 
 static void copy_stats(struct dpaa_percpu_priv *percpu_priv, int num_cpus,
-                      int crr_cpu, u64 *bp_count, u64 *data)
+                      int crr_cpu, u64 bp_count, u64 *data)
 {
        int num_values = num_cpus + 1;
-       int crr = 0, j;
+       int crr = 0;
 
        /* update current CPU's stats and also add them to the total values */
        data[crr * num_values + crr_cpu] = percpu_priv->in_interrupt;
@@ -262,23 +256,27 @@ static void copy_stats(struct dpaa_percpu_priv *percpu_priv, int num_cpus,
        data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_errors;
        data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_errors;
 
-       for (j = 0; j < DPAA_BPS_NUM; j++) {
-               data[crr * num_values + crr_cpu] = bp_count[j];
-               data[crr++ * num_values + num_cpus] += bp_count[j];
-       }
+       data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_dropped;
+       data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_dropped;
+
+       data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_dropped;
+       data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_dropped;
+
+       data[crr * num_values + crr_cpu] = bp_count;
+       data[crr++ * num_values + num_cpus] += bp_count;
 }
 
 static void dpaa_get_ethtool_stats(struct net_device *net_dev,
                                   struct ethtool_stats *stats, u64 *data)
 {
-       u64 bp_count[DPAA_BPS_NUM], cg_time, cg_num;
        struct dpaa_percpu_priv *percpu_priv;
        struct dpaa_rx_errors rx_errors;
        unsigned int num_cpus, offset;
+       u64 bp_count, cg_time, cg_num;
        struct dpaa_ern_cnt ern_cnt;
        struct dpaa_bp *dpaa_bp;
        struct dpaa_priv *priv;
-       int total_stats, i, j;
+       int total_stats, i;
        bool cg_status;
 
        total_stats = dpaa_get_sset_count(net_dev, ETH_SS_STATS);
@@ -292,12 +290,10 @@ static void dpaa_get_ethtool_stats(struct net_device *net_dev,
 
        for_each_online_cpu(i) {
                percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
-               for (j = 0; j < DPAA_BPS_NUM; j++) {
-                       dpaa_bp = priv->dpaa_bps[j];
-                       if (!dpaa_bp->percpu_count)
-                               continue;
-                       bp_count[j] = *(per_cpu_ptr(dpaa_bp->percpu_count, i));
-               }
+               dpaa_bp = priv->dpaa_bp;
+               if (!dpaa_bp->percpu_count)
+                       continue;
+               bp_count = *(per_cpu_ptr(dpaa_bp->percpu_count, i));
                rx_errors.dme += percpu_priv->rx_errors.dme;
                rx_errors.fpe += percpu_priv->rx_errors.fpe;
                rx_errors.fse += percpu_priv->rx_errors.fse;
@@ -315,7 +311,7 @@ static void dpaa_get_ethtool_stats(struct net_device *net_dev,
                copy_stats(percpu_priv, num_cpus, i, bp_count, data);
        }
 
-       offset = (num_cpus + 1) * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM);
+       offset = (num_cpus + 1) * (DPAA_STATS_PERCPU_LEN + 1);
        memcpy(data + offset, &rx_errors, sizeof(struct dpaa_rx_errors));
 
        offset += sizeof(struct dpaa_rx_errors) / sizeof(u64);
@@ -363,18 +359,16 @@ static void dpaa_get_strings(struct net_device *net_dev, u32 stringset,
                memcpy(strings, string_cpu, ETH_GSTRING_LEN);
                strings += ETH_GSTRING_LEN;
        }
-       for (i = 0; i < DPAA_BPS_NUM; i++) {
-               for (j = 0; j < num_cpus; j++) {
-                       snprintf(string_cpu, ETH_GSTRING_LEN,
-                                "bpool %c [CPU %d]", 'a' + i, j);
-                       memcpy(strings, string_cpu, ETH_GSTRING_LEN);
-                       strings += ETH_GSTRING_LEN;
-               }
-               snprintf(string_cpu, ETH_GSTRING_LEN, "bpool %c [TOTAL]",
-                        'a' + i);
+       for (j = 0; j < num_cpus; j++) {
+               snprintf(string_cpu, ETH_GSTRING_LEN,
+                        "bpool [CPU %d]", j);
                memcpy(strings, string_cpu, ETH_GSTRING_LEN);
                strings += ETH_GSTRING_LEN;
        }
+       snprintf(string_cpu, ETH_GSTRING_LEN, "bpool [TOTAL]");
+       memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+       strings += ETH_GSTRING_LEN;
+
        memcpy(strings, dpaa_stats_global, size);
 }
 
index d1e78cd..69184ca 100644 (file)
@@ -6,7 +6,7 @@
 obj-$(CONFIG_FSL_DPAA2_ETH)            += fsl-dpaa2-eth.o
 obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK)      += fsl-dpaa2-ptp.o
 
-fsl-dpaa2-eth-objs     := dpaa2-eth.o dpaa2-ethtool.o dpni.o
+fsl-dpaa2-eth-objs     := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs     := dpaa2-ptp.o dprtc.o
 
index 19379ba..c26c0a7 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2019 NXP
  */
 #include <linux/init.h>
 #include <linux/module.h>
@@ -221,6 +221,7 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv,
                            struct dpaa2_eth_channel *ch,
                            dma_addr_t addr)
 {
+       int retries = 0;
        int err;
 
        ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
@@ -229,8 +230,11 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv,
 
        while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
                                               ch->xdp.drop_bufs,
-                                              ch->xdp.drop_cnt)) == -EBUSY)
+                                              ch->xdp.drop_cnt)) == -EBUSY) {
+               if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
+                       break;
                cpu_relax();
+       }
 
        if (err) {
                free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
@@ -458,7 +462,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
        struct dpaa2_eth_fq *fq = NULL;
        struct dpaa2_dq *dq;
        const struct dpaa2_fd *fd;
-       int cleaned = 0;
+       int cleaned = 0, retries = 0;
        int is_last;
 
        do {
@@ -469,6 +473,11 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
                         * the store until we get some sort of valid response
                         * token (either a valid frame or an "empty dequeue")
                         */
+                       if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) {
+                               netdev_err_once(priv->net_dev,
+                                               "Unable to read a valid dequeue response\n");
+                               return -ETIMEDOUT;
+                       }
                        continue;
                }
 
@@ -477,6 +486,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
 
                fq->consume(priv, ch, fd, fq);
                cleaned++;
+               retries = 0;
        } while (!is_last);
 
        if (!cleaned)
@@ -949,6 +959,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
        u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
        struct page *page;
        dma_addr_t addr;
+       int retries = 0;
        int i, err;
 
        for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) {
@@ -980,8 +991,11 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
 release_bufs:
        /* In case the portal is busy, retry until successful */
        while ((err = dpaa2_io_service_release(ch->dpio, bpid,
-                                              buf_array, i)) == -EBUSY)
+                                              buf_array, i)) == -EBUSY) {
+               if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
+                       break;
                cpu_relax();
+       }
 
        /* If release command failed, clean up and bail out;
         * not much else we can do about it
@@ -1032,16 +1046,21 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
 static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
 {
        u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
+       int retries = 0;
        int ret;
 
        do {
                ret = dpaa2_io_service_acquire(NULL, priv->bpid,
                                               buf_array, count);
                if (ret < 0) {
+                       if (ret == -EBUSY &&
+                           retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
+                               continue;
                        netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n");
                        return;
                }
                free_bufs(priv, buf_array, ret);
+               retries = 0;
        } while (ret);
 }
 
@@ -1094,7 +1113,7 @@ static int pull_channel(struct dpaa2_eth_channel *ch)
                                                    ch->store);
                dequeues++;
                cpu_relax();
-       } while (err == -EBUSY);
+       } while (err == -EBUSY && dequeues < DPAA2_ETH_SWP_BUSY_RETRIES);
 
        ch->stats.dequeue_portal_busy += dequeues;
        if (unlikely(err))
@@ -1118,6 +1137,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
        struct netdev_queue *nq;
        int store_cleaned, work_done;
        struct list_head rx_list;
+       int retries = 0;
        int err;
 
        ch = container_of(napi, struct dpaa2_eth_channel, napi);
@@ -1136,7 +1156,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
                refill_pool(priv, ch, priv->bpid);
 
                store_cleaned = consume_frames(ch, &fq);
-               if (!store_cleaned)
+               if (store_cleaned <= 0)
                        break;
                if (fq->type == DPAA2_RX_FQ) {
                        rx_cleaned += store_cleaned;
@@ -1163,7 +1183,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
        do {
                err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx);
                cpu_relax();
-       } while (err == -EBUSY);
+       } while (err == -EBUSY && retries++ < DPAA2_ETH_SWP_BUSY_RETRIES);
        WARN_ONCE(err, "CDAN notifications rearm failed on core %d",
                  ch->nctx.desired_cpu);
 
@@ -1235,8 +1255,6 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
        priv->rx_td_enabled = enable;
 }
 
-static void update_tx_fqids(struct dpaa2_eth_priv *priv);
-
 static int link_state_update(struct dpaa2_eth_priv *priv)
 {
        struct dpni_link_state state = {0};
@@ -1258,12 +1276,17 @@ static int link_state_update(struct dpaa2_eth_priv *priv)
                   !!(state.options & DPNI_LINK_OPT_ASYM_PAUSE);
        dpaa2_eth_set_rx_taildrop(priv, !tx_pause);
 
+       /* When we manage the MAC/PHY using phylink there is no need
+        * to manually update the netif_carrier.
+        */
+       if (priv->mac)
+               goto out;
+
        /* Chech link state; speed / duplex changes are not treated yet */
        if (priv->link_state.up == state.up)
                goto out;
 
        if (state.up) {
-               update_tx_fqids(priv);
                netif_carrier_on(priv->net_dev);
                netif_tx_start_all_queues(priv->net_dev);
        } else {
@@ -1295,17 +1318,21 @@ static int dpaa2_eth_open(struct net_device *net_dev)
                           priv->dpbp_dev->obj_desc.id, priv->bpid);
        }
 
-       /* We'll only start the txqs when the link is actually ready; make sure
-        * we don't race against the link up notification, which may come
-        * immediately after dpni_enable();
-        */
-       netif_tx_stop_all_queues(net_dev);
+       if (!priv->mac) {
+               /* We'll only start the txqs when the link is actually ready;
+                * make sure we don't race against the link up notification,
+                * which may come immediately after dpni_enable();
+                */
+               netif_tx_stop_all_queues(net_dev);
+
+               /* Also, explicitly set carrier off, otherwise
+                * netif_carrier_ok() will return true and cause 'ip link show'
+                * to report the LOWER_UP flag, even though the link
+                * notification wasn't even received.
+                */
+               netif_carrier_off(net_dev);
+       }
        enable_ch_napi(priv);
-       /* Also, explicitly set carrier off, otherwise netif_carrier_ok() will
-        * return true and cause 'ip link show' to report the LOWER_UP flag,
-        * even though the link notification wasn't even received.
-        */
-       netif_carrier_off(net_dev);
 
        err = dpni_enable(priv->mc_io, 0, priv->mc_token);
        if (err < 0) {
@@ -1313,13 +1340,17 @@ static int dpaa2_eth_open(struct net_device *net_dev)
                goto enable_err;
        }
 
-       /* If the DPMAC object has already processed the link up interrupt,
-        * we have to learn the link state ourselves.
-        */
-       err = link_state_update(priv);
-       if (err < 0) {
-               netdev_err(net_dev, "Can't update link state\n");
-               goto link_state_err;
+       if (!priv->mac) {
+               /* If the DPMAC object has already processed the link up
+                * interrupt, we have to learn the link state ourselves.
+                */
+               err = link_state_update(priv);
+               if (err < 0) {
+                       netdev_err(net_dev, "Can't update link state\n");
+                       goto link_state_err;
+               }
+       } else {
+               phylink_start(priv->mac->phylink);
        }
 
        return 0;
@@ -1394,8 +1425,12 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
        int dpni_enabled = 0;
        int retries = 10;
 
-       netif_tx_stop_all_queues(net_dev);
-       netif_carrier_off(net_dev);
+       if (!priv->mac) {
+               netif_tx_stop_all_queues(net_dev);
+               netif_carrier_off(net_dev);
+       } else {
+               phylink_stop(priv->mac->phylink);
+       }
 
        /* On dpni_disable(), the MC firmware will:
         * - stop MAC Rx and wait for all Rx frames to be enqueued to software
@@ -2046,7 +2081,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
 {
        struct fsl_mc_device *dpcon;
        struct device *dev = priv->net_dev->dev.parent;
-       struct dpcon_attr attrs;
        int err;
 
        err = fsl_mc_object_allocate(to_fsl_mc_device(dev),
@@ -2071,12 +2105,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
                goto close;
        }
 
-       err = dpcon_get_attributes(priv->mc_io, 0, dpcon->mc_handle, &attrs);
-       if (err) {
-               dev_err(dev, "dpcon_get_attributes() failed\n");
-               goto close;
-       }
-
        err = dpcon_enable(priv->mc_io, 0, dpcon->mc_handle);
        if (err) {
                dev_err(dev, "dpcon_enable() failed\n");
@@ -3332,12 +3360,56 @@ static int poll_link_state(void *arg)
        return 0;
 }
 
+static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
+{
+       struct fsl_mc_device *dpni_dev, *dpmac_dev;
+       struct dpaa2_mac *mac;
+       int err;
+
+       dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
+       dpmac_dev = fsl_mc_get_endpoint(dpni_dev);
+       if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+               return 0;
+
+       if (dpaa2_mac_is_type_fixed(dpmac_dev, priv->mc_io))
+               return 0;
+
+       mac = kzalloc(sizeof(struct dpaa2_mac), GFP_KERNEL);
+       if (!mac)
+               return -ENOMEM;
+
+       mac->mc_dev = dpmac_dev;
+       mac->mc_io = priv->mc_io;
+       mac->net_dev = priv->net_dev;
+
+       err = dpaa2_mac_connect(mac);
+       if (err) {
+               netdev_err(priv->net_dev, "Error connecting to the MAC endpoint\n");
+               kfree(mac);
+               return err;
+       }
+       priv->mac = mac;
+
+       return 0;
+}
+
+static void dpaa2_eth_disconnect_mac(struct dpaa2_eth_priv *priv)
+{
+       if (!priv->mac)
+               return;
+
+       dpaa2_mac_disconnect(priv->mac);
+       kfree(priv->mac);
+       priv->mac = NULL;
+}
+
 static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
 {
        u32 status = ~0;
        struct device *dev = (struct device *)arg;
        struct fsl_mc_device *dpni_dev = to_fsl_mc_device(dev);
        struct net_device *net_dev = dev_get_drvdata(dev);
+       struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
        int err;
 
        err = dpni_get_irq_status(dpni_dev->mc_io, 0, dpni_dev->mc_handle,
@@ -3350,8 +3422,17 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
        if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
                link_state_update(netdev_priv(net_dev));
 
-       if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED)
+       if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED) {
                set_mac_addr(netdev_priv(net_dev));
+               update_tx_fqids(priv);
+
+               rtnl_lock();
+               if (priv->mac)
+                       dpaa2_eth_disconnect_mac(priv);
+               else
+                       dpaa2_eth_connect_mac(priv);
+               rtnl_unlock();
+       }
 
        return IRQ_HANDLED;
 }
@@ -3527,6 +3608,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
                priv->do_link_poll = true;
        }
 
+       err = dpaa2_eth_connect_mac(priv);
+       if (err)
+               goto err_connect_mac;
+
        err = register_netdev(net_dev);
        if (err < 0) {
                dev_err(dev, "register_netdev() failed\n");
@@ -3541,6 +3626,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
        return 0;
 
 err_netdev_reg:
+       dpaa2_eth_disconnect_mac(priv);
+err_connect_mac:
        if (priv->do_link_poll)
                kthread_stop(priv->poll_thread);
        else
@@ -3583,6 +3670,10 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 #ifdef CONFIG_DEBUG_FS
        dpaa2_dbg_remove(priv);
 #endif
+       rtnl_lock();
+       dpaa2_eth_disconnect_mac(priv);
+       rtnl_unlock();
+
        unregister_netdev(net_dev);
 
        if (priv->do_link_poll)
index 8a0e65b..7635db3 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "dpaa2-eth-trace.h"
 #include "dpaa2-eth-debugfs.h"
+#include "dpaa2-mac.h"
 
 #define DPAA2_WRIOP_VERSION(x, y, z) ((x) << 10 | (y) << 5 | (z) << 0)
 
@@ -245,6 +246,14 @@ static inline struct dpaa2_faead *dpaa2_get_faead(void *buf_addr, bool swa)
  */
 #define DPAA2_ETH_ENQUEUE_RETRIES      10
 
+/* Number of times to retry DPIO portal operations while waiting
+ * for portal to finish executing current command and become
+ * available. We want to avoid being stuck in a while loop in case
+ * hardware becomes unresponsive, but not give up too easily if
+ * the portal really is busy for valid reasons
+ */
+#define DPAA2_ETH_SWP_BUSY_RETRIES     1000
+
 /* Driver statistics, other than those in struct rtnl_link_stats64.
  * These are usually collected per-CPU and aggregated by ethtool.
  */
@@ -407,6 +416,8 @@ struct dpaa2_eth_priv {
 #ifdef CONFIG_DEBUG_FS
        struct dpaa2_debugfs dbg;
 #endif
+
+       struct dpaa2_mac *mac;
 };
 
 #define DPAA2_RXH_SUPPORTED    (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \
index 0aa1c34..0883620 100644 (file)
@@ -85,6 +85,10 @@ dpaa2_eth_get_link_ksettings(struct net_device *net_dev,
 {
        struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 
+       if (priv->mac)
+               return phylink_ethtool_ksettings_get(priv->mac->phylink,
+                                                    link_settings);
+
        link_settings->base.autoneg = AUTONEG_DISABLE;
        if (!(priv->link_state.options & DPNI_LINK_OPT_HALF_DUPLEX))
                link_settings->base.duplex = DUPLEX_FULL;
@@ -93,12 +97,29 @@ dpaa2_eth_get_link_ksettings(struct net_device *net_dev,
        return 0;
 }
 
+static int
+dpaa2_eth_set_link_ksettings(struct net_device *net_dev,
+                            const struct ethtool_link_ksettings *link_settings)
+{
+       struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+       if (!priv->mac)
+               return -ENOTSUPP;
+
+       return phylink_ethtool_ksettings_set(priv->mac->phylink, link_settings);
+}
+
 static void dpaa2_eth_get_pauseparam(struct net_device *net_dev,
                                     struct ethtool_pauseparam *pause)
 {
        struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
        u64 link_options = priv->link_state.options;
 
+       if (priv->mac) {
+               phylink_ethtool_get_pauseparam(priv->mac->phylink, pause);
+               return;
+       }
+
        pause->rx_pause = !!(link_options & DPNI_LINK_OPT_PAUSE);
        pause->tx_pause = pause->rx_pause ^
                          !!(link_options & DPNI_LINK_OPT_ASYM_PAUSE);
@@ -118,6 +139,9 @@ static int dpaa2_eth_set_pauseparam(struct net_device *net_dev,
                return -EOPNOTSUPP;
        }
 
+       if (priv->mac)
+               return phylink_ethtool_set_pauseparam(priv->mac->phylink,
+                                                     pause);
        if (pause->autoneg)
                return -EOPNOTSUPP;
 
@@ -216,7 +240,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
                if (err == -EINVAL)
                        /* Older firmware versions don't support all pages */
                        memset(&dpni_stats, 0, sizeof(dpni_stats));
-               else
+               else if (err)
                        netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
 
                num_cnt = dpni_stats_page_size[j] / sizeof(u64);
@@ -728,6 +752,7 @@ const struct ethtool_ops dpaa2_ethtool_ops = {
        .get_drvinfo = dpaa2_eth_get_drvinfo,
        .get_link = ethtool_op_get_link,
        .get_link_ksettings = dpaa2_eth_get_link_ksettings,
+       .set_link_ksettings = dpaa2_eth_set_link_ksettings,
        .get_pauseparam = dpaa2_eth_get_pauseparam,
        .set_pauseparam = dpaa2_eth_set_pauseparam,
        .get_sset_count = dpaa2_eth_get_sset_count,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
new file mode 100644 (file)
index 0000000..b713739
--- /dev/null
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+
+#include "dpaa2-eth.h"
+#include "dpaa2-mac.h"
+
+#define phylink_to_dpaa2_mac(config) \
+       container_of((config), struct dpaa2_mac, phylink_config)
+
+static phy_interface_t phy_mode(enum dpmac_eth_if eth_if)
+{
+       switch (eth_if) {
+       case DPMAC_ETH_IF_RGMII:
+               return PHY_INTERFACE_MODE_RGMII;
+       default:
+               return -EINVAL;
+       }
+}
+
+/* Caller must call of_node_put on the returned value */
+static struct device_node *dpaa2_mac_get_node(u16 dpmac_id)
+{
+       struct device_node *dpmacs, *dpmac = NULL;
+       u32 id;
+       int err;
+
+       dpmacs = of_find_node_by_name(NULL, "dpmacs");
+       if (!dpmacs)
+               return NULL;
+
+       while ((dpmac = of_get_next_child(dpmacs, dpmac)) != NULL) {
+               err = of_property_read_u32(dpmac, "reg", &id);
+               if (err)
+                       continue;
+               if (id == dpmac_id)
+                       break;
+       }
+
+       of_node_put(dpmacs);
+
+       return dpmac;
+}
+
+static int dpaa2_mac_get_if_mode(struct device_node *node,
+                                struct dpmac_attr attr)
+{
+       phy_interface_t if_mode;
+       int err;
+
+       err = of_get_phy_mode(node, &if_mode);
+       if (!err)
+               return if_mode;
+
+       if_mode = phy_mode(attr.eth_if);
+       if (if_mode >= 0)
+               return if_mode;
+
+       return -ENODEV;
+}
+
+static bool dpaa2_mac_phy_mode_mismatch(struct dpaa2_mac *mac,
+                                       phy_interface_t interface)
+{
+       switch (interface) {
+       case PHY_INTERFACE_MODE_RGMII:
+       case PHY_INTERFACE_MODE_RGMII_ID:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+               return (interface != mac->if_mode);
+       default:
+               return true;
+       }
+}
+
+static void dpaa2_mac_validate(struct phylink_config *config,
+                              unsigned long *supported,
+                              struct phylink_link_state *state)
+{
+       struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+       if (state->interface != PHY_INTERFACE_MODE_NA &&
+           dpaa2_mac_phy_mode_mismatch(mac, state->interface)) {
+               goto empty_set;
+       }
+
+       phylink_set_port_modes(mask);
+       phylink_set(mask, Autoneg);
+       phylink_set(mask, Pause);
+       phylink_set(mask, Asym_Pause);
+
+       switch (state->interface) {
+       case PHY_INTERFACE_MODE_RGMII:
+       case PHY_INTERFACE_MODE_RGMII_ID:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+               phylink_set(mask, 10baseT_Full);
+               phylink_set(mask, 100baseT_Full);
+               phylink_set(mask, 1000baseT_Full);
+               break;
+       default:
+               goto empty_set;
+       }
+
+       linkmode_and(supported, supported, mask);
+       linkmode_and(state->advertising, state->advertising, mask);
+
+       return;
+
+empty_set:
+       linkmode_zero(supported);
+}
+
+static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode,
+                            const struct phylink_link_state *state)
+{
+       struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+       struct dpmac_link_state *dpmac_state = &mac->state;
+       int err;
+
+       if (state->speed != SPEED_UNKNOWN)
+               dpmac_state->rate = state->speed;
+
+       if (state->duplex != DUPLEX_UNKNOWN) {
+               if (!state->duplex)
+                       dpmac_state->options |= DPMAC_LINK_OPT_HALF_DUPLEX;
+               else
+                       dpmac_state->options &= ~DPMAC_LINK_OPT_HALF_DUPLEX;
+       }
+
+       if (state->an_enabled)
+               dpmac_state->options |= DPMAC_LINK_OPT_AUTONEG;
+       else
+               dpmac_state->options &= ~DPMAC_LINK_OPT_AUTONEG;
+
+       if (state->pause & MLO_PAUSE_RX)
+               dpmac_state->options |= DPMAC_LINK_OPT_PAUSE;
+       else
+               dpmac_state->options &= ~DPMAC_LINK_OPT_PAUSE;
+
+       if (!!(state->pause & MLO_PAUSE_RX) ^ !!(state->pause & MLO_PAUSE_TX))
+               dpmac_state->options |= DPMAC_LINK_OPT_ASYM_PAUSE;
+       else
+               dpmac_state->options &= ~DPMAC_LINK_OPT_ASYM_PAUSE;
+
+       err = dpmac_set_link_state(mac->mc_io, 0,
+                                  mac->mc_dev->mc_handle, dpmac_state);
+       if (err)
+               netdev_err(mac->net_dev, "dpmac_set_link_state() = %d\n", err);
+}
+
+static void dpaa2_mac_link_up(struct phylink_config *config, unsigned int mode,
+                             phy_interface_t interface, struct phy_device *phy)
+{
+       struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+       struct dpmac_link_state *dpmac_state = &mac->state;
+       int err;
+
+       dpmac_state->up = 1;
+       err = dpmac_set_link_state(mac->mc_io, 0,
+                                  mac->mc_dev->mc_handle, dpmac_state);
+       if (err)
+               netdev_err(mac->net_dev, "dpmac_set_link_state() = %d\n", err);
+}
+
+static void dpaa2_mac_link_down(struct phylink_config *config,
+                               unsigned int mode,
+                               phy_interface_t interface)
+{
+       struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+       struct dpmac_link_state *dpmac_state = &mac->state;
+       int err;
+
+       dpmac_state->up = 0;
+       err = dpmac_set_link_state(mac->mc_io, 0,
+                                  mac->mc_dev->mc_handle, dpmac_state);
+       if (err)
+               netdev_err(mac->net_dev, "dpmac_set_link_state() = %d\n", err);
+}
+
+static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
+       .validate = dpaa2_mac_validate,
+       .mac_config = dpaa2_mac_config,
+       .mac_link_up = dpaa2_mac_link_up,
+       .mac_link_down = dpaa2_mac_link_down,
+};
+
+bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,
+                            struct fsl_mc_io *mc_io)
+{
+       struct dpmac_attr attr;
+       bool fixed = false;
+       u16 mc_handle = 0;
+       int err;
+
+       err = dpmac_open(mc_io, 0, dpmac_dev->obj_desc.id,
+                        &mc_handle);
+       if (err || !mc_handle)
+               return false;
+
+       err = dpmac_get_attributes(mc_io, 0, mc_handle, &attr);
+       if (err)
+               goto out;
+
+       if (attr.link_type == DPMAC_LINK_TYPE_FIXED)
+               fixed = true;
+
+out:
+       dpmac_close(mc_io, 0, mc_handle);
+
+       return fixed;
+}
+
+int dpaa2_mac_connect(struct dpaa2_mac *mac)
+{
+       struct fsl_mc_device *dpmac_dev = mac->mc_dev;
+       struct net_device *net_dev = mac->net_dev;
+       struct device_node *dpmac_node;
+       struct phylink *phylink;
+       struct dpmac_attr attr;
+       int err;
+
+       err = dpmac_open(mac->mc_io, 0, dpmac_dev->obj_desc.id,
+                        &dpmac_dev->mc_handle);
+       if (err || !dpmac_dev->mc_handle) {
+               netdev_err(net_dev, "dpmac_open() = %d\n", err);
+               return -ENODEV;
+       }
+
+       err = dpmac_get_attributes(mac->mc_io, 0, dpmac_dev->mc_handle, &attr);
+       if (err) {
+               netdev_err(net_dev, "dpmac_get_attributes() = %d\n", err);
+               goto err_close_dpmac;
+       }
+
+       dpmac_node = dpaa2_mac_get_node(attr.id);
+       if (!dpmac_node) {
+               netdev_err(net_dev, "No dpmac@%d node found.\n", attr.id);
+               err = -ENODEV;
+               goto err_close_dpmac;
+       }
+
+       err = dpaa2_mac_get_if_mode(dpmac_node, attr);
+       if (err < 0) {
+               err = -EINVAL;
+               goto err_put_node;
+       }
+       mac->if_mode = err;
+
+       /* The MAC does not have the capability to add RGMII delays so
+        * error out if the interface mode requests them and there is no PHY
+        * to act upon them
+        */
+       if (of_phy_is_fixed_link(dpmac_node) &&
+           (mac->if_mode == PHY_INTERFACE_MODE_RGMII_ID ||
+            mac->if_mode == PHY_INTERFACE_MODE_RGMII_RXID ||
+            mac->if_mode == PHY_INTERFACE_MODE_RGMII_TXID)) {
+               netdev_err(net_dev, "RGMII delay not supported\n");
+               err = -EINVAL;
+               goto err_put_node;
+       }
+
+       mac->phylink_config.dev = &net_dev->dev;
+       mac->phylink_config.type = PHYLINK_NETDEV;
+
+       phylink = phylink_create(&mac->phylink_config,
+                                of_fwnode_handle(dpmac_node), mac->if_mode,
+                                &dpaa2_mac_phylink_ops);
+       if (IS_ERR(phylink)) {
+               err = PTR_ERR(phylink);
+               goto err_put_node;
+       }
+       mac->phylink = phylink;
+
+       err = phylink_of_phy_connect(mac->phylink, dpmac_node, 0);
+       if (err) {
+               netdev_err(net_dev, "phylink_of_phy_connect() = %d\n", err);
+               goto err_phylink_destroy;
+       }
+
+       of_node_put(dpmac_node);
+
+       return 0;
+
+err_phylink_destroy:
+       phylink_destroy(mac->phylink);
+err_put_node:
+       of_node_put(dpmac_node);
+err_close_dpmac:
+       dpmac_close(mac->mc_io, 0, dpmac_dev->mc_handle);
+       return err;
+}
+
+void dpaa2_mac_disconnect(struct dpaa2_mac *mac)
+{
+       if (!mac->phylink)
+               return;
+
+       phylink_disconnect_phy(mac->phylink);
+       phylink_destroy(mac->phylink);
+       dpmac_close(mac->mc_io, 0, mac->mc_dev->mc_handle);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
new file mode 100644 (file)
index 0000000..8634d0d
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2019 NXP */
+#ifndef DPAA2_MAC_H
+#define DPAA2_MAC_H
+
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/phylink.h>
+
+#include "dpmac.h"
+#include "dpmac-cmd.h"
+
+struct dpaa2_mac {
+       struct fsl_mc_device *mc_dev;
+       struct dpmac_link_state state;
+       struct net_device *net_dev;
+       struct fsl_mc_io *mc_io;
+
+       struct phylink_config phylink_config;
+       struct phylink *phylink;
+       phy_interface_t if_mode;
+};
+
+bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,
+                            struct fsl_mc_io *mc_io);
+
+int dpaa2_mac_connect(struct dpaa2_mac *mac);
+
+void dpaa2_mac_disconnect(struct dpaa2_mac *mac);
+
+#endif /* DPAA2_MAC_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h
new file mode 100644 (file)
index 0000000..96a9b0d
--- /dev/null
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+#ifndef _FSL_DPMAC_CMD_H
+#define _FSL_DPMAC_CMD_H
+
+/* DPMAC Version */
+#define DPMAC_VER_MAJOR                                4
+#define DPMAC_VER_MINOR                                4
+#define DPMAC_CMD_BASE_VERSION                 1
+#define DPMAC_CMD_2ND_VERSION                  2
+#define DPMAC_CMD_ID_OFFSET                    4
+
+#define DPMAC_CMD(id)  (((id) << DPMAC_CMD_ID_OFFSET) | DPMAC_CMD_BASE_VERSION)
+#define DPMAC_CMD_V2(id) (((id) << DPMAC_CMD_ID_OFFSET) | DPMAC_CMD_2ND_VERSION)
+
+/* Command IDs */
+#define DPMAC_CMDID_CLOSE              DPMAC_CMD(0x800)
+#define DPMAC_CMDID_OPEN               DPMAC_CMD(0x80c)
+
+#define DPMAC_CMDID_GET_ATTR           DPMAC_CMD(0x004)
+#define DPMAC_CMDID_SET_LINK_STATE     DPMAC_CMD_V2(0x0c3)
+
+/* Macros for accessing command fields smaller than 1byte */
+#define DPMAC_MASK(field)        \
+       GENMASK(DPMAC_##field##_SHIFT + DPMAC_##field##_SIZE - 1, \
+               DPMAC_##field##_SHIFT)
+
+#define dpmac_set_field(var, field, val) \
+       ((var) |= (((val) << DPMAC_##field##_SHIFT) & DPMAC_MASK(field)))
+#define dpmac_get_field(var, field)      \
+       (((var) & DPMAC_MASK(field)) >> DPMAC_##field##_SHIFT)
+
+struct dpmac_cmd_open {
+       __le32 dpmac_id;
+};
+
+struct dpmac_rsp_get_attributes {
+       u8 eth_if;
+       u8 link_type;
+       __le16 id;
+       __le32 max_rate;
+};
+
+#define DPMAC_STATE_SIZE       1
+#define DPMAC_STATE_SHIFT      0
+#define DPMAC_STATE_VALID_SIZE 1
+#define DPMAC_STATE_VALID_SHIFT        1
+
+struct dpmac_cmd_set_link_state {
+       __le64 options;
+       __le32 rate;
+       __le32 pad0;
+       /* from lsb: up:1, state_valid:1 */
+       u8 state;
+       u8 pad1[7];
+       __le64 supported;
+       __le64 advertising;
+};
+
+#endif /* _FSL_DPMAC_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.c b/drivers/net/ethernet/freescale/dpaa2/dpmac.c
new file mode 100644 (file)
index 0000000..b75189d
--- /dev/null
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+#include <linux/fsl/mc.h>
+#include "dpmac.h"
+#include "dpmac-cmd.h"
+
+/**
+ * dpmac_open() - Open a control session for the specified object.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpmac_id:  DPMAC unique ID
+ * @token:     Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpmac_create function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpmac_open(struct fsl_mc_io *mc_io,
+              u32 cmd_flags,
+              int dpmac_id,
+              u16 *token)
+{
+       struct dpmac_cmd_open *cmd_params;
+       struct fsl_mc_command cmd = { 0 };
+       int err;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPMAC_CMDID_OPEN,
+                                         cmd_flags,
+                                         0);
+       cmd_params = (struct dpmac_cmd_open *)cmd.params;
+       cmd_params->dpmac_id = cpu_to_le32(dpmac_id);
+
+       /* send command to mc*/
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return err;
+
+       /* retrieve response parameters */
+       *token = mc_cmd_hdr_read_token(&cmd);
+
+       return err;
+}
+
+/**
+ * dpmac_close() - Close the control session of the object
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPMAC object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpmac_close(struct fsl_mc_io *mc_io,
+               u32 cmd_flags,
+               u16 token)
+{
+       struct fsl_mc_command cmd = { 0 };
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPMAC_CMDID_CLOSE, cmd_flags,
+                                         token);
+
+       /* send command to mc*/
+       return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpmac_get_attributes - Retrieve DPMAC attributes.
+ *
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPMAC object
+ * @attr:      Returned object's attributes
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpmac_get_attributes(struct fsl_mc_io *mc_io,
+                        u32 cmd_flags,
+                        u16 token,
+                        struct dpmac_attr *attr)
+{
+       struct dpmac_rsp_get_attributes *rsp_params;
+       struct fsl_mc_command cmd = { 0 };
+       int err;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_ATTR,
+                                         cmd_flags,
+                                         token);
+
+       /* send command to mc*/
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return err;
+
+       /* retrieve response parameters */
+       rsp_params = (struct dpmac_rsp_get_attributes *)cmd.params;
+       attr->eth_if = rsp_params->eth_if;
+       attr->link_type = rsp_params->link_type;
+       attr->id = le16_to_cpu(rsp_params->id);
+       attr->max_rate = le32_to_cpu(rsp_params->max_rate);
+
+       return 0;
+}
+
+/**
+ * dpmac_set_link_state() - Set the Ethernet link status
+ * @mc_io:      Pointer to opaque I/O object
+ * @cmd_flags:  Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:      Token of DPMAC object
+ * @link_state: Link state configuration
+ *
+ * Return:      '0' on Success; Error code otherwise.
+ */
+int dpmac_set_link_state(struct fsl_mc_io *mc_io,
+                        u32 cmd_flags,
+                        u16 token,
+                        struct dpmac_link_state *link_state)
+{
+       struct dpmac_cmd_set_link_state *cmd_params;
+       struct fsl_mc_command cmd = { 0 };
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPMAC_CMDID_SET_LINK_STATE,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpmac_cmd_set_link_state *)cmd.params;
+       cmd_params->options = cpu_to_le64(link_state->options);
+       cmd_params->rate = cpu_to_le32(link_state->rate);
+       dpmac_set_field(cmd_params->state, STATE, link_state->up);
+       dpmac_set_field(cmd_params->state, STATE_VALID,
+                       link_state->state_valid);
+       cmd_params->supported = cpu_to_le64(link_state->supported);
+       cmd_params->advertising = cpu_to_le64(link_state->advertising);
+
+       /* send command to mc*/
+       return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.h b/drivers/net/ethernet/freescale/dpaa2/dpmac.h
new file mode 100644 (file)
index 0000000..4efc410
--- /dev/null
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+#ifndef __FSL_DPMAC_H
+#define __FSL_DPMAC_H
+
+/* Data Path MAC API
+ * Contains initialization APIs and runtime control APIs for DPMAC
+ */
+
+struct fsl_mc_io;
+
+int dpmac_open(struct fsl_mc_io *mc_io,
+              u32 cmd_flags,
+              int dpmac_id,
+              u16 *token);
+
+int dpmac_close(struct fsl_mc_io *mc_io,
+               u32 cmd_flags,
+               u16 token);
+
+/**
+ * enum dpmac_link_type -  DPMAC link type
+ * @DPMAC_LINK_TYPE_NONE: No link
+ * @DPMAC_LINK_TYPE_FIXED: Link is fixed type
+ * @DPMAC_LINK_TYPE_PHY: Link by PHY ID
+ * @DPMAC_LINK_TYPE_BACKPLANE: Backplane link type
+ */
+enum dpmac_link_type {
+       DPMAC_LINK_TYPE_NONE,
+       DPMAC_LINK_TYPE_FIXED,
+       DPMAC_LINK_TYPE_PHY,
+       DPMAC_LINK_TYPE_BACKPLANE
+};
+
+/**
+ * enum dpmac_eth_if - DPMAC Ethrnet interface
+ * @DPMAC_ETH_IF_MII: MII interface
+ * @DPMAC_ETH_IF_RMII: RMII interface
+ * @DPMAC_ETH_IF_SMII: SMII interface
+ * @DPMAC_ETH_IF_GMII: GMII interface
+ * @DPMAC_ETH_IF_RGMII: RGMII interface
+ * @DPMAC_ETH_IF_SGMII: SGMII interface
+ * @DPMAC_ETH_IF_QSGMII: QSGMII interface
+ * @DPMAC_ETH_IF_XAUI: XAUI interface
+ * @DPMAC_ETH_IF_XFI: XFI interface
+ * @DPMAC_ETH_IF_CAUI: CAUI interface
+ * @DPMAC_ETH_IF_1000BASEX: 1000BASEX interface
+ * @DPMAC_ETH_IF_USXGMII: USXGMII interface
+ */
+enum dpmac_eth_if {
+       DPMAC_ETH_IF_MII,
+       DPMAC_ETH_IF_RMII,
+       DPMAC_ETH_IF_SMII,
+       DPMAC_ETH_IF_GMII,
+       DPMAC_ETH_IF_RGMII,
+       DPMAC_ETH_IF_SGMII,
+       DPMAC_ETH_IF_QSGMII,
+       DPMAC_ETH_IF_XAUI,
+       DPMAC_ETH_IF_XFI,
+       DPMAC_ETH_IF_CAUI,
+       DPMAC_ETH_IF_1000BASEX,
+       DPMAC_ETH_IF_USXGMII,
+};
+
+/**
+ * struct dpmac_attr - Structure representing DPMAC attributes
+ * @id:                DPMAC object ID
+ * @max_rate:  Maximum supported rate - in Mbps
+ * @eth_if:    Ethernet interface
+ * @link_type: link type
+ */
+struct dpmac_attr {
+       u16 id;
+       u32 max_rate;
+       enum dpmac_eth_if eth_if;
+       enum dpmac_link_type link_type;
+};
+
+int dpmac_get_attributes(struct fsl_mc_io *mc_io,
+                        u32 cmd_flags,
+                        u16 token,
+                        struct dpmac_attr *attr);
+
+/**
+ * DPMAC link configuration/state options
+ */
+
+/**
+ * Enable auto-negotiation
+ */
+#define DPMAC_LINK_OPT_AUTONEG                 BIT_ULL(0)
+/**
+ * Enable half-duplex mode
+ */
+#define DPMAC_LINK_OPT_HALF_DUPLEX             BIT_ULL(1)
+/**
+ * Enable pause frames
+ */
+#define DPMAC_LINK_OPT_PAUSE                   BIT_ULL(2)
+/**
+ * Enable a-symmetric pause frames
+ */
+#define DPMAC_LINK_OPT_ASYM_PAUSE              BIT_ULL(3)
+
+/**
+ * Advertised link speeds
+ */
+#define DPMAC_ADVERTISED_10BASET_FULL          BIT_ULL(0)
+#define DPMAC_ADVERTISED_100BASET_FULL         BIT_ULL(1)
+#define DPMAC_ADVERTISED_1000BASET_FULL                BIT_ULL(2)
+#define DPMAC_ADVERTISED_10000BASET_FULL       BIT_ULL(4)
+#define DPMAC_ADVERTISED_2500BASEX_FULL                BIT_ULL(5)
+
+/**
+ * Advertise auto-negotiation enable
+ */
+#define DPMAC_ADVERTISED_AUTONEG               BIT_ULL(3)
+
+/**
+ * struct dpmac_link_state - DPMAC link configuration request
+ * @rate: Rate in Mbps
+ * @options: Enable/Disable DPMAC link cfg features (bitmap)
+ * @up: Link state
+ * @state_valid: Ignore/Update the state of the link
+ * @supported: Speeds capability of the phy (bitmap)
+ * @advertising: Speeds that are advertised for autoneg (bitmap)
+ */
+struct dpmac_link_state {
+       u32 rate;
+       u64 options;
+       int up;
+       int state_valid;
+       u64 supported;
+       u64 advertising;
+};
+
+int dpmac_set_link_state(struct fsl_mc_io *mc_io,
+                        u32 cmd_flags,
+                        u16 token,
+                        struct dpmac_link_state *link_state);
+
+#endif /* __FSL_DPMAC_H */
index b73421c..7da79b8 100644 (file)
@@ -784,8 +784,8 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
                }
        }
 
-       priv->if_mode = of_get_phy_mode(np);
-       if ((int)priv->if_mode < 0) {
+       err = of_get_phy_mode(np, &priv->if_mode);
+       if (err) {
                dev_err(priv->dev, "missing phy type\n");
                of_node_put(priv->phy_node);
                if (of_phy_is_fixed_link(np))
index 22c01b2..d4d6c2e 100644 (file)
@@ -2706,7 +2706,6 @@ static void fec_enet_free_buffers(struct net_device *ndev)
 
        for (q = 0; q < fep->num_tx_queues; q++) {
                txq = fep->tx_queue[q];
-               bdp = txq->bd.base;
                for (i = 0; i < txq->bd.ring_size; i++) {
                        kfree(txq->tx_bounce[i]);
                        txq->tx_bounce[i] = NULL;
@@ -3394,6 +3393,7 @@ fec_probe(struct platform_device *pdev)
 {
        struct fec_enet_private *fep;
        struct fec_platform_data *pdata;
+       phy_interface_t interface;
        struct net_device *ndev;
        int i, irq, ret = 0;
        const struct of_device_id *of_id;
@@ -3466,15 +3466,15 @@ fec_probe(struct platform_device *pdev)
        }
        fep->phy_node = phy_node;
 
-       ret = of_get_phy_mode(pdev->dev.of_node);
-       if (ret < 0) {
+       ret = of_get_phy_mode(pdev->dev.of_node, &interface);
+       if (ret) {
                pdata = dev_get_platdata(&pdev->dev);
                if (pdata)
                        fep->phy_interface = pdata->phy;
                else
                        fep->phy_interface = PHY_INTERFACE_MODE_MII;
        } else {
-               fep->phy_interface = ret;
+               fep->phy_interface = interface;
        }
 
        fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
index 210749b..934111d 100644 (file)
@@ -634,6 +634,9 @@ static void set_port_liodn(struct fman *fman, u8 port_id,
 {
        u32 tmp;
 
+       iowrite32be(liodn_ofst, &fman->bmi_regs->fmbm_spliodn[port_id - 1]);
+       if (!IS_ENABLED(CONFIG_FSL_PAMU))
+               return;
        /* set LIODN base for this port */
        tmp = ioread32be(&fman->dma_regs->fmdmplr[port_id / 2]);
        if (port_id % 2) {
@@ -644,7 +647,6 @@ static void set_port_liodn(struct fman *fman, u8 port_id,
                tmp |= liodn_base << DMA_LIODN_SHIFT;
        }
        iowrite32be(tmp, &fman->dma_regs->fmdmplr[port_id / 2]);
-       iowrite32be(liodn_ofst, &fman->bmi_regs->fmbm_spliodn[port_id - 1]);
 }
 
 static void enable_rams_ecc(struct fman_fpm_regs __iomem *fpm_rg)
@@ -1942,6 +1944,8 @@ static int fman_init(struct fman *fman)
 
                fman->liodn_offset[i] =
                        ioread32be(&fman->bmi_regs->fmbm_spliodn[i - 1]);
+               if (!IS_ENABLED(CONFIG_FSL_PAMU))
+                       continue;
                liodn_base = ioread32be(&fman->dma_regs->fmdmplr[i / 2]);
                if (i % 2) {
                        /* FMDM_PLR LSB holds LIODN base for odd ports */
index ee82ee1..87b26f0 100644 (file)
@@ -435,7 +435,6 @@ struct fman_port_cfg {
 
 struct fman_port_rx_pools_params {
        u8 num_of_pools;
-       u16 second_largest_buf_size;
        u16 largest_buf_size;
 };
 
@@ -946,8 +945,6 @@ static int set_ext_buffer_pools(struct fman_port *port)
        port->rx_pools_params.num_of_pools = ext_buf_pools->num_of_pools_used;
        port->rx_pools_params.largest_buf_size =
            sizes_array[ordered_array[ext_buf_pools->num_of_pools_used - 1]];
-       port->rx_pools_params.second_largest_buf_size =
-           sizes_array[ordered_array[ext_buf_pools->num_of_pools_used - 2]];
 
        /* FMBM_RMPD reg. - pool depletion */
        if (buf_pool_depletion->pools_grp_mode_enable) {
@@ -1728,6 +1725,20 @@ u32 fman_port_get_qman_channel_id(struct fman_port *port)
 }
 EXPORT_SYMBOL(fman_port_get_qman_channel_id);
 
+/**
+ * fman_port_get_device
+ * port:       Pointer to the FMan port device
+ *
+ * Get the 'struct device' associated to the specified FMan port device
+ *
+ * Return: pointer to associated 'struct device'
+ */
+struct device *fman_port_get_device(struct fman_port *port)
+{
+       return port->dev;
+}
+EXPORT_SYMBOL(fman_port_get_device);
+
 int fman_port_get_hash_result_offset(struct fman_port *port, u32 *offset)
 {
        if (port->buffer_offsets.hash_result_offset == ILLEGAL_BASE)
index 9dbb69f..82f1266 100644 (file)
@@ -157,4 +157,6 @@ int fman_port_get_tstamp(struct fman_port *port, const void *data, u64 *tstamp);
 
 struct fman_port *fman_port_bind(struct device *dev);
 
+struct device *fman_port_get_device(struct fman_port *port);
+
 #endif /* __FMAN_PORT_H */
index 7ab8095..f0806ac 100644 (file)
@@ -608,7 +608,7 @@ static int mac_probe(struct platform_device *_of_dev)
        const u8                *mac_addr;
        u32                      val;
        u8                      fman_id;
-       int                     phy_if;
+       phy_interface_t          phy_if;
 
        dev = &_of_dev->dev;
        mac_node = dev->of_node;
@@ -776,8 +776,8 @@ static int mac_probe(struct platform_device *_of_dev)
        }
 
        /* Get the PHY connection type */
-       phy_if = of_get_phy_mode(mac_node);
-       if (phy_if < 0) {
+       err = of_get_phy_mode(mac_node, &phy_if);
+       if (err) {
                dev_warn(dev,
                         "of_get_phy_mode() for %pOF failed. Defaulting to SGMII\n",
                         mac_node);
index 51ad864..72868a2 100644 (file)
@@ -641,6 +641,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
        const char *model;
        const void *mac_addr;
        int err = 0, i;
+       phy_interface_t interface;
        struct net_device *dev = NULL;
        struct gfar_private *priv = NULL;
        struct device_node *np = ofdev->dev.of_node;
@@ -805,9 +806,9 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
         * rgmii-id really needs to be specified. Other types can be
         * detected by hardware
         */
-       err = of_get_phy_mode(np);
-       if (err >= 0)
-               priv->interface = err;
+       err = of_get_phy_mode(np, &interface);
+       if (!err)
+               priv->interface = interface;
        else
                priv->interface = gfar_get_interface(dev);
 
index 4606a7e..3e9b6d5 100644 (file)
@@ -211,7 +211,7 @@ struct hip04_priv {
 #if defined(CONFIG_HI13X1_GMAC)
        void __iomem *sysctrl_base;
 #endif
-       int phy_mode;
+       phy_interface_t phy_mode;
        int chan;
        unsigned int port;
        unsigned int group;
@@ -961,10 +961,9 @@ static int hip04_mac_probe(struct platform_device *pdev)
                goto init_fail;
        }
 
-       priv->phy_mode = of_get_phy_mode(node);
-       if (priv->phy_mode < 0) {
+       ret = of_get_phy_mode(node, &priv->phy_mode);
+       if (ret) {
                dev_warn(d, "not find phy-mode\n");
-               ret = -EINVAL;
                goto init_fail;
        }
 
index c41b19c..247de91 100644 (file)
@@ -1193,10 +1193,9 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
        if (ret)
                goto err_free_mdio;
 
-       priv->phy_mode = of_get_phy_mode(node);
-       if ((int)priv->phy_mode < 0) {
+       ret = of_get_phy_mode(node, &priv->phy_mode);
+       if (ret) {
                netdev_err(ndev, "not find phy-mode\n");
-               ret = -EINVAL;
                goto err_mdiobus;
        }
 
index 3a14bbc..1c5243c 100644 (file)
@@ -3049,7 +3049,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
        u32 sl;
        u32 credit;
        int i;
-       const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+       static const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
                {DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
                {DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
                {DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0},
@@ -3059,7 +3059,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
                {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
                {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
        };
-       const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+       static const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
                {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0},
                {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1},
                {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2},
index f8a87f8..1b03139 100644 (file)
@@ -45,8 +45,9 @@ enum HCLGE_MBX_OPCODE {
        HCLGE_MBX_GET_LINK_MODE,        /* (VF -> PF) get the link mode of pf */
        HCLGE_MBX_PUSH_VLAN_INFO,       /* (PF -> VF) push port base vlan */
        HCLGE_MBX_GET_MEDIA_TYPE,       /* (VF -> PF) get media type */
+       HCLGE_MBX_PUSH_PROMISC_INFO,    /* (PF -> VF) push vf promisc info */
 
-       HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
+       HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */
        HCLGE_MBX_PUSH_LINK_STATUS,     /* (M7 -> PF) get port link status */
        HCLGE_MBX_NCSI_ERROR,           /* (M7 -> PF) receive a NCSI error */
 };
@@ -71,7 +72,7 @@ enum hclge_mbx_vlan_cfg_subcode {
 };
 
 #define HCLGE_MBX_MAX_MSG_SIZE 16
-#define HCLGE_MBX_MAX_RESP_DATA_SIZE   8
+#define HCLGE_MBX_MAX_RESP_DATA_SIZE   8U
 #define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM       3
 #define HCLGE_MBX_RING_NODE_VARIABLE_NUM       3
 
index 03ca7d9..eef1b27 100644 (file)
@@ -146,7 +146,7 @@ void hnae3_unregister_client(struct hnae3_client *client)
                return;
 
        mutex_lock(&hnae3_common_lock);
-
+       /* one system should only have one client for every type */
        list_for_each_entry(client_tmp, &hnae3_client_list, node) {
                if (client_tmp->type == client->type) {
                        existed = true;
index 75ccc1e..45f5916 100644 (file)
@@ -130,7 +130,6 @@ enum hnae3_module_type {
        HNAE3_MODULE_TYPE_CR            = 0x04,
        HNAE3_MODULE_TYPE_KR            = 0x05,
        HNAE3_MODULE_TYPE_TP            = 0x06,
-
 };
 
 enum hnae3_fec_mode {
@@ -366,6 +365,19 @@ struct hnae3_ae_dev {
  *   Enable/disable HW GRO
  * add_arfs_entry
  *   Check the 5-tuples of flow, and create flow director rule
+ * get_vf_config
+ *   Get the VF configuration setting by the host
+ * set_vf_link_state
+ *   Set VF link status
+ * set_vf_spoofchk
+ *   Enable/disable spoof check for specified vf
+ * set_vf_trust
+ *   Enable/disable trust for specified vf, if the vf being trusted, then
+ *   it can enable promisc mode
+ * set_vf_rate
+ *   Set the max tx rate of specified vf.
+ * set_vf_mac
+ *   Configure the default MAC for specified VF
  */
 struct hnae3_ae_ops {
        int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -531,6 +543,16 @@ struct hnae3_ae_ops {
        int (*mac_connect_phy)(struct hnae3_handle *handle);
        void (*mac_disconnect_phy)(struct hnae3_handle *handle);
        void (*restore_vlan_table)(struct hnae3_handle *handle);
+       int (*get_vf_config)(struct hnae3_handle *handle, int vf,
+                            struct ifla_vf_info *ivf);
+       int (*set_vf_link_state)(struct hnae3_handle *handle, int vf,
+                                int link_state);
+       int (*set_vf_spoofchk)(struct hnae3_handle *handle, int vf,
+                              bool enable);
+       int (*set_vf_trust)(struct hnae3_handle *handle, int vf, bool enable);
+       int (*set_vf_rate)(struct hnae3_handle *handle, int vf,
+                          int min_tx_rate, int max_tx_rate, bool force);
+       int (*set_vf_mac)(struct hnae3_handle *handle, int vf, u8 *p);
 };
 
 struct hnae3_dcb_ops {
@@ -553,7 +575,8 @@ struct hnae3_ae_algo {
        const struct pci_device_id *pdev_id_table;
 };
 
-#define HNAE3_INT_NAME_LEN        (IFNAMSIZ + 16)
+#define HNAE3_INT_NAME_EXT_LEN    32    /* Max extra information length */
+#define HNAE3_INT_NAME_LEN        (IFNAMSIZ + HNAE3_INT_NAME_EXT_LEN)
 #define HNAE3_ITR_COUNTDOWN_START 100
 
 struct hnae3_tc_info {
index 28961a6..6b328a2 100644 (file)
@@ -16,15 +16,14 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
                               const char *cmd_buf)
 {
        struct hns3_nic_priv *priv = h->priv;
-       struct hns3_nic_ring_data *ring_data;
        struct hns3_enet_ring *ring;
        u32 base_add_l, base_add_h;
        u32 queue_num, queue_max;
        u32 value, i = 0;
        int cnt;
 
-       if (!priv->ring_data) {
-               dev_err(&h->pdev->dev, "ring_data is NULL\n");
+       if (!priv->ring) {
+               dev_err(&h->pdev->dev, "priv->ring is NULL\n");
                return -EFAULT;
        }
 
@@ -44,7 +43,6 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
                return -EINVAL;
        }
 
-       ring_data = priv->ring_data;
        for (i = queue_num; i < queue_max; i++) {
                /* Each cycle needs to determine whether the instance is reset,
                 * to prevent reference to invalid memory. And need to ensure
@@ -54,73 +52,73 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
                    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
                        return -EPERM;
 
-               ring = ring_data[(u32)(i + h->kinfo.num_tqps)].ring;
+               ring = &priv->ring[(u32)(i + h->kinfo.num_tqps)];
                base_add_h = readl_relaxed(ring->tqp->io_base +
                                           HNS3_RING_RX_RING_BASEADDR_H_REG);
                base_add_l = readl_relaxed(ring->tqp->io_base +
                                           HNS3_RING_RX_RING_BASEADDR_L_REG);
-               dev_info(&h->pdev->dev, "RX(%d) BASE ADD: 0x%08x%08x\n", i,
+               dev_info(&h->pdev->dev, "RX(%u) BASE ADD: 0x%08x%08x\n", i,
                         base_add_h, base_add_l);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_RX_RING_BD_NUM_REG);
-               dev_info(&h->pdev->dev, "RX(%d) RING BD NUM: %u\n", i, value);
+               dev_info(&h->pdev->dev, "RX(%u) RING BD NUM: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_RX_RING_BD_LEN_REG);
-               dev_info(&h->pdev->dev, "RX(%d) RING BD LEN: %u\n", i, value);
+               dev_info(&h->pdev->dev, "RX(%u) RING BD LEN: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_RX_RING_TAIL_REG);
-               dev_info(&h->pdev->dev, "RX(%d) RING TAIL: %u\n", i, value);
+               dev_info(&h->pdev->dev, "RX(%u) RING TAIL: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_RX_RING_HEAD_REG);
-               dev_info(&h->pdev->dev, "RX(%d) RING HEAD: %u\n", i, value);
+               dev_info(&h->pdev->dev, "RX(%u) RING HEAD: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_RX_RING_FBDNUM_REG);
-               dev_info(&h->pdev->dev, "RX(%d) RING FBDNUM: %u\n", i, value);
+               dev_info(&h->pdev->dev, "RX(%u) RING FBDNUM: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_RX_RING_PKTNUM_RECORD_REG);
-               dev_info(&h->pdev->dev, "RX(%d) RING PKTNUM: %u\n", i, value);
+               dev_info(&h->pdev->dev, "RX(%u) RING PKTNUM: %u\n", i, value);
 
-               ring = ring_data[i].ring;
+               ring = &priv->ring[i];
                base_add_h = readl_relaxed(ring->tqp->io_base +
                                           HNS3_RING_TX_RING_BASEADDR_H_REG);
                base_add_l = readl_relaxed(ring->tqp->io_base +
                                           HNS3_RING_TX_RING_BASEADDR_L_REG);
-               dev_info(&h->pdev->dev, "TX(%d) BASE ADD: 0x%08x%08x\n", i,
+               dev_info(&h->pdev->dev, "TX(%u) BASE ADD: 0x%08x%08x\n", i,
                         base_add_h, base_add_l);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_TX_RING_BD_NUM_REG);
-               dev_info(&h->pdev->dev, "TX(%d) RING BD NUM: %u\n", i, value);
+               dev_info(&h->pdev->dev, "TX(%u) RING BD NUM: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_TX_RING_TC_REG);
-               dev_info(&h->pdev->dev, "TX(%d) RING TC: %u\n", i, value);
+               dev_info(&h->pdev->dev, "TX(%u) RING TC: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_TX_RING_TAIL_REG);
-               dev_info(&h->pdev->dev, "TX(%d) RING TAIL: %u\n", i, value);
+               dev_info(&h->pdev->dev, "TX(%u) RING TAIL: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_TX_RING_HEAD_REG);
-               dev_info(&h->pdev->dev, "TX(%d) RING HEAD: %u\n", i, value);
+               dev_info(&h->pdev->dev, "TX(%u) RING HEAD: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_TX_RING_FBDNUM_REG);
-               dev_info(&h->pdev->dev, "TX(%d) RING FBDNUM: %u\n", i, value);
+               dev_info(&h->pdev->dev, "TX(%u) RING FBDNUM: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_TX_RING_OFFSET_REG);
-               dev_info(&h->pdev->dev, "TX(%d) RING OFFSET: %u\n", i, value);
+               dev_info(&h->pdev->dev, "TX(%u) RING OFFSET: %u\n", i, value);
 
                value = readl_relaxed(ring->tqp->io_base +
                                      HNS3_RING_TX_RING_PKTNUM_RECORD_REG);
-               dev_info(&h->pdev->dev, "TX(%d) RING PKTNUM: %u\n\n", i,
+               dev_info(&h->pdev->dev, "TX(%u) RING PKTNUM: %u\n\n", i,
                         value);
        }
 
@@ -130,7 +128,6 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 static int hns3_dbg_queue_map(struct hnae3_handle *h)
 {
        struct hns3_nic_priv *priv = h->priv;
-       struct hns3_nic_ring_data *ring_data;
        int i;
 
        if (!h->ae_algo->ops->get_global_queue_id)
@@ -143,15 +140,12 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h)
                u16 global_qid;
 
                global_qid = h->ae_algo->ops->get_global_queue_id(h, i);
-               ring_data = &priv->ring_data[i];
-               if (!ring_data || !ring_data->ring ||
-                   !ring_data->ring->tqp_vector)
+               if (!priv->ring || !priv->ring[i].tqp_vector)
                        continue;
 
                dev_info(&h->pdev->dev,
                         "      %4d            %4d            %4d\n",
-                        i, global_qid,
-                        ring_data->ring->tqp_vector->vector_irq);
+                        i, global_qid, priv->ring[i].tqp_vector->vector_irq);
        }
 
        return 0;
@@ -160,7 +154,6 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h)
 static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 {
        struct hns3_nic_priv *priv = h->priv;
-       struct hns3_nic_ring_data *ring_data;
        struct hns3_desc *rx_desc, *tx_desc;
        struct device *dev = &h->pdev->dev;
        struct hns3_enet_ring *ring;
@@ -183,8 +176,7 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
                return -EINVAL;
        }
 
-       ring_data = priv->ring_data;
-       ring  = ring_data[q_num].ring;
+       ring  = &priv->ring[q_num];
        value = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
        tx_index = (cnt == 1) ? value : tx_index;
 
@@ -198,23 +190,26 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
        addr = le64_to_cpu(tx_desc->addr);
        dev_info(dev, "TX Queue Num: %u, BD Index: %u\n", q_num, tx_index);
        dev_info(dev, "(TX)addr: %pad\n", &addr);
-       dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.vlan_tag);
-       dev_info(dev, "(TX)send_size: %u\n", tx_desc->tx.send_size);
+       dev_info(dev, "(TX)vlan_tag: %u\n", le16_to_cpu(tx_desc->tx.vlan_tag));
+       dev_info(dev, "(TX)send_size: %u\n",
+                le16_to_cpu(tx_desc->tx.send_size));
        dev_info(dev, "(TX)vlan_tso: %u\n", tx_desc->tx.type_cs_vlan_tso);
        dev_info(dev, "(TX)l2_len: %u\n", tx_desc->tx.l2_len);
        dev_info(dev, "(TX)l3_len: %u\n", tx_desc->tx.l3_len);
        dev_info(dev, "(TX)l4_len: %u\n", tx_desc->tx.l4_len);
-       dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.outer_vlan_tag);
-       dev_info(dev, "(TX)tv: %u\n", tx_desc->tx.tv);
+       dev_info(dev, "(TX)vlan_tag: %u\n",
+                le16_to_cpu(tx_desc->tx.outer_vlan_tag));
+       dev_info(dev, "(TX)tv: %u\n", le16_to_cpu(tx_desc->tx.tv));
        dev_info(dev, "(TX)vlan_msec: %u\n", tx_desc->tx.ol_type_vlan_msec);
        dev_info(dev, "(TX)ol2_len: %u\n", tx_desc->tx.ol2_len);
        dev_info(dev, "(TX)ol3_len: %u\n", tx_desc->tx.ol3_len);
        dev_info(dev, "(TX)ol4_len: %u\n", tx_desc->tx.ol4_len);
-       dev_info(dev, "(TX)paylen: %u\n", tx_desc->tx.paylen);
-       dev_info(dev, "(TX)vld_ra_ri: %u\n", tx_desc->tx.bdtp_fe_sc_vld_ra_ri);
-       dev_info(dev, "(TX)mss: %u\n", tx_desc->tx.mss);
+       dev_info(dev, "(TX)paylen: %u\n", le32_to_cpu(tx_desc->tx.paylen));
+       dev_info(dev, "(TX)vld_ra_ri: %u\n",
+                le16_to_cpu(tx_desc->tx.bdtp_fe_sc_vld_ra_ri));
+       dev_info(dev, "(TX)mss: %u\n", le16_to_cpu(tx_desc->tx.mss));
 
-       ring  = ring_data[q_num + h->kinfo.num_tqps].ring;
+       ring  = &priv->ring[q_num + h->kinfo.num_tqps];
        value = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG);
        rx_index = (cnt == 1) ? value : tx_index;
        rx_desc  = &ring->desc[rx_index];
@@ -222,15 +217,19 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
        addr = le64_to_cpu(rx_desc->addr);
        dev_info(dev, "RX Queue Num: %u, BD Index: %u\n", q_num, rx_index);
        dev_info(dev, "(RX)addr: %pad\n", &addr);
-       dev_info(dev, "(RX)l234_info: %u\n", rx_desc->rx.l234_info);
-       dev_info(dev, "(RX)pkt_len: %u\n", rx_desc->rx.pkt_len);
-       dev_info(dev, "(RX)size: %u\n", rx_desc->rx.size);
-       dev_info(dev, "(RX)rss_hash: %u\n", rx_desc->rx.rss_hash);
-       dev_info(dev, "(RX)fd_id: %u\n", rx_desc->rx.fd_id);
-       dev_info(dev, "(RX)vlan_tag: %u\n", rx_desc->rx.vlan_tag);
-       dev_info(dev, "(RX)o_dm_vlan_id_fb: %u\n", rx_desc->rx.o_dm_vlan_id_fb);
-       dev_info(dev, "(RX)ot_vlan_tag: %u\n", rx_desc->rx.ot_vlan_tag);
-       dev_info(dev, "(RX)bd_base_info: %u\n", rx_desc->rx.bd_base_info);
+       dev_info(dev, "(RX)l234_info: %u\n",
+                le32_to_cpu(rx_desc->rx.l234_info));
+       dev_info(dev, "(RX)pkt_len: %u\n", le16_to_cpu(rx_desc->rx.pkt_len));
+       dev_info(dev, "(RX)size: %u\n", le16_to_cpu(rx_desc->rx.size));
+       dev_info(dev, "(RX)rss_hash: %u\n", le32_to_cpu(rx_desc->rx.rss_hash));
+       dev_info(dev, "(RX)fd_id: %u\n", le16_to_cpu(rx_desc->rx.fd_id));
+       dev_info(dev, "(RX)vlan_tag: %u\n", le16_to_cpu(rx_desc->rx.vlan_tag));
+       dev_info(dev, "(RX)o_dm_vlan_id_fb: %u\n",
+                le16_to_cpu(rx_desc->rx.o_dm_vlan_id_fb));
+       dev_info(dev, "(RX)ot_vlan_tag: %u\n",
+                le16_to_cpu(rx_desc->rx.ot_vlan_tag));
+       dev_info(dev, "(RX)bd_base_info: %u\n",
+                le32_to_cpu(rx_desc->rx.bd_base_info));
 
        return 0;
 }
index 616cad0..ba05368 100644 (file)
@@ -483,7 +483,7 @@ static void hns3_reset_tx_queue(struct hnae3_handle *h)
 
        for (i = 0; i < h->kinfo.num_tqps; i++) {
                dev_queue = netdev_get_tx_queue(ndev,
-                                               priv->ring_data[i].queue_index);
+                                               priv->ring[i].queue_index);
                netdev_tx_reset_queue(dev_queue);
        }
 }
@@ -681,7 +681,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
                return 0;
 
        ret = skb_cow_head(skb, 0);
-       if (unlikely(ret))
+       if (unlikely(ret < 0))
                return ret;
 
        l3.hdr = skb_network_header(skb);
@@ -962,14 +962,6 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
        return 0;
 }
 
-static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
-{
-       /* Config bd buffer end */
-       if (!!frag_end)
-               hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U);
-       hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U);
-}
-
 static int hns3_handle_vtags(struct hns3_enet_ring *tx_ring,
                             struct sk_buff *skb)
 {
@@ -1062,7 +1054,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
                skb_reset_mac_len(skb);
 
                ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
-               if (unlikely(ret)) {
+               if (unlikely(ret < 0)) {
                        u64_stats_update_begin(&ring->syncp);
                        ring->stats.tx_l4_proto_err++;
                        u64_stats_update_end(&ring->syncp);
@@ -1072,7 +1064,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
                ret = hns3_set_l2l3l4(skb, ol4_proto, il4_proto,
                                      &type_cs_vlan_tso,
                                      &ol_type_vlan_len_msec);
-               if (unlikely(ret)) {
+               if (unlikely(ret < 0)) {
                        u64_stats_update_begin(&ring->syncp);
                        ring->stats.tx_l2l3l4_err++;
                        u64_stats_update_end(&ring->syncp);
@@ -1081,7 +1073,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 
                ret = hns3_set_tso(skb, &paylen, &mss,
                                   &type_cs_vlan_tso);
-               if (unlikely(ret)) {
+               if (unlikely(ret < 0)) {
                        u64_stats_update_begin(&ring->syncp);
                        ring->stats.tx_tso_err++;
                        u64_stats_update_end(&ring->syncp);
@@ -1102,9 +1094,10 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 }
 
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-                         unsigned int size, int frag_end,
-                         enum hns_desc_type type)
+                         unsigned int size, enum hns_desc_type type)
 {
+#define HNS3_LIKELY_BD_NUM     1
+
        struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
        struct hns3_desc *desc = &ring->desc[ring->next_to_use];
        struct device *dev = ring_to_dev(ring);
@@ -1118,7 +1111,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
                int ret;
 
                ret = hns3_fill_skb_desc(ring, skb, desc);
-               if (unlikely(ret))
+               if (unlikely(ret < 0))
                        return ret;
 
                dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
@@ -1137,19 +1130,16 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
        desc_cb->length = size;
 
        if (likely(size <= HNS3_MAX_BD_SIZE)) {
-               u16 bdtp_fe_sc_vld_ra_ri = 0;
-
                desc_cb->priv = priv;
                desc_cb->dma = dma;
                desc_cb->type = type;
                desc->addr = cpu_to_le64(dma);
                desc->tx.send_size = cpu_to_le16(size);
-               hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
                desc->tx.bdtp_fe_sc_vld_ra_ri =
-                       cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+                       cpu_to_le16(BIT(HNS3_TXD_VLD_B));
 
                ring_ptr_move_fw(ring, next_to_use);
-               return 0;
+               return HNS3_LIKELY_BD_NUM;
        }
 
        frag_buf_num = hns3_tx_bd_count(size);
@@ -1158,8 +1148,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 
        /* When frag size is bigger than hardware limit, split this frag */
        for (k = 0; k < frag_buf_num; k++) {
-               u16 bdtp_fe_sc_vld_ra_ri = 0;
-
                /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
                desc_cb->priv = priv;
                desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k;
@@ -1170,11 +1158,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
                desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k);
                desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ?
                                     (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
-               hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri,
-                                      frag_end && (k == frag_buf_num - 1) ?
-                                               1 : 0);
                desc->tx.bdtp_fe_sc_vld_ra_ri =
-                               cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+                               cpu_to_le16(BIT(HNS3_TXD_VLD_B));
 
                /* move ring pointer to next */
                ring_ptr_move_fw(ring, next_to_use);
@@ -1183,23 +1168,78 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
                desc = &ring->desc[ring->next_to_use];
        }
 
-       return 0;
+       return frag_buf_num;
 }
 
-static unsigned int hns3_nic_bd_num(struct sk_buff *skb)
+static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
+                                   unsigned int bd_num)
 {
-       unsigned int bd_num;
+       unsigned int size;
        int i;
 
-       /* if the total len is within the max bd limit */
-       if (likely(skb->len <= HNS3_MAX_BD_SIZE))
-               return skb_shinfo(skb)->nr_frags + 1;
+       size = skb_headlen(skb);
+       while (size > HNS3_MAX_BD_SIZE) {
+               bd_size[bd_num++] = HNS3_MAX_BD_SIZE;
+               size -= HNS3_MAX_BD_SIZE;
 
-       bd_num = hns3_tx_bd_count(skb_headlen(skb));
+               if (bd_num > HNS3_MAX_TSO_BD_NUM)
+                       return bd_num;
+       }
+
+       if (size) {
+               bd_size[bd_num++] = size;
+               if (bd_num > HNS3_MAX_TSO_BD_NUM)
+                       return bd_num;
+       }
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-               bd_num += hns3_tx_bd_count(skb_frag_size(frag));
+               size = skb_frag_size(frag);
+               if (!size)
+                       continue;
+
+               while (size > HNS3_MAX_BD_SIZE) {
+                       bd_size[bd_num++] = HNS3_MAX_BD_SIZE;
+                       size -= HNS3_MAX_BD_SIZE;
+
+                       if (bd_num > HNS3_MAX_TSO_BD_NUM)
+                               return bd_num;
+               }
+
+               bd_size[bd_num++] = size;
+               if (bd_num > HNS3_MAX_TSO_BD_NUM)
+                       return bd_num;
+       }
+
+       return bd_num;
+}
+
+static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size)
+{
+       struct sk_buff *frag_skb;
+       unsigned int bd_num = 0;
+
+       /* If the total len is within the max bd limit */
+       if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) &&
+                  skb_shinfo(skb)->nr_frags < HNS3_MAX_NON_TSO_BD_NUM))
+               return skb_shinfo(skb)->nr_frags + 1U;
+
+       /* The below case will always be linearized, return
+        * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized.
+        */
+       if (unlikely(skb->len > HNS3_MAX_TSO_SIZE ||
+                    (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)))
+               return HNS3_MAX_TSO_BD_NUM + 1U;
+
+       bd_num = hns3_skb_bd_num(skb, bd_size, bd_num);
+
+       if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM)
+               return bd_num;
+
+       skb_walk_frags(skb, frag_skb) {
+               bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num);
+               if (bd_num > HNS3_MAX_TSO_BD_NUM)
+                       return bd_num;
        }
 
        return bd_num;
@@ -1218,26 +1258,26 @@ static unsigned int hns3_gso_hdr_len(struct sk_buff *skb)
  * 7 frags to to be larger than gso header len + mss, and the remaining
  * continuous 7 frags to be larger than MSS except the last 7 frags.
  */
-static bool hns3_skb_need_linearized(struct sk_buff *skb)
+static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size,
+                                    unsigned int bd_num)
 {
-       int bd_limit = HNS3_MAX_BD_NUM_NORMAL - 1;
        unsigned int tot_len = 0;
        int i;
 
-       for (i = 0; i < bd_limit; i++)
-               tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
+       for (i = 0; i < HNS3_MAX_NON_TSO_BD_NUM - 1U; i++)
+               tot_len += bd_size[i];
 
-       /* ensure headlen + the first 7 frags is greater than mss + header
-        * and the first 7 frags is greater than mss.
-        */
-       if (((tot_len + skb_headlen(skb)) < (skb_shinfo(skb)->gso_size +
-           hns3_gso_hdr_len(skb))) || (tot_len < skb_shinfo(skb)->gso_size))
+       /* ensure the first 8 frags is greater than mss + header */
+       if (tot_len + bd_size[HNS3_MAX_NON_TSO_BD_NUM - 1U] <
+           skb_shinfo(skb)->gso_size + hns3_gso_hdr_len(skb))
                return true;
 
-       /* ensure the remaining continuous 7 buffer is greater than mss */
-       for (i = 0; i < (skb_shinfo(skb)->nr_frags - bd_limit - 1); i++) {
-               tot_len -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
-               tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i + bd_limit]);
+       /* ensure every continuous 7 buffer is greater than mss
+        * except the last one.
+        */
+       for (i = 0; i < bd_num - HNS3_MAX_NON_TSO_BD_NUM; i++) {
+               tot_len -= bd_size[i];
+               tot_len += bd_size[i + HNS3_MAX_NON_TSO_BD_NUM - 1U];
 
                if (tot_len < skb_shinfo(skb)->gso_size)
                        return true;
@@ -1249,15 +1289,16 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb)
 static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
                                  struct sk_buff **out_skb)
 {
+       unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U];
        struct sk_buff *skb = *out_skb;
        unsigned int bd_num;
 
-       bd_num = hns3_nic_bd_num(skb);
-       if (unlikely(bd_num > HNS3_MAX_BD_NUM_NORMAL)) {
+       bd_num = hns3_tx_bd_num(skb, bd_size);
+       if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
                struct sk_buff *new_skb;
 
-               if (skb_is_gso(skb) && bd_num <= HNS3_MAX_BD_NUM_TSO &&
-                   !hns3_skb_need_linearized(skb))
+               if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
+                   !hns3_skb_need_linearized(skb, bd_size, bd_num))
                        goto out;
 
                /* manual split the send packet */
@@ -1267,9 +1308,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
                dev_kfree_skb_any(skb);
                *out_skb = new_skb;
 
-               bd_num = hns3_nic_bd_num(new_skb);
-               if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_TSO) ||
-                   (!skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_NORMAL))
+               bd_num = hns3_tx_bd_count(new_skb->len);
+               if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
+                   (!skb_is_gso(new_skb) &&
+                    bd_num > HNS3_MAX_NON_TSO_BD_NUM))
                        return -ENOMEM;
 
                u64_stats_update_begin(&ring->syncp);
@@ -1314,73 +1356,98 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
        }
 }
 
+static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
+                                struct sk_buff *skb, enum hns_desc_type type)
+{
+       unsigned int size = skb_headlen(skb);
+       int i, ret, bd_num = 0;
+
+       if (size) {
+               ret = hns3_fill_desc(ring, skb, size, type);
+               if (unlikely(ret < 0))
+                       return ret;
+
+               bd_num += ret;
+       }
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               size = skb_frag_size(frag);
+               if (!size)
+                       continue;
+
+               ret = hns3_fill_desc(ring, frag, size, DESC_TYPE_PAGE);
+               if (unlikely(ret < 0))
+                       return ret;
+
+               bd_num += ret;
+       }
+
+       return bd_num;
+}
+
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
-       struct hns3_nic_ring_data *ring_data =
-               &tx_ring_data(priv, skb->queue_mapping);
-       struct hns3_enet_ring *ring = ring_data->ring;
+       struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping];
        struct netdev_queue *dev_queue;
-       skb_frag_t *frag;
-       int next_to_use_head;
-       int buf_num;
-       int seg_num;
-       int size;
+       int pre_ntu, next_to_use_head;
+       struct sk_buff *frag_skb;
+       int bd_num = 0;
        int ret;
-       int i;
 
        /* Prefetch the data used later */
        prefetch(skb->data);
 
-       buf_num = hns3_nic_maybe_stop_tx(ring, &skb);
-       if (unlikely(buf_num <= 0)) {
-               if (buf_num == -EBUSY) {
+       ret = hns3_nic_maybe_stop_tx(ring, &skb);
+       if (unlikely(ret <= 0)) {
+               if (ret == -EBUSY) {
                        u64_stats_update_begin(&ring->syncp);
                        ring->stats.tx_busy++;
                        u64_stats_update_end(&ring->syncp);
                        goto out_net_tx_busy;
-               } else if (buf_num == -ENOMEM) {
+               } else if (ret == -ENOMEM) {
                        u64_stats_update_begin(&ring->syncp);
                        ring->stats.sw_err_cnt++;
                        u64_stats_update_end(&ring->syncp);
                }
 
-               hns3_rl_err(netdev, "xmit error: %d!\n", buf_num);
+               hns3_rl_err(netdev, "xmit error: %d!\n", ret);
                goto out_err_tx_ok;
        }
 
-       /* No. of segments (plus a header) */
-       seg_num = skb_shinfo(skb)->nr_frags + 1;
-       /* Fill the first part */
-       size = skb_headlen(skb);
-
        next_to_use_head = ring->next_to_use;
 
-       ret = hns3_fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0,
-                            DESC_TYPE_SKB);
-       if (unlikely(ret))
+       ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
+       if (unlikely(ret < 0))
                goto fill_err;
 
-       /* Fill the fragments */
-       for (i = 1; i < seg_num; i++) {
-               frag = &skb_shinfo(skb)->frags[i - 1];
-               size = skb_frag_size(frag);
+       bd_num += ret;
 
-               ret = hns3_fill_desc(ring, frag, size,
-                                    seg_num - 1 == i ? 1 : 0,
-                                    DESC_TYPE_PAGE);
+       if (!skb_has_frag_list(skb))
+               goto out;
 
-               if (unlikely(ret))
+       skb_walk_frags(skb, frag_skb) {
+               ret = hns3_fill_skb_to_desc(ring, frag_skb, DESC_TYPE_PAGE);
+               if (unlikely(ret < 0))
                        goto fill_err;
+
+               bd_num += ret;
        }
+out:
+       pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) :
+                                       (ring->desc_num - 1);
+       ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
+                               cpu_to_le16(BIT(HNS3_TXD_FE_B));
 
        /* Complete translate all packets */
-       dev_queue = netdev_get_tx_queue(netdev, ring_data->queue_index);
+       dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
        netdev_tx_sent_queue(dev_queue, skb->len);
 
        wmb(); /* Commit all data before submit */
 
-       hnae3_queue_xmit(ring->tqp, buf_num);
+       hnae3_queue_xmit(ring->tqp, bd_num);
 
        return NETDEV_TX_OK;
 
@@ -1392,7 +1459,7 @@ out_err_tx_ok:
        return NETDEV_TX_OK;
 
 out_net_tx_busy:
-       netif_stop_subqueue(netdev, ring_data->queue_index);
+       netif_stop_subqueue(netdev, ring->queue_index);
        smp_mb(); /* Commit all data before submit */
 
        return NETDEV_TX_BUSY;
@@ -1413,6 +1480,16 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
                return 0;
        }
 
+       /* For VF device, if there is a perm_addr, then the user will not
+        * be allowed to change the address.
+        */
+       if (!hns3_is_phys_func(h->pdev) &&
+           !is_zero_ether_addr(netdev->perm_addr)) {
+               netdev_err(netdev, "has permanent MAC %pM, user MAC %pM not allow\n",
+                          netdev->perm_addr, mac_addr->sa_data);
+               return -EPERM;
+       }
+
        ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false);
        if (ret) {
                netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
@@ -1505,7 +1582,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 
        for (idx = 0; idx < queue_num; idx++) {
                /* fetch the tx stats */
-               ring = priv->ring_data[idx].ring;
+               ring = &priv->ring[idx];
                do {
                        start = u64_stats_fetch_begin_irq(&ring->syncp);
                        tx_bytes += ring->stats.tx_bytes;
@@ -1523,7 +1600,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
                } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
                /* fetch the rx stats */
-               ring = priv->ring_data[idx + queue_num].ring;
+               ring = &priv->ring[idx + queue_num];
                do {
                        start = u64_stats_fetch_begin_irq(&ring->syncp);
                        rx_bytes += ring->stats.rx_bytes;
@@ -1633,8 +1710,8 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
        int ret = -EIO;
 
        netif_dbg(h, drv, netdev,
-                 "set vf vlan: vf=%d, vlan=%u, qos=%u, vlan_proto=%u\n",
-                 vf, vlan, qos, vlan_proto);
+                 "set vf vlan: vf=%d, vlan=%u, qos=%u, vlan_proto=0x%x\n",
+                 vf, vlan, qos, ntohs(vlan_proto));
 
        if (h->ae_algo->ops->set_vf_vlan_filter)
                ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan,
@@ -1643,6 +1720,29 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
        return ret;
 }
 
+static int hns3_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
+{
+       struct hnae3_handle *handle = hns3_get_handle(netdev);
+
+       if (hns3_nic_resetting(netdev))
+               return -EBUSY;
+
+       if (!handle->ae_algo->ops->set_vf_spoofchk)
+               return -EOPNOTSUPP;
+
+       return handle->ae_algo->ops->set_vf_spoofchk(handle, vf, enable);
+}
+
+static int hns3_set_vf_trust(struct net_device *netdev, int vf, bool enable)
+{
+       struct hnae3_handle *handle = hns3_get_handle(netdev);
+
+       if (!handle->ae_algo->ops->set_vf_trust)
+               return -EOPNOTSUPP;
+
+       return handle->ae_algo->ops->set_vf_trust(handle, vf, enable);
+}
+
 static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -1671,7 +1771,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 {
        struct hns3_nic_priv *priv = netdev_priv(ndev);
        struct hnae3_handle *h = hns3_get_handle(ndev);
-       struct hns3_enet_ring *tx_ring = NULL;
+       struct hns3_enet_ring *tx_ring;
        struct napi_struct *napi;
        int timeout_queue = 0;
        int hw_head, hw_tail;
@@ -1692,6 +1792,9 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
                    time_after(jiffies,
                               (trans_start + ndev->watchdog_timeo))) {
                        timeout_queue = i;
+                       netdev_info(ndev, "queue state: 0x%lx, delta msecs: %u\n",
+                                   q->state,
+                                   jiffies_to_msecs(jiffies - trans_start));
                        break;
                }
        }
@@ -1705,7 +1808,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 
        priv->tx_timeout_count++;
 
-       tx_ring = priv->ring_data[timeout_queue].ring;
+       tx_ring = &priv->ring[timeout_queue];
        napi = &tx_ring->tqp_vector->napi;
 
        netdev_info(ndev,
@@ -1805,6 +1908,57 @@ static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 }
 #endif
 
+static int hns3_nic_get_vf_config(struct net_device *ndev, int vf,
+                                 struct ifla_vf_info *ivf)
+{
+       struct hnae3_handle *h = hns3_get_handle(ndev);
+
+       if (!h->ae_algo->ops->get_vf_config)
+               return -EOPNOTSUPP;
+
+       return h->ae_algo->ops->get_vf_config(h, vf, ivf);
+}
+
+static int hns3_nic_set_vf_link_state(struct net_device *ndev, int vf,
+                                     int link_state)
+{
+       struct hnae3_handle *h = hns3_get_handle(ndev);
+
+       if (!h->ae_algo->ops->set_vf_link_state)
+               return -EOPNOTSUPP;
+
+       return h->ae_algo->ops->set_vf_link_state(h, vf, link_state);
+}
+
+static int hns3_nic_set_vf_rate(struct net_device *ndev, int vf,
+                               int min_tx_rate, int max_tx_rate)
+{
+       struct hnae3_handle *h = hns3_get_handle(ndev);
+
+       if (!h->ae_algo->ops->set_vf_rate)
+               return -EOPNOTSUPP;
+
+       return h->ae_algo->ops->set_vf_rate(h, vf, min_tx_rate, max_tx_rate,
+                                           false);
+}
+
+static int hns3_nic_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+       struct hnae3_handle *h = hns3_get_handle(netdev);
+
+       if (!h->ae_algo->ops->set_vf_mac)
+               return -EOPNOTSUPP;
+
+       if (is_multicast_ether_addr(mac)) {
+               netdev_err(netdev,
+                          "Invalid MAC:%pM specified. Could not set MAC\n",
+                          mac);
+               return -EINVAL;
+       }
+
+       return h->ae_algo->ops->set_vf_mac(h, vf_id, mac);
+}
+
 static const struct net_device_ops hns3_nic_netdev_ops = {
        .ndo_open               = hns3_nic_net_open,
        .ndo_stop               = hns3_nic_net_stop,
@@ -1820,10 +1974,15 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
        .ndo_vlan_rx_add_vid    = hns3_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = hns3_vlan_rx_kill_vid,
        .ndo_set_vf_vlan        = hns3_ndo_set_vf_vlan,
+       .ndo_set_vf_spoofchk    = hns3_set_vf_spoofchk,
+       .ndo_set_vf_trust       = hns3_set_vf_trust,
 #ifdef CONFIG_RFS_ACCEL
        .ndo_rx_flow_steer      = hns3_rx_flow_steer,
 #endif
-
+       .ndo_get_vf_config      = hns3_nic_get_vf_config,
+       .ndo_set_vf_link_state  = hns3_nic_set_vf_link_state,
+       .ndo_set_vf_rate        = hns3_nic_set_vf_rate,
+       .ndo_set_vf_mac         = hns3_nic_set_vf_mac,
 };
 
 bool hns3_is_phys_func(struct pci_dev *pdev)
@@ -1843,7 +2002,7 @@ bool hns3_is_phys_func(struct pci_dev *pdev)
        case HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF:
                return false;
        default:
-               dev_warn(&pdev->dev, "un-recognized pci device-id %d",
+               dev_warn(&pdev->dev, "un-recognized pci device-id %u",
                         dev_id);
        }
 
@@ -2069,9 +2228,8 @@ static void hns3_set_default_feature(struct net_device *netdev)
                NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
                NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
                NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
-
-       netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+               NETIF_F_TSO_MANGLEID | NETIF_F_FRAGLIST;
 
        netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
 
@@ -2081,21 +2239,24 @@ static void hns3_set_default_feature(struct net_device *netdev)
                NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
                NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
                NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
+               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+               NETIF_F_FRAGLIST;
 
        netdev->vlan_features |=
                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
                NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO |
                NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
                NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
+               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+               NETIF_F_FRAGLIST;
 
        netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
                NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
                NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
                NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
                NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
+               NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+               NETIF_F_FRAGLIST;
 
        if (pdev->revision >= 0x21) {
                netdev->hw_features |= NETIF_F_GRO_HW;
@@ -2320,18 +2481,19 @@ static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
 
 void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
 {
-       struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+       struct net_device *netdev = ring_to_netdev(ring);
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct netdev_queue *dev_queue;
        int bytes, pkts;
        int head;
 
        head = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG);
-       rmb(); /* Make sure head is ready before touch any data */
 
        if (is_ring_empty(ring) || head == ring->next_to_clean)
                return; /* no data to poll */
 
+       rmb(); /* Make sure head is ready before touch any data */
+
        if (unlikely(!is_valid_clean_head(ring, head))) {
                netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
                           ring->next_to_use, ring->next_to_clean);
@@ -2358,7 +2520,7 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
        netdev_tx_completed_queue(dev_queue, pkts, bytes);
 
        if (unlikely(pkts && netif_carrier_ok(netdev) &&
-                    (ring_space(ring) > HNS3_MAX_BD_PER_PKT))) {
+                    ring_space(ring) > HNS3_MAX_TSO_BD_NUM)) {
                /* Make sure that anybody stopping the queue after this
                 * sees the new next_to_clean.
                 */
@@ -2401,7 +2563,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
                                ring->stats.sw_err_cnt++;
                                u64_stats_update_end(&ring->syncp);
 
-                               hns3_rl_err(ring->tqp_vector->napi.dev,
+                               hns3_rl_err(ring_to_netdev(ring),
                                            "alloc rx buffer failed: %d\n",
                                            ret);
                                break;
@@ -2510,7 +2672,7 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
 static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
                             u32 l234info, u32 bd_base_info, u32 ol_info)
 {
-       struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+       struct net_device *netdev = ring_to_netdev(ring);
        int l3_type, l4_type;
        int ol4_type;
 
@@ -2626,7 +2788,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 {
 #define HNS3_NEED_ADD_FRAG     1
        struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
-       struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+       struct net_device *netdev = ring_to_netdev(ring);
        struct sk_buff *skb;
 
        ring->skb = napi_alloc_skb(&ring->tqp_vector->napi, HNS3_RX_HEAD_SIZE);
@@ -2672,10 +2834,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 }
 
 static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
-                        struct sk_buff **out_skb, bool pending)
+                        bool pending)
 {
-       struct sk_buff *skb = *out_skb;
-       struct sk_buff *head_skb = *out_skb;
+       struct sk_buff *skb = ring->skb;
+       struct sk_buff *head_skb = skb;
        struct sk_buff *new_skb;
        struct hns3_desc_cb *desc_cb;
        struct hns3_desc *pre_desc;
@@ -2704,10 +2866,9 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
                        return -ENXIO;
 
                if (unlikely(ring->frag_num >= MAX_SKB_FRAGS)) {
-                       new_skb = napi_alloc_skb(&ring->tqp_vector->napi,
-                                                HNS3_RX_HEAD_SIZE);
+                       new_skb = napi_alloc_skb(&ring->tqp_vector->napi, 0);
                        if (unlikely(!new_skb)) {
-                               hns3_rl_err(ring->tqp_vector->napi.dev,
+                               hns3_rl_err(ring_to_netdev(ring),
                                            "alloc rx fraglist skb fail\n");
                                return -ENXIO;
                        }
@@ -2783,7 +2944,7 @@ static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
 
 static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 {
-       struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+       struct net_device *netdev = ring_to_netdev(ring);
        enum hns3_pkt_l2t_type l2_frame_type;
        u32 bd_base_info, l234info, ol_info;
        struct hns3_desc *desc;
@@ -2858,8 +3019,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
        return 0;
 }
 
-static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
-                            struct sk_buff **out_skb)
+static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
 {
        struct sk_buff *skb = ring->skb;
        struct hns3_desc_cb *desc_cb;
@@ -2897,12 +3057,12 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 
        if (!skb) {
                ret = hns3_alloc_skb(ring, length, ring->va);
-               *out_skb = skb = ring->skb;
+               skb = ring->skb;
 
                if (ret < 0) /* alloc buffer fail */
                        return ret;
                if (ret > 0) { /* need add frag */
-                       ret = hns3_add_frag(ring, desc, &skb, false);
+                       ret = hns3_add_frag(ring, desc, false);
                        if (ret)
                                return ret;
 
@@ -2913,7 +3073,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
                               ALIGN(ring->pull_len, sizeof(long)));
                }
        } else {
-               ret = hns3_add_frag(ring, desc, &skb, true);
+               ret = hns3_add_frag(ring, desc, true);
                if (ret)
                        return ret;
 
@@ -2931,8 +3091,6 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
        }
 
        skb_record_rx_queue(skb, ring->tqp->tqp_index);
-       *out_skb = skb;
-
        return 0;
 }
 
@@ -2941,17 +3099,19 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
 {
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
        int unused_count = hns3_desc_unused(ring);
-       struct sk_buff *skb = ring->skb;
        int recv_pkts = 0;
        int recv_bds = 0;
        int err, num;
 
        num = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG);
-       rmb(); /* Make sure num taken effect before the other data is touched */
-
        num -= unused_count;
        unused_count -= ring->pending_buf;
 
+       if (num <= 0)
+               goto out;
+
+       rmb(); /* Make sure num taken effect before the other data is touched */
+
        while (recv_pkts < budget && recv_bds < num) {
                /* Reuse or realloc buffers */
                if (unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
@@ -2961,27 +3121,19 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
                }
 
                /* Poll one pkt */
-               err = hns3_handle_rx_bd(ring, &skb);
-               if (unlikely(!skb)) /* This fault cannot be repaired */
-                       goto out;
-
-               if (err == -ENXIO) { /* Do not get FE for the packet */
+               err = hns3_handle_rx_bd(ring);
+               /* Do not get FE for the packet or failed to alloc skb */
+               if (unlikely(!ring->skb || err == -ENXIO)) {
                        goto out;
-               } else if (unlikely(err)) {  /* Do jump the err */
-                       recv_bds += ring->pending_buf;
-                       unused_count += ring->pending_buf;
-                       ring->skb = NULL;
-                       ring->pending_buf = 0;
-                       continue;
+               } else if (likely(!err)) {
+                       rx_fn(ring, ring->skb);
+                       recv_pkts++;
                }
 
-               rx_fn(ring, skb);
                recv_bds += ring->pending_buf;
                unused_count += ring->pending_buf;
                ring->skb = NULL;
                ring->pending_buf = 0;
-
-               recv_pkts++;
        }
 
 out:
@@ -3324,13 +3476,13 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
                tqp_vector = &priv->tqp_vector[vector_i];
 
                hns3_add_ring_to_group(&tqp_vector->tx_group,
-                                      priv->ring_data[i].ring);
+                                      &priv->ring[i]);
 
                hns3_add_ring_to_group(&tqp_vector->rx_group,
-                                      priv->ring_data[i + tqp_num].ring);
+                                      &priv->ring[i + tqp_num]);
 
-               priv->ring_data[i].ring->tqp_vector = tqp_vector;
-               priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+               priv->ring[i].tqp_vector = tqp_vector;
+               priv->ring[i + tqp_num].tqp_vector = tqp_vector;
                tqp_vector->num_tqps++;
        }
 
@@ -3474,28 +3626,22 @@ static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
        return 0;
 }
 
-static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
-                            unsigned int ring_type)
+static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
+                             unsigned int ring_type)
 {
-       struct hns3_nic_ring_data *ring_data = priv->ring_data;
        int queue_num = priv->ae_handle->kinfo.num_tqps;
-       struct pci_dev *pdev = priv->ae_handle->pdev;
        struct hns3_enet_ring *ring;
        int desc_num;
 
-       ring = devm_kzalloc(&pdev->dev, sizeof(*ring), GFP_KERNEL);
-       if (!ring)
-               return -ENOMEM;
-
        if (ring_type == HNAE3_RING_TYPE_TX) {
+               ring = &priv->ring[q->tqp_index];
                desc_num = priv->ae_handle->kinfo.num_tx_desc;
-               ring_data[q->tqp_index].ring = ring;
-               ring_data[q->tqp_index].queue_index = q->tqp_index;
+               ring->queue_index = q->tqp_index;
                ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
        } else {
+               ring = &priv->ring[q->tqp_index + queue_num];
                desc_num = priv->ae_handle->kinfo.num_rx_desc;
-               ring_data[q->tqp_index + queue_num].ring = ring;
-               ring_data[q->tqp_index + queue_num].queue_index = q->tqp_index;
+               ring->queue_index = q->tqp_index;
                ring->io_base = q->io_base;
        }
 
@@ -3510,76 +3656,41 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
        ring->desc_num = desc_num;
        ring->next_to_use = 0;
        ring->next_to_clean = 0;
-
-       return 0;
 }
 
-static int hns3_queue_to_ring(struct hnae3_queue *tqp,
-                             struct hns3_nic_priv *priv)
+static void hns3_queue_to_ring(struct hnae3_queue *tqp,
+                              struct hns3_nic_priv *priv)
 {
-       int ret;
-
-       ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_TX);
-       if (ret)
-               return ret;
-
-       ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
-       if (ret) {
-               devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring);
-               return ret;
-       }
-
-       return 0;
+       hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_TX);
+       hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
 }
 
 static int hns3_get_ring_config(struct hns3_nic_priv *priv)
 {
        struct hnae3_handle *h = priv->ae_handle;
        struct pci_dev *pdev = h->pdev;
-       int i, ret;
+       int i;
 
-       priv->ring_data =  devm_kzalloc(&pdev->dev,
-                                       array3_size(h->kinfo.num_tqps,
-                                                   sizeof(*priv->ring_data),
-                                                   2),
-                                       GFP_KERNEL);
-       if (!priv->ring_data)
+       priv->ring = devm_kzalloc(&pdev->dev,
+                                 array3_size(h->kinfo.num_tqps,
+                                             sizeof(*priv->ring), 2),
+                                 GFP_KERNEL);
+       if (!priv->ring)
                return -ENOMEM;
 
-       for (i = 0; i < h->kinfo.num_tqps; i++) {
-               ret = hns3_queue_to_ring(h->kinfo.tqp[i], priv);
-               if (ret)
-                       goto err;
-       }
+       for (i = 0; i < h->kinfo.num_tqps; i++)
+               hns3_queue_to_ring(h->kinfo.tqp[i], priv);
 
        return 0;
-err:
-       while (i--) {
-               devm_kfree(priv->dev, priv->ring_data[i].ring);
-               devm_kfree(priv->dev,
-                          priv->ring_data[i + h->kinfo.num_tqps].ring);
-       }
-
-       devm_kfree(&pdev->dev, priv->ring_data);
-       priv->ring_data = NULL;
-       return ret;
 }
 
 static void hns3_put_ring_config(struct hns3_nic_priv *priv)
 {
-       struct hnae3_handle *h = priv->ae_handle;
-       int i;
-
-       if (!priv->ring_data)
+       if (!priv->ring)
                return;
 
-       for (i = 0; i < h->kinfo.num_tqps; i++) {
-               devm_kfree(priv->dev, priv->ring_data[i].ring);
-               devm_kfree(priv->dev,
-                          priv->ring_data[i + h->kinfo.num_tqps].ring);
-       }
-       devm_kfree(priv->dev, priv->ring_data);
-       priv->ring_data = NULL;
+       devm_kfree(priv->dev, priv->ring);
+       priv->ring = NULL;
 }
 
 static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
@@ -3696,7 +3807,7 @@ static void hns3_init_tx_ring_tc(struct hns3_nic_priv *priv)
                for (j = 0; j < tc_info->tqp_count; j++) {
                        struct hnae3_queue *q;
 
-                       q = priv->ring_data[tc_info->tqp_offset + j].ring->tqp;
+                       q = priv->ring[tc_info->tqp_offset + j].tqp;
                        hns3_write_dev(q, HNS3_RING_TX_RING_TC_REG,
                                       tc_info->tc);
                }
@@ -3711,21 +3822,21 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv)
        int ret;
 
        for (i = 0; i < ring_num; i++) {
-               ret = hns3_alloc_ring_memory(priv->ring_data[i].ring);
+               ret = hns3_alloc_ring_memory(&priv->ring[i]);
                if (ret) {
                        dev_err(priv->dev,
                                "Alloc ring memory fail! ret=%d\n", ret);
                        goto out_when_alloc_ring_memory;
                }
 
-               u64_stats_init(&priv->ring_data[i].ring->syncp);
+               u64_stats_init(&priv->ring[i].syncp);
        }
 
        return 0;
 
 out_when_alloc_ring_memory:
        for (j = i - 1; j >= 0; j--)
-               hns3_fini_ring(priv->ring_data[j].ring);
+               hns3_fini_ring(&priv->ring[j]);
 
        return -ENOMEM;
 }
@@ -3736,30 +3847,31 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
        int i;
 
        for (i = 0; i < h->kinfo.num_tqps; i++) {
-               hns3_fini_ring(priv->ring_data[i].ring);
-               hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
+               hns3_fini_ring(&priv->ring[i]);
+               hns3_fini_ring(&priv->ring[i + h->kinfo.num_tqps]);
        }
        return 0;
 }
 
 /* Set mac addr if it is configured. or leave it to the AE driver */
-static int hns3_init_mac_addr(struct net_device *netdev, bool init)
+static int hns3_init_mac_addr(struct net_device *netdev)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hnae3_handle *h = priv->ae_handle;
        u8 mac_addr_temp[ETH_ALEN];
        int ret = 0;
 
-       if (h->ae_algo->ops->get_mac_addr && init) {
+       if (h->ae_algo->ops->get_mac_addr)
                h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
-               ether_addr_copy(netdev->dev_addr, mac_addr_temp);
-       }
 
        /* Check if the MAC address is valid, if not get a random one */
-       if (!is_valid_ether_addr(netdev->dev_addr)) {
+       if (!is_valid_ether_addr(mac_addr_temp)) {
                eth_hw_addr_random(netdev);
                dev_warn(priv->dev, "using random MAC address %pM\n",
                         netdev->dev_addr);
+       } else {
+               ether_addr_copy(netdev->dev_addr, mac_addr_temp);
+               ether_addr_copy(netdev->perm_addr, mac_addr_temp);
        }
 
        if (h->ae_algo->ops->set_mac_addr)
@@ -3827,14 +3939,14 @@ static void hns3_info_show(struct hns3_nic_priv *priv)
        struct hnae3_knic_private_info *kinfo = &priv->ae_handle->kinfo;
 
        dev_info(priv->dev, "MAC address: %pM\n", priv->netdev->dev_addr);
-       dev_info(priv->dev, "Task queue pairs numbers: %d\n", kinfo->num_tqps);
-       dev_info(priv->dev, "RSS size: %d\n", kinfo->rss_size);
-       dev_info(priv->dev, "Allocated RSS size: %d\n", kinfo->req_rss_size);
-       dev_info(priv->dev, "RX buffer length: %d\n", kinfo->rx_buf_len);
-       dev_info(priv->dev, "Desc num per TX queue: %d\n", kinfo->num_tx_desc);
-       dev_info(priv->dev, "Desc num per RX queue: %d\n", kinfo->num_rx_desc);
-       dev_info(priv->dev, "Total number of enabled TCs: %d\n", kinfo->num_tc);
-       dev_info(priv->dev, "Max mtu size: %d\n", priv->netdev->max_mtu);
+       dev_info(priv->dev, "Task queue pairs numbers: %u\n", kinfo->num_tqps);
+       dev_info(priv->dev, "RSS size: %u\n", kinfo->rss_size);
+       dev_info(priv->dev, "Allocated RSS size: %u\n", kinfo->req_rss_size);
+       dev_info(priv->dev, "RX buffer length: %u\n", kinfo->rx_buf_len);
+       dev_info(priv->dev, "Desc num per TX queue: %u\n", kinfo->num_tx_desc);
+       dev_info(priv->dev, "Desc num per RX queue: %u\n", kinfo->num_rx_desc);
+       dev_info(priv->dev, "Total number of enabled TCs: %u\n", kinfo->num_tc);
+       dev_info(priv->dev, "Max mtu size: %u\n", priv->netdev->max_mtu);
 }
 
 static int hns3_client_init(struct hnae3_handle *handle)
@@ -3863,7 +3975,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
        handle->kinfo.netdev = netdev;
        handle->priv = (void *)priv;
 
-       hns3_init_mac_addr(netdev, true);
+       hns3_init_mac_addr(netdev);
 
        hns3_set_default_feature(netdev);
 
@@ -3897,7 +4009,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
        ret = hns3_init_all_ring(priv);
        if (ret) {
                ret = -ENOMEM;
-               goto out_init_ring_data;
+               goto out_init_ring;
        }
 
        ret = hns3_init_phy(netdev);
@@ -3936,12 +4048,12 @@ out_reg_netdev_fail:
        hns3_uninit_phy(netdev);
 out_init_phy:
        hns3_uninit_all_ring(priv);
-out_init_ring_data:
+out_init_ring:
        hns3_nic_uninit_vector_data(priv);
 out_init_vector_data:
        hns3_nic_dealloc_vector_data(priv);
 out_alloc_vector_data:
-       priv->ring_data = NULL;
+       priv->ring = NULL;
 out_get_ring_cfg:
        priv->ae_handle = NULL;
        free_netdev(netdev);
@@ -4102,7 +4214,7 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring)
                                /* if alloc new buffer fail, exit directly
                                 * and reclear in up flow.
                                 */
-                               netdev_warn(ring->tqp->handle->kinfo.netdev,
+                               netdev_warn(ring_to_netdev(ring),
                                            "reserve buffer map failed, ret = %d\n",
                                            ret);
                                return ret;
@@ -4148,10 +4260,10 @@ static void hns3_clear_all_ring(struct hnae3_handle *h, bool force)
        for (i = 0; i < h->kinfo.num_tqps; i++) {
                struct hns3_enet_ring *ring;
 
-               ring = priv->ring_data[i].ring;
+               ring = &priv->ring[i];
                hns3_clear_tx_ring(ring);
 
-               ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
+               ring = &priv->ring[i + h->kinfo.num_tqps];
                /* Continue to clear other rings even if clearing some
                 * rings failed.
                 */
@@ -4175,16 +4287,16 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
                if (ret)
                        return ret;
 
-               hns3_init_ring_hw(priv->ring_data[i].ring);
+               hns3_init_ring_hw(&priv->ring[i]);
 
                /* We need to clear tx ring here because self test will
                 * use the ring and will not run down before up
                 */
-               hns3_clear_tx_ring(priv->ring_data[i].ring);
-               priv->ring_data[i].ring->next_to_clean = 0;
-               priv->ring_data[i].ring->next_to_use = 0;
+               hns3_clear_tx_ring(&priv->ring[i]);
+               priv->ring[i].next_to_clean = 0;
+               priv->ring[i].next_to_use = 0;
 
-               rx_ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
+               rx_ring = &priv->ring[i + h->kinfo.num_tqps];
                hns3_init_ring_hw(rx_ring);
                ret = hns3_clear_rx_ring(rx_ring);
                if (ret)
@@ -4331,7 +4443,7 @@ static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
        bool vlan_filter_enable;
        int ret;
 
-       ret = hns3_init_mac_addr(netdev, false);
+       ret = hns3_init_mac_addr(netdev);
        if (ret)
                return ret;
 
@@ -4454,7 +4566,7 @@ int hns3_set_channels(struct net_device *netdev,
        if (new_tqp_num > hns3_get_max_available_channels(h) ||
            new_tqp_num < 1) {
                dev_err(&netdev->dev,
-                       "Change tqps fail, the tqp range is from 1 to %d",
+                       "Change tqps fail, the tqp range is from 1 to %u",
                        hns3_get_max_available_channels(h));
                return -EINVAL;
        }
index 2110fa3..345633f 100644 (file)
@@ -76,7 +76,7 @@ enum hns3_nic_state {
 #define HNS3_RING_NAME_LEN                     16
 #define HNS3_BUFFER_SIZE_2048                  2048
 #define HNS3_RING_MAX_PENDING                  32760
-#define HNS3_RING_MIN_PENDING                  24
+#define HNS3_RING_MIN_PENDING                  72
 #define HNS3_RING_BD_MULTIPLE                  8
 /* max frame size of mac */
 #define HNS3_MAC_MAX_FRAME                     9728
@@ -186,7 +186,7 @@ enum hns3_nic_state {
 #define HNS3_TXD_MSS_S                         0
 #define HNS3_TXD_MSS_M                         (0x3fff << HNS3_TXD_MSS_S)
 
-#define HNS3_TX_LAST_SIZE_M                    0xffff
+#define HNS3_TX_LAST_SIZE_M                    0xffff
 
 #define HNS3_VECTOR_TX_IRQ                     BIT_ULL(0)
 #define HNS3_VECTOR_RX_IRQ                     BIT_ULL(1)
@@ -195,9 +195,13 @@ enum hns3_nic_state {
 #define HNS3_VECTOR_INITED                     1
 
 #define HNS3_MAX_BD_SIZE                       65535
-#define HNS3_MAX_BD_NUM_NORMAL                 8
-#define HNS3_MAX_BD_NUM_TSO                    63
-#define HNS3_MAX_BD_PER_PKT                    MAX_SKB_FRAGS
+#define HNS3_MAX_NON_TSO_BD_NUM                        8U
+#define HNS3_MAX_TSO_BD_NUM                    63U
+#define HNS3_MAX_TSO_SIZE \
+       (HNS3_MAX_BD_SIZE * HNS3_MAX_TSO_BD_NUM)
+
+#define HNS3_MAX_NON_TSO_SIZE \
+       (HNS3_MAX_BD_SIZE * HNS3_MAX_NON_TSO_BD_NUM)
 
 #define HNS3_VECTOR_GL0_OFFSET                 0x100
 #define HNS3_VECTOR_GL1_OFFSET                 0x200
@@ -309,7 +313,7 @@ struct hns3_desc_cb {
 
        u16 reuse_flag;
 
-       /* desc type, used by the ring user to mark the type of the priv data */
+       /* desc type, used by the ring user to mark the type of the priv data */
        u16 type;
 };
 
@@ -405,6 +409,7 @@ struct hns3_enet_ring {
        struct hns3_enet_ring *next;
        struct hns3_enet_tqp_vector *tqp_vector;
        struct hnae3_queue *tqp;
+       int queue_index;
        struct device *dev; /* will be used for DMA mapping of descriptors */
 
        /* statistic */
@@ -430,18 +435,7 @@ struct hns3_enet_ring {
        int pending_buf;
        struct sk_buff *skb;
        struct sk_buff *tail_skb;
-};
-
-struct hns_queue;
-
-struct hns3_nic_ring_data {
-       struct hns3_enet_ring *ring;
-       struct napi_struct napi;
-       int queue_index;
-       int (*poll_one)(struct hns3_nic_ring_data *, int, void *);
-       void (*ex_process)(struct hns3_nic_ring_data *, struct sk_buff *);
-       void (*fini_process)(struct hns3_nic_ring_data *);
-};
+} ____cacheline_internodealigned_in_smp;
 
 enum hns3_flow_level_range {
        HNS3_FLOW_LOW = 0,
@@ -518,7 +512,7 @@ struct hns3_nic_priv {
         * the cb for nic to manage the ring buffer, the first half of the
         * array is for tx_ring and vice versa for the second half
         */
-       struct hns3_nic_ring_data *ring_data;
+       struct hns3_enet_ring *ring;
        struct hns3_enet_tqp_vector *tqp_vector;
        u16 vector_num;
 
@@ -613,11 +607,11 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
 
 #define ring_to_dev(ring) ((ring)->dev)
 
+#define ring_to_netdev(ring)   ((ring)->tqp_vector->napi.dev)
+
 #define ring_to_dma_dir(ring) (HNAE3_IS_TX_RING(ring) ? \
        DMA_TO_DEVICE : DMA_FROM_DEVICE)
 
-#define tx_ring_data(priv, idx) ((priv)->ring_data[idx])
-
 #define hns3_buf_size(_ring) ((_ring)->buf_size)
 
 static inline unsigned int hns3_page_order(struct hns3_enet_ring *ring)
index 680c350..b104d3c 100644 (file)
@@ -203,7 +203,7 @@ static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget)
 
        kinfo = &h->kinfo;
        for (i = kinfo->num_tqps; i < kinfo->num_tqps * 2; i++) {
-               struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+               struct hns3_enet_ring *ring = &priv->ring[i];
                struct hns3_enet_ring_group *rx_group;
                u64 pre_rx_pkt;
 
@@ -226,7 +226,7 @@ static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid,
        u32 i;
 
        for (i = start_ringid; i <= end_ringid; i++) {
-               struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+               struct hns3_enet_ring *ring = &priv->ring[i];
 
                hns3_clean_tx_ring(ring);
        }
@@ -491,7 +491,7 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
 
        /* get stats for Tx */
        for (i = 0; i < kinfo->num_tqps; i++) {
-               ring = nic_priv->ring_data[i].ring;
+               ring = &nic_priv->ring[i];
                for (j = 0; j < HNS3_TXQ_STATS_COUNT; j++) {
                        stat = (u8 *)ring + hns3_txq_stats[j].stats_offset;
                        *data++ = *(u64 *)stat;
@@ -500,7 +500,7 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
 
        /* get stats for Rx */
        for (i = 0; i < kinfo->num_tqps; i++) {
-               ring = nic_priv->ring_data[i + kinfo->num_tqps].ring;
+               ring = &nic_priv->ring[i + kinfo->num_tqps];
                for (j = 0; j < HNS3_RXQ_STATS_COUNT; j++) {
                        stat = (u8 *)ring + hns3_rxq_stats[j].stats_offset;
                        *data++ = *(u64 *)stat;
@@ -603,8 +603,8 @@ static void hns3_get_ringparam(struct net_device *netdev,
        param->tx_max_pending = HNS3_RING_MAX_PENDING;
        param->rx_max_pending = HNS3_RING_MAX_PENDING;
 
-       param->tx_pending = priv->ring_data[0].ring->desc_num;
-       param->rx_pending = priv->ring_data[queue_num].ring->desc_num;
+       param->tx_pending = priv->ring[0].desc_num;
+       param->rx_pending = priv->ring[queue_num].desc_num;
 }
 
 static void hns3_get_pauseparam(struct net_device *netdev,
@@ -906,9 +906,8 @@ static void hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
        h->kinfo.num_rx_desc = rx_desc_num;
 
        for (i = 0; i < h->kinfo.num_tqps; i++) {
-               priv->ring_data[i].ring->desc_num = tx_desc_num;
-               priv->ring_data[i + h->kinfo.num_tqps].ring->desc_num =
-                       rx_desc_num;
+               priv->ring[i].desc_num = tx_desc_num;
+               priv->ring[i + h->kinfo.num_tqps].desc_num = rx_desc_num;
        }
 }
 
@@ -924,7 +923,7 @@ static struct hns3_enet_ring *hns3_backup_ringparam(struct hns3_nic_priv *priv)
                return NULL;
 
        for (i = 0; i < handle->kinfo.num_tqps * 2; i++) {
-               memcpy(&tmp_rings[i], priv->ring_data[i].ring,
+               memcpy(&tmp_rings[i], &priv->ring[i],
                       sizeof(struct hns3_enet_ring));
                tmp_rings[i].skb = NULL;
        }
@@ -972,8 +971,8 @@ static int hns3_set_ringparam(struct net_device *ndev,
        /* Hardware requires that its descriptors must be multiple of eight */
        new_tx_desc_num = ALIGN(param->tx_pending, HNS3_RING_BD_MULTIPLE);
        new_rx_desc_num = ALIGN(param->rx_pending, HNS3_RING_BD_MULTIPLE);
-       old_tx_desc_num = priv->ring_data[0].ring->desc_num;
-       old_rx_desc_num = priv->ring_data[queue_num].ring->desc_num;
+       old_tx_desc_num = priv->ring[0].desc_num;
+       old_rx_desc_num = priv->ring[queue_num].desc_num;
        if (old_tx_desc_num == new_tx_desc_num &&
            old_rx_desc_num == new_rx_desc_num)
                return 0;
@@ -986,7 +985,7 @@ static int hns3_set_ringparam(struct net_device *ndev,
        }
 
        netdev_info(ndev,
-                   "Changing Tx/Rx ring depth from %d/%d to %d/%d\n",
+                   "Changing Tx/Rx ring depth from %u/%u to %u/%u\n",
                    old_tx_desc_num, old_rx_desc_num,
                    new_tx_desc_num, new_rx_desc_num);
 
@@ -1002,7 +1001,7 @@ static int hns3_set_ringparam(struct net_device *ndev,
                hns3_change_all_ring_bd_num(priv, old_tx_desc_num,
                                            old_rx_desc_num);
                for (i = 0; i < h->kinfo.num_tqps * 2; i++)
-                       memcpy(priv->ring_data[i].ring, &tmp_rings[i],
+                       memcpy(&priv->ring[i], &tmp_rings[i],
                               sizeof(struct hns3_enet_ring));
        } else {
                for (i = 0; i < h->kinfo.num_tqps * 2; i++)
@@ -1098,13 +1097,13 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
 
        if (queue >= queue_num) {
                netdev_err(netdev,
-                          "Invalid queue value %d! Queue max id=%d\n",
+                          "Invalid queue value %u! Queue max id=%u\n",
                           queue, queue_num - 1);
                return -EINVAL;
        }
 
-       tx_vector = priv->ring_data[queue].ring->tqp_vector;
-       rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
+       tx_vector = priv->ring[queue].tqp_vector;
+       rx_vector = priv->ring[queue_num + queue].tqp_vector;
 
        cmd->use_adaptive_tx_coalesce =
                        tx_vector->tx_group.coal.gl_adapt_enable;
@@ -1148,14 +1147,14 @@ static int hns3_check_gl_coalesce_para(struct net_device *netdev,
        rx_gl = hns3_gl_round_down(cmd->rx_coalesce_usecs);
        if (rx_gl != cmd->rx_coalesce_usecs) {
                netdev_info(netdev,
-                           "rx_usecs(%d) rounded down to %d, because it must be multiple of 2.\n",
+                           "rx_usecs(%u) rounded down to %u, because it must be multiple of 2.\n",
                            cmd->rx_coalesce_usecs, rx_gl);
        }
 
        tx_gl = hns3_gl_round_down(cmd->tx_coalesce_usecs);
        if (tx_gl != cmd->tx_coalesce_usecs) {
                netdev_info(netdev,
-                           "tx_usecs(%d) rounded down to %d, because it must be multiple of 2.\n",
+                           "tx_usecs(%u) rounded down to %u, because it must be multiple of 2.\n",
                            cmd->tx_coalesce_usecs, tx_gl);
        }
 
@@ -1183,7 +1182,7 @@ static int hns3_check_rl_coalesce_para(struct net_device *netdev,
        rl = hns3_rl_round_down(cmd->rx_coalesce_usecs_high);
        if (rl != cmd->rx_coalesce_usecs_high) {
                netdev_info(netdev,
-                           "usecs_high(%d) rounded down to %d, because it must be multiple of 4.\n",
+                           "usecs_high(%u) rounded down to %u, because it must be multiple of 4.\n",
                            cmd->rx_coalesce_usecs_high, rl);
        }
 
@@ -1212,7 +1211,7 @@ static int hns3_check_coalesce_para(struct net_device *netdev,
        if (cmd->use_adaptive_tx_coalesce == 1 ||
            cmd->use_adaptive_rx_coalesce == 1) {
                netdev_info(netdev,
-                           "adaptive-tx=%d and adaptive-rx=%d, tx_usecs or rx_usecs will changed dynamically.\n",
+                           "adaptive-tx=%u and adaptive-rx=%u, tx_usecs or rx_usecs will changed dynamically.\n",
                            cmd->use_adaptive_tx_coalesce,
                            cmd->use_adaptive_rx_coalesce);
        }
@@ -1229,8 +1228,8 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
        struct hnae3_handle *h = priv->ae_handle;
        int queue_num = h->kinfo.num_tqps;
 
-       tx_vector = priv->ring_data[queue].ring->tqp_vector;
-       rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
+       tx_vector = priv->ring[queue].tqp_vector;
+       rx_vector = priv->ring[queue_num + queue].tqp_vector;
 
        tx_vector->tx_group.coal.gl_adapt_enable =
                                cmd->use_adaptive_tx_coalesce;
index ecf58cf..940ead3 100644 (file)
@@ -145,7 +145,7 @@ static int hclge_cmd_csq_clean(struct hclge_hw *hw)
        rmb(); /* Make sure head is ready before touch any data */
 
        if (!is_valid_csq_clean_head(csq, head)) {
-               dev_warn(&hdev->pdev->dev, "wrong cmd head (%d, %d-%d)\n", head,
+               dev_warn(&hdev->pdev->dev, "wrong cmd head (%u, %d-%d)\n", head,
                         csq->next_to_use, csq->next_to_clean);
                dev_warn(&hdev->pdev->dev,
                         "Disabling any further commands to IMP firmware\n");
@@ -314,11 +314,10 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
                } while (timeout < hw->cmq.tx_timeout);
        }
 
-       if (!complete) {
+       if (!complete)
                retval = -EBADE;
-       } else {
+       else
                retval = hclge_cmd_check_retval(hw, desc, num, ntc);
-       }
 
        /* Clean the command send queue */
        handle = hclge_cmd_csq_clean(hw);
index 4821fe0..af96e79 100644 (file)
@@ -5,8 +5,10 @@
 #define __HCLGE_CMD_H
 #include <linux/types.h>
 #include <linux/io.h>
+#include <linux/etherdevice.h>
 
 #define HCLGE_CMDQ_TX_TIMEOUT          30000
+#define HCLGE_DESC_DATA_LEN            6
 
 struct hclge_dev;
 struct hclge_desc {
@@ -18,7 +20,7 @@ struct hclge_desc {
        __le16 flag;
        __le16 retval;
        __le16 rsv;
-       __le32 data[6];
+       __le32 data[HCLGE_DESC_DATA_LEN];
 };
 
 struct hclge_cmq_ring {
@@ -244,7 +246,7 @@ enum hclge_opcode_type {
        /* QCN commands */
        HCLGE_OPC_QCN_MOD_CFG           = 0x1A01,
        HCLGE_OPC_QCN_GRP_TMPLT_CFG     = 0x1A02,
-       HCLGE_OPC_QCN_SHAPPING_IR_CFG   = 0x1A03,
+       HCLGE_OPC_QCN_SHAPPING_CFG      = 0x1A03,
        HCLGE_OPC_QCN_SHAPPING_BS_CFG   = 0x1A04,
        HCLGE_OPC_QCN_QSET_LINK_CFG     = 0x1A05,
        HCLGE_OPC_QCN_RP_STATUS_GET     = 0x1A06,
@@ -259,6 +261,7 @@ enum hclge_opcode_type {
 
        /* NCL config command */
        HCLGE_OPC_QUERY_NCL_CONFIG      = 0x7011,
+
        /* M7 stats command */
        HCLGE_OPC_M7_STATS_BD           = 0x7012,
        HCLGE_OPC_M7_STATS_INFO         = 0x7013,
@@ -428,8 +431,10 @@ struct hclge_rx_pkt_buf_cmd {
 #define HCLGE_PF_MAC_NUM_MASK  0x3
 #define HCLGE_PF_STATE_MAIN    BIT(HCLGE_PF_STATE_MAIN_B)
 #define HCLGE_PF_STATE_DONE    BIT(HCLGE_PF_STATE_DONE_B)
+#define HCLGE_VF_RST_STATUS_CMD        4
+
 struct hclge_func_status_cmd {
-       __le32  vf_rst_state[4];
+       __le32  vf_rst_state[HCLGE_VF_RST_STATUS_CMD];
        u8 pf_state;
        u8 mac_id;
        u8 rsv1;
@@ -485,10 +490,12 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_UMV_TBL_SPACE_S      16
 #define HCLGE_CFG_UMV_TBL_SPACE_M      GENMASK(31, 16)
 
+#define HCLGE_CFG_CMD_CNT              4
+
 struct hclge_cfg_param_cmd {
        __le32 offset;
        __le32 rsv;
-       __le32 param[4];
+       __le32 param[HCLGE_CFG_CMD_CNT];
 };
 
 #define HCLGE_MAC_MODE         0x0
@@ -712,8 +719,7 @@ struct hclge_mac_mgr_tbl_entry_cmd {
        u8      flags;
        u8      resp_code;
        __le16  vlan_tag;
-       __le32  mac_addr_hi32;
-       __le16  mac_addr_lo16;
+       u8      mac_addr[ETH_ALEN];
        __le16  rsv1;
        __le16  ethter_type;
        __le16  egress_port;
@@ -758,20 +764,27 @@ struct hclge_vlan_filter_ctrl_cmd {
        u8 rsv2[19];
 };
 
+#define HCLGE_VLAN_ID_OFFSET_STEP      160
+#define HCLGE_VLAN_BYTE_SIZE           8
+#define        HCLGE_VLAN_OFFSET_BITMAP \
+       (HCLGE_VLAN_ID_OFFSET_STEP / HCLGE_VLAN_BYTE_SIZE)
+
 struct hclge_vlan_filter_pf_cfg_cmd {
        u8 vlan_offset;
        u8 vlan_cfg;
        u8 rsv[2];
-       u8 vlan_offset_bitmap[20];
+       u8 vlan_offset_bitmap[HCLGE_VLAN_OFFSET_BITMAP];
 };
 
+#define HCLGE_MAX_VF_BYTES  16
+
 struct hclge_vlan_filter_vf_cfg_cmd {
        __le16 vlan_id;
        u8  resp_code;
        u8  rsv;
        u8  vlan_cfg;
        u8  rsv1[3];
-       u8  vf_bitmap[16];
+       u8  vf_bitmap[HCLGE_MAX_VF_BYTES];
 };
 
 #define HCLGE_SWITCH_ANTI_SPOOF_B      0U
@@ -806,6 +819,7 @@ enum hclge_mac_vlan_cfg_sel {
 #define HCLGE_CFG_NIC_ROCE_SEL_B       4
 #define HCLGE_ACCEPT_TAG2_B            5
 #define HCLGE_ACCEPT_UNTAG2_B          6
+#define HCLGE_VF_NUM_PER_BYTE          8
 
 struct hclge_vport_vtag_tx_cfg_cmd {
        u8 vport_vlan_cfg;
@@ -813,7 +827,7 @@ struct hclge_vport_vtag_tx_cfg_cmd {
        u8 rsv1[2];
        __le16 def_vlan_tag1;
        __le16 def_vlan_tag2;
-       u8 vf_bitmap[8];
+       u8 vf_bitmap[HCLGE_VF_NUM_PER_BYTE];
        u8 rsv2[8];
 };
 
@@ -825,7 +839,7 @@ struct hclge_vport_vtag_rx_cfg_cmd {
        u8 vport_vlan_cfg;
        u8 vf_offset;
        u8 rsv1[6];
-       u8 vf_bitmap[8];
+       u8 vf_bitmap[HCLGE_VF_NUM_PER_BYTE];
        u8 rsv2[8];
 };
 
@@ -864,7 +878,7 @@ struct hclge_mac_ethertype_idx_rd_cmd {
        u8      flags;
        u8      resp_code;
        __le16  vlan_tag;
-       u8      mac_addr[6];
+       u8      mac_addr[ETH_ALEN];
        __le16  index;
        __le16  ethter_type;
        __le16  egress_port;
@@ -1090,9 +1104,6 @@ void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
                                enum hclge_opcode_type opcode, bool is_read);
 void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read);
 
-int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
-                              struct hclge_promisc_param *param);
-
 enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw,
                                           struct hclge_desc *desc);
 enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw,
index c063301..49ad848 100644 (file)
@@ -87,7 +87,7 @@ static int hclge_dcb_common_validate(struct hclge_dev *hdev, u8 num_tc,
        for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
                if (prio_tc[i] >= num_tc) {
                        dev_err(&hdev->pdev->dev,
-                               "prio_tc[%u] checking failed, %u >= num_tc(%u)\n",
+                               "prio_tc[%d] checking failed, %u >= num_tc(%u)\n",
                                i, prio_tc[i], num_tc);
                        return -EINVAL;
                }
index d0128d7..112df34 100644 (file)
@@ -145,7 +145,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
                return;
        }
 
-       buf_len  = sizeof(struct hclge_desc) * bd_num;
+       buf_len = sizeof(struct hclge_desc) * bd_num;
        desc_src = kzalloc(buf_len, GFP_KERNEL);
        if (!desc_src) {
                dev_err(&hdev->pdev->dev, "call kzalloc failed\n");
@@ -153,7 +153,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
        }
 
        desc = desc_src;
-       ret  = hclge_dbg_cmd_send(hdev, desc, index, bd_num, reg_msg->cmd);
+       ret = hclge_dbg_cmd_send(hdev, desc, index, bd_num, reg_msg->cmd);
        if (ret) {
                kfree(desc_src);
                return;
@@ -169,7 +169,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
                if (dfx_message->flag)
                        dev_info(&hdev->pdev->dev, "%s: 0x%x\n",
                                 dfx_message->message,
-                                desc->data[i % entries_per_desc]);
+                                le32_to_cpu(desc->data[i % entries_per_desc]));
 
                dfx_message++;
        }
@@ -237,44 +237,48 @@ static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf)
        if (ret)
                return;
 
-       dev_info(dev, "sch_nq_cnt: 0x%x\n", desc[0].data[1]);
+       dev_info(dev, "sch_nq_cnt: 0x%x\n", le32_to_cpu(desc[0].data[1]));
 
        ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, HCLGE_OPC_SCH_RQ_CNT);
        if (ret)
                return;
 
-       dev_info(dev, "sch_rq_cnt: 0x%x\n", desc[0].data[1]);
+       dev_info(dev, "sch_rq_cnt: 0x%x\n", le32_to_cpu(desc[0].data[1]));
 
        ret = hclge_dbg_cmd_send(hdev, desc, 0, 2, HCLGE_OPC_TM_INTERNAL_STS);
        if (ret)
                return;
 
-       dev_info(dev, "pri_bp: 0x%x\n", desc[0].data[1]);
-       dev_info(dev, "fifo_dfx_info: 0x%x\n", desc[0].data[2]);
-       dev_info(dev, "sch_roce_fifo_afull_gap: 0x%x\n", desc[0].data[3]);
-       dev_info(dev, "tx_private_waterline: 0x%x\n", desc[0].data[4]);
-       dev_info(dev, "tm_bypass_en: 0x%x\n", desc[0].data[5]);
-       dev_info(dev, "SSU_TM_BYPASS_EN: 0x%x\n", desc[1].data[0]);
-       dev_info(dev, "SSU_RESERVE_CFG: 0x%x\n", desc[1].data[1]);
+       dev_info(dev, "pri_bp: 0x%x\n", le32_to_cpu(desc[0].data[1]));
+       dev_info(dev, "fifo_dfx_info: 0x%x\n", le32_to_cpu(desc[0].data[2]));
+       dev_info(dev, "sch_roce_fifo_afull_gap: 0x%x\n",
+                le32_to_cpu(desc[0].data[3]));
+       dev_info(dev, "tx_private_waterline: 0x%x\n",
+                le32_to_cpu(desc[0].data[4]));
+       dev_info(dev, "tm_bypass_en: 0x%x\n", le32_to_cpu(desc[0].data[5]));
+       dev_info(dev, "SSU_TM_BYPASS_EN: 0x%x\n", le32_to_cpu(desc[1].data[0]));
+       dev_info(dev, "SSU_RESERVE_CFG: 0x%x\n", le32_to_cpu(desc[1].data[1]));
 
        ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
                                 HCLGE_OPC_TM_INTERNAL_CNT);
        if (ret)
                return;
 
-       dev_info(dev, "SCH_NIC_NUM: 0x%x\n", desc[0].data[1]);
-       dev_info(dev, "SCH_ROCE_NUM: 0x%x\n", desc[0].data[2]);
+       dev_info(dev, "SCH_NIC_NUM: 0x%x\n", le32_to_cpu(desc[0].data[1]));
+       dev_info(dev, "SCH_ROCE_NUM: 0x%x\n", le32_to_cpu(desc[0].data[2]));
 
        ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
                                 HCLGE_OPC_TM_INTERNAL_STS_1);
        if (ret)
                return;
 
-       dev_info(dev, "TC_MAP_SEL: 0x%x\n", desc[0].data[1]);
-       dev_info(dev, "IGU_PFC_PRI_EN: 0x%x\n", desc[0].data[2]);
-       dev_info(dev, "MAC_PFC_PRI_EN: 0x%x\n", desc[0].data[3]);
-       dev_info(dev, "IGU_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[4]);
-       dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[5]);
+       dev_info(dev, "TC_MAP_SEL: 0x%x\n", le32_to_cpu(desc[0].data[1]));
+       dev_info(dev, "IGU_PFC_PRI_EN: 0x%x\n", le32_to_cpu(desc[0].data[2]));
+       dev_info(dev, "MAC_PFC_PRI_EN: 0x%x\n", le32_to_cpu(desc[0].data[3]));
+       dev_info(dev, "IGU_PRI_MAP_TC_CFG: 0x%x\n",
+                le32_to_cpu(desc[0].data[4]));
+       dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n",
+                le32_to_cpu(desc[0].data[5]));
 }
 
 static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
@@ -364,7 +368,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
        pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
        dev_info(&hdev->pdev->dev, "PG_C pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
        dev_info(&hdev->pdev->dev, "PG_C pg_shapping: 0x%x\n",
-                pg_shap_cfg_cmd->pg_shapping_para);
+                le32_to_cpu(pg_shap_cfg_cmd->pg_shapping_para));
 
        cmd = HCLGE_OPC_TM_PG_P_SHAPPING;
        hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -375,7 +379,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
        pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
        dev_info(&hdev->pdev->dev, "PG_P pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
        dev_info(&hdev->pdev->dev, "PG_P pg_shapping: 0x%x\n",
-                pg_shap_cfg_cmd->pg_shapping_para);
+                le32_to_cpu(pg_shap_cfg_cmd->pg_shapping_para));
 
        cmd = HCLGE_OPC_TM_PORT_SHAPPING;
        hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -385,7 +389,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 
        port_shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
        dev_info(&hdev->pdev->dev, "PORT port_shapping: 0x%x\n",
-                port_shap_cfg_cmd->port_shapping_para);
+                le32_to_cpu(port_shap_cfg_cmd->port_shapping_para));
 
        cmd = HCLGE_OPC_TM_PG_SCH_MODE_CFG;
        hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -393,7 +397,8 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
        if (ret)
                goto err_tm_pg_cmd_send;
 
-       dev_info(&hdev->pdev->dev, "PG_SCH pg_id: %u\n", desc.data[0]);
+       dev_info(&hdev->pdev->dev, "PG_SCH pg_id: %u\n",
+                le32_to_cpu(desc.data[0]));
 
        cmd = HCLGE_OPC_TM_PRI_SCH_MODE_CFG;
        hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -401,7 +406,8 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
        if (ret)
                goto err_tm_pg_cmd_send;
 
-       dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n", desc.data[0]);
+       dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n",
+                le32_to_cpu(desc.data[0]));
 
        cmd = HCLGE_OPC_TM_QS_SCH_MODE_CFG;
        hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -409,7 +415,8 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
        if (ret)
                goto err_tm_pg_cmd_send;
 
-       dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n", desc.data[0]);
+       dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n",
+                le32_to_cpu(desc.data[0]));
 
        if (!hnae3_dev_dcb_supported(hdev)) {
                dev_info(&hdev->pdev->dev,
@@ -429,7 +436,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
        dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_group_id: 0x%x\n",
                 bp_to_qs_map_cmd->qs_group_id);
        dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_bit_map: 0x%x\n",
-                bp_to_qs_map_cmd->qs_bit_map);
+                le32_to_cpu(bp_to_qs_map_cmd->qs_bit_map));
        return;
 
 err_tm_pg_cmd_send:
@@ -471,7 +478,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
 
        qs_to_pri_map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
        dev_info(&hdev->pdev->dev, "QS_TO_PRI qs_id: %u\n",
-                qs_to_pri_map->qs_id);
+                le16_to_cpu(qs_to_pri_map->qs_id));
        dev_info(&hdev->pdev->dev, "QS_TO_PRI priority: %u\n",
                 qs_to_pri_map->priority);
        dev_info(&hdev->pdev->dev, "QS_TO_PRI link_vld: %u\n",
@@ -484,9 +491,10 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
                goto err_tm_cmd_send;
 
        nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
-       dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n", nq_to_qs_map->nq_id);
+       dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n",
+                le16_to_cpu(nq_to_qs_map->nq_id));
        dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: 0x%x\n",
-                nq_to_qs_map->qset_id);
+                le16_to_cpu(nq_to_qs_map->qset_id));
 
        cmd = HCLGE_OPC_TM_PG_WEIGHT;
        hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -505,7 +513,8 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
                goto err_tm_cmd_send;
 
        qs_weight = (struct hclge_qs_weight_cmd *)desc.data;
-       dev_info(&hdev->pdev->dev, "QS qs_id: %u\n", qs_weight->qs_id);
+       dev_info(&hdev->pdev->dev, "QS qs_id: %u\n",
+                le16_to_cpu(qs_weight->qs_id));
        dev_info(&hdev->pdev->dev, "QS dwrr: %u\n", qs_weight->dwrr);
 
        cmd = HCLGE_OPC_TM_PRI_WEIGHT;
@@ -527,7 +536,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
        shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
        dev_info(&hdev->pdev->dev, "PRI_C pri_id: %u\n", shap_cfg_cmd->pri_id);
        dev_info(&hdev->pdev->dev, "PRI_C pri_shapping: 0x%x\n",
-                shap_cfg_cmd->pri_shapping_para);
+                le32_to_cpu(shap_cfg_cmd->pri_shapping_para));
 
        cmd = HCLGE_OPC_TM_PRI_P_SHAPPING;
        hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -538,7 +547,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
        shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
        dev_info(&hdev->pdev->dev, "PRI_P pri_id: %u\n", shap_cfg_cmd->pri_id);
        dev_info(&hdev->pdev->dev, "PRI_P pri_shapping: 0x%x\n",
-                shap_cfg_cmd->pri_shapping_para);
+                le32_to_cpu(shap_cfg_cmd->pri_shapping_para));
 
        hclge_dbg_dump_tm_pg(hdev);
 
@@ -658,7 +667,7 @@ static void hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev)
        dev_info(&hdev->pdev->dev, "pause_trans_gap: 0x%x\n",
                 pause_param->pause_trans_gap);
        dev_info(&hdev->pdev->dev, "pause_trans_time: 0x%x\n",
-                pause_param->pause_trans_time);
+                le16_to_cpu(pause_param->pause_trans_time));
 }
 
 static void hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev)
@@ -712,7 +721,7 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
        tx_buf_cmd = (struct hclge_tx_buff_alloc_cmd *)desc[0].data;
        for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
                dev_info(&hdev->pdev->dev, "tx_packet_buf_tc_%d: 0x%x\n", i,
-                        tx_buf_cmd->tx_pkt_buff[i]);
+                        le16_to_cpu(tx_buf_cmd->tx_pkt_buff[i]));
 
        cmd = HCLGE_OPC_RX_PRIV_BUFF_ALLOC;
        hclge_cmd_setup_basic_desc(desc, cmd, true);
@@ -724,10 +733,10 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
        rx_buf_cmd = (struct hclge_rx_priv_buff_cmd *)desc[0].data;
        for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
                dev_info(&hdev->pdev->dev, "rx_packet_buf_tc_%d: 0x%x\n", i,
-                        rx_buf_cmd->buf_num[i]);
+                        le16_to_cpu(rx_buf_cmd->buf_num[i]));
 
        dev_info(&hdev->pdev->dev, "rx_share_buf: 0x%x\n",
-                rx_buf_cmd->shared_buf);
+                le16_to_cpu(rx_buf_cmd->shared_buf));
 
        cmd = HCLGE_OPC_RX_COM_WL_ALLOC;
        hclge_cmd_setup_basic_desc(desc, cmd, true);
@@ -738,7 +747,8 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
        rx_com_wl = (struct hclge_rx_com_wl *)desc[0].data;
        dev_info(&hdev->pdev->dev, "\n");
        dev_info(&hdev->pdev->dev, "rx_com_wl: high: 0x%x, low: 0x%x\n",
-                rx_com_wl->com_wl.high, rx_com_wl->com_wl.low);
+                le16_to_cpu(rx_com_wl->com_wl.high),
+                le16_to_cpu(rx_com_wl->com_wl.low));
 
        cmd = HCLGE_OPC_RX_GBL_PKT_CNT;
        hclge_cmd_setup_basic_desc(desc, cmd, true);
@@ -749,7 +759,8 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
        rx_packet_cnt = (struct hclge_rx_com_wl *)desc[0].data;
        dev_info(&hdev->pdev->dev,
                 "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n",
-                rx_packet_cnt->com_wl.high, rx_packet_cnt->com_wl.low);
+                le16_to_cpu(rx_packet_cnt->com_wl.high),
+                le16_to_cpu(rx_packet_cnt->com_wl.low));
        dev_info(&hdev->pdev->dev, "\n");
 
        if (!hnae3_dev_dcb_supported(hdev)) {
@@ -769,14 +780,16 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
        for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
                dev_info(&hdev->pdev->dev,
                         "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n", i,
-                        rx_priv_wl->tc_wl[i].high, rx_priv_wl->tc_wl[i].low);
+                        le16_to_cpu(rx_priv_wl->tc_wl[i].high),
+                        le16_to_cpu(rx_priv_wl->tc_wl[i].low));
 
        rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[1].data;
        for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
                dev_info(&hdev->pdev->dev,
                         "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n",
                         i + HCLGE_TC_NUM_ONE_DESC,
-                        rx_priv_wl->tc_wl[i].high, rx_priv_wl->tc_wl[i].low);
+                        le16_to_cpu(rx_priv_wl->tc_wl[i].high),
+                        le16_to_cpu(rx_priv_wl->tc_wl[i].low));
 
        cmd = HCLGE_OPC_RX_COM_THRD_ALLOC;
        hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
@@ -791,16 +804,16 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
        for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
                dev_info(&hdev->pdev->dev,
                         "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n", i,
-                        rx_com_thrd->com_thrd[i].high,
-                        rx_com_thrd->com_thrd[i].low);
+                        le16_to_cpu(rx_com_thrd->com_thrd[i].high),
+                        le16_to_cpu(rx_com_thrd->com_thrd[i].low));
 
        rx_com_thrd = (struct hclge_rx_com_thrd *)desc[1].data;
        for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
                dev_info(&hdev->pdev->dev,
                         "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n",
                         i + HCLGE_TC_NUM_ONE_DESC,
-                        rx_com_thrd->com_thrd[i].high,
-                        rx_com_thrd->com_thrd[i].low);
+                        le16_to_cpu(rx_com_thrd->com_thrd[i].high),
+                        le16_to_cpu(rx_com_thrd->com_thrd[i].low));
        return;
 
 err_qos_cmd_send:
@@ -845,7 +858,8 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
                memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
                snprintf(printf_buf, HCLGE_DBG_BUF_LEN,
                         "%02u   |%02x:%02x:%02x:%02x:%02x:%02x|",
-                        req0->index, req0->mac_addr[0], req0->mac_addr[1],
+                        le16_to_cpu(req0->index),
+                        req0->mac_addr[0], req0->mac_addr[1],
                         req0->mac_addr[2], req0->mac_addr[3],
                         req0->mac_addr[4], req0->mac_addr[5]);
 
@@ -929,7 +943,7 @@ static void hclge_dbg_fd_tcam(struct hclge_dev *hdev)
        }
 }
 
-static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
+void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
 {
        dev_info(&hdev->pdev->dev, "PF reset count: %u\n",
                 hdev->rst_stats.pf_rst_cnt);
@@ -945,8 +959,6 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
                 hdev->rst_stats.hw_reset_done_cnt);
        dev_info(&hdev->pdev->dev, "reset count: %u\n",
                 hdev->rst_stats.reset_cnt);
-       dev_info(&hdev->pdev->dev, "reset count: %u\n",
-                hdev->rst_stats.reset_cnt);
        dev_info(&hdev->pdev->dev, "reset fail count: %u\n",
                 hdev->rst_stats.reset_fail_cnt);
        dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n",
@@ -961,6 +973,7 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
                 hclge_read_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG));
        dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
                 hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING));
+       dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
 }
 
 static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
@@ -1110,6 +1123,82 @@ static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev)
        }
 }
 
+static void hclge_dbg_dump_qs_shaper_single(struct hclge_dev *hdev, u16 qsid)
+{
+       struct hclge_qs_shapping_cmd *shap_cfg_cmd;
+       u8 ir_u, ir_b, ir_s, bs_b, bs_s;
+       struct hclge_desc desc;
+       u32 shapping_para;
+       int ret;
+
+       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, true);
+
+       shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data;
+       shap_cfg_cmd->qs_id = cpu_to_le16(qsid);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "qs%u failed to get tx_rate, ret=%d\n",
+                       qsid, ret);
+               return;
+       }
+
+       shapping_para = le32_to_cpu(shap_cfg_cmd->qs_shapping_para);
+       ir_b = hclge_tm_get_field(shapping_para, IR_B);
+       ir_u = hclge_tm_get_field(shapping_para, IR_U);
+       ir_s = hclge_tm_get_field(shapping_para, IR_S);
+       bs_b = hclge_tm_get_field(shapping_para, BS_B);
+       bs_s = hclge_tm_get_field(shapping_para, BS_S);
+
+       dev_info(&hdev->pdev->dev,
+                "qs%u ir_b:%u, ir_u:%u, ir_s:%u, bs_b:%u, bs_s:%u\n",
+                qsid, ir_b, ir_u, ir_s, bs_b, bs_s);
+}
+
+static void hclge_dbg_dump_qs_shaper_all(struct hclge_dev *hdev)
+{
+       struct hnae3_knic_private_info *kinfo;
+       struct hclge_vport *vport;
+       int vport_id, i;
+
+       for (vport_id = 0; vport_id <= pci_num_vf(hdev->pdev); vport_id++) {
+               vport = &hdev->vport[vport_id];
+               kinfo = &vport->nic.kinfo;
+
+               dev_info(&hdev->pdev->dev, "qs cfg of vport%d:\n", vport_id);
+
+               for (i = 0; i < kinfo->num_tc; i++) {
+                       u16 qsid = vport->qs_offset + i;
+
+                       hclge_dbg_dump_qs_shaper_single(hdev, qsid);
+               }
+       }
+}
+
+static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev,
+                                    const char *cmd_buf)
+{
+#define HCLGE_MAX_QSET_NUM 1024
+
+       u16 qsid;
+       int ret;
+
+       ret = kstrtou16(cmd_buf, 0, &qsid);
+       if (ret) {
+               hclge_dbg_dump_qs_shaper_all(hdev);
+               return;
+       }
+
+       if (qsid >= HCLGE_MAX_QSET_NUM) {
+               dev_err(&hdev->pdev->dev, "qsid(%u) out of range[0-1023]\n",
+                       qsid);
+               return;
+       }
+
+       hclge_dbg_dump_qs_shaper_single(hdev, qsid);
+}
+
 int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
 #define DUMP_REG       "dump reg"
@@ -1145,6 +1234,9 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
                                          &cmd_buf[sizeof("dump ncl_config")]);
        } else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
                hclge_dbg_dump_mac_tnl_status(hdev);
+       } else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
+               hclge_dbg_dump_qs_shaper(hdev,
+                                        &cmd_buf[sizeof("dump qs shaper")]);
        } else {
                dev_info(&hdev->pdev->dev, "unknown command\n");
                return -EINVAL;
index 87dece0..dc66b4e 100644 (file)
@@ -1747,7 +1747,7 @@ static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
 
        if (vf_id) {
                if (vf_id >= hdev->num_alloc_vport) {
-                       dev_err(dev, "invalid vf id(%d)\n", vf_id);
+                       dev_err(dev, "invalid vf id(%u)\n", vf_id);
                        return;
                }
 
index e02e01b..4f8f068 100644 (file)
@@ -55,6 +55,8 @@
 
 #define HCLGE_LINK_STATUS_MS   10
 
+#define HCLGE_VF_VPORT_START_NUM       1
+
 static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
 static int hclge_init_vlan_config(struct hclge_dev *hdev);
 static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
@@ -323,8 +325,7 @@ static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = {
        {
                .flags = HCLGE_MAC_MGR_MASK_VLAN_B,
                .ethter_type = cpu_to_le16(ETH_P_LLDP),
-               .mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)),
-               .mac_addr_lo16 = cpu_to_le16(htons(0x000E)),
+               .mac_addr = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e},
                .i_port_bitmap = 0x1,
        },
 };
@@ -1194,6 +1195,35 @@ static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
                hclge_parse_backplane_link_mode(hdev, speed_ability);
 }
 
+static u32 hclge_get_max_speed(u8 speed_ability)
+{
+       if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+               return HCLGE_MAC_SPEED_100G;
+
+       if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+               return HCLGE_MAC_SPEED_50G;
+
+       if (speed_ability & HCLGE_SUPPORT_40G_BIT)
+               return HCLGE_MAC_SPEED_40G;
+
+       if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+               return HCLGE_MAC_SPEED_25G;
+
+       if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+               return HCLGE_MAC_SPEED_10G;
+
+       if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+               return HCLGE_MAC_SPEED_1G;
+
+       if (speed_ability & HCLGE_SUPPORT_100M_BIT)
+               return HCLGE_MAC_SPEED_100M;
+
+       if (speed_ability & HCLGE_SUPPORT_10M_BIT)
+               return HCLGE_MAC_SPEED_10M;
+
+       return HCLGE_MAC_SPEED_1G;
+}
+
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
        struct hclge_cfg_param_cmd *req;
@@ -1364,9 +1394,11 @@ static int hclge_configure(struct hclge_dev *hdev)
 
        hclge_parse_link_mode(hdev, cfg.speed_ability);
 
+       hdev->hw.mac.max_speed = hclge_get_max_speed(cfg.speed_ability);
+
        if ((hdev->tc_max > HNAE3_MAX_TC) ||
            (hdev->tc_max < 1)) {
-               dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
+               dev_warn(&hdev->pdev->dev, "TC num = %u.\n",
                         hdev->tc_max);
                hdev->tc_max = 1;
        }
@@ -1626,7 +1658,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
        num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
 
        if (hdev->num_tqps < num_vport) {
-               dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)",
+               dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)",
                        hdev->num_tqps, num_vport);
                return -EINVAL;
        }
@@ -1649,6 +1681,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
        for (i = 0; i < num_vport; i++) {
                vport->back = hdev;
                vport->vport_id = i;
+               vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO;
                vport->mps = HCLGE_MAC_DEFAULT_FRAME;
                vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
                vport->rxvlan_cfg.rx_vlan_offload_en = true;
@@ -2312,7 +2345,7 @@ static int hclge_init_msi(struct hclge_dev *hdev)
        }
        if (vectors < hdev->num_msi)
                dev_warn(&hdev->pdev->dev,
-                        "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n",
+                        "requested %u MSI/MSI-X, but allocated %d MSI/MSI-X\n",
                         hdev->num_msi, vectors);
 
        hdev->num_msi = vectors;
@@ -2744,7 +2777,7 @@ static void hclge_update_port_capability(struct hclge_mac *mac)
        else if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
                mac->module_type = HNAE3_MODULE_TYPE_TP;
 
-       if (mac->support_autoneg == true) {
+       if (mac->support_autoneg) {
                linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mac->supported);
                linkmode_copy(mac->advertising, mac->supported);
        } else {
@@ -2871,6 +2904,62 @@ static int hclge_get_status(struct hnae3_handle *handle)
        return hdev->hw.mac.link;
 }
 
+static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
+{
+       if (pci_num_vf(hdev->pdev) == 0) {
+               dev_err(&hdev->pdev->dev,
+                       "SRIOV is disabled, can not get vport(%d) info.\n", vf);
+               return NULL;
+       }
+
+       if (vf < 0 || vf >= pci_num_vf(hdev->pdev)) {
+               dev_err(&hdev->pdev->dev,
+                       "vf id(%d) is out of range(0 <= vfid < %d)\n",
+                       vf, pci_num_vf(hdev->pdev));
+               return NULL;
+       }
+
+       /* VF start from 1 in vport */
+       vf += HCLGE_VF_VPORT_START_NUM;
+       return &hdev->vport[vf];
+}
+
+static int hclge_get_vf_config(struct hnae3_handle *handle, int vf,
+                              struct ifla_vf_info *ivf)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+
+       vport = hclge_get_vf_vport(hdev, vf);
+       if (!vport)
+               return -EINVAL;
+
+       ivf->vf = vf;
+       ivf->linkstate = vport->vf_info.link_state;
+       ivf->spoofchk = vport->vf_info.spoofchk;
+       ivf->trusted = vport->vf_info.trusted;
+       ivf->min_tx_rate = 0;
+       ivf->max_tx_rate = vport->vf_info.max_tx_rate;
+       ether_addr_copy(ivf->mac, vport->vf_info.mac);
+
+       return 0;
+}
+
+static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
+                                  int link_state)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+
+       vport = hclge_get_vf_vport(hdev, vf);
+       if (!vport)
+               return -EINVAL;
+
+       vport->vf_info.link_state = link_state;
+
+       return 0;
+}
+
 static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 {
        u32 rst_src_reg, cmdq_src_reg, msix_src_reg;
@@ -3191,7 +3280,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 
                if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
                        dev_err(&hdev->pdev->dev,
-                               "flr wait timeout: %d\n", cnt);
+                               "flr wait timeout: %u\n", cnt);
                        return -EBUSY;
                }
 
@@ -3241,7 +3330,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
                ret = hclge_set_vf_rst(hdev, vport->vport_id, reset);
                if (ret) {
                        dev_err(&hdev->pdev->dev,
-                               "set vf(%d) rst failed %d!\n",
+                               "set vf(%u) rst failed %d!\n",
                                vport->vport_id, ret);
                        return ret;
                }
@@ -3256,7 +3345,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
                ret = hclge_inform_reset_assert_to_vf(vport);
                if (ret)
                        dev_warn(&hdev->pdev->dev,
-                                "inform reset to vf(%d) failed %d!\n",
+                                "inform reset to vf(%u) failed %d!\n",
                                 vport->vport_id, ret);
        }
 
@@ -3569,7 +3658,7 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
                hdev->rst_stats.reset_fail_cnt++;
                set_bit(hdev->reset_type, &hdev->reset_pending);
                dev_info(&hdev->pdev->dev,
-                        "re-schedule reset task(%d)\n",
+                        "re-schedule reset task(%u)\n",
                         hdev->rst_stats.reset_fail_cnt);
                return true;
        }
@@ -3580,6 +3669,9 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
        hclge_reset_handshake(hdev, true);
 
        dev_err(&hdev->pdev->dev, "Reset fail!\n");
+
+       hclge_dbg_dump_rst_info(hdev);
+
        return false;
 }
 
@@ -3763,12 +3855,13 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
                                  HCLGE_RESET_INTERVAL))) {
                mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
                return;
-       } else if (hdev->default_reset_request)
+       } else if (hdev->default_reset_request) {
                hdev->reset_level =
                        hclge_get_reset_level(ae_dev,
                                              &hdev->default_reset_request);
-       else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ)))
+       } else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ))) {
                hdev->reset_level = HNAE3_FUNC_RESET;
+       }
 
        dev_info(&hdev->pdev->dev, "received reset event, reset type is %d\n",
                 hdev->reset_level);
@@ -3893,6 +3986,7 @@ static void hclge_service_task(struct work_struct *work)
        hclge_update_link_status(hdev);
        hclge_update_vport_alive(hdev);
        hclge_sync_vlan_filter(hdev);
+
        if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
                hclge_rfs_filter_expire(hdev);
                hdev->fd_arfs_expire_timer = 0;
@@ -4399,7 +4493,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
         */
        if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) {
                dev_err(&hdev->pdev->dev,
-                       "Configure rss tc size failed, invalid TC_SIZE = %d\n",
+                       "Configure rss tc size failed, invalid TC_SIZE = %u\n",
                        rss_size);
                return -EINVAL;
        }
@@ -4577,8 +4671,8 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, int vector,
        return ret;
 }
 
-int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
-                              struct hclge_promisc_param *param)
+static int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
+                                     struct hclge_promisc_param *param)
 {
        struct hclge_promisc_cfg_cmd *req;
        struct hclge_desc desc;
@@ -4605,8 +4699,9 @@ int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
        return ret;
 }
 
-void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
-                             bool en_mc, bool en_bc, int vport_id)
+static void hclge_promisc_param_init(struct hclge_promisc_param *param,
+                                    bool en_uc, bool en_mc, bool en_bc,
+                                    int vport_id)
 {
        if (!param)
                return;
@@ -4621,12 +4716,21 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
        param->vf_id = vport_id;
 }
 
+int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc,
+                                bool en_mc_pmc, bool en_bc_pmc)
+{
+       struct hclge_dev *hdev = vport->back;
+       struct hclge_promisc_param param;
+
+       hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, en_bc_pmc,
+                                vport->vport_id);
+       return hclge_cmd_set_promisc_mode(hdev, &param);
+}
+
 static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
                                  bool en_mc_pmc)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
-       struct hclge_dev *hdev = vport->back;
-       struct hclge_promisc_param param;
        bool en_bc_pmc = true;
 
        /* For revision 0x20, if broadcast promisc enabled, vlan filter is
@@ -4636,9 +4740,8 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
        if (handle->pdev->revision == 0x20)
                en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false;
 
-       hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, en_bc_pmc,
-                                vport->vport_id);
-       return hclge_cmd_set_promisc_mode(hdev, &param);
+       return hclge_set_vport_promisc_mode(vport, en_uc_pmc, en_mc_pmc,
+                                           en_bc_pmc);
 }
 
 static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
@@ -4740,7 +4843,7 @@ static int hclge_init_fd_config(struct hclge_dev *hdev)
                break;
        default:
                dev_err(&hdev->pdev->dev,
-                       "Unsupported flow director mode %d\n",
+                       "Unsupported flow director mode %u\n",
                        hdev->fd_cfg.fd_mode);
                return -EOPNOTSUPP;
        }
@@ -5070,7 +5173,7 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
                                   true);
        if (ret) {
                dev_err(&hdev->pdev->dev,
-                       "fd key_y config fail, loc=%d, ret=%d\n",
+                       "fd key_y config fail, loc=%u, ret=%d\n",
                        rule->queue_id, ret);
                return ret;
        }
@@ -5079,7 +5182,7 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
                                   true);
        if (ret)
                dev_err(&hdev->pdev->dev,
-                       "fd key_x config fail, loc=%d, ret=%d\n",
+                       "fd key_x config fail, loc=%u, ret=%d\n",
                        rule->queue_id, ret);
        return ret;
 }
@@ -5328,7 +5431,7 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
                }
        } else if (!is_add) {
                dev_err(&hdev->pdev->dev,
-                       "delete fail, rule %d is inexistent\n",
+                       "delete fail, rule %u is inexistent\n",
                        location);
                return -EINVAL;
        }
@@ -5568,7 +5671,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 
                if (vf > hdev->num_req_vfs) {
                        dev_err(&hdev->pdev->dev,
-                               "Error: vf id (%d) > max vf num (%d)\n",
+                               "Error: vf id (%u) > max vf num (%u)\n",
                                vf, hdev->num_req_vfs);
                        return -EINVAL;
                }
@@ -5578,7 +5681,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 
                if (ring >= tqps) {
                        dev_err(&hdev->pdev->dev,
-                               "Error: queue id (%d) > max tqp num (%d)\n",
+                               "Error: queue id (%u) > max tqp num (%u)\n",
                                ring, tqps - 1);
                        return -EINVAL;
                }
@@ -5637,7 +5740,7 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
 
        if (!hclge_fd_rule_exist(hdev, fs->location)) {
                dev_err(&hdev->pdev->dev,
-                       "Delete fail, rule %d is inexistent\n", fs->location);
+                       "Delete fail, rule %u is inexistent\n", fs->location);
                return -ENOENT;
        }
 
@@ -5714,7 +5817,7 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 
                if (ret) {
                        dev_warn(&hdev->pdev->dev,
-                                "Restore rule %d failed, remove it\n",
+                                "Restore rule %u failed, remove it\n",
                                 rule->location);
                        clear_bit(rule->location, hdev->fd_bmap);
                        hlist_del(&rule->rule_node);
@@ -6707,7 +6810,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 
        if (cmdq_resp) {
                dev_err(&hdev->pdev->dev,
-                       "cmdq execute failed for get_mac_vlan_cmd_status,status=%d.\n",
+                       "cmdq execute failed for get_mac_vlan_cmd_status,status=%u.\n",
                        cmdq_resp);
                return -EIO;
        }
@@ -6959,7 +7062,7 @@ static int hclge_init_umv_space(struct hclge_dev *hdev)
 
        if (allocated_size < hdev->wanted_umv_size)
                dev_warn(&hdev->pdev->dev,
-                        "Alloc umv space failed, want %d, get %d\n",
+                        "Alloc umv space failed, want %u, get %u\n",
                         hdev->wanted_umv_size, allocated_size);
 
        mutex_init(&hdev->umv_mutex);
@@ -7127,7 +7230,7 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 
        /* check if we just hit the duplicate */
        if (!ret) {
-               dev_warn(&hdev->pdev->dev, "VF %d mac(%pM) exists\n",
+               dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n",
                         vport->vport_id, addr);
                return 0;
        }
@@ -7308,7 +7411,7 @@ void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
        mc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_MC;
 
        list_for_each_entry_safe(mac_cfg, tmp, list, node) {
-               if (strncmp(mac_cfg->mac_addr, mac_addr, ETH_ALEN) == 0) {
+               if (ether_addr_equal(mac_cfg->mac_addr, mac_addr)) {
                        if (uc_flag && mac_cfg->hd_tbl_status)
                                hclge_rm_uc_addr_common(vport, mac_addr);
 
@@ -7380,7 +7483,7 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
 
        if (cmdq_resp) {
                dev_err(&hdev->pdev->dev,
-                       "cmdq execute failed for get_mac_ethertype_cmd_status, status=%d.\n",
+                       "cmdq execute failed for get_mac_ethertype_cmd_status, status=%u.\n",
                        cmdq_resp);
                return -EIO;
        }
@@ -7402,7 +7505,7 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
                break;
        default:
                dev_err(&hdev->pdev->dev,
-                       "add mac ethertype failed for undefined, code=%d.\n",
+                       "add mac ethertype failed for undefined, code=%u.\n",
                        resp_code);
                return_status = -EIO;
        }
@@ -7410,6 +7513,67 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
        return return_status;
 }
 
+static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx,
+                                    u8 *mac_addr)
+{
+       struct hclge_mac_vlan_tbl_entry_cmd req;
+       struct hclge_dev *hdev = vport->back;
+       struct hclge_desc desc;
+       u16 egress_port = 0;
+       int i;
+
+       if (is_zero_ether_addr(mac_addr))
+               return false;
+
+       memset(&req, 0, sizeof(req));
+       hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
+                       HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
+       req.egress_port = cpu_to_le16(egress_port);
+       hclge_prepare_mac_addr(&req, mac_addr, false);
+
+       if (hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false) != -ENOENT)
+               return true;
+
+       vf_idx += HCLGE_VF_VPORT_START_NUM;
+       for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++)
+               if (i != vf_idx &&
+                   ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac))
+                       return true;
+
+       return false;
+}
+
+static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf,
+                           u8 *mac_addr)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+
+       vport = hclge_get_vf_vport(hdev, vf);
+       if (!vport)
+               return -EINVAL;
+
+       if (ether_addr_equal(mac_addr, vport->vf_info.mac)) {
+               dev_info(&hdev->pdev->dev,
+                        "Specified MAC(=%pM) is same as before, no change committed!\n",
+                        mac_addr);
+               return 0;
+       }
+
+       if (hclge_check_vf_mac_exist(vport, vf, mac_addr)) {
+               dev_err(&hdev->pdev->dev, "Specified MAC(=%pM) exists!\n",
+                       mac_addr);
+               return -EEXIST;
+       }
+
+       ether_addr_copy(vport->vf_info.mac, mac_addr);
+       dev_info(&hdev->pdev->dev,
+                "MAC of VF %d has been set to %pM, and it will be reinitialized!\n",
+                vf, mac_addr);
+
+       return hclge_inform_reset_assert_to_vf(vport);
+}
+
 static int hclge_add_mgr_tbl(struct hclge_dev *hdev,
                             const struct hclge_mac_mgr_tbl_entry_cmd *req)
 {
@@ -7582,7 +7746,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
                                    bool is_kill, u16 vlan,
                                    __be16 proto)
 {
-#define HCLGE_MAX_VF_BYTES  16
+       struct hclge_vport *vport = &hdev->vport[vfid];
        struct hclge_vlan_filter_vf_cfg_cmd *req0;
        struct hclge_vlan_filter_vf_cfg_cmd *req1;
        struct hclge_desc desc[2];
@@ -7591,10 +7755,18 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
        int ret;
 
        /* if vf vlan table is full, firmware will close vf vlan filter, it
-        * is unable and unnecessary to add new vlan id to vf vlan filter
+        * is unable and unnecessary to add new vlan id to vf vlan filter.
+        * If spoof check is enable, and vf vlan is full, it shouldn't add
+        * new vlan, because tx packets with these vlan id will be dropped.
         */
-       if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill)
+       if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) {
+               if (vport->vf_info.spoofchk && vlan) {
+                       dev_err(&hdev->pdev->dev,
+                               "Can't add vlan due to spoof check is on and vf vlan table is full\n");
+                       return -EPERM;
+               }
                return 0;
+       }
 
        hclge_cmd_setup_basic_desc(&desc[0],
                                   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
@@ -7638,7 +7810,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
                }
 
                dev_err(&hdev->pdev->dev,
-                       "Add vf vlan filter fail, ret =%d.\n",
+                       "Add vf vlan filter fail, ret =%u.\n",
                        req0->resp_code);
        } else {
 #define HCLGE_VF_VLAN_DEL_NO_FOUND     1
@@ -7654,7 +7826,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
                        return 0;
 
                dev_err(&hdev->pdev->dev,
-                       "Kill vf vlan filter fail, ret =%d.\n",
+                       "Kill vf vlan filter fail, ret =%u.\n",
                        req0->resp_code);
        }
 
@@ -7673,9 +7845,10 @@ static int hclge_set_port_vlan_filter(struct hclge_dev *hdev, __be16 proto,
 
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_PF_CFG, false);
 
-       vlan_offset_160 = vlan_id / 160;
-       vlan_offset_byte = (vlan_id % 160) / 8;
-       vlan_offset_byte_val = 1 << (vlan_id % 8);
+       vlan_offset_160 = vlan_id / HCLGE_VLAN_ID_OFFSET_STEP;
+       vlan_offset_byte = (vlan_id % HCLGE_VLAN_ID_OFFSET_STEP) /
+                          HCLGE_VLAN_BYTE_SIZE;
+       vlan_offset_byte_val = 1 << (vlan_id % HCLGE_VLAN_BYTE_SIZE);
 
        req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data;
        req->vlan_offset = vlan_offset_160;
@@ -7703,7 +7876,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
                                       proto);
        if (ret) {
                dev_err(&hdev->pdev->dev,
-                       "Set %d vport vlan filter config fail, ret =%d.\n",
+                       "Set %u vport vlan filter config fail, ret =%d.\n",
                        vport_id, ret);
                return ret;
        }
@@ -7715,7 +7888,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
 
        if (!is_kill && test_and_set_bit(vport_id, hdev->vlan_table[vlan_id])) {
                dev_err(&hdev->pdev->dev,
-                       "Add port vlan failed, vport %d is already in vlan %d\n",
+                       "Add port vlan failed, vport %u is already in vlan %u\n",
                        vport_id, vlan_id);
                return -EINVAL;
        }
@@ -7723,7 +7896,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
        if (is_kill &&
            !test_and_clear_bit(vport_id, hdev->vlan_table[vlan_id])) {
                dev_err(&hdev->pdev->dev,
-                       "Delete port vlan failed, vport %d is not in vlan %d\n",
+                       "Delete port vlan failed, vport %u is not in vlan %u\n",
                        vport_id, vlan_id);
                return -EINVAL;
        }
@@ -8091,12 +8264,15 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle)
                }
 
                list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
-                       if (vlan->hd_tbl_status)
-                               hclge_set_vlan_filter_hw(hdev,
-                                                        htons(ETH_P_8021Q),
-                                                        vport->vport_id,
-                                                        vlan->vlan_id,
-                                                        false);
+                       int ret;
+
+                       if (!vlan->hd_tbl_status)
+                               continue;
+                       ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+                                                      vport->vport_id,
+                                                      vlan->vlan_id, false);
+                       if (ret)
+                               break;
                }
        }
 
@@ -8376,6 +8552,7 @@ int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu)
        struct hclge_dev *hdev = vport->back;
        int i, max_frm_size, ret;
 
+       /* HW supprt 2 layer vlan */
        max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
        if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
            max_frm_size > HCLGE_MAC_MAX_FRAME)
@@ -8791,16 +8968,16 @@ static void hclge_info_show(struct hclge_dev *hdev)
 
        dev_info(dev, "PF info begin:\n");
 
-       dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
-       dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
-       dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
-       dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
-       dev_info(dev, "Numbers of vmdp vports: %d\n", hdev->num_vmdq_vport);
-       dev_info(dev, "Numbers of VF for this PF: %d\n", hdev->num_req_vfs);
-       dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
-       dev_info(dev, "Total buffer size for TX/RX: %d\n", hdev->pkt_buf_size);
-       dev_info(dev, "TX buffer size for each TC: %d\n", hdev->tx_buf_size);
-       dev_info(dev, "DV buffer size for each TC: %d\n", hdev->dv_buf_size);
+       dev_info(dev, "Task queue pairs numbers: %u\n", hdev->num_tqps);
+       dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc);
+       dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc);
+       dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport);
+       dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport);
+       dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs);
+       dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map);
+       dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size);
+       dev_info(dev, "TX buffer size for each TC: %u\n", hdev->tx_buf_size);
+       dev_info(dev, "DV buffer size for each TC: %u\n", hdev->dv_buf_size);
        dev_info(dev, "This is %s PF\n",
                 hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
        dev_info(dev, "DCB %s\n",
@@ -8816,10 +8993,9 @@ static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
 {
        struct hnae3_client *client = vport->nic.client;
        struct hclge_dev *hdev = ae_dev->priv;
-       int rst_cnt;
+       int rst_cnt = hdev->rst_stats.reset_cnt;
        int ret;
 
-       rst_cnt = hdev->rst_stats.reset_cnt;
        ret = client->ops->init_instance(&vport->nic);
        if (ret)
                return ret;
@@ -8919,7 +9095,6 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
                switch (client->type) {
                case HNAE3_CLIENT_KNIC:
-
                        hdev->nic_client = client;
                        vport->nic.client = client;
                        ret = hclge_init_nic_client_instance(ae_dev, vport);
@@ -9118,7 +9293,7 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev)
                ret = hclge_set_vf_rst(hdev, vport->vport_id, false);
                if (ret)
                        dev_warn(&hdev->pdev->dev,
-                                "clear vf(%d) rst failed %d!\n",
+                                "clear vf(%u) rst failed %d!\n",
                                 vport->vport_id, ret);
        }
 }
@@ -9140,6 +9315,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        hdev->reset_type = HNAE3_NONE_RESET;
        hdev->reset_level = HNAE3_FUNC_RESET;
        ae_dev->priv = hdev;
+
+       /* HW supprt 2 layer vlan */
        hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
 
        mutex_init(&hdev->vport_lock);
@@ -9338,6 +9515,219 @@ static void hclge_stats_clear(struct hclge_dev *hdev)
        memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats));
 }
 
+static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
+{
+       return hclge_config_switch_param(hdev, vf, enable,
+                                        HCLGE_SWITCH_ANTI_SPOOF_MASK);
+}
+
+static int hclge_set_vlan_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
+{
+       return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+                                         HCLGE_FILTER_FE_NIC_INGRESS_B,
+                                         enable, vf);
+}
+
+static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable)
+{
+       int ret;
+
+       ret = hclge_set_mac_spoofchk(hdev, vf, enable);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "Set vf %d mac spoof check %s failed, ret=%d\n",
+                       vf, enable ? "on" : "off", ret);
+               return ret;
+       }
+
+       ret = hclge_set_vlan_spoofchk(hdev, vf, enable);
+       if (ret)
+               dev_err(&hdev->pdev->dev,
+                       "Set vf %d vlan spoof check %s failed, ret=%d\n",
+                       vf, enable ? "on" : "off", ret);
+
+       return ret;
+}
+
+static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf,
+                                bool enable)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       u32 new_spoofchk = enable ? 1 : 0;
+       int ret;
+
+       if (hdev->pdev->revision == 0x20)
+               return -EOPNOTSUPP;
+
+       vport = hclge_get_vf_vport(hdev, vf);
+       if (!vport)
+               return -EINVAL;
+
+       if (vport->vf_info.spoofchk == new_spoofchk)
+               return 0;
+
+       if (enable && test_bit(vport->vport_id, hdev->vf_vlan_full))
+               dev_warn(&hdev->pdev->dev,
+                        "vf %d vlan table is full, enable spoof check may cause its packet send fail\n",
+                        vf);
+       else if (enable && hclge_is_umv_space_full(vport))
+               dev_warn(&hdev->pdev->dev,
+                        "vf %d mac table is full, enable spoof check may cause its packet send fail\n",
+                        vf);
+
+       ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, enable);
+       if (ret)
+               return ret;
+
+       vport->vf_info.spoofchk = new_spoofchk;
+       return 0;
+}
+
+static int hclge_reset_vport_spoofchk(struct hclge_dev *hdev)
+{
+       struct hclge_vport *vport = hdev->vport;
+       int ret;
+       int i;
+
+       if (hdev->pdev->revision == 0x20)
+               return 0;
+
+       /* resume the vf spoof check state after reset */
+       for (i = 0; i < hdev->num_alloc_vport; i++) {
+               ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id,
+                                              vport->vf_info.spoofchk);
+               if (ret)
+                       return ret;
+
+               vport++;
+       }
+
+       return 0;
+}
+
+static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       u32 new_trusted = enable ? 1 : 0;
+       bool en_bc_pmc;
+       int ret;
+
+       vport = hclge_get_vf_vport(hdev, vf);
+       if (!vport)
+               return -EINVAL;
+
+       if (vport->vf_info.trusted == new_trusted)
+               return 0;
+
+       /* Disable promisc mode for VF if it is not trusted any more. */
+       if (!enable && vport->vf_info.promisc_enable) {
+               en_bc_pmc = hdev->pdev->revision != 0x20;
+               ret = hclge_set_vport_promisc_mode(vport, false, false,
+                                                  en_bc_pmc);
+               if (ret)
+                       return ret;
+               vport->vf_info.promisc_enable = 0;
+               hclge_inform_vf_promisc_info(vport);
+       }
+
+       vport->vf_info.trusted = new_trusted;
+
+       return 0;
+}
+
+static void hclge_reset_vf_rate(struct hclge_dev *hdev)
+{
+       int ret;
+       int vf;
+
+       /* reset vf rate to default value */
+       for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) {
+               struct hclge_vport *vport = &hdev->vport[vf];
+
+               vport->vf_info.max_tx_rate = 0;
+               ret = hclge_tm_qs_shaper_cfg(vport, vport->vf_info.max_tx_rate);
+               if (ret)
+                       dev_err(&hdev->pdev->dev,
+                               "vf%d failed to reset to default, ret=%d\n",
+                               vf - HCLGE_VF_VPORT_START_NUM, ret);
+       }
+}
+
+static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf,
+                                    int min_tx_rate, int max_tx_rate)
+{
+       if (min_tx_rate != 0 ||
+           max_tx_rate < 0 || max_tx_rate > hdev->hw.mac.max_speed) {
+               dev_err(&hdev->pdev->dev,
+                       "min_tx_rate:%d [0], max_tx_rate:%d [0, %u]\n",
+                       min_tx_rate, max_tx_rate, hdev->hw.mac.max_speed);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf,
+                            int min_tx_rate, int max_tx_rate, bool force)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       int ret;
+
+       ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate);
+       if (ret)
+               return ret;
+
+       vport = hclge_get_vf_vport(hdev, vf);
+       if (!vport)
+               return -EINVAL;
+
+       if (!force && max_tx_rate == vport->vf_info.max_tx_rate)
+               return 0;
+
+       ret = hclge_tm_qs_shaper_cfg(vport, max_tx_rate);
+       if (ret)
+               return ret;
+
+       vport->vf_info.max_tx_rate = max_tx_rate;
+
+       return 0;
+}
+
+static int hclge_resume_vf_rate(struct hclge_dev *hdev)
+{
+       struct hnae3_handle *handle = &hdev->vport->nic;
+       struct hclge_vport *vport;
+       int ret;
+       int vf;
+
+       /* resume the vf max_tx_rate after reset */
+       for (vf = 0; vf < pci_num_vf(hdev->pdev); vf++) {
+               vport = hclge_get_vf_vport(hdev, vf);
+               if (!vport)
+                       return -EINVAL;
+
+               /* zero means max rate, after reset, firmware already set it to
+                * max rate, so just continue.
+                */
+               if (!vport->vf_info.max_tx_rate)
+                       continue;
+
+               ret = hclge_set_vf_rate(handle, vf, 0,
+                                       vport->vf_info.max_tx_rate, true);
+               if (ret) {
+                       dev_err(&hdev->pdev->dev,
+                               "vf%d failed to resume tx_rate:%u, ret=%d\n",
+                               vf, vport->vf_info.max_tx_rate, ret);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 static void hclge_reset_vport_state(struct hclge_dev *hdev)
 {
        struct hclge_vport *vport = hdev->vport;
@@ -9415,6 +9805,9 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
                return ret;
        }
 
+       /* Log and clear the hw errors those already occurred */
+       hclge_handle_all_hns_hw_errors(ae_dev);
+
        /* Re-enable the hw error interrupts because
         * the interrupts get disabled on global reset.
         */
@@ -9437,6 +9830,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
        }
 
        hclge_reset_vport_state(hdev);
+       ret = hclge_reset_vport_spoofchk(hdev);
+       if (ret)
+               return ret;
+
+       ret = hclge_resume_vf_rate(hdev);
+       if (ret)
+               return ret;
 
        dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n",
                 HCLGE_DRIVER_NAME);
@@ -9449,6 +9849,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
        struct hclge_dev *hdev = ae_dev->priv;
        struct hclge_mac *mac = &hdev->hw.mac;
 
+       hclge_reset_vf_rate(hdev);
        hclge_misc_affinity_teardown(hdev);
        hclge_state_uninit(hdev);
 
@@ -9513,8 +9914,8 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
        u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
        struct hclge_dev *hdev = vport->back;
        u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
-       int cur_rss_size = kinfo->rss_size;
-       int cur_tqps = kinfo->num_tqps;
+       u16 cur_rss_size = kinfo->rss_size;
+       u16 cur_tqps = kinfo->num_tqps;
        u16 tc_valid[HCLGE_MAX_TC_NUM];
        u16 roundup_size;
        u32 *rss_indir;
@@ -9568,7 +9969,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
 out:
        if (!ret)
                dev_info(&hdev->pdev->dev,
-                        "Channels changed, rss_size from %d to %d, tqps from %d to %d",
+                        "Channels changed, rss_size from %u to %u, tqps from %u to %u",
                         cur_rss_size, kinfo->rss_size,
                         cur_tqps, kinfo->rss_size * kinfo->num_tc);
 
@@ -10171,6 +10572,12 @@ static const struct hnae3_ae_ops hclge_ops = {
        .mac_connect_phy = hclge_mac_connect_phy,
        .mac_disconnect_phy = hclge_mac_disconnect_phy,
        .restore_vlan_table = hclge_restore_vlan_table,
+       .get_vf_config = hclge_get_vf_config,
+       .set_vf_link_state = hclge_set_vf_link_state,
+       .set_vf_spoofchk = hclge_set_vf_spoofchk,
+       .set_vf_trust = hclge_set_vf_trust,
+       .set_vf_rate = hclge_set_vf_rate,
+       .set_vf_mac = hclge_set_vf_mac,
 };
 
 static struct hnae3_ae_algo ae_algo = {
index c3d56b8..1c0f6df 100644 (file)
 
 /* Factor used to calculate offset and bitmap of VF num */
 #define HCLGE_VF_NUM_PER_CMD           64
-#define HCLGE_VF_NUM_PER_BYTE          8
 
 enum HLCGE_PORT_TYPE {
        HOST_PORT,
@@ -226,8 +225,6 @@ enum hclge_evt_cause {
        HCLGE_VECTOR0_EVENT_OTHER,
 };
 
-#define HCLGE_MPF_ENBALE 1
-
 enum HCLGE_MAC_SPEED {
        HCLGE_MAC_SPEED_UNKNOWN = 0,            /* unknown */
        HCLGE_MAC_SPEED_10M     = 10,           /* 10 Mbps */
@@ -258,6 +255,7 @@ struct hclge_mac {
        u8 support_autoneg;
        u8 speed_type;  /* 0: sfp speed, 1: active speed */
        u32 speed;
+       u32 max_speed;
        u32 speed_ability; /* speed ability supported by current media */
        u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */
        u32 fec_mode; /* active fec mode */
@@ -655,7 +653,6 @@ struct hclge_rst_stats {
        u32 hw_reset_done_cnt;  /* the number of HW reset has completed */
        u32 pf_rst_cnt;         /* the number of PF reset */
        u32 flr_rst_cnt;        /* the number of FLR */
-       u32 core_rst_cnt;       /* the number of CORE reset */
        u32 global_rst_cnt;     /* the number of GLOBAL */
        u32 imp_rst_cnt;        /* the number of IMP reset */
        u32 reset_cnt;          /* the number of reset */
@@ -886,6 +883,15 @@ struct hclge_port_base_vlan_config {
        struct hclge_vlan_info vlan_info;
 };
 
+struct hclge_vf_info {
+       int link_state;
+       u8 mac[ETH_ALEN];
+       u32 spoofchk;
+       u32 max_tx_rate;
+       u32 trusted;
+       u16 promisc_enable;
+};
+
 struct hclge_vport {
        u16 alloc_tqps; /* Allocated Tx/Rx queues */
 
@@ -917,15 +923,15 @@ struct hclge_vport {
        unsigned long state;
        unsigned long last_active_jiffies;
        u32 mps; /* Max packet size */
+       struct hclge_vf_info vf_info;
 
        struct list_head uc_mac_list;   /* Store VF unicast table */
        struct list_head mc_mac_list;   /* Store VF multicast table */
        struct list_head vlan_list;     /* Store VF vlan table */
 };
 
-void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
-                             bool en_mc, bool en_bc, int vport_id);
-
+int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc,
+                                bool en_mc_pmc, bool en_bc_pmc);
 int hclge_add_uc_addr_common(struct hclge_vport *vport,
                             const unsigned char *addr);
 int hclge_rm_uc_addr_common(struct hclge_vport *vport,
@@ -994,4 +1000,6 @@ int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev,
                                struct hclge_desc *desc);
 void hclge_report_hw_error(struct hclge_dev *hdev,
                           enum hnae3_hw_error_type type);
+void hclge_inform_vf_promisc_info(struct hclge_vport *vport);
+void hclge_dbg_dump_rst_info(struct hclge_dev *hdev);
 #endif
index f5da28a..0b433eb 100644 (file)
@@ -26,7 +26,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
 
        if (resp_data_len > HCLGE_MBX_MAX_RESP_DATA_SIZE) {
                dev_err(&hdev->pdev->dev,
-                       "PF fail to gen resp to VF len %d exceeds max len %d\n",
+                       "PF fail to gen resp to VF len %u exceeds max len %u\n",
                        resp_data_len,
                        HCLGE_MBX_MAX_RESP_DATA_SIZE);
                /* If resp_data_len is too long, set the value to max length
@@ -205,12 +205,38 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en,
 static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
                                     struct hclge_mbx_vf_to_pf_cmd *req)
 {
-       bool en_bc = req->msg[1] ? true : false;
-       struct hclge_promisc_param param;
+#define HCLGE_MBX_BC_INDEX     1
+#define HCLGE_MBX_UC_INDEX     2
+#define HCLGE_MBX_MC_INDEX     3
 
-       /* vf is not allowed to enable unicast/multicast broadcast */
-       hclge_promisc_param_init(&param, false, false, en_bc, vport->vport_id);
-       return hclge_cmd_set_promisc_mode(vport->back, &param);
+       bool en_bc = req->msg[HCLGE_MBX_BC_INDEX] ? true : false;
+       bool en_uc = req->msg[HCLGE_MBX_UC_INDEX] ? true : false;
+       bool en_mc = req->msg[HCLGE_MBX_MC_INDEX] ? true : false;
+       int ret;
+
+       if (!vport->vf_info.trusted) {
+               en_uc = false;
+               en_mc = false;
+       }
+
+       ret = hclge_set_vport_promisc_mode(vport, en_uc, en_mc, en_bc);
+       if (req->mbx_need_resp)
+               hclge_gen_resp_to_vf(vport, req, ret, NULL, 0);
+
+       vport->vf_info.promisc_enable = (en_uc || en_mc) ? 1 : 0;
+
+       return ret;
+}
+
+void hclge_inform_vf_promisc_info(struct hclge_vport *vport)
+{
+       u8 dest_vfid = (u8)vport->vport_id;
+       u8 msg_data[2];
+
+       memcpy(&msg_data[0], &vport->vf_info.promisc_enable, sizeof(u16));
+
+       hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
+                          HCLGE_MBX_PUSH_PROMISC_INFO, dest_vfid);
 }
 
 static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
@@ -223,6 +249,20 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
        if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_MODIFY) {
                const u8 *old_addr = (const u8 *)(&mbx_req->msg[8]);
 
+               /* If VF MAC has been configured by the host then it
+                * cannot be overridden by the MAC specified by the VM.
+                */
+               if (!is_zero_ether_addr(vport->vf_info.mac) &&
+                   !ether_addr_equal(mac_addr, vport->vf_info.mac)) {
+                       status = -EPERM;
+                       goto out;
+               }
+
+               if (!is_valid_ether_addr(mac_addr)) {
+                       status = -EINVAL;
+                       goto out;
+               }
+
                hclge_rm_uc_addr_common(vport, old_addr);
                status = hclge_add_uc_addr_common(vport, mac_addr);
                if (status) {
@@ -245,11 +285,12 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
                                                 false, HCLGE_MAC_ADDR_UC);
        } else {
                dev_err(&hdev->pdev->dev,
-                       "failed to set unicast mac addr, unknown subcode %d\n",
+                       "failed to set unicast mac addr, unknown subcode %u\n",
                        mbx_req->msg[1]);
                return -EIO;
        }
 
+out:
        if (mbx_req->mbx_need_resp & HCLGE_MBX_NEED_RESP_BIT)
                hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
 
@@ -278,7 +319,7 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
                                                 false, HCLGE_MAC_ADDR_MC);
        } else {
                dev_err(&hdev->pdev->dev,
-                       "failed to set mcast mac addr, unknown subcode %d\n",
+                       "failed to set mcast mac addr, unknown subcode %u\n",
                        mbx_req->msg[1]);
                return -EIO;
        }
@@ -324,6 +365,9 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
                proto =  msg_cmd->proto;
                status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
                                               vlan, is_kill);
+               if (mbx_req->mbx_need_resp)
+                       return hclge_gen_resp_to_vf(vport, mbx_req, status,
+                                                   NULL, 0);
        } else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) {
                struct hnae3_handle *handle = &vport->nic;
                bool en = msg_cmd->is_kill ? true : false;
@@ -398,6 +442,13 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport,
                                    HCLGE_TQPS_RSS_INFO_LEN);
 }
 
+static int hclge_get_vf_mac_addr(struct hclge_vport *vport,
+                                struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+       return hclge_gen_resp_to_vf(vport, mbx_req, 0, vport->vf_info.mac,
+                                   ETH_ALEN);
+}
+
 static int hclge_get_vf_queue_depth(struct hclge_vport *vport,
                                    struct hclge_mbx_vf_to_pf_cmd *mbx_req,
                                    bool gen_resp)
@@ -428,6 +479,9 @@ static int hclge_get_vf_media_type(struct hclge_vport *vport,
 static int hclge_get_link_info(struct hclge_vport *vport,
                               struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
+#define HCLGE_VF_LINK_STATE_UP         1U
+#define HCLGE_VF_LINK_STATE_DOWN       0U
+
        struct hclge_dev *hdev = vport->back;
        u16 link_status;
        u8 msg_data[8];
@@ -435,7 +489,19 @@ static int hclge_get_link_info(struct hclge_vport *vport,
        u16 duplex;
 
        /* mac.link can only be 0 or 1 */
-       link_status = (u16)hdev->hw.mac.link;
+       switch (vport->vf_info.link_state) {
+       case IFLA_VF_LINK_STATE_ENABLE:
+               link_status = HCLGE_VF_LINK_STATE_UP;
+               break;
+       case IFLA_VF_LINK_STATE_DISABLE:
+               link_status = HCLGE_VF_LINK_STATE_DOWN;
+               break;
+       case IFLA_VF_LINK_STATE_AUTO:
+       default:
+               link_status = (u16)hdev->hw.mac.link;
+               break;
+       }
+
        duplex = hdev->hw.mac.duplex;
        memcpy(&msg_data[0], &link_status, sizeof(u16));
        memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32));
@@ -489,7 +555,7 @@ static void hclge_reset_vf(struct hclge_vport *vport,
        struct hclge_dev *hdev = vport->back;
        int ret;
 
-       dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
+       dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %u!",
                 vport->vport_id);
 
        ret = hclge_func_reset_cmd(hdev, vport->vport_id);
@@ -524,7 +590,8 @@ static int hclge_get_queue_id_in_pf(struct hclge_vport *vport,
        qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
        memcpy(resp_data, &qid_in_pf, sizeof(qid_in_pf));
 
-       return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, 2);
+       return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data,
+                                   sizeof(resp_data));
 }
 
 static int hclge_get_rss_key(struct hclge_vport *vport,
@@ -614,7 +681,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
                flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
                if (unlikely(!hnae3_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B))) {
                        dev_warn(&hdev->pdev->dev,
-                                "dropped invalid mailbox message, code = %d\n",
+                                "dropped invalid mailbox message, code = %u\n",
                                 req->msg[0]);
 
                        /* dropping/not processing this invalid message */
@@ -749,12 +816,19 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
                case HCLGE_MBX_PUSH_LINK_STATUS:
                        hclge_handle_link_change_event(hdev, req);
                        break;
+               case HCLGE_MBX_GET_MAC_ADDR:
+                       ret = hclge_get_vf_mac_addr(vport, req);
+                       if (ret)
+                               dev_err(&hdev->pdev->dev,
+                                       "PF failed(%d) to get MAC for VF\n",
+                                       ret);
+                       break;
                case HCLGE_MBX_NCSI_ERROR:
                        hclge_handle_ncsi_error(hdev);
                        break;
                default:
                        dev_err(&hdev->pdev->dev,
-                               "un-supported mailbox message, code = %d\n",
+                               "un-supported mailbox message, code = %u\n",
                                req->msg[0]);
                        break;
                }
index dc4dfd4..696c5ae 100644 (file)
@@ -134,7 +134,7 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev)
                         "no phy device is connected to mdio bus\n");
                return 0;
        } else if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
-               dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n",
+               dev_err(&hdev->pdev->dev, "phy_addr(%u) is too large.\n",
                        hdev->hw.mac.phy_addr);
                return -EINVAL;
        }
index 62399cc..fbc39a2 100644 (file)
@@ -46,7 +46,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 #define DIVISOR_CLK            (1000 * 8)
 #define DIVISOR_IR_B_126       (126 * DIVISOR_CLK)
 
-       const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
+       static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
                6 * 256,        /* Prioriy level */
                6 * 32,         /* Prioriy group level */
                6 * 8,          /* Port level */
@@ -511,6 +511,49 @@ static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc, u8 grp_id,
        return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
+int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate)
+{
+       struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+       struct hclge_qs_shapping_cmd *shap_cfg_cmd;
+       struct hclge_dev *hdev = vport->back;
+       struct hclge_desc desc;
+       u8 ir_b, ir_u, ir_s;
+       u32 shaper_para;
+       int ret, i;
+
+       if (!max_tx_rate)
+               max_tx_rate = HCLGE_ETHER_MAX_RATE;
+
+       ret = hclge_shaper_para_calc(max_tx_rate, HCLGE_SHAPER_LVL_QSET,
+                                    &ir_b, &ir_u, &ir_s);
+       if (ret)
+               return ret;
+
+       shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+                                                HCLGE_SHAPER_BS_U_DEF,
+                                                HCLGE_SHAPER_BS_S_DEF);
+
+       for (i = 0; i < kinfo->num_tc; i++) {
+               hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG,
+                                          false);
+
+               shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data;
+               shap_cfg_cmd->qs_id = cpu_to_le16(vport->qs_offset + i);
+               shap_cfg_cmd->qs_shapping_para = cpu_to_le32(shaper_para);
+
+               ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+               if (ret) {
+                       dev_err(&hdev->pdev->dev,
+                               "vf%u, qs%u failed to set tx_rate:%d, ret=%d\n",
+                               vport->vport_id, shap_cfg_cmd->qs_id,
+                               max_tx_rate, ret);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 {
        struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
@@ -532,7 +575,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
        /* Set to user value, no larger than max_rss_size. */
        if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size &&
            kinfo->req_rss_size <= max_rss_size) {
-               dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
+               dev_info(&hdev->pdev->dev, "rss changes from %u to %u\n",
                         kinfo->rss_size, kinfo->req_rss_size);
                kinfo->rss_size = kinfo->req_rss_size;
        } else if (kinfo->rss_size > max_rss_size ||
index 8186109..95ef6e1 100644 (file)
@@ -96,6 +96,12 @@ struct hclge_pg_shapping_cmd {
        __le32 pg_shapping_para;
 };
 
+struct hclge_qs_shapping_cmd {
+       __le16 qs_id;
+       u8 rsvd[2];
+       __le32 qs_shapping_para;
+};
+
 #define HCLGE_BP_GRP_NUM               32
 #define HCLGE_BP_SUB_GRP_ID_S          0
 #define HCLGE_BP_SUB_GRP_ID_M          GENMASK(4, 0)
@@ -154,4 +160,6 @@ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
 int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
 int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
 int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
+int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
+
 #endif
index d5d1cc5..af2245e 100644 (file)
@@ -50,7 +50,7 @@ static int hclgevf_cmd_csq_clean(struct hclgevf_hw *hw)
        rmb(); /* Make sure head is ready before touch any data */
 
        if (!hclgevf_is_valid_csq_clean_head(csq, head)) {
-               dev_warn(&hdev->pdev->dev, "wrong cmd head (%d, %d-%d)\n", head,
+               dev_warn(&hdev->pdev->dev, "wrong cmd head (%u, %d-%d)\n", head,
                         csq->next_to_use, csq->next_to_clean);
                dev_warn(&hdev->pdev->dev,
                         "Disabling any further commands to IMP firmware\n");
@@ -92,9 +92,9 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
        u32 reg_val;
 
        if (ring->flag == HCLGEVF_TYPE_CSQ) {
-               reg_val = (u32)ring->desc_dma_addr;
+               reg_val = lower_32_bits(ring->desc_dma_addr);
                hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_L_REG, reg_val);
-               reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+               reg_val = upper_32_bits(ring->desc_dma_addr);
                hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
 
                reg_val = hclgevf_read_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG);
@@ -105,9 +105,9 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
                hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
                hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, 0);
        } else {
-               reg_val = (u32)ring->desc_dma_addr;
+               reg_val = lower_32_bits(ring->desc_dma_addr);
                hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_L_REG, reg_val);
-               reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+               reg_val = upper_32_bits(ring->desc_dma_addr);
                hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
 
                reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
index 7d7e712..25d78a5 100644 (file)
@@ -1113,6 +1113,7 @@ static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
 }
 
 static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
+                                       bool en_uc_pmc, bool en_mc_pmc,
                                        bool en_bc_pmc)
 {
        struct hclge_mbx_vf_to_pf_cmd *req;
@@ -1120,10 +1121,11 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
        int ret;
 
        req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
-
        hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
        req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE;
        req->msg[1] = en_bc_pmc ? 1 : 0;
+       req->msg[2] = en_uc_pmc ? 1 : 0;
+       req->msg[3] = en_mc_pmc ? 1 : 0;
 
        ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
        if (ret)
@@ -1133,9 +1135,17 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
        return ret;
 }
 
-static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc)
+static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
+                                   bool en_mc_pmc)
 {
-       return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc);
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+       struct pci_dev *pdev = hdev->pdev;
+       bool en_bc_pmc;
+
+       en_bc_pmc = pdev->revision != 0x20;
+
+       return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc,
+                                           en_bc_pmc);
 }
 
 static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
@@ -1174,11 +1184,37 @@ static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle)
        }
 }
 
+static int hclgevf_get_host_mac_addr(struct hclgevf_dev *hdev, u8 *p)
+{
+       u8 host_mac[ETH_ALEN];
+       int status;
+
+       status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_MAC_ADDR, 0, NULL, 0,
+                                     true, host_mac, ETH_ALEN);
+       if (status) {
+               dev_err(&hdev->pdev->dev,
+                       "fail to get VF MAC from host %d", status);
+               return status;
+       }
+
+       ether_addr_copy(p, host_mac);
+
+       return 0;
+}
+
 static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+       u8 host_mac_addr[ETH_ALEN];
 
-       ether_addr_copy(p, hdev->hw.mac.mac_addr);
+       if (hclgevf_get_host_mac_addr(hdev, host_mac_addr))
+               return;
+
+       hdev->has_pf_mac = !is_zero_ether_addr(host_mac_addr);
+       if (hdev->has_pf_mac)
+               ether_addr_copy(p, host_mac_addr);
+       else
+               ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
 static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
@@ -1275,7 +1311,7 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
        memcpy(&msg_data[3], &proto, sizeof(proto));
        ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
                                   HCLGE_MBX_VLAN_FILTER, msg_data,
-                                  HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
+                                  HCLGEVF_VLAN_MBX_MSG_LEN, true, NULL, 0);
 
        /* when remove hw vlan filter failed, record the vlan id,
         * and try to remove it from hw later, to be consistence
@@ -1513,12 +1549,39 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
        return ret;
 }
 
+static void hclgevf_dump_rst_info(struct hclgevf_dev *hdev)
+{
+       dev_info(&hdev->pdev->dev, "VF function reset count: %u\n",
+                hdev->rst_stats.vf_func_rst_cnt);
+       dev_info(&hdev->pdev->dev, "FLR reset count: %u\n",
+                hdev->rst_stats.flr_rst_cnt);
+       dev_info(&hdev->pdev->dev, "VF reset count: %u\n",
+                hdev->rst_stats.vf_rst_cnt);
+       dev_info(&hdev->pdev->dev, "reset done count: %u\n",
+                hdev->rst_stats.rst_done_cnt);
+       dev_info(&hdev->pdev->dev, "HW reset done count: %u\n",
+                hdev->rst_stats.hw_rst_done_cnt);
+       dev_info(&hdev->pdev->dev, "reset count: %u\n",
+                hdev->rst_stats.rst_cnt);
+       dev_info(&hdev->pdev->dev, "reset fail count: %u\n",
+                hdev->rst_stats.rst_fail_cnt);
+       dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n",
+                hclgevf_read_dev(&hdev->hw, HCLGEVF_MISC_VECTOR_REG_BASE));
+       dev_info(&hdev->pdev->dev, "vector0 interrupt status: 0x%x\n",
+                hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STAT_REG));
+       dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n",
+                hclgevf_read_dev(&hdev->hw, HCLGEVF_CMDQ_TX_DEPTH_REG));
+       dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
+                hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING));
+       dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
+}
+
 static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
 {
        /* recover handshake status with IMP when reset fail */
        hclgevf_reset_handshake(hdev, true);
        hdev->rst_stats.rst_fail_cnt++;
-       dev_err(&hdev->pdev->dev, "failed to reset VF(%d)\n",
+       dev_err(&hdev->pdev->dev, "failed to reset VF(%u)\n",
                hdev->rst_stats.rst_fail_cnt);
 
        if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT)
@@ -1527,6 +1590,8 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
        if (hclgevf_is_reset_pending(hdev)) {
                set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
                hclgevf_reset_task_schedule(hdev);
+       } else {
+               hclgevf_dump_rst_info(hdev);
        }
 }
 
@@ -1748,6 +1813,8 @@ static void hclgevf_service_timer(struct timer_list *t)
 
 static void hclgevf_reset_service_task(struct work_struct *work)
 {
+#define        HCLGEVF_MAX_RESET_ATTEMPTS_CNT  3
+
        struct hclgevf_dev *hdev =
                container_of(work, struct hclgevf_dev, rst_service_task);
        int ret;
@@ -1800,7 +1867,7 @@ static void hclgevf_reset_service_task(struct work_struct *work)
                 * We cannot do much for 2. but to check first we can try reset
                 * our PCIe + stack and see if it alleviates the problem.
                 */
-               if (hdev->reset_attempts > 3) {
+               if (hdev->reset_attempts > HCLGEVF_MAX_RESET_ATTEMPTS_CNT) {
                        /* prepare for full reset of stack + pcie interface */
                        set_bit(HNAE3_VF_FULL_RESET, &hdev->reset_pending);
 
@@ -2103,7 +2170,6 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
                ret = hclgevf_set_rss_input_tuple(hdev, rss_cfg);
                if (ret)
                        return ret;
-
        }
 
        /* Initialize RSS indirect table */
@@ -2272,7 +2338,7 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
        }
        if (vectors < hdev->num_msi)
                dev_warn(&hdev->pdev->dev,
-                        "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n",
+                        "requested %u MSI/MSI-X, but allocated %d MSI/MSI-X\n",
                         hdev->num_msi, vectors);
 
        hdev->num_msi = vectors;
@@ -2348,12 +2414,12 @@ static void hclgevf_info_show(struct hclgevf_dev *hdev)
 
        dev_info(dev, "VF info begin:\n");
 
-       dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
-       dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
-       dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
-       dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
-       dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
-       dev_info(dev, "PF media type of this VF: %d\n",
+       dev_info(dev, "Task queue pairs numbers: %u\n", hdev->num_tqps);
+       dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc);
+       dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc);
+       dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport);
+       dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map);
+       dev_info(dev, "PF media type of this VF: %u\n",
                 hdev->hw.mac.media_type);
 
        dev_info(dev, "VF info end.\n");
@@ -2648,12 +2714,6 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
                return ret;
        }
 
-       if (pdev->revision >= 0x21) {
-               ret = hclgevf_set_promisc_mode(hdev, true);
-               if (ret)
-                       return ret;
-       }
-
        dev_info(&hdev->pdev->dev, "Reset done\n");
 
        return 0;
@@ -2728,17 +2788,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
        if (ret)
                goto err_config;
 
-       /* vf is not allowed to enable unicast/multicast promisc mode.
-        * For revision 0x20, default to disable broadcast promisc mode,
-        * firmware makes sure broadcast packets can be accepted.
-        * For revision 0x21, default to enable broadcast promisc mode.
-        */
-       if (pdev->revision >= 0x21) {
-               ret = hclgevf_set_promisc_mode(hdev, true);
-               if (ret)
-                       goto err_config;
-       }
-
        /* Initialize RSS for this VF */
        ret = hclgevf_rss_init_hw(hdev);
        if (ret) {
@@ -3152,6 +3201,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
        .get_global_queue_id = hclgevf_get_qid_global,
        .set_timer_task = hclgevf_set_timer_task,
        .get_link_mode = hclgevf_get_link_mode,
+       .set_promisc_mode = hclgevf_set_promisc_mode,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
index 2b8d6bc..2f4c81b 100644 (file)
@@ -150,8 +150,6 @@ enum hclgevf_states {
        HCLGEVF_STATE_CMD_DISABLE,
 };
 
-#define HCLGEVF_MPF_ENBALE 1
-
 struct hclgevf_mac {
        u8 media_type;
        u8 module_type;
@@ -266,6 +264,7 @@ struct hclgevf_dev {
        u16 num_tx_desc;        /* desc num of per tx queue */
        u16 num_rx_desc;        /* desc num of per rx queue */
        u8 hw_tc_map;
+       u8 has_pf_mac;
 
        u16 num_msi;
        u16 num_msi_left;
index a108191..7cbd715 100644 (file)
@@ -33,7 +33,7 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 
        if (resp_len > HCLGE_MBX_MAX_RESP_DATA_SIZE) {
                dev_err(&hdev->pdev->dev,
-                       "VF mbx response len(=%d) exceeds maximum(=%d)\n",
+                       "VF mbx response len(=%u) exceeds maximum(=%u)\n",
                        resp_len,
                        HCLGE_MBX_MAX_RESP_DATA_SIZE);
                return -EINVAL;
@@ -49,7 +49,7 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 
        if (i >= HCLGEVF_MAX_TRY_TIMES) {
                dev_err(&hdev->pdev->dev,
-                       "VF could not get mbx(%d,%d) resp(=%d) from PF in %d tries\n",
+                       "VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n",
                        code0, code1, hdev->mbx_resp.received_resp, i);
                return -EIO;
        }
@@ -68,10 +68,10 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 
        if (!(r_code0 == code0 && r_code1 == code1 && !mbx_resp->resp_status)) {
                dev_err(&hdev->pdev->dev,
-                       "VF could not match resp code(code0=%d,code1=%d), %d\n",
+                       "VF could not match resp code(code0=%u,code1=%u), %d\n",
                        code0, code1, mbx_resp->resp_status);
                dev_err(&hdev->pdev->dev,
-                       "VF could not match resp r_code(r_code0=%d,r_code1=%d)\n",
+                       "VF could not match resp r_code(r_code0=%u,r_code1=%u)\n",
                        r_code0, r_code1);
                return -EIO;
        }
@@ -168,7 +168,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
                if (unlikely(!hnae3_get_bit(flag, HCLGEVF_CMDQ_RX_OUTVLD_B))) {
                        dev_warn(&hdev->pdev->dev,
-                                "dropped invalid mailbox message, code = %d\n",
+                                "dropped invalid mailbox message, code = %u\n",
                                 req->msg[0]);
 
                        /* dropping/not processing this invalid message */
@@ -187,7 +187,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                case HCLGE_MBX_PF_VF_RESP:
                        if (resp->received_resp)
                                dev_warn(&hdev->pdev->dev,
-                                        "VF mbx resp flag not clear(%d)\n",
+                                        "VF mbx resp flag not clear(%u)\n",
                                         req->msg[1]);
                        resp->received_resp = true;
 
@@ -205,6 +205,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                case HCLGE_MBX_ASSERTING_RESET:
                case HCLGE_MBX_LINK_STAT_MODE:
                case HCLGE_MBX_PUSH_VLAN_INFO:
+               case HCLGE_MBX_PUSH_PROMISC_INFO:
                        /* set this mbx event as pending. This is required as we
                         * might loose interrupt event when mbx task is busy
                         * handling. This shall be cleared when mbx task just
@@ -218,7 +219,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                        if (atomic_read(&hdev->arq.count) >=
                            HCLGE_MBX_MAX_ARQ_MSG_NUM) {
                                dev_warn(&hdev->pdev->dev,
-                                        "Async Q full, dropping msg(%d)\n",
+                                        "Async Q full, dropping msg(%u)\n",
                                         req->msg[1]);
                                break;
                        }
@@ -235,7 +236,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                        break;
                default:
                        dev_err(&hdev->pdev->dev,
-                               "VF received unsupported(%d) mbx msg from PF\n",
+                               "VF received unsupported(%u) mbx msg from PF\n",
                                req->msg[0]);
                        break;
                }
@@ -248,6 +249,14 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                          crq->next_to_use);
 }
 
+static void hclgevf_parse_promisc_info(struct hclgevf_dev *hdev,
+                                      u16 promisc_info)
+{
+       if (!promisc_info)
+               dev_info(&hdev->pdev->dev,
+                        "Promisc mode is closed by host for being untrusted.\n");
+}
+
 void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 {
        enum hnae3_reset_type reset_type;
@@ -313,9 +322,12 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
                        hclgevf_update_port_base_vlan_info(hdev, state,
                                                           (u8 *)vlan_info, 8);
                        break;
+               case HCLGE_MBX_PUSH_PROMISC_INFO:
+                       hclgevf_parse_promisc_info(hdev, msg_q[1]);
+                       break;
                default:
                        dev_err(&hdev->pdev->dev,
-                               "fetched unsupported(%d) message from arq\n",
+                               "fetched unsupported(%u) message from arq\n",
                                msg_q[0]);
                        break;
                }
diff --git a/drivers/net/ethernet/hp/Kconfig b/drivers/net/ethernet/hp/Kconfig
deleted file mode 100644 (file)
index fb395cf..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# HP network device configuration
-#
-
-config NET_VENDOR_HP
-       bool "HP devices"
-       default y
-       depends on ISA || EISA || PCI
-       ---help---
-         If you have a network (Ethernet) card belonging to this class, say Y.
-
-         Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about HP cards. If you say Y, you will be asked for
-         your specific card in the following questions.
-
-if NET_VENDOR_HP
-
-config HP100
-       tristate "HP 10/100VG PCLAN (ISA, EISA, PCI) support"
-       depends on (ISA || EISA || PCI)
-       ---help---
-         If you have a network (Ethernet) card of this type, say Y here.
-
-         To compile this driver as a module, choose M here. The module
-         will be called hp100.
-
-endif # NET_VENDOR_HP
diff --git a/drivers/net/ethernet/hp/Makefile b/drivers/net/ethernet/hp/Makefile
deleted file mode 100644 (file)
index 5ed723b..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the HP network device drivers.
-#
-
-obj-$(CONFIG_HP100) += hp100.o
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
deleted file mode 100644 (file)
index 6ec78f5..0000000
+++ /dev/null
@@ -1,3037 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-** hp100.c
-** HP CASCADE Architecture Driver for 100VG-AnyLan Network Adapters
-**
-** $Id: hp100.c,v 1.58 2001/09/24 18:03:01 perex Exp perex $
-**
-** Based on the HP100 driver written by Jaroslav Kysela <perex@jcu.cz>
-** Extended for new busmaster capable chipsets by
-** Siegfried "Frieder" Loeffler (dg1sek) <floeff@mathematik.uni-stuttgart.de>
-**
-** Maintained by: Jaroslav Kysela <perex@perex.cz>
-**
-** This driver has only been tested with
-** -- HP J2585B 10/100 Mbit/s PCI Busmaster
-** -- HP J2585A 10/100 Mbit/s PCI
-** -- HP J2970A 10 Mbit/s PCI Combo 10base-T/BNC
-** -- HP J2973A 10 Mbit/s PCI 10base-T
-** -- HP J2573  10/100 ISA
-** -- Compex ReadyLink ENET100-VG4  10/100 Mbit/s PCI / EISA
-** -- Compex FreedomLine 100/VG  10/100 Mbit/s ISA / EISA / PCI
-**
-** but it should also work with the other CASCADE based adapters.
-**
-** TODO:
-**       -  J2573 seems to hang sometimes when in shared memory mode.
-**       -  Mode for Priority TX
-**       -  Check PCI registers, performance might be improved?
-**       -  To reduce interrupt load in busmaster, one could switch off
-**          the interrupts that are used to refill the queues whenever the
-**          queues are filled up to more than a certain threshold.
-**       -  some updates for EISA version of card
-**
-**
-**
-** 1.57c -> 1.58
-**   - used indent to change coding-style
-**   - added KTI DP-200 EISA ID
-**   - ioremap is also used for low (<1MB) memory (multi-architecture support)
-**
-** 1.57b -> 1.57c - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-**   - release resources on failure in init_module
-**
-** 1.57 -> 1.57b - Jean II
-**   - fix spinlocks, SMP is now working !
-**
-** 1.56 -> 1.57
-**   - updates for new PCI interface for 2.1 kernels
-**
-** 1.55 -> 1.56
-**   - removed printk in misc. interrupt and update statistics to allow
-**     monitoring of card status
-**   - timing changes in xmit routines, relogin to 100VG hub added when
-**     driver does reset
-**   - included fix for Compex FreedomLine PCI adapter
-**
-** 1.54 -> 1.55
-**   - fixed bad initialization in init_module
-**   - added Compex FreedomLine adapter
-**   - some fixes in card initialization
-**
-** 1.53 -> 1.54
-**   - added hardware multicast filter support (doesn't work)
-**   - little changes in hp100_sense_lan routine
-**     - added support for Coax and AUI (J2970)
-**   - fix for multiple cards and hp100_mode parameter (insmod)
-**   - fix for shared IRQ
-**
-** 1.52 -> 1.53
-**   - fixed bug in multicast support
-**
-*/
-
-#define HP100_DEFAULT_PRIORITY_TX 0
-
-#undef HP100_DEBUG
-#undef HP100_DEBUG_B           /* Trace  */
-#undef HP100_DEBUG_BM          /* Debug busmaster code (PDL stuff) */
-
-#undef HP100_DEBUG_TRAINING    /* Debug login-to-hub procedure */
-#undef HP100_DEBUG_TX
-#undef HP100_DEBUG_IRQ
-#undef HP100_DEBUG_RX
-
-#undef HP100_MULTICAST_FILTER  /* Need to be debugged... */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/eisa.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/spinlock.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/jiffies.h>
-
-#include <asm/io.h>
-
-#include "hp100.h"
-
-/*
- *  defines
- */
-
-#define HP100_BUS_ISA     0
-#define HP100_BUS_EISA    1
-#define HP100_BUS_PCI     2
-
-#define HP100_REGION_SIZE      0x20    /* for ioports */
-#define HP100_SIG_LEN          8       /* same as EISA_SIG_LEN */
-
-#define HP100_MAX_PACKET_SIZE  (1536+4)
-#define HP100_MIN_PACKET_SIZE  60
-
-#ifndef HP100_DEFAULT_RX_RATIO
-/* default - 75% onboard memory on the card are used for RX packets */
-#define HP100_DEFAULT_RX_RATIO 75
-#endif
-
-#ifndef HP100_DEFAULT_PRIORITY_TX
-/* default - don't enable transmit outgoing packets as priority */
-#define HP100_DEFAULT_PRIORITY_TX 0
-#endif
-
-/*
- *  structures
- */
-
-struct hp100_private {
-       spinlock_t lock;
-       char id[HP100_SIG_LEN];
-       u_short chip;
-       u_short soft_model;
-       u_int memory_size;
-       u_int virt_memory_size;
-       u_short rx_ratio;       /* 1 - 99 */
-       u_short priority_tx;    /* != 0 - priority tx */
-       u_short mode;           /* PIO, Shared Mem or Busmaster */
-       u_char bus;
-       struct pci_dev *pci_dev;
-       short mem_mapped;       /* memory mapped access */
-       void __iomem *mem_ptr_virt;     /* virtual memory mapped area, maybe NULL */
-       unsigned long mem_ptr_phys;     /* physical memory mapped area */
-       short lan_type;         /* 10Mb/s, 100Mb/s or -1 (error) */
-       int hub_status;         /* was login to hub successful? */
-       u_char mac1_mode;
-       u_char mac2_mode;
-       u_char hash_bytes[8];
-
-       /* Rings for busmaster mode: */
-       hp100_ring_t *rxrhead;  /* Head (oldest) index into rxring */
-       hp100_ring_t *rxrtail;  /* Tail (newest) index into rxring */
-       hp100_ring_t *txrhead;  /* Head (oldest) index into txring */
-       hp100_ring_t *txrtail;  /* Tail (newest) index into txring */
-
-       hp100_ring_t rxring[MAX_RX_PDL];
-       hp100_ring_t txring[MAX_TX_PDL];
-
-       u_int *page_vaddr_algn; /* Aligned virtual address of allocated page */
-       u_long whatever_offset; /* Offset to bus/phys/dma address */
-       int rxrcommit;          /* # Rx PDLs committed to adapter */
-       int txrcommit;          /* # Tx PDLs committed to adapter */
-};
-
-/*
- *  variables
- */
-#ifdef CONFIG_ISA
-static const char *hp100_isa_tbl[] = {
-       "HWPF150", /* HP J2573 rev A */
-       "HWP1950", /* HP J2573 */
-};
-#endif
-
-static const struct eisa_device_id hp100_eisa_tbl[] = {
-       { "HWPF180" }, /* HP J2577 rev A */
-       { "HWP1920" }, /* HP 27248B */
-       { "HWP1940" }, /* HP J2577 */
-       { "HWP1990" }, /* HP J2577 */
-       { "CPX0301" }, /* ReadyLink ENET100-VG4 */
-       { "CPX0401" }, /* FreedomLine 100/VG */
-       { "" }         /* Mandatory final entry ! */
-};
-MODULE_DEVICE_TABLE(eisa, hp100_eisa_tbl);
-
-static const struct pci_device_id hp100_pci_tbl[] = {
-       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585A, PCI_ANY_ID, PCI_ANY_ID,},
-       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585B, PCI_ANY_ID, PCI_ANY_ID,},
-       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2970A, PCI_ANY_ID, PCI_ANY_ID,},
-       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2973A, PCI_ANY_ID, PCI_ANY_ID,},
-       {PCI_VENDOR_ID_COMPEX, PCI_DEVICE_ID_COMPEX_ENET100VG4, PCI_ANY_ID, PCI_ANY_ID,},
-       {PCI_VENDOR_ID_COMPEX2, PCI_DEVICE_ID_COMPEX2_100VG, PCI_ANY_ID, PCI_ANY_ID,},
-/*     {PCI_VENDOR_ID_KTI, PCI_DEVICE_ID_KTI_DP200, PCI_ANY_ID, PCI_ANY_ID }, */
-       {}                      /* Terminating entry */
-};
-MODULE_DEVICE_TABLE(pci, hp100_pci_tbl);
-
-static int hp100_rx_ratio = HP100_DEFAULT_RX_RATIO;
-static int hp100_priority_tx = HP100_DEFAULT_PRIORITY_TX;
-static int hp100_mode = 1;
-
-module_param(hp100_rx_ratio, int, 0);
-module_param(hp100_priority_tx, int, 0);
-module_param(hp100_mode, int, 0);
-
-/*
- *  prototypes
- */
-
-static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus,
-                       struct pci_dev *pci_dev);
-
-
-static int hp100_open(struct net_device *dev);
-static int hp100_close(struct net_device *dev);
-static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
-                                   struct net_device *dev);
-static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
-                                      struct net_device *dev);
-static void hp100_rx(struct net_device *dev);
-static struct net_device_stats *hp100_get_stats(struct net_device *dev);
-static void hp100_misc_interrupt(struct net_device *dev);
-static void hp100_update_stats(struct net_device *dev);
-static void hp100_clear_stats(struct hp100_private *lp, int ioaddr);
-static void hp100_set_multicast_list(struct net_device *dev);
-static irqreturn_t hp100_interrupt(int irq, void *dev_id);
-static void hp100_start_interface(struct net_device *dev);
-static void hp100_stop_interface(struct net_device *dev);
-static void hp100_load_eeprom(struct net_device *dev, u_short ioaddr);
-static int hp100_sense_lan(struct net_device *dev);
-static int hp100_login_to_vg_hub(struct net_device *dev,
-                                u_short force_relogin);
-static int hp100_down_vg_link(struct net_device *dev);
-static void hp100_cascade_reset(struct net_device *dev, u_short enable);
-static void hp100_BM_shutdown(struct net_device *dev);
-static void hp100_mmuinit(struct net_device *dev);
-static void hp100_init_pdls(struct net_device *dev);
-static int hp100_init_rxpdl(struct net_device *dev,
-                           register hp100_ring_t * ringptr,
-                           register u_int * pdlptr);
-static int hp100_init_txpdl(struct net_device *dev,
-                           register hp100_ring_t * ringptr,
-                           register u_int * pdlptr);
-static void hp100_rxfill(struct net_device *dev);
-static void hp100_hwinit(struct net_device *dev);
-static void hp100_clean_txring(struct net_device *dev);
-#ifdef HP100_DEBUG
-static void hp100_RegisterDump(struct net_device *dev);
-#endif
-
-/* Conversion to new PCI API :
- * Convert an address in a kernel buffer to a bus/phys/dma address.
- * This work *only* for memory fragments part of lp->page_vaddr,
- * because it was properly DMA allocated via pci_alloc_consistent(),
- * so we just need to "retrieve" the original mapping to bus/phys/dma
- * address - Jean II */
-static inline dma_addr_t virt_to_whatever(struct net_device *dev, u32 * ptr)
-{
-       struct hp100_private *lp = netdev_priv(dev);
-       return ((u_long) ptr) + lp->whatever_offset;
-}
-
-static inline u_int pdl_map_data(struct hp100_private *lp, void *data)
-{
-       return pci_map_single(lp->pci_dev, data,
-                             MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE);
-}
-
-/* TODO: This function should not really be needed in a good design... */
-static void wait(void)
-{
-       mdelay(1);
-}
-
-/*
- *  probe functions
- *  These functions should - if possible - avoid doing write operations
- *  since this could cause problems when the card is not installed.
- */
-
-/*
- * Read board id and convert to string.
- * Effectively same code as decode_eisa_sig
- */
-static const char *hp100_read_id(int ioaddr)
-{
-       int i;
-       static char str[HP100_SIG_LEN];
-       unsigned char sig[4], sum;
-        unsigned short rev;
-
-       hp100_page(ID_MAC_ADDR);
-       sum = 0;
-       for (i = 0; i < 4; i++) {
-               sig[i] = hp100_inb(BOARD_ID + i);
-               sum += sig[i];
-       }
-
-       sum += hp100_inb(BOARD_ID + i);
-       if (sum != 0xff)
-               return NULL;    /* bad checksum */
-
-        str[0] = ((sig[0] >> 2) & 0x1f) + ('A' - 1);
-        str[1] = (((sig[0] & 3) << 3) | (sig[1] >> 5)) + ('A' - 1);
-        str[2] = (sig[1] & 0x1f) + ('A' - 1);
-        rev = (sig[2] << 8) | sig[3];
-        sprintf(str + 3, "%04X", rev);
-
-       return str;
-}
-
-#ifdef CONFIG_ISA
-static __init int hp100_isa_probe1(struct net_device *dev, int ioaddr)
-{
-       const char *sig;
-       int i;
-
-       if (!request_region(ioaddr, HP100_REGION_SIZE, "hp100"))
-               goto err;
-
-       if (hp100_inw(HW_ID) != HP100_HW_ID_CASCADE) {
-               release_region(ioaddr, HP100_REGION_SIZE);
-               goto err;
-       }
-
-       sig = hp100_read_id(ioaddr);
-       release_region(ioaddr, HP100_REGION_SIZE);
-
-       if (sig == NULL)
-               goto err;
-
-       for (i = 0; i < ARRAY_SIZE(hp100_isa_tbl); i++) {
-               if (!strcmp(hp100_isa_tbl[i], sig))
-                       break;
-
-       }
-
-       if (i < ARRAY_SIZE(hp100_isa_tbl))
-               return hp100_probe1(dev, ioaddr, HP100_BUS_ISA, NULL);
- err:
-       return -ENODEV;
-
-}
-/*
- * Probe for ISA board.
- * EISA and PCI are handled by device infrastructure.
- */
-
-static int  __init hp100_isa_probe(struct net_device *dev, int addr)
-{
-       int err = -ENODEV;
-
-       /* Probe for a specific ISA address */
-       if (addr > 0xff && addr < 0x400)
-               err = hp100_isa_probe1(dev, addr);
-
-       else if (addr != 0)
-               err = -ENXIO;
-
-       else {
-               /* Probe all ISA possible port regions */
-               for (addr = 0x100; addr < 0x400; addr += 0x20) {
-                       err = hp100_isa_probe1(dev, addr);
-                       if (!err)
-                               break;
-               }
-       }
-       return err;
-}
-#endif /* CONFIG_ISA */
-
-#if !defined(MODULE) && defined(CONFIG_ISA)
-struct net_device * __init hp100_probe(int unit)
-{
-       struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
-       int err;
-
-       if (!dev)
-               return ERR_PTR(-ENODEV);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4200, TRACE);
-       printk("hp100: %s: probe\n", dev->name);
-#endif
-
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
-       err = hp100_isa_probe(dev, dev->base_addr);
-       if (err)
-               goto out;
-
-       return dev;
- out:
-       free_netdev(dev);
-       return ERR_PTR(err);
-}
-#endif /* !MODULE && CONFIG_ISA */
-
-static const struct net_device_ops hp100_bm_netdev_ops = {
-       .ndo_open               = hp100_open,
-       .ndo_stop               = hp100_close,
-       .ndo_start_xmit         = hp100_start_xmit_bm,
-       .ndo_get_stats          = hp100_get_stats,
-       .ndo_set_rx_mode        = hp100_set_multicast_list,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static const struct net_device_ops hp100_netdev_ops = {
-       .ndo_open               = hp100_open,
-       .ndo_stop               = hp100_close,
-       .ndo_start_xmit         = hp100_start_xmit,
-       .ndo_get_stats          = hp100_get_stats,
-       .ndo_set_rx_mode        = hp100_set_multicast_list,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus,
-                       struct pci_dev *pci_dev)
-{
-       int i;
-       int err = -ENODEV;
-       const char *eid;
-       u_int chip;
-       u_char uc;
-       u_int memory_size = 0, virt_memory_size = 0;
-       u_short local_mode, lsw;
-       short mem_mapped;
-       unsigned long mem_ptr_phys;
-       void __iomem *mem_ptr_virt;
-       struct hp100_private *lp;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4201, TRACE);
-       printk("hp100: %s: probe1\n", dev->name);
-#endif
-
-       /* memory region for programmed i/o */
-       if (!request_region(ioaddr, HP100_REGION_SIZE, "hp100"))
-               goto out1;
-
-       if (hp100_inw(HW_ID) != HP100_HW_ID_CASCADE)
-               goto out2;
-
-       chip = hp100_inw(PAGING) & HP100_CHIPID_MASK;
-#ifdef HP100_DEBUG
-       if (chip == HP100_CHIPID_SHASTA)
-               printk("hp100: %s: Shasta Chip detected. (This is a pre 802.12 chip)\n", dev->name);
-       else if (chip == HP100_CHIPID_RAINIER)
-               printk("hp100: %s: Rainier Chip detected. (This is a pre 802.12 chip)\n", dev->name);
-       else if (chip == HP100_CHIPID_LASSEN)
-               printk("hp100: %s: Lassen Chip detected.\n", dev->name);
-       else
-               printk("hp100: %s: Warning: Unknown CASCADE chip (id=0x%.4x).\n", dev->name, chip);
-#endif
-
-       dev->base_addr = ioaddr;
-
-       eid = hp100_read_id(ioaddr);
-       if (eid == NULL) {      /* bad checksum? */
-               printk(KERN_WARNING "%s: bad ID checksum at base port 0x%x\n",
-                      __func__, ioaddr);
-               goto out2;
-       }
-
-       hp100_page(ID_MAC_ADDR);
-       for (i = uc = 0; i < 7; i++)
-               uc += hp100_inb(LAN_ADDR + i);
-       if (uc != 0xff) {
-               printk(KERN_WARNING
-                      "%s: bad lan address checksum at port 0x%x)\n",
-                      __func__, ioaddr);
-               err = -EIO;
-               goto out2;
-       }
-
-       /* Make sure, that all registers are correctly updated... */
-
-       hp100_load_eeprom(dev, ioaddr);
-       wait();
-
-       /*
-        * Determine driver operation mode
-        *
-        * Use the variable "hp100_mode" upon insmod or as kernel parameter to
-        * force driver modes:
-        * hp100_mode=1 -> default, use busmaster mode if configured.
-        * hp100_mode=2 -> enable shared memory mode
-        * hp100_mode=3 -> force use of i/o mapped mode.
-        * hp100_mode=4 -> same as 1, but re-set the enable bit on the card.
-        */
-
-       /*
-        * LSW values:
-        *   0x2278 -> J2585B, PnP shared memory mode
-        *   0x2270 -> J2585B, shared memory mode, 0xdc000
-        *   0xa23c -> J2585B, I/O mapped mode
-        *   0x2240 -> EISA COMPEX, BusMaster (Shasta Chip)
-        *   0x2220 -> EISA HP, I/O (Shasta Chip)
-        *   0x2260 -> EISA HP, BusMaster (Shasta Chip)
-        */
-
-#if 0
-       local_mode = 0x2270;
-       hp100_outw(0xfefe, OPTION_LSW);
-       hp100_outw(local_mode | HP100_SET_LB | HP100_SET_HB, OPTION_LSW);
-#endif
-
-       /* hp100_mode value maybe used in future by another card */
-       local_mode = hp100_mode;
-       if (local_mode < 1 || local_mode > 4)
-               local_mode = 1; /* default */
-#ifdef HP100_DEBUG
-       printk("hp100: %s: original LSW = 0x%x\n", dev->name,
-              hp100_inw(OPTION_LSW));
-#endif
-
-       if (local_mode == 3) {
-               hp100_outw(HP100_MEM_EN | HP100_RESET_LB, OPTION_LSW);
-               hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-               hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
-               printk("hp100: IO mapped mode forced.\n");
-       } else if (local_mode == 2) {
-               hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
-               hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-               hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
-               printk("hp100: Shared memory mode requested.\n");
-       } else if (local_mode == 4) {
-               if (chip == HP100_CHIPID_LASSEN) {
-                       hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_SET_HB, OPTION_LSW);
-                       hp100_outw(HP100_IO_EN | HP100_MEM_EN | HP100_RESET_LB, OPTION_LSW);
-                       printk("hp100: Busmaster mode requested.\n");
-               }
-               local_mode = 1;
-       }
-
-       if (local_mode == 1) {  /* default behaviour */
-               lsw = hp100_inw(OPTION_LSW);
-
-               if ((lsw & HP100_IO_EN) && (~lsw & HP100_MEM_EN) &&
-                   (~lsw & (HP100_BM_WRITE | HP100_BM_READ))) {
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: IO_EN bit is set on card.\n", dev->name);
-#endif
-                       local_mode = 3;
-               } else if (chip == HP100_CHIPID_LASSEN &&
-                          (lsw & (HP100_BM_WRITE | HP100_BM_READ)) == (HP100_BM_WRITE | HP100_BM_READ)) {
-                       /* Conversion to new PCI API :
-                        * I don't have the doc, but I assume that the card
-                        * can map the full 32bit address space.
-                        * Also, we can have EISA Busmaster cards (not tested),
-                        * so beware !!! - Jean II */
-                       if((bus == HP100_BUS_PCI) &&
-                          (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)))) {
-                               /* Gracefully fallback to shared memory */
-                               goto busmasterfail;
-                       }
-                       printk("hp100: Busmaster mode enabled.\n");
-                       hp100_outw(HP100_MEM_EN | HP100_IO_EN | HP100_RESET_LB, OPTION_LSW);
-               } else {
-               busmasterfail:
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: Card not configured for BM or BM not supported with this card.\n", dev->name);
-                       printk("hp100: %s: Trying shared memory mode.\n", dev->name);
-#endif
-                       /* In this case, try shared memory mode */
-                       local_mode = 2;
-                       hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
-                       /* hp100_outw(HP100_IO_EN|HP100_RESET_LB, OPTION_LSW); */
-               }
-       }
-#ifdef HP100_DEBUG
-       printk("hp100: %s: new LSW = 0x%x\n", dev->name, hp100_inw(OPTION_LSW));
-#endif
-
-       /* Check for shared memory on the card, eventually remap it */
-       hp100_page(HW_MAP);
-       mem_mapped = ((hp100_inw(OPTION_LSW) & (HP100_MEM_EN)) != 0);
-       mem_ptr_phys = 0UL;
-       mem_ptr_virt = NULL;
-       memory_size = (8192 << ((hp100_inb(SRAM) >> 5) & 0x07));
-       virt_memory_size = 0;
-
-       /* For memory mapped or busmaster mode, we want the memory address */
-       if (mem_mapped || (local_mode == 1)) {
-               mem_ptr_phys = (hp100_inw(MEM_MAP_LSW) | (hp100_inw(MEM_MAP_MSW) << 16));
-               mem_ptr_phys &= ~0x1fff;        /* 8k alignment */
-
-               if (bus == HP100_BUS_ISA && (mem_ptr_phys & ~0xfffff) != 0) {
-                       printk("hp100: Can only use programmed i/o mode.\n");
-                       mem_ptr_phys = 0;
-                       mem_mapped = 0;
-                       local_mode = 3; /* Use programmed i/o */
-               }
-
-               /* We do not need access to shared memory in busmaster mode */
-               /* However in slave mode we need to remap high (>1GB) card memory  */
-               if (local_mode != 1) {  /* = not busmaster */
-                       /* We try with smaller memory sizes, if ioremap fails */
-                       for (virt_memory_size = memory_size; virt_memory_size > 16383; virt_memory_size >>= 1) {
-                               if ((mem_ptr_virt = ioremap((u_long) mem_ptr_phys, virt_memory_size)) == NULL) {
-#ifdef HP100_DEBUG
-                                       printk("hp100: %s: ioremap for 0x%x bytes high PCI memory at 0x%lx failed\n", dev->name, virt_memory_size, mem_ptr_phys);
-#endif
-                               } else {
-#ifdef HP100_DEBUG
-                                       printk("hp100: %s: remapped 0x%x bytes high PCI memory at 0x%lx to %p.\n", dev->name, virt_memory_size, mem_ptr_phys, mem_ptr_virt);
-#endif
-                                       break;
-                               }
-                       }
-
-                       if (mem_ptr_virt == NULL) {     /* all ioremap tries failed */
-                               printk("hp100: Failed to ioremap the PCI card memory. Will have to use i/o mapped mode.\n");
-                               local_mode = 3;
-                               virt_memory_size = 0;
-                       }
-               }
-       }
-
-       if (local_mode == 3) {  /* io mapped forced */
-               mem_mapped = 0;
-               mem_ptr_phys = 0;
-               mem_ptr_virt = NULL;
-               printk("hp100: Using (slow) programmed i/o mode.\n");
-       }
-
-       /* Initialise the "private" data structure for this card. */
-       lp = netdev_priv(dev);
-
-       spin_lock_init(&lp->lock);
-       strlcpy(lp->id, eid, HP100_SIG_LEN);
-       lp->chip = chip;
-       lp->mode = local_mode;
-       lp->bus = bus;
-       lp->pci_dev = pci_dev;
-       lp->priority_tx = hp100_priority_tx;
-       lp->rx_ratio = hp100_rx_ratio;
-       lp->mem_ptr_phys = mem_ptr_phys;
-       lp->mem_ptr_virt = mem_ptr_virt;
-       hp100_page(ID_MAC_ADDR);
-       lp->soft_model = hp100_inb(SOFT_MODEL);
-       lp->mac1_mode = HP100_MAC1MODE3;
-       lp->mac2_mode = HP100_MAC2MODE3;
-       memset(&lp->hash_bytes, 0x00, 8);
-
-       dev->base_addr = ioaddr;
-
-       lp->memory_size = memory_size;
-       lp->virt_memory_size = virt_memory_size;
-       lp->rx_ratio = hp100_rx_ratio;  /* can be conf'd with insmod */
-
-       if (lp->mode == 1)      /* busmaster */
-               dev->netdev_ops = &hp100_bm_netdev_ops;
-       else
-               dev->netdev_ops = &hp100_netdev_ops;
-
-       /* Ask the card for which IRQ line it is configured */
-       if (bus == HP100_BUS_PCI) {
-               dev->irq = pci_dev->irq;
-       } else {
-               hp100_page(HW_MAP);
-               dev->irq = hp100_inb(IRQ_CHANNEL) & HP100_IRQMASK;
-               if (dev->irq == 2)
-                       dev->irq = 9;
-       }
-
-       if (lp->mode == 1)      /* busmaster */
-               dev->dma = 4;
-
-       /* Ask the card for its MAC address and store it for later use. */
-       hp100_page(ID_MAC_ADDR);
-       for (i = uc = 0; i < 6; i++)
-               dev->dev_addr[i] = hp100_inb(LAN_ADDR + i);
-
-       /* Reset statistics (counters) */
-       hp100_clear_stats(lp, ioaddr);
-
-       /* If busmaster mode is wanted, a dma-capable memory area is needed for
-        * the rx and tx PDLs
-        * PCI cards can access the whole PC memory. Therefore GFP_DMA is not
-        * needed for the allocation of the memory area.
-        */
-
-       /* TODO: We do not need this with old cards, where PDLs are stored
-        * in the cards shared memory area. But currently, busmaster has been
-        * implemented/tested only with the lassen chip anyway... */
-       if (lp->mode == 1) {    /* busmaster */
-               dma_addr_t page_baddr;
-               /* Get physically continuous memory for TX & RX PDLs    */
-               /* Conversion to new PCI API :
-                * Pages are always aligned and zeroed, no need to it ourself.
-                * Doc says should be OK for EISA bus as well - Jean II */
-               lp->page_vaddr_algn = pci_alloc_consistent(lp->pci_dev, MAX_RINGSIZE, &page_baddr);
-               if (!lp->page_vaddr_algn) {
-                       err = -ENOMEM;
-                       goto out_mem_ptr;
-               }
-               lp->whatever_offset = ((u_long) page_baddr) - ((u_long) lp->page_vaddr_algn);
-
-#ifdef HP100_DEBUG_BM
-               printk("hp100: %s: Reserved DMA memory from 0x%x to 0x%x\n", dev->name, (u_int) lp->page_vaddr_algn, (u_int) lp->page_vaddr_algn + MAX_RINGSIZE);
-#endif
-               lp->rxrcommit = lp->txrcommit = 0;
-               lp->rxrhead = lp->rxrtail = &(lp->rxring[0]);
-               lp->txrhead = lp->txrtail = &(lp->txring[0]);
-       }
-
-       /* Initialise the card. */
-       /* (I'm not really sure if it's a good idea to do this during probing, but
-        * like this it's assured that the lan connection type can be sensed
-        * correctly)
-        */
-       hp100_hwinit(dev);
-
-       /* Try to find out which kind of LAN the card is connected to. */
-       lp->lan_type = hp100_sense_lan(dev);
-
-       /* Print out a message what about what we think we have probed. */
-       printk("hp100: at 0x%x, IRQ %d, ", ioaddr, dev->irq);
-       switch (bus) {
-       case HP100_BUS_EISA:
-               printk("EISA");
-               break;
-       case HP100_BUS_PCI:
-               printk("PCI");
-               break;
-       default:
-               printk("ISA");
-               break;
-       }
-       printk(" bus, %dk SRAM (rx/tx %d%%).\n", lp->memory_size >> 10, lp->rx_ratio);
-
-       if (lp->mode == 2) {    /* memory mapped */
-               printk("hp100: Memory area at 0x%lx-0x%lx", mem_ptr_phys,
-                               (mem_ptr_phys + (mem_ptr_phys > 0x100000 ? (u_long) lp->memory_size : 16 * 1024)) - 1);
-               if (mem_ptr_virt)
-                       printk(" (virtual base %p)", mem_ptr_virt);
-               printk(".\n");
-
-               /* Set for info when doing ifconfig */
-               dev->mem_start = mem_ptr_phys;
-               dev->mem_end = mem_ptr_phys + lp->memory_size;
-       }
-
-       printk("hp100: ");
-       if (lp->lan_type != HP100_LAN_ERR)
-               printk("Adapter is attached to ");
-       switch (lp->lan_type) {
-       case HP100_LAN_100:
-               printk("100Mb/s Voice Grade AnyLAN network.\n");
-               break;
-       case HP100_LAN_10:
-               printk("10Mb/s network (10baseT).\n");
-               break;
-       case HP100_LAN_COAX:
-               printk("10Mb/s network (coax).\n");
-               break;
-       default:
-               printk("Warning! Link down.\n");
-       }
-
-       err = register_netdev(dev);
-       if (err)
-               goto out3;
-
-       return 0;
-out3:
-       if (local_mode == 1)
-               pci_free_consistent(lp->pci_dev, MAX_RINGSIZE + 0x0f,
-                                   lp->page_vaddr_algn,
-                                   virt_to_whatever(dev, lp->page_vaddr_algn));
-out_mem_ptr:
-       if (mem_ptr_virt)
-               iounmap(mem_ptr_virt);
-out2:
-       release_region(ioaddr, HP100_REGION_SIZE);
-out1:
-       return err;
-}
-
-/* This procedure puts the card into a stable init state */
-static void hp100_hwinit(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4202, TRACE);
-       printk("hp100: %s: hwinit\n", dev->name);
-#endif
-
-       /* Initialise the card. -------------------------------------------- */
-
-       /* Clear all pending Ints and disable Ints */
-       hp100_page(PERFORMANCE);
-       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
-       hp100_outw(0xffff, IRQ_STATUS); /* clear all pending ints */
-
-       hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-       hp100_outw(HP100_TRI_INT | HP100_SET_HB, OPTION_LSW);
-
-       if (lp->mode == 1) {
-               hp100_BM_shutdown(dev); /* disables BM, puts cascade in reset */
-               wait();
-       } else {
-               hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-               hp100_cascade_reset(dev, 1);
-               hp100_page(MAC_CTRL);
-               hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);
-       }
-
-       /* Initiate EEPROM reload */
-       hp100_load_eeprom(dev, 0);
-
-       wait();
-
-       /* Go into reset again. */
-       hp100_cascade_reset(dev, 1);
-
-       /* Set Option Registers to a safe state  */
-       hp100_outw(HP100_DEBUG_EN |
-                  HP100_RX_HDR |
-                  HP100_EE_EN |
-                  HP100_BM_WRITE |
-                  HP100_BM_READ | HP100_RESET_HB |
-                  HP100_FAKE_INT |
-                  HP100_INT_EN |
-                  HP100_MEM_EN |
-                  HP100_IO_EN | HP100_RESET_LB, OPTION_LSW);
-
-       hp100_outw(HP100_TRI_INT |
-                  HP100_MMAP_DIS | HP100_SET_HB, OPTION_LSW);
-
-       hp100_outb(HP100_PRIORITY_TX |
-                  HP100_ADV_NXT_PKT |
-                  HP100_TX_CMD | HP100_RESET_LB, OPTION_MSW);
-
-       /* TODO: Configure MMU for Ram Test. */
-       /* TODO: Ram Test. */
-
-       /* Re-check if adapter is still at same i/o location      */
-       /* (If the base i/o in eeprom has been changed but the    */
-       /* registers had not been changed, a reload of the eeprom */
-       /* would move the adapter to the address stored in eeprom */
-
-       /* TODO: Code to implement. */
-
-       /* Until here it was code from HWdiscover procedure. */
-       /* Next comes code from mmuinit procedure of SCO BM driver which is
-        * called from HWconfigure in the SCO driver.  */
-
-       /* Initialise MMU, eventually switch on Busmaster Mode, initialise
-        * multicast filter...
-        */
-       hp100_mmuinit(dev);
-
-       /* We don't turn the interrupts on here - this is done by start_interface. */
-       wait();                 /* TODO: Do we really need this? */
-
-       /* Enable Hardware (e.g. unreset) */
-       hp100_cascade_reset(dev, 0);
-
-       /* ------- initialisation complete ----------- */
-
-       /* Finally try to log in the Hub if there may be a VG connection. */
-       if ((lp->lan_type == HP100_LAN_100) || (lp->lan_type == HP100_LAN_ERR))
-               hp100_login_to_vg_hub(dev, 0);  /* relogin */
-
-}
-
-
-/*
- * mmuinit - Reinitialise Cascade MMU and MAC settings.
- * Note: Must already be in reset and leaves card in reset.
- */
-static void hp100_mmuinit(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-       int i;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4203, TRACE);
-       printk("hp100: %s: mmuinit\n", dev->name);
-#endif
-
-#ifdef HP100_DEBUG
-       if (0 != (hp100_inw(OPTION_LSW) & HP100_HW_RST)) {
-               printk("hp100: %s: Not in reset when entering mmuinit. Fix me.\n", dev->name);
-               return;
-       }
-#endif
-
-       /* Make sure IRQs are masked off and ack'ed. */
-       hp100_page(PERFORMANCE);
-       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
-       hp100_outw(0xffff, IRQ_STATUS); /* ack IRQ */
-
-       /*
-        * Enable Hardware
-        * - Clear Debug En, Rx Hdr Pipe, EE En, I/O En, Fake Int and Intr En
-        * - Set Tri-State Int, Bus Master Rd/Wr, and Mem Map Disable
-        * - Clear Priority, Advance Pkt and Xmit Cmd
-        */
-
-       hp100_outw(HP100_DEBUG_EN |
-                  HP100_RX_HDR |
-                  HP100_EE_EN | HP100_RESET_HB |
-                  HP100_IO_EN |
-                  HP100_FAKE_INT |
-                  HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-
-       hp100_outw(HP100_TRI_INT | HP100_SET_HB, OPTION_LSW);
-
-       if (lp->mode == 1) {    /* busmaster */
-               hp100_outw(HP100_BM_WRITE |
-                          HP100_BM_READ |
-                          HP100_MMAP_DIS | HP100_SET_HB, OPTION_LSW);
-       } else if (lp->mode == 2) {     /* memory mapped */
-               hp100_outw(HP100_BM_WRITE |
-                          HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
-               hp100_outw(HP100_MMAP_DIS | HP100_RESET_HB, OPTION_LSW);
-               hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
-               hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-       } else if (lp->mode == 3) {     /* i/o mapped mode */
-               hp100_outw(HP100_MMAP_DIS | HP100_SET_HB |
-                          HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-       }
-
-       hp100_page(HW_MAP);
-       hp100_outb(0, EARLYRXCFG);
-       hp100_outw(0, EARLYTXCFG);
-
-       /*
-        * Enable Bus Master mode
-        */
-       if (lp->mode == 1) {    /* busmaster */
-               /* Experimental: Set some PCI configuration bits */
-               hp100_page(HW_MAP);
-               hp100_andb(~HP100_PDL_USE3, MODECTRL1); /* BM engine read maximum */
-               hp100_andb(~HP100_TX_DUALQ, MODECTRL1); /* No Queue for Priority TX */
-
-               /* PCI Bus failures should result in a Misc. Interrupt */
-               hp100_orb(HP100_EN_BUS_FAIL, MODECTRL2);
-
-               hp100_outw(HP100_BM_READ | HP100_BM_WRITE | HP100_SET_HB, OPTION_LSW);
-               hp100_page(HW_MAP);
-               /* Use Burst Mode and switch on PAGE_CK */
-               hp100_orb(HP100_BM_BURST_RD | HP100_BM_BURST_WR, BM);
-               if ((lp->chip == HP100_CHIPID_RAINIER) || (lp->chip == HP100_CHIPID_SHASTA))
-                       hp100_orb(HP100_BM_PAGE_CK, BM);
-               hp100_orb(HP100_BM_MASTER, BM);
-       } else {                /* not busmaster */
-
-               hp100_page(HW_MAP);
-               hp100_andb(~HP100_BM_MASTER, BM);
-       }
-
-       /*
-        * Divide card memory into regions for Rx, Tx and, if non-ETR chip, PDLs
-        */
-       hp100_page(MMU_CFG);
-       if (lp->mode == 1) {    /* only needed for Busmaster */
-               int xmit_stop, recv_stop;
-
-               if ((lp->chip == HP100_CHIPID_RAINIER) ||
-                   (lp->chip == HP100_CHIPID_SHASTA)) {
-                       int pdl_stop;
-
-                       /*
-                        * Each pdl is 508 bytes long. (63 frags * 4 bytes for address and
-                        * 4 bytes for header). We will leave NUM_RXPDLS * 508 (rounded
-                        * to the next higher 1k boundary) bytes for the rx-pdl's
-                        * Note: For non-etr chips the transmit stop register must be
-                        * programmed on a 1k boundary, i.e. bits 9:0 must be zero.
-                        */
-                       pdl_stop = lp->memory_size;
-                       xmit_stop = (pdl_stop - 508 * (MAX_RX_PDL) - 16) & ~(0x03ff);
-                       recv_stop = (xmit_stop * (lp->rx_ratio) / 100) & ~(0x03ff);
-                       hp100_outw((pdl_stop >> 4) - 1, PDL_MEM_STOP);
-#ifdef HP100_DEBUG_BM
-                       printk("hp100: %s: PDL_STOP = 0x%x\n", dev->name, pdl_stop);
-#endif
-               } else {
-                       /* ETR chip (Lassen) in busmaster mode */
-                       xmit_stop = (lp->memory_size) - 1;
-                       recv_stop = ((lp->memory_size * lp->rx_ratio) / 100) & ~(0x03ff);
-               }
-
-               hp100_outw(xmit_stop >> 4, TX_MEM_STOP);
-               hp100_outw(recv_stop >> 4, RX_MEM_STOP);
-#ifdef HP100_DEBUG_BM
-               printk("hp100: %s: TX_STOP  = 0x%x\n", dev->name, xmit_stop >> 4);
-               printk("hp100: %s: RX_STOP  = 0x%x\n", dev->name, recv_stop >> 4);
-#endif
-       } else {
-               /* Slave modes (memory mapped and programmed io)  */
-               hp100_outw((((lp->memory_size * lp->rx_ratio) / 100) >> 4), RX_MEM_STOP);
-               hp100_outw(((lp->memory_size - 1) >> 4), TX_MEM_STOP);
-#ifdef HP100_DEBUG
-               printk("hp100: %s: TX_MEM_STOP: 0x%x\n", dev->name, hp100_inw(TX_MEM_STOP));
-               printk("hp100: %s: RX_MEM_STOP: 0x%x\n", dev->name, hp100_inw(RX_MEM_STOP));
-#endif
-       }
-
-       /* Write MAC address into page 1 */
-       hp100_page(MAC_ADDRESS);
-       for (i = 0; i < 6; i++)
-               hp100_outb(dev->dev_addr[i], MAC_ADDR + i);
-
-       /* Zero the multicast hash registers */
-       for (i = 0; i < 8; i++)
-               hp100_outb(0x0, HASH_BYTE0 + i);
-
-       /* Set up MAC defaults */
-       hp100_page(MAC_CTRL);
-
-       /* Go to LAN Page and zero all filter bits */
-       /* Zero accept error, accept multicast, accept broadcast and accept */
-       /* all directed packet bits */
-       hp100_andb(~(HP100_RX_EN |
-                    HP100_TX_EN |
-                    HP100_ACC_ERRORED |
-                    HP100_ACC_MC |
-                    HP100_ACC_BC | HP100_ACC_PHY), MAC_CFG_1);
-
-       hp100_outb(0x00, MAC_CFG_2);
-
-       /* Zero the frame format bit. This works around a training bug in the */
-       /* new hubs. */
-       hp100_outb(0x00, VG_LAN_CFG_2); /* (use 802.3) */
-
-       if (lp->priority_tx)
-               hp100_outb(HP100_PRIORITY_TX | HP100_SET_LB, OPTION_MSW);
-       else
-               hp100_outb(HP100_PRIORITY_TX | HP100_RESET_LB, OPTION_MSW);
-
-       hp100_outb(HP100_ADV_NXT_PKT |
-                  HP100_TX_CMD | HP100_RESET_LB, OPTION_MSW);
-
-       /* If busmaster, initialize the PDLs */
-       if (lp->mode == 1)
-               hp100_init_pdls(dev);
-
-       /* Go to performance page and initialize isr and imr registers */
-       hp100_page(PERFORMANCE);
-       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
-       hp100_outw(0xffff, IRQ_STATUS); /* ack IRQ */
-}
-
-/*
- *  open/close functions
- */
-
-static int hp100_open(struct net_device *dev)
-{
-       struct hp100_private *lp = netdev_priv(dev);
-#ifdef HP100_DEBUG_B
-       int ioaddr = dev->base_addr;
-#endif
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4204, TRACE);
-       printk("hp100: %s: open\n", dev->name);
-#endif
-
-       /* New: if bus is PCI or EISA, interrupts might be shared interrupts */
-       if (request_irq(dev->irq, hp100_interrupt,
-                       lp->bus == HP100_BUS_PCI || lp->bus ==
-                       HP100_BUS_EISA ? IRQF_SHARED : 0,
-                       dev->name, dev)) {
-               printk("hp100: %s: unable to get IRQ %d\n", dev->name, dev->irq);
-               return -EAGAIN;
-       }
-
-       netif_trans_update(dev); /* prevent tx timeout */
-       netif_start_queue(dev);
-
-       lp->lan_type = hp100_sense_lan(dev);
-       lp->mac1_mode = HP100_MAC1MODE3;
-       lp->mac2_mode = HP100_MAC2MODE3;
-       memset(&lp->hash_bytes, 0x00, 8);
-
-       hp100_stop_interface(dev);
-
-       hp100_hwinit(dev);
-
-       hp100_start_interface(dev);     /* sets mac modes, enables interrupts */
-
-       return 0;
-}
-
-/* The close function is called when the interface is to be brought down */
-static int hp100_close(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4205, TRACE);
-       printk("hp100: %s: close\n", dev->name);
-#endif
-
-       hp100_page(PERFORMANCE);
-       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all IRQs */
-
-       hp100_stop_interface(dev);
-
-       if (lp->lan_type == HP100_LAN_100)
-               lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-
-       netif_stop_queue(dev);
-
-       free_irq(dev->irq, dev);
-
-#ifdef HP100_DEBUG
-       printk("hp100: %s: close LSW = 0x%x\n", dev->name,
-              hp100_inw(OPTION_LSW));
-#endif
-
-       return 0;
-}
-
-
-/*
- * Configure the PDL Rx rings and LAN
- */
-static void hp100_init_pdls(struct net_device *dev)
-{
-       struct hp100_private *lp = netdev_priv(dev);
-       hp100_ring_t *ringptr;
-       u_int *pageptr;         /* Warning : increment by 4 - Jean II */
-       int i;
-
-#ifdef HP100_DEBUG_B
-       int ioaddr = dev->base_addr;
-#endif
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4206, TRACE);
-       printk("hp100: %s: init pdls\n", dev->name);
-#endif
-
-       if (!lp->page_vaddr_algn)
-               printk("hp100: %s: Warning: lp->page_vaddr_algn not initialised!\n", dev->name);
-       else {
-               /* pageptr shall point into the DMA accessible memory region  */
-               /* we use this pointer to status the upper limit of allocated */
-               /* memory in the allocated page. */
-               /* note: align the pointers to the pci cache line size */
-               memset(lp->page_vaddr_algn, 0, MAX_RINGSIZE);   /* Zero  Rx/Tx ring page */
-               pageptr = lp->page_vaddr_algn;
-
-               lp->rxrcommit = 0;
-               ringptr = lp->rxrhead = lp->rxrtail = &(lp->rxring[0]);
-
-               /* Initialise Rx Ring */
-               for (i = MAX_RX_PDL - 1; i >= 0; i--) {
-                       lp->rxring[i].next = ringptr;
-                       ringptr = &(lp->rxring[i]);
-                       pageptr += hp100_init_rxpdl(dev, ringptr, pageptr);
-               }
-
-               /* Initialise Tx Ring */
-               lp->txrcommit = 0;
-               ringptr = lp->txrhead = lp->txrtail = &(lp->txring[0]);
-               for (i = MAX_TX_PDL - 1; i >= 0; i--) {
-                       lp->txring[i].next = ringptr;
-                       ringptr = &(lp->txring[i]);
-                       pageptr += hp100_init_txpdl(dev, ringptr, pageptr);
-               }
-       }
-}
-
-
-/* These functions "format" the entries in the pdl structure   */
-/* They return how much memory the fragments need.            */
-static int hp100_init_rxpdl(struct net_device *dev,
-                           register hp100_ring_t * ringptr,
-                           register u32 * pdlptr)
-{
-       /* pdlptr is starting address for this pdl */
-
-       if (0 != (((unsigned long) pdlptr) & 0xf))
-               printk("hp100: %s: Init rxpdl: Unaligned pdlptr 0x%lx.\n",
-                      dev->name, (unsigned long) pdlptr);
-
-       ringptr->pdl = pdlptr + 1;
-       ringptr->pdl_paddr = virt_to_whatever(dev, pdlptr + 1);
-       ringptr->skb = NULL;
-
-       /*
-        * Write address and length of first PDL Fragment (which is used for
-        * storing the RX-Header
-        * We use the 4 bytes _before_ the PDH in the pdl memory area to
-        * store this information. (PDH is at offset 0x04)
-        */
-       /* Note that pdlptr+1 and not pdlptr is the pointer to the PDH */
-
-       *(pdlptr + 2) = (u_int) virt_to_whatever(dev, pdlptr);  /* Address Frag 1 */
-       *(pdlptr + 3) = 4;      /* Length  Frag 1 */
-
-       return roundup(MAX_RX_FRAG * 2 + 2, 4);
-}
-
-
-static int hp100_init_txpdl(struct net_device *dev,
-                           register hp100_ring_t * ringptr,
-                           register u32 * pdlptr)
-{
-       if (0 != (((unsigned long) pdlptr) & 0xf))
-               printk("hp100: %s: Init txpdl: Unaligned pdlptr 0x%lx.\n", dev->name, (unsigned long) pdlptr);
-
-       ringptr->pdl = pdlptr;  /* +1; */
-       ringptr->pdl_paddr = virt_to_whatever(dev, pdlptr);     /* +1 */
-       ringptr->skb = NULL;
-
-       return roundup(MAX_TX_FRAG * 2 + 2, 4);
-}
-
-/*
- * hp100_build_rx_pdl allocates an skb_buff of maximum size plus two bytes
- * for possible odd word alignment rounding up to next dword and set PDL
- * address for fragment#2
- * Returns: 0 if unable to allocate skb_buff
- *          1 if successful
- */
-static int hp100_build_rx_pdl(hp100_ring_t * ringptr,
-                             struct net_device *dev)
-{
-#ifdef HP100_DEBUG_B
-       int ioaddr = dev->base_addr;
-#endif
-#ifdef HP100_DEBUG_BM
-       u_int *p;
-#endif
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4207, TRACE);
-       printk("hp100: %s: build rx pdl\n", dev->name);
-#endif
-
-       /* Allocate skb buffer of maximum size */
-       /* Note: This depends on the alloc_skb functions allocating more
-        * space than requested, i.e. aligning to 16bytes */
-
-       ringptr->skb = netdev_alloc_skb(dev, roundup(MAX_ETHER_SIZE + 2, 4));
-
-       if (NULL != ringptr->skb) {
-               /*
-                * Reserve 2 bytes at the head of the buffer to land the IP header
-                * on a long word boundary (According to the Network Driver section
-                * in the Linux KHG, this should help to increase performance.)
-                */
-               skb_reserve(ringptr->skb, 2);
-
-               ringptr->skb->data = skb_put(ringptr->skb, MAX_ETHER_SIZE);
-
-               /* ringptr->pdl points to the beginning of the PDL, i.e. the PDH */
-               /* Note: 1st Fragment is used for the 4 byte packet status
-                * (receive header). Its PDL entries are set up by init_rxpdl. So
-                * here we only have to set up the PDL fragment entries for the data
-                * part. Those 4 bytes will be stored in the DMA memory region
-                * directly before the PDL.
-                */
-#ifdef HP100_DEBUG_BM
-               printk("hp100: %s: build_rx_pdl: PDH@0x%x, skb->data (len %d) at 0x%x\n",
-                                    dev->name, (u_int) ringptr->pdl,
-                                    roundup(MAX_ETHER_SIZE + 2, 4),
-                                    (unsigned int) ringptr->skb->data);
-#endif
-
-               /* Conversion to new PCI API : map skbuf data to PCI bus.
-                * Doc says it's OK for EISA as well - Jean II */
-               ringptr->pdl[0] = 0x00020000;   /* Write PDH */
-               ringptr->pdl[3] = pdl_map_data(netdev_priv(dev),
-                                              ringptr->skb->data);
-               ringptr->pdl[4] = MAX_ETHER_SIZE;       /* Length of Data */
-
-#ifdef HP100_DEBUG_BM
-               for (p = (ringptr->pdl); p < (ringptr->pdl + 5); p++)
-                       printk("hp100: %s: Adr 0x%.8x = 0x%.8x\n", dev->name, (u_int) p, (u_int) * p);
-#endif
-               return 1;
-       }
-       /* else: */
-       /* alloc_skb failed (no memory) -> still can receive the header
-        * fragment into PDL memory. make PDL safe by clearing msgptr and
-        * making the PDL only 1 fragment (i.e. the 4 byte packet status)
-        */
-#ifdef HP100_DEBUG_BM
-       printk("hp100: %s: build_rx_pdl: PDH@0x%x, No space for skb.\n", dev->name, (u_int) ringptr->pdl);
-#endif
-
-       ringptr->pdl[0] = 0x00010000;   /* PDH: Count=1 Fragment */
-
-       return 0;
-}
-
-/*
- *  hp100_rxfill - attempt to fill the Rx Ring will empty skb's
- *
- * Makes assumption that skb's are always contiguous memory areas and
- * therefore PDLs contain only 2 physical fragments.
- * -  While the number of Rx PDLs with buffers is less than maximum
- *      a.  Get a maximum packet size skb
- *      b.  Put the physical address of the buffer into the PDL.
- *      c.  Output physical address of PDL to adapter.
- */
-static void hp100_rxfill(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-
-       struct hp100_private *lp = netdev_priv(dev);
-       hp100_ring_t *ringptr;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4208, TRACE);
-       printk("hp100: %s: rxfill\n", dev->name);
-#endif
-
-       hp100_page(PERFORMANCE);
-
-       while (lp->rxrcommit < MAX_RX_PDL) {
-               /*
-                  ** Attempt to get a buffer and build a Rx PDL.
-                */
-               ringptr = lp->rxrtail;
-               if (0 == hp100_build_rx_pdl(ringptr, dev)) {
-                       return; /* None available, return */
-               }
-
-               /* Hand this PDL over to the card */
-               /* Note: This needs performance page selected! */
-#ifdef HP100_DEBUG_BM
-               printk("hp100: %s: rxfill: Hand to card: pdl #%d @0x%x phys:0x%x, buffer: 0x%x\n",
-                                    dev->name, lp->rxrcommit, (u_int) ringptr->pdl,
-                                    (u_int) ringptr->pdl_paddr, (u_int) ringptr->pdl[3]);
-#endif
-
-               hp100_outl((u32) ringptr->pdl_paddr, RX_PDA);
-
-               lp->rxrcommit += 1;
-               lp->rxrtail = ringptr->next;
-       }
-}
-
-/*
- * BM_shutdown - shutdown bus mastering and leave chip in reset state
- */
-
-static void hp100_BM_shutdown(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-       unsigned long time;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4209, TRACE);
-       printk("hp100: %s: bm shutdown\n", dev->name);
-#endif
-
-       hp100_page(PERFORMANCE);
-       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
-       hp100_outw(0xffff, IRQ_STATUS); /* Ack all ints */
-
-       /* Ensure Interrupts are off */
-       hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-
-       /* Disable all MAC activity */
-       hp100_page(MAC_CTRL);
-       hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);    /* stop rx/tx */
-
-       /* If cascade MMU is not already in reset */
-       if (0 != (hp100_inw(OPTION_LSW) & HP100_HW_RST)) {
-               /* Wait 1.3ms (10Mb max packet time) to ensure MAC is idle so
-                * MMU pointers will not be reset out from underneath
-                */
-               hp100_page(MAC_CTRL);
-               for (time = 0; time < 5000; time++) {
-                       if ((hp100_inb(MAC_CFG_1) & (HP100_TX_IDLE | HP100_RX_IDLE)) == (HP100_TX_IDLE | HP100_RX_IDLE))
-                               break;
-               }
-
-               /* Shutdown algorithm depends on the generation of Cascade */
-               if (lp->chip == HP100_CHIPID_LASSEN) {  /* ETR shutdown/reset */
-                       /* Disable Busmaster mode and wait for bit to go to zero. */
-                       hp100_page(HW_MAP);
-                       hp100_andb(~HP100_BM_MASTER, BM);
-                       /* 100 ms timeout */
-                       for (time = 0; time < 32000; time++) {
-                               if (0 == (hp100_inb(BM) & HP100_BM_MASTER))
-                                       break;
-                       }
-               } else {        /* Shasta or Rainier Shutdown/Reset */
-                       /* To ensure all bus master inloading activity has ceased,
-                        * wait for no Rx PDAs or no Rx packets on card.
-                        */
-                       hp100_page(PERFORMANCE);
-                       /* 100 ms timeout */
-                       for (time = 0; time < 10000; time++) {
-                               /* RX_PDL: PDLs not executed. */
-                               /* RX_PKT_CNT: RX'd packets on card. */
-                               if ((hp100_inb(RX_PDL) == 0) && (hp100_inb(RX_PKT_CNT) == 0))
-                                       break;
-                       }
-
-                       if (time >= 10000)
-                               printk("hp100: %s: BM shutdown error.\n", dev->name);
-
-                       /* To ensure all bus master outloading activity has ceased,
-                        * wait until the Tx PDA count goes to zero or no more Tx space
-                        * available in the Tx region of the card.
-                        */
-                       /* 100 ms timeout */
-                       for (time = 0; time < 10000; time++) {
-                               if ((0 == hp100_inb(TX_PKT_CNT)) &&
-                                   (0 != (hp100_inb(TX_MEM_FREE) & HP100_AUTO_COMPARE)))
-                                       break;
-                       }
-
-                       /* Disable Busmaster mode */
-                       hp100_page(HW_MAP);
-                       hp100_andb(~HP100_BM_MASTER, BM);
-               }       /* end of shutdown procedure for non-etr parts */
-
-               hp100_cascade_reset(dev, 1);
-       }
-       hp100_page(PERFORMANCE);
-       /* hp100_outw( HP100_BM_READ | HP100_BM_WRITE | HP100_RESET_HB, OPTION_LSW ); */
-       /* Busmaster mode should be shut down now. */
-}
-
-static int hp100_check_lan(struct net_device *dev)
-{
-       struct hp100_private *lp = netdev_priv(dev);
-
-       if (lp->lan_type < 0) { /* no LAN type detected yet? */
-               hp100_stop_interface(dev);
-               if ((lp->lan_type = hp100_sense_lan(dev)) < 0) {
-                       printk("hp100: %s: no connection found - check wire\n", dev->name);
-                       hp100_start_interface(dev);     /* 10Mb/s RX packets maybe handled */
-                       return -EIO;
-               }
-               if (lp->lan_type == HP100_LAN_100)
-                       lp->hub_status = hp100_login_to_vg_hub(dev, 0); /* relogin */
-               hp100_start_interface(dev);
-       }
-       return 0;
-}
-
-/*
- *  transmit functions
- */
-
-/* tx function for busmaster mode */
-static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
-                                      struct net_device *dev)
-{
-       unsigned long flags;
-       int i, ok_flag;
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-       hp100_ring_t *ringptr;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4210, TRACE);
-       printk("hp100: %s: start_xmit_bm\n", dev->name);
-#endif
-       if (skb->len <= 0)
-               goto drop;
-
-       if (lp->chip == HP100_CHIPID_SHASTA && skb_padto(skb, ETH_ZLEN))
-               return NETDEV_TX_OK;
-
-       /* Get Tx ring tail pointer */
-       if (lp->txrtail->next == lp->txrhead) {
-               /* No memory. */
-#ifdef HP100_DEBUG
-               printk("hp100: %s: start_xmit_bm: No TX PDL available.\n", dev->name);
-#endif
-               /* not waited long enough since last tx? */
-               if (time_before(jiffies, dev_trans_start(dev) + HZ))
-                       goto drop;
-
-               if (hp100_check_lan(dev))
-                       goto drop;
-
-               if (lp->lan_type == HP100_LAN_100 && lp->hub_status < 0) {
-                       /* we have a 100Mb/s adapter but it isn't connected to hub */
-                       printk("hp100: %s: login to 100Mb/s hub retry\n", dev->name);
-                       hp100_stop_interface(dev);
-                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-                       hp100_start_interface(dev);
-               } else {
-                       spin_lock_irqsave(&lp->lock, flags);
-                       hp100_ints_off();       /* Useful ? Jean II */
-                       i = hp100_sense_lan(dev);
-                       hp100_ints_on();
-                       spin_unlock_irqrestore(&lp->lock, flags);
-                       if (i == HP100_LAN_ERR)
-                               printk("hp100: %s: link down detected\n", dev->name);
-                       else if (lp->lan_type != i) {   /* cable change! */
-                               /* it's very hard - all network settings must be changed!!! */
-                               printk("hp100: %s: cable change 10Mb/s <-> 100Mb/s detected\n", dev->name);
-                               lp->lan_type = i;
-                               hp100_stop_interface(dev);
-                               if (lp->lan_type == HP100_LAN_100)
-                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-                               hp100_start_interface(dev);
-                       } else {
-                               printk("hp100: %s: interface reset\n", dev->name);
-                               hp100_stop_interface(dev);
-                               if (lp->lan_type == HP100_LAN_100)
-                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-                               hp100_start_interface(dev);
-                       }
-               }
-
-               goto drop;
-       }
-
-       /*
-        * we have to turn int's off before modifying this, otherwise
-        * a tx_pdl_cleanup could occur at the same time
-        */
-       spin_lock_irqsave(&lp->lock, flags);
-       ringptr = lp->txrtail;
-       lp->txrtail = ringptr->next;
-
-       /* Check whether packet has minimal packet size */
-       ok_flag = skb->len >= HP100_MIN_PACKET_SIZE;
-       i = ok_flag ? skb->len : HP100_MIN_PACKET_SIZE;
-
-       ringptr->skb = skb;
-       ringptr->pdl[0] = ((1 << 16) | i);      /* PDH: 1 Fragment & length */
-       if (lp->chip == HP100_CHIPID_SHASTA) {
-               /* TODO:Could someone who has the EISA card please check if this works? */
-               ringptr->pdl[2] = i;
-       } else {                /* Lassen */
-               /* In the PDL, don't use the padded size but the real packet size: */
-               ringptr->pdl[2] = skb->len;     /* 1st Frag: Length of frag */
-       }
-       /* Conversion to new PCI API : map skbuf data to PCI bus.
-        * Doc says it's OK for EISA as well - Jean II */
-       ringptr->pdl[1] = ((u32) pci_map_single(lp->pci_dev, skb->data, ringptr->pdl[2], PCI_DMA_TODEVICE));    /* 1st Frag: Adr. of data */
-
-       /* Hand this PDL to the card. */
-       hp100_outl(ringptr->pdl_paddr, TX_PDA_L);       /* Low Prio. Queue */
-
-       lp->txrcommit++;
-
-       dev->stats.tx_packets++;
-       dev->stats.tx_bytes += skb->len;
-
-       spin_unlock_irqrestore(&lp->lock, flags);
-
-       return NETDEV_TX_OK;
-
-drop:
-       dev_kfree_skb(skb);
-       return NETDEV_TX_OK;
-}
-
-
-/* clean_txring checks if packets have been sent by the card by reading
- * the TX_PDL register from the performance page and comparing it to the
- * number of committed packets. It then frees the skb's of the packets that
- * obviously have been sent to the network.
- *
- * Needs the PERFORMANCE page selected.
- */
-static void hp100_clean_txring(struct net_device *dev)
-{
-       struct hp100_private *lp = netdev_priv(dev);
-       int ioaddr = dev->base_addr;
-       int donecount;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4211, TRACE);
-       printk("hp100: %s: clean txring\n", dev->name);
-#endif
-
-       /* How many PDLs have been transmitted? */
-       donecount = (lp->txrcommit) - hp100_inb(TX_PDL);
-
-#ifdef HP100_DEBUG
-       if (donecount > MAX_TX_PDL)
-               printk("hp100: %s: Warning: More PDLs transmitted than committed to card???\n", dev->name);
-#endif
-
-       for (; 0 != donecount; donecount--) {
-#ifdef HP100_DEBUG_BM
-               printk("hp100: %s: Free skb: data @0x%.8x txrcommit=0x%x TXPDL=0x%x, done=0x%x\n",
-                               dev->name, (u_int) lp->txrhead->skb->data,
-                               lp->txrcommit, hp100_inb(TX_PDL), donecount);
-#endif
-               /* Conversion to new PCI API : NOP */
-               pci_unmap_single(lp->pci_dev, (dma_addr_t) lp->txrhead->pdl[1], lp->txrhead->pdl[2], PCI_DMA_TODEVICE);
-               dev_consume_skb_any(lp->txrhead->skb);
-               lp->txrhead->skb = NULL;
-               lp->txrhead = lp->txrhead->next;
-               lp->txrcommit--;
-       }
-}
-
-/* tx function for slave modes */
-static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
-                                   struct net_device *dev)
-{
-       unsigned long flags;
-       int i, ok_flag;
-       int ioaddr = dev->base_addr;
-       u_short val;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4212, TRACE);
-       printk("hp100: %s: start_xmit\n", dev->name);
-#endif
-       if (skb->len <= 0)
-               goto drop;
-
-       if (hp100_check_lan(dev))
-               goto drop;
-
-       /* If there is not enough free memory on the card... */
-       i = hp100_inl(TX_MEM_FREE) & 0x7fffffff;
-       if (!(((i / 2) - 539) > (skb->len + 16) && (hp100_inb(TX_PKT_CNT) < 255))) {
-#ifdef HP100_DEBUG
-               printk("hp100: %s: start_xmit: tx free mem = 0x%x\n", dev->name, i);
-#endif
-               /* not waited long enough since last failed tx try? */
-               if (time_before(jiffies, dev_trans_start(dev) + HZ)) {
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: trans_start timing problem\n",
-                              dev->name);
-#endif
-                       goto drop;
-               }
-               if (lp->lan_type == HP100_LAN_100 && lp->hub_status < 0) {
-                       /* we have a 100Mb/s adapter but it isn't connected to hub */
-                       printk("hp100: %s: login to 100Mb/s hub retry\n", dev->name);
-                       hp100_stop_interface(dev);
-                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-                       hp100_start_interface(dev);
-               } else {
-                       spin_lock_irqsave(&lp->lock, flags);
-                       hp100_ints_off();       /* Useful ? Jean II */
-                       i = hp100_sense_lan(dev);
-                       hp100_ints_on();
-                       spin_unlock_irqrestore(&lp->lock, flags);
-                       if (i == HP100_LAN_ERR)
-                               printk("hp100: %s: link down detected\n", dev->name);
-                       else if (lp->lan_type != i) {   /* cable change! */
-                               /* it's very hard - all network setting must be changed!!! */
-                               printk("hp100: %s: cable change 10Mb/s <-> 100Mb/s detected\n", dev->name);
-                               lp->lan_type = i;
-                               hp100_stop_interface(dev);
-                               if (lp->lan_type == HP100_LAN_100)
-                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-                               hp100_start_interface(dev);
-                       } else {
-                               printk("hp100: %s: interface reset\n", dev->name);
-                               hp100_stop_interface(dev);
-                               if (lp->lan_type == HP100_LAN_100)
-                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-                               hp100_start_interface(dev);
-                               mdelay(1);
-                       }
-               }
-               goto drop;
-       }
-
-       for (i = 0; i < 6000 && (hp100_inb(OPTION_MSW) & HP100_TX_CMD); i++) {
-#ifdef HP100_DEBUG_TX
-               printk("hp100: %s: start_xmit: busy\n", dev->name);
-#endif
-       }
-
-       spin_lock_irqsave(&lp->lock, flags);
-       hp100_ints_off();
-       val = hp100_inw(IRQ_STATUS);
-       /* Ack / clear the interrupt TX_COMPLETE interrupt - this interrupt is set
-        * when the current packet being transmitted on the wire is completed. */
-       hp100_outw(HP100_TX_COMPLETE, IRQ_STATUS);
-#ifdef HP100_DEBUG_TX
-       printk("hp100: %s: start_xmit: irq_status=0x%.4x, irqmask=0x%.4x, len=%d\n",
-                       dev->name, val, hp100_inw(IRQ_MASK), (int) skb->len);
-#endif
-
-       ok_flag = skb->len >= HP100_MIN_PACKET_SIZE;
-       i = ok_flag ? skb->len : HP100_MIN_PACKET_SIZE;
-
-       hp100_outw(i, DATA32);  /* tell card the total packet length */
-       hp100_outw(i, FRAGMENT_LEN);    /* and first/only fragment length    */
-
-       if (lp->mode == 2) {    /* memory mapped */
-               /* Note: The J2585B needs alignment to 32bits here!  */
-               memcpy_toio(lp->mem_ptr_virt, skb->data, (skb->len + 3) & ~3);
-               if (!ok_flag)
-                       memset_io(lp->mem_ptr_virt, 0, HP100_MIN_PACKET_SIZE - skb->len);
-       } else {                /* programmed i/o */
-               outsl(ioaddr + HP100_REG_DATA32, skb->data,
-                     (skb->len + 3) >> 2);
-               if (!ok_flag)
-                       for (i = (skb->len + 3) & ~3; i < HP100_MIN_PACKET_SIZE; i += 4)
-                               hp100_outl(0, DATA32);
-       }
-
-       hp100_outb(HP100_TX_CMD | HP100_SET_LB, OPTION_MSW);    /* send packet */
-
-       dev->stats.tx_packets++;
-       dev->stats.tx_bytes += skb->len;
-       hp100_ints_on();
-       spin_unlock_irqrestore(&lp->lock, flags);
-
-       dev_consume_skb_any(skb);
-
-#ifdef HP100_DEBUG_TX
-       printk("hp100: %s: start_xmit: end\n", dev->name);
-#endif
-
-       return NETDEV_TX_OK;
-
-drop:
-       dev_kfree_skb(skb);
-       return NETDEV_TX_OK;
-
-}
-
-
-/*
- * Receive Function (Non-Busmaster mode)
- * Called when an "Receive Packet" interrupt occurs, i.e. the receive
- * packet counter is non-zero.
- * For non-busmaster, this function does the whole work of transferring
- * the packet to the host memory and then up to higher layers via skb
- * and netif_rx.
- */
-
-static void hp100_rx(struct net_device *dev)
-{
-       int packets, pkt_len;
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-       u_int header;
-       struct sk_buff *skb;
-
-#ifdef DEBUG_B
-       hp100_outw(0x4213, TRACE);
-       printk("hp100: %s: rx\n", dev->name);
-#endif
-
-       /* First get indication of received lan packet */
-       /* RX_PKT_CND indicates the number of packets which have been fully */
-       /* received onto the card but have not been fully transferred of the card */
-       packets = hp100_inb(RX_PKT_CNT);
-#ifdef HP100_DEBUG_RX
-       if (packets > 1)
-               printk("hp100: %s: rx: waiting packets = %d\n", dev->name, packets);
-#endif
-
-       while (packets-- > 0) {
-               /* If ADV_NXT_PKT is still set, we have to wait until the card has */
-               /* really advanced to the next packet. */
-               for (pkt_len = 0; pkt_len < 6000 && (hp100_inb(OPTION_MSW) & HP100_ADV_NXT_PKT); pkt_len++) {
-#ifdef HP100_DEBUG_RX
-                       printk ("hp100: %s: rx: busy, remaining packets = %d\n", dev->name, packets);
-#endif
-               }
-
-               /* First we get the header, which contains information about the */
-               /* actual length of the received packet. */
-               if (lp->mode == 2) {    /* memory mapped mode */
-                       header = readl(lp->mem_ptr_virt);
-               } else          /* programmed i/o */
-                       header = hp100_inl(DATA32);
-
-               pkt_len = ((header & HP100_PKT_LEN_MASK) + 3) & ~3;
-
-#ifdef HP100_DEBUG_RX
-               printk("hp100: %s: rx: new packet - length=%d, errors=0x%x, dest=0x%x\n",
-                                    dev->name, header & HP100_PKT_LEN_MASK,
-                                    (header >> 16) & 0xfff8, (header >> 16) & 7);
-#endif
-
-               /* Now we allocate the skb and transfer the data into it. */
-               skb = netdev_alloc_skb(dev, pkt_len + 2);
-               if (skb == NULL) {      /* Not enough memory->drop packet */
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: rx: couldn't allocate a sk_buff of size %d\n",
-                                            dev->name, pkt_len);
-#endif
-                       dev->stats.rx_dropped++;
-               } else {        /* skb successfully allocated */
-
-                       u_char *ptr;
-
-                       skb_reserve(skb,2);
-
-                       /* ptr to start of the sk_buff data area */
-                       skb_put(skb, pkt_len);
-                       ptr = skb->data;
-
-                       /* Now transfer the data from the card into that area */
-                       if (lp->mode == 2)
-                               memcpy_fromio(ptr, lp->mem_ptr_virt,pkt_len);
-                       else    /* io mapped */
-                               insl(ioaddr + HP100_REG_DATA32, ptr, pkt_len >> 2);
-
-                       skb->protocol = eth_type_trans(skb, dev);
-
-#ifdef HP100_DEBUG_RX
-                       printk("hp100: %s: rx: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
-                                       dev->name, ptr[0], ptr[1], ptr[2], ptr[3],
-                                       ptr[4], ptr[5], ptr[6], ptr[7], ptr[8],
-                                       ptr[9], ptr[10], ptr[11]);
-#endif
-                       netif_rx(skb);
-                       dev->stats.rx_packets++;
-                       dev->stats.rx_bytes += pkt_len;
-               }
-
-               /* Indicate the card that we have got the packet */
-               hp100_outb(HP100_ADV_NXT_PKT | HP100_SET_LB, OPTION_MSW);
-
-               switch (header & 0x00070000) {
-               case (HP100_MULTI_ADDR_HASH << 16):
-               case (HP100_MULTI_ADDR_NO_HASH << 16):
-                       dev->stats.multicast++;
-                       break;
-               }
-       }                       /* end of while(there are packets) loop */
-#ifdef HP100_DEBUG_RX
-       printk("hp100_rx: %s: end\n", dev->name);
-#endif
-}
-
-/*
- * Receive Function for Busmaster Mode
- */
-static void hp100_rx_bm(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-       hp100_ring_t *ptr;
-       u_int header;
-       int pkt_len;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4214, TRACE);
-       printk("hp100: %s: rx_bm\n", dev->name);
-#endif
-
-#ifdef HP100_DEBUG
-       if (0 == lp->rxrcommit) {
-               printk("hp100: %s: rx_bm called although no PDLs were committed to adapter?\n", dev->name);
-               return;
-       } else
-               /* RX_PKT_CNT states how many PDLs are currently formatted and available to
-                * the cards BM engine */
-       if ((hp100_inw(RX_PKT_CNT) & 0x00ff) >= lp->rxrcommit) {
-               printk("hp100: %s: More packets received than committed? RX_PKT_CNT=0x%x, commit=0x%x\n",
-                                    dev->name, hp100_inw(RX_PKT_CNT) & 0x00ff,
-                                    lp->rxrcommit);
-               return;
-       }
-#endif
-
-       while ((lp->rxrcommit > hp100_inb(RX_PDL))) {
-               /*
-                * The packet was received into the pdl pointed to by lp->rxrhead (
-                * the oldest pdl in the ring
-                */
-
-               /* First we get the header, which contains information about the */
-               /* actual length of the received packet. */
-
-               ptr = lp->rxrhead;
-
-               header = *(ptr->pdl - 1);
-               pkt_len = (header & HP100_PKT_LEN_MASK);
-
-               /* Conversion to new PCI API : NOP */
-               pci_unmap_single(lp->pci_dev, (dma_addr_t) ptr->pdl[3], MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE);
-
-#ifdef HP100_DEBUG_BM
-               printk("hp100: %s: rx_bm: header@0x%x=0x%x length=%d, errors=0x%x, dest=0x%x\n",
-                               dev->name, (u_int) (ptr->pdl - 1), (u_int) header,
-                               pkt_len, (header >> 16) & 0xfff8, (header >> 16) & 7);
-               printk("hp100: %s: RX_PDL_COUNT:0x%x TX_PDL_COUNT:0x%x, RX_PKT_CNT=0x%x PDH=0x%x, Data@0x%x len=0x%x\n",
-                               dev->name, hp100_inb(RX_PDL), hp100_inb(TX_PDL),
-                               hp100_inb(RX_PKT_CNT), (u_int) * (ptr->pdl),
-                               (u_int) * (ptr->pdl + 3), (u_int) * (ptr->pdl + 4));
-#endif
-
-               if ((pkt_len >= MIN_ETHER_SIZE) &&
-                   (pkt_len <= MAX_ETHER_SIZE)) {
-                       if (ptr->skb == NULL) {
-                               printk("hp100: %s: rx_bm: skb null\n", dev->name);
-                               /* can happen if we only allocated room for the pdh due to memory shortage. */
-                               dev->stats.rx_dropped++;
-                       } else {
-                               skb_trim(ptr->skb, pkt_len);    /* Shorten it */
-                               ptr->skb->protocol =
-                                   eth_type_trans(ptr->skb, dev);
-
-                               netif_rx(ptr->skb);     /* Up and away... */
-
-                               dev->stats.rx_packets++;
-                               dev->stats.rx_bytes += pkt_len;
-                       }
-
-                       switch (header & 0x00070000) {
-                       case (HP100_MULTI_ADDR_HASH << 16):
-                       case (HP100_MULTI_ADDR_NO_HASH << 16):
-                               dev->stats.multicast++;
-                               break;
-                       }
-               } else {
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: rx_bm: Received bad packet (length=%d)\n", dev->name, pkt_len);
-#endif
-                       if (ptr->skb != NULL)
-                               dev_kfree_skb_any(ptr->skb);
-                       dev->stats.rx_errors++;
-               }
-
-               lp->rxrhead = lp->rxrhead->next;
-
-               /* Allocate a new rx PDL (so lp->rxrcommit stays the same) */
-               if (0 == hp100_build_rx_pdl(lp->rxrtail, dev)) {
-                       /* No space for skb, header can still be received. */
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: rx_bm: No space for new PDL.\n", dev->name);
-#endif
-                       return;
-               } else {        /* successfully allocated new PDL - put it in ringlist at tail. */
-                       hp100_outl((u32) lp->rxrtail->pdl_paddr, RX_PDA);
-                       lp->rxrtail = lp->rxrtail->next;
-               }
-
-       }
-}
-
-/*
- *  statistics
- */
-static struct net_device_stats *hp100_get_stats(struct net_device *dev)
-{
-       unsigned long flags;
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4215, TRACE);
-#endif
-
-       spin_lock_irqsave(&lp->lock, flags);
-       hp100_ints_off();       /* Useful ? Jean II */
-       hp100_update_stats(dev);
-       hp100_ints_on();
-       spin_unlock_irqrestore(&lp->lock, flags);
-       return &(dev->stats);
-}
-
-static void hp100_update_stats(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       u_short val;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4216, TRACE);
-       printk("hp100: %s: update-stats\n", dev->name);
-#endif
-
-       /* Note: Statistics counters clear when read. */
-       hp100_page(MAC_CTRL);
-       val = hp100_inw(DROPPED) & 0x0fff;
-       dev->stats.rx_errors += val;
-       dev->stats.rx_over_errors += val;
-       val = hp100_inb(CRC);
-       dev->stats.rx_errors += val;
-       dev->stats.rx_crc_errors += val;
-       val = hp100_inb(ABORT);
-       dev->stats.tx_errors += val;
-       dev->stats.tx_aborted_errors += val;
-       hp100_page(PERFORMANCE);
-}
-
-static void hp100_misc_interrupt(struct net_device *dev)
-{
-#ifdef HP100_DEBUG_B
-       int ioaddr = dev->base_addr;
-#endif
-
-#ifdef HP100_DEBUG_B
-       int ioaddr = dev->base_addr;
-       hp100_outw(0x4216, TRACE);
-       printk("hp100: %s: misc_interrupt\n", dev->name);
-#endif
-
-       /* Note: Statistics counters clear when read. */
-       dev->stats.rx_errors++;
-       dev->stats.tx_errors++;
-}
-
-static void hp100_clear_stats(struct hp100_private *lp, int ioaddr)
-{
-       unsigned long flags;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4217, TRACE);
-       printk("hp100: %s: clear_stats\n", dev->name);
-#endif
-
-       spin_lock_irqsave(&lp->lock, flags);
-       hp100_page(MAC_CTRL);   /* get all statistics bytes */
-       hp100_inw(DROPPED);
-       hp100_inb(CRC);
-       hp100_inb(ABORT);
-       hp100_page(PERFORMANCE);
-       spin_unlock_irqrestore(&lp->lock, flags);
-}
-
-
-/*
- *  multicast setup
- */
-
-/*
- *  Set or clear the multicast filter for this adapter.
- */
-
-static void hp100_set_multicast_list(struct net_device *dev)
-{
-       unsigned long flags;
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4218, TRACE);
-       printk("hp100: %s: set_mc_list\n", dev->name);
-#endif
-
-       spin_lock_irqsave(&lp->lock, flags);
-       hp100_ints_off();
-       hp100_page(MAC_CTRL);
-       hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);    /* stop rx/tx */
-
-       if (dev->flags & IFF_PROMISC) {
-               lp->mac2_mode = HP100_MAC2MODE6;        /* promiscuous mode = get all good */
-               lp->mac1_mode = HP100_MAC1MODE6;        /* packets on the net */
-               memset(&lp->hash_bytes, 0xff, 8);
-       } else if (!netdev_mc_empty(dev) || (dev->flags & IFF_ALLMULTI)) {
-               lp->mac2_mode = HP100_MAC2MODE5;        /* multicast mode = get packets for */
-               lp->mac1_mode = HP100_MAC1MODE5;        /* me, broadcasts and all multicasts */
-#ifdef HP100_MULTICAST_FILTER  /* doesn't work!!! */
-               if (dev->flags & IFF_ALLMULTI) {
-                       /* set hash filter to receive all multicast packets */
-                       memset(&lp->hash_bytes, 0xff, 8);
-               } else {
-                       int i, idx;
-                       u_char *addrs;
-                       struct netdev_hw_addr *ha;
-
-                       memset(&lp->hash_bytes, 0x00, 8);
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: computing hash filter - mc_count = %i\n",
-                              dev->name, netdev_mc_count(dev));
-#endif
-                       netdev_for_each_mc_addr(ha, dev) {
-                               addrs = ha->addr;
-#ifdef HP100_DEBUG
-                               printk("hp100: %s: multicast = %pM, ",
-                                            dev->name, addrs);
-#endif
-                               for (i = idx = 0; i < 6; i++) {
-                                       idx ^= *addrs++ & 0x3f;
-                                       printk(":%02x:", idx);
-                               }
-#ifdef HP100_DEBUG
-                               printk("idx = %i\n", idx);
-#endif
-                               lp->hash_bytes[idx >> 3] |= (1 << (idx & 7));
-                       }
-               }
-#else
-               memset(&lp->hash_bytes, 0xff, 8);
-#endif
-       } else {
-               lp->mac2_mode = HP100_MAC2MODE3;        /* normal mode = get packets for me */
-               lp->mac1_mode = HP100_MAC1MODE3;        /* and broadcasts */
-               memset(&lp->hash_bytes, 0x00, 8);
-       }
-
-       if (((hp100_inb(MAC_CFG_1) & 0x0f) != lp->mac1_mode) ||
-           (hp100_inb(MAC_CFG_2) != lp->mac2_mode)) {
-               int i;
-
-               hp100_outb(lp->mac2_mode, MAC_CFG_2);
-               hp100_andb(HP100_MAC1MODEMASK, MAC_CFG_1);      /* clear mac1 mode bits */
-               hp100_orb(lp->mac1_mode, MAC_CFG_1);    /* and set the new mode */
-
-               hp100_page(MAC_ADDRESS);
-               for (i = 0; i < 8; i++)
-                       hp100_outb(lp->hash_bytes[i], HASH_BYTE0 + i);
-#ifdef HP100_DEBUG
-               printk("hp100: %s: mac1 = 0x%x, mac2 = 0x%x, multicast hash = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-                                    dev->name, lp->mac1_mode, lp->mac2_mode,
-                                    lp->hash_bytes[0], lp->hash_bytes[1],
-                                    lp->hash_bytes[2], lp->hash_bytes[3],
-                                    lp->hash_bytes[4], lp->hash_bytes[5],
-                                    lp->hash_bytes[6], lp->hash_bytes[7]);
-#endif
-
-               if (lp->lan_type == HP100_LAN_100) {
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: 100VG MAC settings have changed - relogin.\n", dev->name);
-#endif
-                       lp->hub_status = hp100_login_to_vg_hub(dev, 1); /* force a relogin to the hub */
-               }
-       } else {
-               int i;
-               u_char old_hash_bytes[8];
-
-               hp100_page(MAC_ADDRESS);
-               for (i = 0; i < 8; i++)
-                       old_hash_bytes[i] = hp100_inb(HASH_BYTE0 + i);
-               if (memcmp(old_hash_bytes, &lp->hash_bytes, 8)) {
-                       for (i = 0; i < 8; i++)
-                               hp100_outb(lp->hash_bytes[i], HASH_BYTE0 + i);
-#ifdef HP100_DEBUG
-                       printk("hp100: %s: multicast hash = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-                                       dev->name, lp->hash_bytes[0],
-                                       lp->hash_bytes[1], lp->hash_bytes[2],
-                                       lp->hash_bytes[3], lp->hash_bytes[4],
-                                       lp->hash_bytes[5], lp->hash_bytes[6],
-                                       lp->hash_bytes[7]);
-#endif
-
-                       if (lp->lan_type == HP100_LAN_100) {
-#ifdef HP100_DEBUG
-                               printk("hp100: %s: 100VG MAC settings have changed - relogin.\n", dev->name);
-#endif
-                               lp->hub_status = hp100_login_to_vg_hub(dev, 1); /* force a relogin to the hub */
-                       }
-               }
-       }
-
-       hp100_page(MAC_CTRL);
-       hp100_orb(HP100_RX_EN | HP100_RX_IDLE | /* enable rx */
-                 HP100_TX_EN | HP100_TX_IDLE, MAC_CFG_1);      /* enable tx */
-
-       hp100_page(PERFORMANCE);
-       hp100_ints_on();
-       spin_unlock_irqrestore(&lp->lock, flags);
-}
-
-/*
- *  hardware interrupt handling
- */
-
-static irqreturn_t hp100_interrupt(int irq, void *dev_id)
-{
-       struct net_device *dev = (struct net_device *) dev_id;
-       struct hp100_private *lp = netdev_priv(dev);
-
-       int ioaddr;
-       u_int val;
-
-       if (dev == NULL)
-               return IRQ_NONE;
-       ioaddr = dev->base_addr;
-
-       spin_lock(&lp->lock);
-
-       hp100_ints_off();
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4219, TRACE);
-#endif
-
-       /*  hp100_page( PERFORMANCE ); */
-       val = hp100_inw(IRQ_STATUS);
-#ifdef HP100_DEBUG_IRQ
-       printk("hp100: %s: mode=%x,IRQ_STAT=0x%.4x,RXPKTCNT=0x%.2x RXPDL=0x%.2x TXPKTCNT=0x%.2x TXPDL=0x%.2x\n",
-                            dev->name, lp->mode, (u_int) val, hp100_inb(RX_PKT_CNT),
-                            hp100_inb(RX_PDL), hp100_inb(TX_PKT_CNT), hp100_inb(TX_PDL));
-#endif
-
-       if (val == 0) {         /* might be a shared interrupt */
-               spin_unlock(&lp->lock);
-               hp100_ints_on();
-               return IRQ_NONE;
-       }
-       /* We're only interested in those interrupts we really enabled. */
-       /* val &= hp100_inw( IRQ_MASK ); */
-
-       /*
-        * RX_PDL_FILL_COMPL is set whenever a RX_PDL has been executed. A RX_PDL
-        * is considered executed whenever the RX_PDL data structure is no longer
-        * needed.
-        */
-       if (val & HP100_RX_PDL_FILL_COMPL) {
-               if (lp->mode == 1)
-                       hp100_rx_bm(dev);
-               else {
-                       printk("hp100: %s: rx_pdl_fill_compl interrupt although not busmaster?\n", dev->name);
-               }
-       }
-
-       /*
-        * The RX_PACKET interrupt is set, when the receive packet counter is
-        * non zero. We use this interrupt for receiving in slave mode. In
-        * busmaster mode, we use it to make sure we did not miss any rx_pdl_fill
-        * interrupts. If rx_pdl_fill_compl is not set and rx_packet is set, then
-        * we somehow have missed a rx_pdl_fill_compl interrupt.
-        */
-
-       if (val & HP100_RX_PACKET) {    /* Receive Packet Counter is non zero */
-               if (lp->mode != 1)      /* non busmaster */
-                       hp100_rx(dev);
-               else if (!(val & HP100_RX_PDL_FILL_COMPL)) {
-                       /* Shouldn't happen - maybe we missed a RX_PDL_FILL Interrupt?  */
-                       hp100_rx_bm(dev);
-               }
-       }
-
-       /*
-        * Ack. that we have noticed the interrupt and thereby allow next one.
-        * Note that this is now done after the slave rx function, since first
-        * acknowledging and then setting ADV_NXT_PKT caused an extra interrupt
-        * on the J2573.
-        */
-       hp100_outw(val, IRQ_STATUS);
-
-       /*
-        * RX_ERROR is set when a packet is dropped due to no memory resources on
-        * the card or when a RCV_ERR occurs.
-        * TX_ERROR is set when a TX_ABORT condition occurs in the MAC->exists
-        * only in the 802.3 MAC and happens when 16 collisions occur during a TX
-        */
-       if (val & (HP100_TX_ERROR | HP100_RX_ERROR)) {
-#ifdef HP100_DEBUG_IRQ
-               printk("hp100: %s: TX/RX Error IRQ\n", dev->name);
-#endif
-               hp100_update_stats(dev);
-               if (lp->mode == 1) {
-                       hp100_rxfill(dev);
-                       hp100_clean_txring(dev);
-               }
-       }
-
-       /*
-        * RX_PDA_ZERO is set when the PDA count goes from non-zero to zero.
-        */
-       if ((lp->mode == 1) && (val & (HP100_RX_PDA_ZERO)))
-               hp100_rxfill(dev);
-
-       /*
-        * HP100_TX_COMPLETE interrupt occurs when packet transmitted on wire
-        * is completed
-        */
-       if ((lp->mode == 1) && (val & (HP100_TX_COMPLETE)))
-               hp100_clean_txring(dev);
-
-       /*
-        * MISC_ERROR is set when either the LAN link goes down or a detected
-        * bus error occurs.
-        */
-       if (val & HP100_MISC_ERROR) {   /* New for J2585B */
-#ifdef HP100_DEBUG_IRQ
-               printk
-                   ("hp100: %s: Misc. Error Interrupt - Check cabling.\n",
-                    dev->name);
-#endif
-               if (lp->mode == 1) {
-                       hp100_clean_txring(dev);
-                       hp100_rxfill(dev);
-               }
-               hp100_misc_interrupt(dev);
-       }
-
-       spin_unlock(&lp->lock);
-       hp100_ints_on();
-       return IRQ_HANDLED;
-}
-
-/*
- *  some misc functions
- */
-
-static void hp100_start_interface(struct net_device *dev)
-{
-       unsigned long flags;
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4220, TRACE);
-       printk("hp100: %s: hp100_start_interface\n", dev->name);
-#endif
-
-       spin_lock_irqsave(&lp->lock, flags);
-
-       /* Ensure the adapter does not want to request an interrupt when */
-       /* enabling the IRQ line to be active on the bus (i.e. not tri-stated) */
-       hp100_page(PERFORMANCE);
-       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
-       hp100_outw(0xffff, IRQ_STATUS); /* ack all IRQs */
-       hp100_outw(HP100_FAKE_INT | HP100_INT_EN | HP100_RESET_LB,
-                  OPTION_LSW);
-       /* Un Tri-state int. TODO: Check if shared interrupts can be realised? */
-       hp100_outw(HP100_TRI_INT | HP100_RESET_HB, OPTION_LSW);
-
-       if (lp->mode == 1) {
-               /* Make sure BM bit is set... */
-               hp100_page(HW_MAP);
-               hp100_orb(HP100_BM_MASTER, BM);
-               hp100_rxfill(dev);
-       } else if (lp->mode == 2) {
-               /* Enable memory mapping. Note: Don't do this when busmaster. */
-               hp100_outw(HP100_MMAP_DIS | HP100_RESET_HB, OPTION_LSW);
-       }
-
-       hp100_page(PERFORMANCE);
-       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
-       hp100_outw(0xffff, IRQ_STATUS); /* ack IRQ */
-
-       /* enable a few interrupts: */
-       if (lp->mode == 1) {    /* busmaster mode */
-               hp100_outw(HP100_RX_PDL_FILL_COMPL |
-                          HP100_RX_PDA_ZERO | HP100_RX_ERROR |
-                          /* HP100_RX_PACKET    | */
-                          /* HP100_RX_EARLY_INT |  */ HP100_SET_HB |
-                          /* HP100_TX_PDA_ZERO  |  */
-                          HP100_TX_COMPLETE |
-                          /* HP100_MISC_ERROR   |  */
-                          HP100_TX_ERROR | HP100_SET_LB, IRQ_MASK);
-       } else {
-               hp100_outw(HP100_RX_PACKET |
-                          HP100_RX_ERROR | HP100_SET_HB |
-                          HP100_TX_ERROR | HP100_SET_LB, IRQ_MASK);
-       }
-
-       /* Note : before hp100_set_multicast_list(), because it will play with
-        * spinlock itself... Jean II */
-       spin_unlock_irqrestore(&lp->lock, flags);
-
-       /* Enable MAC Tx and RX, set MAC modes, ... */
-       hp100_set_multicast_list(dev);
-}
-
-static void hp100_stop_interface(struct net_device *dev)
-{
-       struct hp100_private *lp = netdev_priv(dev);
-       int ioaddr = dev->base_addr;
-       u_int val;
-
-#ifdef HP100_DEBUG_B
-       printk("hp100: %s: hp100_stop_interface\n", dev->name);
-       hp100_outw(0x4221, TRACE);
-#endif
-
-       if (lp->mode == 1)
-               hp100_BM_shutdown(dev);
-       else {
-               /* Note: MMAP_DIS will be reenabled by start_interface */
-               hp100_outw(HP100_INT_EN | HP100_RESET_LB |
-                          HP100_TRI_INT | HP100_MMAP_DIS | HP100_SET_HB,
-                          OPTION_LSW);
-               val = hp100_inw(OPTION_LSW);
-
-               hp100_page(MAC_CTRL);
-               hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);
-
-               if (!(val & HP100_HW_RST))
-                       return; /* If reset, imm. return ... */
-               /* ... else: busy wait until idle */
-               for (val = 0; val < 6000; val++)
-                       if ((hp100_inb(MAC_CFG_1) & (HP100_TX_IDLE | HP100_RX_IDLE)) == (HP100_TX_IDLE | HP100_RX_IDLE)) {
-                               hp100_page(PERFORMANCE);
-                               return;
-                       }
-               printk("hp100: %s: hp100_stop_interface - timeout\n", dev->name);
-               hp100_page(PERFORMANCE);
-       }
-}
-
-static void hp100_load_eeprom(struct net_device *dev, u_short probe_ioaddr)
-{
-       int i;
-       int ioaddr = probe_ioaddr > 0 ? probe_ioaddr : dev->base_addr;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4222, TRACE);
-#endif
-
-       hp100_page(EEPROM_CTRL);
-       hp100_andw(~HP100_EEPROM_LOAD, EEPROM_CTRL);
-       hp100_orw(HP100_EEPROM_LOAD, EEPROM_CTRL);
-       for (i = 0; i < 10000; i++)
-               if (!(hp100_inb(OPTION_MSW) & HP100_EE_LOAD))
-                       return;
-       printk("hp100: %s: hp100_load_eeprom - timeout\n", dev->name);
-}
-
-/*  Sense connection status.
- *  return values: LAN_10  - Connected to 10Mbit/s network
- *                 LAN_100 - Connected to 100Mbit/s network
- *                 LAN_ERR - not connected or 100Mbit/s Hub down
- */
-static int hp100_sense_lan(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       u_short val_VG, val_10;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4223, TRACE);
-#endif
-
-       hp100_page(MAC_CTRL);
-       val_10 = hp100_inb(10_LAN_CFG_1);
-       val_VG = hp100_inb(VG_LAN_CFG_1);
-       hp100_page(PERFORMANCE);
-#ifdef HP100_DEBUG
-       printk("hp100: %s: sense_lan: val_VG = 0x%04x, val_10 = 0x%04x\n",
-              dev->name, val_VG, val_10);
-#endif
-
-       if (val_10 & HP100_LINK_BEAT_ST)        /* 10Mb connection is active */
-               return HP100_LAN_10;
-
-       if (val_10 & HP100_AUI_ST) {    /* have we BNC or AUI onboard? */
-               /*
-                * This can be overriden by dos utility, so if this has no effect,
-                * perhaps you need to download that utility from HP and set card
-                * back to "auto detect".
-                */
-               val_10 |= HP100_AUI_SEL | HP100_LOW_TH;
-               hp100_page(MAC_CTRL);
-               hp100_outb(val_10, 10_LAN_CFG_1);
-               hp100_page(PERFORMANCE);
-               return HP100_LAN_COAX;
-       }
-
-       /* Those cards don't have a 100 Mbit connector */
-       if ( !strcmp(lp->id, "HWP1920")  ||
-            (lp->pci_dev &&
-             lp->pci_dev->vendor == PCI_VENDOR_ID &&
-             (lp->pci_dev->device == PCI_DEVICE_ID_HP_J2970A ||
-              lp->pci_dev->device == PCI_DEVICE_ID_HP_J2973A)))
-               return HP100_LAN_ERR;
-
-       if (val_VG & HP100_LINK_CABLE_ST)       /* Can hear the HUBs tone. */
-               return HP100_LAN_100;
-       return HP100_LAN_ERR;
-}
-
-static int hp100_down_vg_link(struct net_device *dev)
-{
-       struct hp100_private *lp = netdev_priv(dev);
-       int ioaddr = dev->base_addr;
-       unsigned long time;
-       long savelan, newlan;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4224, TRACE);
-       printk("hp100: %s: down_vg_link\n", dev->name);
-#endif
-
-       hp100_page(MAC_CTRL);
-       time = jiffies + (HZ / 4);
-       do {
-               if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST)
-                       break;
-               if (!in_interrupt())
-                       schedule_timeout_interruptible(1);
-       } while (time_after(time, jiffies));
-
-       if (time_after_eq(jiffies, time))       /* no signal->no logout */
-               return 0;
-
-       /* Drop the VG Link by clearing the link up cmd and load addr. */
-
-       hp100_andb(~(HP100_LOAD_ADDR | HP100_LINK_CMD), VG_LAN_CFG_1);
-       hp100_orb(HP100_VG_SEL, VG_LAN_CFG_1);
-
-       /* Conditionally stall for >250ms on Link-Up Status (to go down) */
-       time = jiffies + (HZ / 2);
-       do {
-               if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
-                       break;
-               if (!in_interrupt())
-                       schedule_timeout_interruptible(1);
-       } while (time_after(time, jiffies));
-
-#ifdef HP100_DEBUG
-       if (time_after_eq(jiffies, time))
-               printk("hp100: %s: down_vg_link: Link does not go down?\n", dev->name);
-#endif
-
-       /* To prevent condition where Rev 1 VG MAC and old hubs do not complete */
-       /* logout under traffic (even though all the status bits are cleared),  */
-       /* do this workaround to get the Rev 1 MAC in its idle state */
-       if (lp->chip == HP100_CHIPID_LASSEN) {
-               /* Reset VG MAC to insure it leaves the logoff state even if */
-               /* the Hub is still emitting tones */
-               hp100_andb(~HP100_VG_RESET, VG_LAN_CFG_1);
-               udelay(1500);   /* wait for >1ms */
-               hp100_orb(HP100_VG_RESET, VG_LAN_CFG_1);        /* Release Reset */
-               udelay(1500);
-       }
-
-       /* New: For lassen, switch to 10 Mbps mac briefly to clear training ACK */
-       /* to get the VG mac to full reset. This is not req.d with later chips */
-       /* Note: It will take the between 1 and 2 seconds for the VG mac to be */
-       /* selected again! This will be left to the connect hub function to */
-       /* perform if desired.  */
-       if (lp->chip == HP100_CHIPID_LASSEN) {
-               /* Have to write to 10 and 100VG control registers simultaneously */
-               savelan = newlan = hp100_inl(10_LAN_CFG_1);     /* read 10+100 LAN_CFG regs */
-               newlan &= ~(HP100_VG_SEL << 16);
-               newlan |= (HP100_DOT3_MAC) << 8;
-               hp100_andb(~HP100_AUTO_MODE, MAC_CFG_3);        /* Autosel off */
-               hp100_outl(newlan, 10_LAN_CFG_1);
-
-               /* Conditionally stall for 5sec on VG selected. */
-               time = jiffies + (HZ * 5);
-               do {
-                       if (!(hp100_inb(MAC_CFG_4) & HP100_MAC_SEL_ST))
-                               break;
-                       if (!in_interrupt())
-                               schedule_timeout_interruptible(1);
-               } while (time_after(time, jiffies));
-
-               hp100_orb(HP100_AUTO_MODE, MAC_CFG_3);  /* Autosel back on */
-               hp100_outl(savelan, 10_LAN_CFG_1);
-       }
-
-       time = jiffies + (3 * HZ);      /* Timeout 3s */
-       do {
-               if ((hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST) == 0)
-                       break;
-               if (!in_interrupt())
-                       schedule_timeout_interruptible(1);
-       } while (time_after(time, jiffies));
-
-       if (time_before_eq(time, jiffies)) {
-#ifdef HP100_DEBUG
-               printk("hp100: %s: down_vg_link: timeout\n", dev->name);
-#endif
-               return -EIO;
-       }
-
-       time = jiffies + (2 * HZ);      /* This seems to take a while.... */
-       do {
-               if (!in_interrupt())
-                       schedule_timeout_interruptible(1);
-       } while (time_after(time, jiffies));
-
-       return 0;
-}
-
-static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin)
-{
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-       u_short val = 0;
-       unsigned long time;
-       int startst;
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4225, TRACE);
-       printk("hp100: %s: login_to_vg_hub\n", dev->name);
-#endif
-
-       /* Initiate a login sequence iff VG MAC is enabled and either Load Address
-        * bit is zero or the force relogin flag is set (e.g. due to MAC address or
-        * promiscuous mode change)
-        */
-       hp100_page(MAC_CTRL);
-       startst = hp100_inb(VG_LAN_CFG_1);
-       if ((force_relogin == 1) || (hp100_inb(MAC_CFG_4) & HP100_MAC_SEL_ST)) {
-#ifdef HP100_DEBUG_TRAINING
-               printk("hp100: %s: Start training\n", dev->name);
-#endif
-
-               /* Ensure VG Reset bit is 1 (i.e., do not reset) */
-               hp100_orb(HP100_VG_RESET, VG_LAN_CFG_1);
-
-               /* If Lassen AND auto-select-mode AND VG tones were sensed on */
-               /* entry then temporarily put them into force 100Mbit mode */
-               if ((lp->chip == HP100_CHIPID_LASSEN) && (startst & HP100_LINK_CABLE_ST))
-                       hp100_andb(~HP100_DOT3_MAC, 10_LAN_CFG_2);
-
-               /* Drop the VG link by zeroing Link Up Command and Load Address  */
-               hp100_andb(~(HP100_LINK_CMD /* |HP100_LOAD_ADDR */ ), VG_LAN_CFG_1);
-
-#ifdef HP100_DEBUG_TRAINING
-               printk("hp100: %s: Bring down the link\n", dev->name);
-#endif
-
-               /* Wait for link to drop */
-               time = jiffies + (HZ / 10);
-               do {
-                       if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
-                               break;
-                       if (!in_interrupt())
-                               schedule_timeout_interruptible(1);
-               } while (time_after(time, jiffies));
-
-               /* Start an addressed training and optionally request promiscuous port */
-               if ((dev->flags) & IFF_PROMISC) {
-                       hp100_orb(HP100_PROM_MODE, VG_LAN_CFG_2);
-                       if (lp->chip == HP100_CHIPID_LASSEN)
-                               hp100_orw(HP100_MACRQ_PROMSC, TRAIN_REQUEST);
-               } else {
-                       hp100_andb(~HP100_PROM_MODE, VG_LAN_CFG_2);
-                       /* For ETR parts we need to reset the prom. bit in the training
-                        * register, otherwise promiscious mode won't be disabled.
-                        */
-                       if (lp->chip == HP100_CHIPID_LASSEN) {
-                               hp100_andw(~HP100_MACRQ_PROMSC, TRAIN_REQUEST);
-                       }
-               }
-
-               /* With ETR parts, frame format request bits can be set. */
-               if (lp->chip == HP100_CHIPID_LASSEN)
-                       hp100_orb(HP100_MACRQ_FRAMEFMT_EITHER, TRAIN_REQUEST);
-
-               hp100_orb(HP100_LINK_CMD | HP100_LOAD_ADDR | HP100_VG_RESET, VG_LAN_CFG_1);
-
-               /* Note: Next wait could be omitted for Hood and earlier chips under */
-               /* certain circumstances */
-               /* TODO: check if hood/earlier and skip wait. */
-
-               /* Wait for either short timeout for VG tones or long for login    */
-               /* Wait for the card hardware to signalise link cable status ok... */
-               hp100_page(MAC_CTRL);
-               time = jiffies + (1 * HZ);      /* 1 sec timeout for cable st */
-               do {
-                       if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST)
-                               break;
-                       if (!in_interrupt())
-                               schedule_timeout_interruptible(1);
-               } while (time_before(jiffies, time));
-
-               if (time_after_eq(jiffies, time)) {
-#ifdef HP100_DEBUG_TRAINING
-                       printk("hp100: %s: Link cable status not ok? Training aborted.\n", dev->name);
-#endif
-               } else {
-#ifdef HP100_DEBUG_TRAINING
-                       printk
-                           ("hp100: %s: HUB tones detected. Trying to train.\n",
-                            dev->name);
-#endif
-
-                       time = jiffies + (2 * HZ);      /* again a timeout */
-                       do {
-                               val = hp100_inb(VG_LAN_CFG_1);
-                               if ((val & (HP100_LINK_UP_ST))) {
-#ifdef HP100_DEBUG_TRAINING
-                                       printk("hp100: %s: Passed training.\n", dev->name);
-#endif
-                                       break;
-                               }
-                               if (!in_interrupt())
-                                       schedule_timeout_interruptible(1);
-                       } while (time_after(time, jiffies));
-               }
-
-               /* If LINK_UP_ST is set, then we are logged into the hub. */
-               if (time_before_eq(jiffies, time) && (val & HP100_LINK_UP_ST)) {
-#ifdef HP100_DEBUG_TRAINING
-                       printk("hp100: %s: Successfully logged into the HUB.\n", dev->name);
-                       if (lp->chip == HP100_CHIPID_LASSEN) {
-                               val = hp100_inw(TRAIN_ALLOW);
-                               printk("hp100: %s: Card supports 100VG MAC Version \"%s\" ",
-                                            dev->name, (hp100_inw(TRAIN_REQUEST) & HP100_CARD_MACVER) ? "802.12" : "Pre");
-                               printk("Driver will use MAC Version \"%s\"\n", (val & HP100_HUB_MACVER) ? "802.12" : "Pre");
-                               printk("hp100: %s: Frame format is %s.\n", dev->name, (val & HP100_MALLOW_FRAMEFMT) ? "802.5" : "802.3");
-                       }
-#endif
-               } else {
-                       /* If LINK_UP_ST is not set, login was not successful */
-                       printk("hp100: %s: Problem logging into the HUB.\n", dev->name);
-                       if (lp->chip == HP100_CHIPID_LASSEN) {
-                               /* Check allowed Register to find out why there is a problem. */
-                               val = hp100_inw(TRAIN_ALLOW);   /* won't work on non-ETR card */
-#ifdef HP100_DEBUG_TRAINING
-                               printk("hp100: %s: MAC Configuration requested: 0x%04x, HUB allowed: 0x%04x\n", dev->name, hp100_inw(TRAIN_REQUEST), val);
-#endif
-                               if (val & HP100_MALLOW_ACCDENIED)
-                                       printk("hp100: %s: HUB access denied.\n", dev->name);
-                               if (val & HP100_MALLOW_CONFIGURE)
-                                       printk("hp100: %s: MAC Configuration is incompatible with the Network.\n", dev->name);
-                               if (val & HP100_MALLOW_DUPADDR)
-                                       printk("hp100: %s: Duplicate MAC Address on the Network.\n", dev->name);
-                       }
-               }
-
-               /* If we have put the chip into forced 100 Mbit mode earlier, go back */
-               /* to auto-select mode */
-
-               if ((lp->chip == HP100_CHIPID_LASSEN) && (startst & HP100_LINK_CABLE_ST)) {
-                       hp100_page(MAC_CTRL);
-                       hp100_orb(HP100_DOT3_MAC, 10_LAN_CFG_2);
-               }
-
-               val = hp100_inb(VG_LAN_CFG_1);
-
-               /* Clear the MISC_ERROR Interrupt, which might be generated when doing the relogin */
-               hp100_page(PERFORMANCE);
-               hp100_outw(HP100_MISC_ERROR, IRQ_STATUS);
-
-               if (val & HP100_LINK_UP_ST)
-                       return 0;       /* login was ok */
-               else {
-                       printk("hp100: %s: Training failed.\n", dev->name);
-                       hp100_down_vg_link(dev);
-                       return -EIO;
-               }
-       }
-       /* no forced relogin & already link there->no training. */
-       return -EIO;
-}
-
-static void hp100_cascade_reset(struct net_device *dev, u_short enable)
-{
-       int ioaddr = dev->base_addr;
-       struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-       hp100_outw(0x4226, TRACE);
-       printk("hp100: %s: cascade_reset\n", dev->name);
-#endif
-
-       if (enable) {
-               hp100_outw(HP100_HW_RST | HP100_RESET_LB, OPTION_LSW);
-               if (lp->chip == HP100_CHIPID_LASSEN) {
-                       /* Lassen requires a PCI transmit fifo reset */
-                       hp100_page(HW_MAP);
-                       hp100_andb(~HP100_PCI_RESET, PCICTRL2);
-                       hp100_orb(HP100_PCI_RESET, PCICTRL2);
-                       /* Wait for min. 300 ns */
-                       /* we can't use jiffies here, because it may be */
-                       /* that we have disabled the timer... */
-                       udelay(400);
-                       hp100_andb(~HP100_PCI_RESET, PCICTRL2);
-                       hp100_page(PERFORMANCE);
-               }
-       } else {                /* bring out of reset */
-               hp100_outw(HP100_HW_RST | HP100_SET_LB, OPTION_LSW);
-               udelay(400);
-               hp100_page(PERFORMANCE);
-       }
-}
-
-#ifdef HP100_DEBUG
-void hp100_RegisterDump(struct net_device *dev)
-{
-       int ioaddr = dev->base_addr;
-       int Page;
-       int Register;
-
-       /* Dump common registers */
-       printk("hp100: %s: Cascade Register Dump\n", dev->name);
-       printk("hardware id #1: 0x%.2x\n", hp100_inb(HW_ID));
-       printk("hardware id #2/paging: 0x%.2x\n", hp100_inb(PAGING));
-       printk("option #1: 0x%.4x\n", hp100_inw(OPTION_LSW));
-       printk("option #2: 0x%.4x\n", hp100_inw(OPTION_MSW));
-
-       /* Dump paged registers */
-       for (Page = 0; Page < 8; Page++) {
-               /* Dump registers */
-               printk("page: 0x%.2x\n", Page);
-               outw(Page, ioaddr + 0x02);
-               for (Register = 0x8; Register < 0x22; Register += 2) {
-                       /* Display Register contents except data port */
-                       if (((Register != 0x10) && (Register != 0x12)) || (Page > 0)) {
-                               printk("0x%.2x = 0x%.4x\n", Register, inw(ioaddr + Register));
-                       }
-               }
-       }
-       hp100_page(PERFORMANCE);
-}
-#endif
-
-
-static void cleanup_dev(struct net_device *d)
-{
-       struct hp100_private *p = netdev_priv(d);
-
-       unregister_netdev(d);
-       release_region(d->base_addr, HP100_REGION_SIZE);
-
-       if (p->mode == 1)       /* busmaster */
-               pci_free_consistent(p->pci_dev, MAX_RINGSIZE + 0x0f,
-                                   p->page_vaddr_algn,
-                                   virt_to_whatever(d, p->page_vaddr_algn));
-       if (p->mem_ptr_virt)
-               iounmap(p->mem_ptr_virt);
-
-       free_netdev(d);
-}
-
-static int hp100_eisa_probe(struct device *gendev)
-{
-       struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
-       struct eisa_device *edev = to_eisa_device(gendev);
-       int err;
-
-       if (!dev)
-               return -ENOMEM;
-
-       SET_NETDEV_DEV(dev, &edev->dev);
-
-       err = hp100_probe1(dev, edev->base_addr + 0xC38, HP100_BUS_EISA, NULL);
-       if (err)
-               goto out1;
-
-#ifdef HP100_DEBUG
-       printk("hp100: %s: EISA adapter found at 0x%x\n", dev->name,
-              dev->base_addr);
-#endif
-       dev_set_drvdata(gendev, dev);
-       return 0;
- out1:
-       free_netdev(dev);
-       return err;
-}
-
-static int hp100_eisa_remove(struct device *gendev)
-{
-       struct net_device *dev = dev_get_drvdata(gendev);
-       cleanup_dev(dev);
-       return 0;
-}
-
-static struct eisa_driver hp100_eisa_driver = {
-        .id_table = hp100_eisa_tbl,
-        .driver   = {
-                .name    = "hp100",
-                .probe   = hp100_eisa_probe,
-               .remove  = hp100_eisa_remove,
-        }
-};
-
-static int hp100_pci_probe(struct pci_dev *pdev,
-                          const struct pci_device_id *ent)
-{
-       struct net_device *dev;
-       int ioaddr;
-       u_short pci_command;
-       int err;
-
-       if (pci_enable_device(pdev))
-               return -ENODEV;
-
-       dev = alloc_etherdev(sizeof(struct hp100_private));
-       if (!dev) {
-               err = -ENOMEM;
-               goto out0;
-       }
-
-       SET_NETDEV_DEV(dev, &pdev->dev);
-
-       pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
-       if (!(pci_command & PCI_COMMAND_IO)) {
-#ifdef HP100_DEBUG
-               printk("hp100: %s: PCI I/O Bit has not been set. Setting...\n", dev->name);
-#endif
-               pci_command |= PCI_COMMAND_IO;
-               pci_write_config_word(pdev, PCI_COMMAND, pci_command);
-       }
-
-       if (!(pci_command & PCI_COMMAND_MASTER)) {
-#ifdef HP100_DEBUG
-               printk("hp100: %s: PCI Master Bit has not been set. Setting...\n", dev->name);
-#endif
-               pci_command |= PCI_COMMAND_MASTER;
-               pci_write_config_word(pdev, PCI_COMMAND, pci_command);
-       }
-
-       ioaddr = pci_resource_start(pdev, 0);
-       err = hp100_probe1(dev, ioaddr, HP100_BUS_PCI, pdev);
-       if (err)
-               goto out1;
-
-#ifdef HP100_DEBUG
-       printk("hp100: %s: PCI adapter found at 0x%x\n", dev->name, ioaddr);
-#endif
-       pci_set_drvdata(pdev, dev);
-       return 0;
- out1:
-       free_netdev(dev);
- out0:
-       pci_disable_device(pdev);
-       return err;
-}
-
-static void hp100_pci_remove(struct pci_dev *pdev)
-{
-       struct net_device *dev = pci_get_drvdata(pdev);
-
-       cleanup_dev(dev);
-       pci_disable_device(pdev);
-}
-
-
-static struct pci_driver hp100_pci_driver = {
-       .name           = "hp100",
-       .id_table       = hp100_pci_tbl,
-       .probe          = hp100_pci_probe,
-       .remove         = hp100_pci_remove,
-};
-
-/*
- *  module section
- */
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, "
-              "Siegfried \"Frieder\" Loeffler (dg1sek) <floeff@mathematik.uni-stuttgart.de>");
-MODULE_DESCRIPTION("HP CASCADE Architecture Driver for 100VG-AnyLan Network Adapters");
-
-/*
- * Note: to register three isa devices, use:
- * option hp100 hp100_port=0,0,0
- *        to register one card at io 0x280 as eth239, use:
- * option hp100 hp100_port=0x280
- */
-#if defined(MODULE) && defined(CONFIG_ISA)
-#define HP100_DEVICES 5
-/* Parameters set by insmod */
-static int hp100_port[HP100_DEVICES] = { 0, [1 ... (HP100_DEVICES-1)] = -1 };
-module_param_hw_array(hp100_port, int, ioport, NULL, 0);
-
-/* List of devices */
-static struct net_device *hp100_devlist[HP100_DEVICES];
-
-static int __init hp100_isa_init(void)
-{
-       struct net_device *dev;
-       int i, err, cards = 0;
-
-       /* Don't autoprobe ISA bus */
-       if (hp100_port[0] == 0)
-               return -ENODEV;
-
-       /* Loop on all possible base addresses */
-       for (i = 0; i < HP100_DEVICES && hp100_port[i] != -1; ++i) {
-               dev = alloc_etherdev(sizeof(struct hp100_private));
-               if (!dev) {
-                       while (cards > 0)
-                               cleanup_dev(hp100_devlist[--cards]);
-
-                       return -ENOMEM;
-               }
-
-               err = hp100_isa_probe(dev, hp100_port[i]);
-               if (!err)
-                       hp100_devlist[cards++] = dev;
-               else
-                       free_netdev(dev);
-       }
-
-       return cards > 0 ? 0 : -ENODEV;
-}
-
-static void hp100_isa_cleanup(void)
-{
-       int i;
-
-       for (i = 0; i < HP100_DEVICES; i++) {
-               struct net_device *dev = hp100_devlist[i];
-               if (dev)
-                       cleanup_dev(dev);
-       }
-}
-#else
-#define hp100_isa_init()       (0)
-#define hp100_isa_cleanup()    do { } while(0)
-#endif
-
-static int __init hp100_module_init(void)
-{
-       int err;
-
-       err = hp100_isa_init();
-       if (err && err != -ENODEV)
-               goto out;
-       err = eisa_driver_register(&hp100_eisa_driver);
-       if (err && err != -ENODEV)
-               goto out2;
-       err = pci_register_driver(&hp100_pci_driver);
-       if (err && err != -ENODEV)
-               goto out3;
- out:
-       return err;
- out3:
-       eisa_driver_unregister (&hp100_eisa_driver);
- out2:
-       hp100_isa_cleanup();
-       goto out;
-}
-
-
-static void __exit hp100_module_exit(void)
-{
-       hp100_isa_cleanup();
-       eisa_driver_unregister (&hp100_eisa_driver);
-       pci_unregister_driver (&hp100_pci_driver);
-}
-
-module_init(hp100_module_init)
-module_exit(hp100_module_exit)
diff --git a/drivers/net/ethernet/hp/hp100.h b/drivers/net/ethernet/hp/hp100.h
deleted file mode 100644 (file)
index 7239b94..0000000
+++ /dev/null
@@ -1,611 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * hp100.h: Hewlett Packard HP10/100VG ANY LAN ethernet driver for Linux.
- *
- * $Id: hp100.h,v 1.51 1997/04/08 14:26:42 floeff Exp floeff $
- *
- * Authors:  Jaroslav Kysela, <perex@pf.jcu.cz>
- *           Siegfried Loeffler <floeff@tunix.mathematik.uni-stuttgart.de>
- *
- * This driver is based on the 'hpfepkt' crynwr packet driver.
- */
-
-/****************************************************************************
- *  Hardware Constants
- ****************************************************************************/
-
-/*
- * Page Identifiers
- * (Swap Paging Register, PAGING, bits 3:0, Offset 0x02)
- */
-
-#define HP100_PAGE_PERFORMANCE 0x0     /* Page 0 */
-#define HP100_PAGE_MAC_ADDRESS 0x1     /* Page 1 */
-#define HP100_PAGE_HW_MAP      0x2     /* Page 2 */
-#define HP100_PAGE_EEPROM_CTRL 0x3     /* Page 3 */
-#define HP100_PAGE_MAC_CTRL    0x4     /* Page 4 */
-#define HP100_PAGE_MMU_CFG     0x5     /* Page 5 */
-#define HP100_PAGE_ID_MAC_ADDR 0x6     /* Page 6 */
-#define HP100_PAGE_MMU_POINTER 0x7     /* Page 7 */
-
-
-/* Registers that are present on all pages  */
-
-#define HP100_REG_HW_ID                0x00    /* R:  (16) Unique card ID           */
-#define HP100_REG_TRACE                0x00    /* W:  (16) Used for debug output    */
-#define HP100_REG_PAGING       0x02    /* R:  (16),15:4 Card ID             */
-                                       /* W:  (16),3:0 Switch pages         */
-#define HP100_REG_OPTION_LSW   0x04    /* RW: (16) Select card functions    */
-#define HP100_REG_OPTION_MSW   0x06    /* RW: (16) Select card functions    */
-
-/*  Page 0 - Performance  */
-
-#define HP100_REG_IRQ_STATUS   0x08    /* RW: (16) Which ints are pending   */
-#define HP100_REG_IRQ_MASK     0x0a    /* RW: (16) Select ints to allow     */
-#define HP100_REG_FRAGMENT_LEN 0x0c    /* W: (16)12:0 Current fragment len */
-/* Note: For 32 bit systems, fragment len and offset registers are available */
-/*       at offset 0x28 and 0x2c, where they can be written as 32bit values. */
-#define HP100_REG_OFFSET       0x0e    /* RW: (16)12:0 Offset to start read */
-#define HP100_REG_DATA32       0x10    /* RW: (32) I/O mode data port       */
-#define HP100_REG_DATA16       0x12    /* RW: WORDs must be read from here  */
-#define HP100_REG_TX_MEM_FREE  0x14    /* RD: (32) Amount of free Tx mem    */
-#define HP100_REG_TX_PDA_L      0x14   /* W: (32) BM: Ptr to PDL, Low Pri  */
-#define HP100_REG_TX_PDA_H      0x1c   /* W: (32) BM: Ptr to PDL, High Pri */
-#define HP100_REG_RX_PKT_CNT   0x18    /* RD: (8) Rx count of pkts on card  */
-#define HP100_REG_TX_PKT_CNT   0x19    /* RD: (8) Tx count of pkts on card  */
-#define HP100_REG_RX_PDL        0x1a   /* R: (8) BM: # rx pdl not executed */
-#define HP100_REG_TX_PDL        0x1b   /* R: (8) BM: # tx pdl not executed */
-#define HP100_REG_RX_PDA        0x18   /* W: (32) BM: Up to 31 addresses */
-                                       /*             which point to a PDL */
-#define HP100_REG_SL_EARLY      0x1c   /*    (32) Enhanced Slave Early Rx */
-#define HP100_REG_STAT_DROPPED  0x20   /* R (12) Dropped Packet Counter */
-#define HP100_REG_STAT_ERRORED  0x22   /* R (8) Errored Packet Counter */
-#define HP100_REG_STAT_ABORT    0x23   /* R (8) Abort Counter/OW Coll. Flag */
-#define HP100_REG_RX_RING       0x24   /* W (32) Slave: RX Ring Pointers */
-#define HP100_REG_32_FRAGMENT_LEN 0x28 /* W (13) Slave: Fragment Length Reg */
-#define HP100_REG_32_OFFSET     0x2c   /* W (16) Slave: Offset Register */
-
-/*  Page 1 - MAC Address/Hash Table  */
-
-#define HP100_REG_MAC_ADDR     0x08    /* RW: (8) Cards MAC address         */
-#define HP100_REG_HASH_BYTE0   0x10    /* RW: (8) Cards multicast filter    */
-
-/*  Page 2 - Hardware Mapping  */
-
-#define HP100_REG_MEM_MAP_LSW  0x08    /* RW: (16) LSW of cards mem addr    */
-#define HP100_REG_MEM_MAP_MSW  0x0a    /* RW: (16) MSW of cards mem addr    */
-#define HP100_REG_IO_MAP       0x0c    /* RW: (8) Cards I/O address         */
-#define HP100_REG_IRQ_CHANNEL  0x0d    /* RW: (8) IRQ and edge/level int    */
-#define HP100_REG_SRAM         0x0e    /* RW: (8) How much RAM on card      */
-#define HP100_REG_BM           0x0f    /* RW: (8) Controls BM functions     */
-
-/* New on Page 2 for ETR chips: */
-#define HP100_REG_MODECTRL1     0x10   /* RW: (8) Mode Control 1 */
-#define HP100_REG_MODECTRL2     0x11   /* RW: (8) Mode Control 2 */
-#define HP100_REG_PCICTRL1      0x12   /* RW: (8) PCI Cfg 1 */
-#define HP100_REG_PCICTRL2      0x13   /* RW: (8) PCI Cfg 2 */
-#define HP100_REG_PCIBUSMLAT    0x15   /* RW: (8) PCI Bus Master Latency */
-#define HP100_REG_EARLYTXCFG    0x16   /* RW: (16) Early TX Cfg/Cntrl Reg */
-#define HP100_REG_EARLYRXCFG    0x18   /* RW: (8) Early RX Cfg/Cntrl Reg */
-#define HP100_REG_ISAPNPCFG1    0x1a   /* RW: (8) ISA PnP Cfg/Cntrl Reg 1 */
-#define HP100_REG_ISAPNPCFG2    0x1b   /* RW: (8) ISA PnP Cfg/Cntrl Reg 2 */
-
-/*  Page 3 - EEPROM/Boot ROM  */
-
-#define HP100_REG_EEPROM_CTRL  0x08    /* RW: (16) Used to load EEPROM      */
-#define HP100_REG_BOOTROM_CTRL  0x0a
-
-/*  Page 4 - LAN Configuration  (MAC_CTRL) */
-
-#define HP100_REG_10_LAN_CFG_1 0x08    /* RW: (8) Set 10M XCVR functions   */
-#define HP100_REG_10_LAN_CFG_2  0x09   /* RW: (8)     10M XCVR functions   */
-#define HP100_REG_VG_LAN_CFG_1 0x0a    /* RW: (8) Set 100M XCVR functions  */
-#define HP100_REG_VG_LAN_CFG_2  0x0b   /* RW: (8) 100M LAN Training cfgregs */
-#define HP100_REG_MAC_CFG_1    0x0c    /* RW: (8) Types of pkts to accept   */
-#define HP100_REG_MAC_CFG_2    0x0d    /* RW: (8) Misc MAC functions        */
-#define HP100_REG_MAC_CFG_3     0x0e   /* RW: (8) Misc MAC functions */
-#define HP100_REG_MAC_CFG_4     0x0f   /* R:  (8) Misc MAC states */
-#define HP100_REG_DROPPED      0x10    /* R:  (16),11:0 Pkts can't fit in mem */
-#define HP100_REG_CRC          0x12    /* R:  (8) Pkts with CRC             */
-#define HP100_REG_ABORT                0x13    /* R:  (8) Aborted Tx pkts           */
-#define HP100_REG_TRAIN_REQUEST 0x14   /* RW: (16) Endnode MAC register. */
-#define HP100_REG_TRAIN_ALLOW   0x16   /* R:  (16) Hub allowed register */
-
-/*  Page 5 - MMU  */
-
-#define HP100_REG_RX_MEM_STOP  0x0c    /* RW: (16) End of Rx ring addr      */
-#define HP100_REG_TX_MEM_STOP  0x0e    /* RW: (16) End of Tx ring addr      */
-#define HP100_REG_PDL_MEM_STOP  0x10   /* Not used by 802.12 devices */
-#define HP100_REG_ECB_MEM_STOP  0x14   /* I've no idea what this is */
-
-/*  Page 6 - Card ID/Physical LAN Address  */
-
-#define HP100_REG_BOARD_ID     0x08    /* R:  (8) EISA/ISA card ID          */
-#define HP100_REG_BOARD_IO_CHCK 0x0c   /* R:  (8) Added to ID to get FFh    */
-#define HP100_REG_SOFT_MODEL   0x0d    /* R:  (8) Config program defined    */
-#define HP100_REG_LAN_ADDR     0x10    /* R:  (8) MAC addr of card          */
-#define HP100_REG_LAN_ADDR_CHCK 0x16   /* R:  (8) Added to addr to get FFh  */
-
-/*  Page 7 - MMU Current Pointers  */
-
-#define HP100_REG_PTR_RXSTART  0x08    /* R:  (16) Current begin of Rx ring */
-#define HP100_REG_PTR_RXEND    0x0a    /* R:  (16) Current end of Rx ring   */
-#define HP100_REG_PTR_TXSTART  0x0c    /* R:  (16) Current begin of Tx ring */
-#define HP100_REG_PTR_TXEND    0x0e    /* R:  (16) Current end of Rx ring   */
-#define HP100_REG_PTR_RPDLSTART 0x10
-#define HP100_REG_PTR_RPDLEND   0x12
-#define HP100_REG_PTR_RINGPTRS  0x14
-#define HP100_REG_PTR_MEMDEBUG  0x1a
-/* ------------------------------------------------------------------------ */
-
-
-/*
- * Hardware ID Register I (Always available, HW_ID, Offset 0x00)
- */
-#define HP100_HW_ID_CASCADE     0x4850 /* Identifies Cascade Chip */
-
-/*
- * Hardware ID Register 2 & Paging Register
- * (Always available, PAGING, Offset 0x02)
- * Bits 15:4 are for the Chip ID
- */
-#define HP100_CHIPID_MASK        0xFFF0
-#define HP100_CHIPID_SHASTA      0x5350        /* Not 802.12 compliant */
-                                        /* EISA BM/SL, MCA16/32 SL, ISA SL */
-#define HP100_CHIPID_RAINIER     0x5360        /* Not 802.12 compliant EISA BM, */
-                                        /* PCI SL, MCA16/32 SL, ISA SL */
-#define HP100_CHIPID_LASSEN      0x5370        /* 802.12 compliant PCI BM, PCI SL */
-                                        /* LRF supported */
-
-/*
- *  Option Registers I and II
- * (Always available, OPTION_LSW, Offset 0x04-0x05)
- */
-#define HP100_DEBUG_EN         0x8000  /* 0:Dis., 1:Enable Debug Dump Ptr. */
-#define HP100_RX_HDR           0x4000  /* 0:Dis., 1:Enable putting pkt into */
-                                       /*   system mem. before Rx interrupt */
-#define HP100_MMAP_DIS         0x2000  /* 0:Enable, 1:Disable mem.mapping. */
-                                       /*   MMAP_DIS must be 0 and MEM_EN */
-                                       /*   must be 1 for memory-mapped */
-                                       /*   mode to be enabled */
-#define HP100_EE_EN            0x1000  /* 0:Disable,1:Enable EEPROM writing */
-#define HP100_BM_WRITE         0x0800  /* 0:Slave, 1:Bus Master for Tx data */
-#define HP100_BM_READ          0x0400  /* 0:Slave, 1:Bus Master for Rx data */
-#define HP100_TRI_INT          0x0200  /* 0:Don't, 1:Do tri-state the int */
-#define HP100_MEM_EN           0x0040  /* Config program set this to */
-                                       /*   0:Disable, 1:Enable mem map. */
-                                       /*   See MMAP_DIS. */
-#define HP100_IO_EN            0x0020  /* 1:Enable I/O transfers */
-#define HP100_BOOT_EN          0x0010  /* 1:Enable boot ROM access */
-#define HP100_FAKE_INT         0x0008  /* 1:int */
-#define HP100_INT_EN           0x0004  /* 1:Enable ints from card */
-#define HP100_HW_RST           0x0002  /* 0:Reset, 1:Out of reset */
-                                       /* NIC reset on 0 to 1 transition */
-
-/*
- *  Option Register III
- * (Always available, OPTION_MSW, Offset 0x06)
- */
-#define HP100_PRIORITY_TX      0x0080  /* 1:Do all Tx pkts as priority */
-#define HP100_EE_LOAD          0x0040  /* 1:EEPROM loading, 0 when done */
-#define HP100_ADV_NXT_PKT      0x0004  /* 1:Advance to next pkt in Rx queue */
-                                       /*   h/w will set to 0 when done */
-#define HP100_TX_CMD           0x0002  /* 1:Tell h/w download done, h/w */
-                                       /*   will set to 0 when done */
-
-/*
- * Interrupt Status Registers I and II
- * (Page PERFORMANCE, IRQ_STATUS, Offset 0x08-0x09)
- * Note: With old chips, these Registers will clear when 1 is written to them
- *       with new chips this depends on setting of CLR_ISMODE
- */
-#define HP100_RX_EARLY_INT      0x2000
-#define HP100_RX_PDA_ZERO       0x1000
-#define HP100_RX_PDL_FILL_COMPL 0x0800
-#define HP100_RX_PACKET                0x0400  /* 0:No, 1:Yes pkt has been Rx */
-#define HP100_RX_ERROR         0x0200  /* 0:No, 1:Yes Rx pkt had error */
-#define HP100_TX_PDA_ZERO       0x0020 /* 1 when PDA count goes to zero */
-#define HP100_TX_SPACE_AVAIL   0x0010  /* 0:<8192, 1:>=8192 Tx free bytes */
-#define HP100_TX_COMPLETE      0x0008  /* 0:No, 1:Yes a Tx has completed */
-#define HP100_MISC_ERROR        0x0004 /* 0:No, 1:Lan Link down or bus error */
-#define HP100_TX_ERROR         0x0002  /* 0:No, 1:Yes Tx pkt had error */
-
-/*
- * Xmit Memory Free Count
- * (Page PERFORMANCE, TX_MEM_FREE, Offset 0x14) (Read only, 32bit)
- */
-#define HP100_AUTO_COMPARE     0x80000000      /* Tx Space avail & pkts<255 */
-#define HP100_FREE_SPACE       0x7fffffe0      /* Tx free memory */
-
-/*
- *  IRQ Channel
- * (Page HW_MAP, IRQ_CHANNEL, Offset 0x0d)
- */
-#define HP100_ZERO_WAIT_EN     0x80    /* 0:No, 1:Yes asserts NOWS signal */
-#define HP100_IRQ_SCRAMBLE      0x40
-#define HP100_BOND_HP           0x20
-#define HP100_LEVEL_IRQ                0x10    /* 0:Edge, 1:Level type interrupts. */
-                                       /* (Only valid on EISA cards) */
-#define HP100_IRQMASK          0x0F    /* Isolate the IRQ bits */
-
-/*
- * SRAM Parameters
- * (Page HW_MAP, SRAM, Offset 0x0e)
- */
-#define HP100_RAM_SIZE_MASK    0xe0    /* AND to get SRAM size index */
-#define HP100_RAM_SIZE_SHIFT   0x05    /* Shift count(put index in lwr bits) */
-
-/*
- * Bus Master Register
- * (Page HW_MAP, BM, Offset 0x0f)
- */
-#define HP100_BM_BURST_RD       0x01   /* EISA only: 1=Use burst trans. fm system */
-                                       /* memory to chip (tx) */
-#define HP100_BM_BURST_WR       0x02   /* EISA only: 1=Use burst trans. fm system */
-                                       /* memory to chip (rx) */
-#define HP100_BM_MASTER                0x04    /* 0:Slave, 1:BM mode */
-#define HP100_BM_PAGE_CK        0x08   /* This bit should be set whenever in */
-                                       /* an EISA system */
-#define HP100_BM_PCI_8CLK       0x40   /* ... cycles 8 clocks apart */
-
-
-/*
- * Mode Control Register I
- * (Page HW_MAP, MODECTRL1, Offset0x10)
- */
-#define HP100_TX_DUALQ          0x10
-   /* If set and BM -> dual tx pda queues */
-#define HP100_ISR_CLRMODE       0x02   /* If set ISR will clear all pending */
-                                      /* interrupts on read (etr only?) */
-#define HP100_EE_NOLOAD         0x04   /* Status whether res will be loaded */
-                                      /* from the eeprom */
-#define HP100_TX_CNT_FLG        0x08   /* Controls Early TX Reg Cnt Field */
-#define HP100_PDL_USE3          0x10   /* If set BM engine will read only */
-                                      /* first three data elements of a PDL */
-                                      /* on the first access. */
-#define HP100_BUSTYPE_MASK      0xe0   /* Three bit bus type info */
-
-/*
- * Mode Control Register II
- * (Page HW_MAP, MODECTRL2, Offset0x11)
- */
-#define HP100_EE_MASK           0x0f   /* Tell EEPROM circuit not to load */
-                                      /* certain resources */
-#define HP100_DIS_CANCEL        0x20   /* For tx dualq mode operation */
-#define HP100_EN_PDL_WB         0x40   /* 1: Status of PDL completion may be */
-                                      /* written back to system mem */
-#define HP100_EN_BUS_FAIL       0x80   /* Enables bus-fail portion of misc */
-                                      /* interrupt */
-
-/*
- * PCI Configuration and Control Register I
- * (Page HW_MAP, PCICTRL1, Offset 0x12)
- */
-#define HP100_LO_MEM            0x01   /* 1: Mapped Mem requested below 1MB */
-#define HP100_NO_MEM            0x02   /* 1: Disables Req for sysmem to PCI */
-                                      /* bios */
-#define HP100_USE_ISA           0x04   /* 1: isa type decodes will occur */
-                                      /* simultaneously with PCI decodes */
-#define HP100_IRQ_HI_MASK       0xf0   /* pgmed by pci bios */
-#define HP100_PCI_IRQ_HI_MASK   0x78   /* Isolate 4 bits for PCI IRQ  */
-
-/*
- * PCI Configuration and Control Register II
- * (Page HW_MAP, PCICTRL2, Offset 0x13)
- */
-#define HP100_RD_LINE_PDL       0x01   /* 1: PCI command Memory Read Line en */
-#define HP100_RD_TX_DATA_MASK   0x06   /* choose PCI memread cmds for TX */
-#define HP100_MWI               0x08   /* 1: en. PCI memory write invalidate */
-#define HP100_ARB_MODE          0x10   /* Select PCI arbitor type */
-#define HP100_STOP_EN           0x20   /* Enables PCI state machine to issue */
-                                      /* pci stop if cascade not ready */
-#define HP100_IGNORE_PAR        0x40   /* 1: PCI state machine ignores parity */
-#define HP100_PCI_RESET         0x80   /* 0->1: Reset PCI block */
-
-/*
- * Early TX Configuration and Control Register
- * (Page HW_MAP, EARLYTXCFG, Offset 0x16)
- */
-#define HP100_EN_EARLY_TX       0x8000 /* 1=Enable Early TX */
-#define HP100_EN_ADAPTIVE       0x4000 /* 1=Enable adaptive mode */
-#define HP100_EN_TX_UR_IRQ      0x2000 /* reserved, must be 0 */
-#define HP100_EN_LOW_TX         0x1000 /* reserved, must be 0 */
-#define HP100_ET_CNT_MASK       0x0fff /* bits 11..0: ET counters */
-
-/*
- * Early RX Configuration and Control Register
- * (Page HW_MAP, EARLYRXCFG, Offset 0x18)
- */
-#define HP100_EN_EARLY_RX       0x80   /* 1=Enable Early RX */
-#define HP100_EN_LOW_RX         0x40   /* reserved, must be 0 */
-#define HP100_RX_TRIP_MASK      0x1f   /* bits 4..0: threshold at which the
-                                        * early rx circuit will start the
-                                        * dma of received packet into system
-                                        * memory for BM */
-
-/*
- *  Serial Devices Control Register
- * (Page EEPROM_CTRL, EEPROM_CTRL, Offset 0x08)
- */
-#define HP100_EEPROM_LOAD      0x0001  /* 0->1 loads EEPROM into registers. */
-                                       /* When it goes back to 0, load is   */
-                                       /* complete. This should take ~600us. */
-
-/*
- * 10MB LAN Control and Configuration Register I
- * (Page MAC_CTRL, 10_LAN_CFG_1, Offset 0x08)
- */
-#define HP100_MAC10_SEL                0xc0    /* Get bits to indicate MAC */
-#define HP100_AUI_SEL          0x20    /* Status of AUI selection */
-#define HP100_LOW_TH           0x10    /* 0:No, 1:Yes allow better cabling */
-#define HP100_LINK_BEAT_DIS    0x08    /* 0:Enable, 1:Disable link beat */
-#define HP100_LINK_BEAT_ST     0x04    /* 0:No, 1:Yes link beat being Rx */
-#define HP100_R_ROL_ST         0x02    /* 0:No, 1:Yes Rx twisted pair has */
-                                       /*             been reversed */
-#define HP100_AUI_ST           0x01    /* 0:No, 1:Yes use AUI on TP card */
-
-/*
- * 10 MB LAN Control and Configuration Register II
- * (Page MAC_CTRL, 10_LAN_CFG_2, Offset 0x09)
- */
-#define HP100_SQU_ST           0x01    /* 0:No, 1:Yes collision signal sent */
-                                       /*       after Tx.Only used for AUI. */
-#define HP100_FULLDUP           0x02   /* 1: LXT901 XCVR fullduplx enabled */
-#define HP100_DOT3_MAC          0x04   /* 1: DOT 3 Mac sel. unless Autosel */
-
-/*
- * MAC Selection, use with MAC10_SEL bits
- */
-#define HP100_AUTO_SEL_10      0x0     /* Auto select */
-#define HP100_XCVR_LXT901_10   0x1     /* LXT901 10BaseT transceiver */
-#define HP100_XCVR_7213                0x2     /* 7213 transceiver */
-#define HP100_XCVR_82503       0x3     /* 82503 transceiver */
-
-/*
- *  100MB LAN Training Register
- * (Page MAC_CTRL, VG_LAN_CFG_2, Offset 0x0b) (old, pre 802.12)
- */
-#define HP100_FRAME_FORMAT     0x08    /* 0:802.3, 1:802.5 frames */
-#define HP100_BRIDGE           0x04    /* 0:No, 1:Yes tell hub i am a bridge */
-#define HP100_PROM_MODE                0x02    /* 0:No, 1:Yes tell hub card is */
-                                       /*         promiscuous */
-#define HP100_REPEATER         0x01    /* 0:No, 1:Yes tell hub MAC wants to */
-                                       /*         be a cascaded repeater */
-
-/*
- * 100MB LAN Control and Configuration Register
- * (Page MAC_CTRL, VG_LAN_CFG_1, Offset 0x0a)
- */
-#define HP100_VG_SEL           0x80    /* 0:No, 1:Yes use 100 Mbit MAC */
-#define HP100_LINK_UP_ST       0x40    /* 0:No, 1:Yes endnode logged in */
-#define HP100_LINK_CABLE_ST    0x20    /* 0:No, 1:Yes cable can hear tones */
-                                       /*         from  hub */
-#define HP100_LOAD_ADDR                0x10    /* 0->1 card addr will be sent  */
-                                       /* 100ms later the link status  */
-                                       /* bits are valid */
-#define HP100_LINK_CMD         0x08    /* 0->1 link will attempt to log in. */
-                                       /* 100ms later the link status */
-                                       /* bits are valid */
-#define HP100_TRN_DONE          0x04   /* NEW ETR-Chips only: Will be reset */
-                                       /* after LinkUp Cmd is given and set */
-                                       /* when training has completed. */
-#define HP100_LINK_GOOD_ST     0x02    /* 0:No, 1:Yes cable passed training */
-#define HP100_VG_RESET         0x01    /* 0:Yes, 1:No reset the 100VG MAC */
-
-
-/*
- *  MAC Configuration Register I
- * (Page MAC_CTRL, MAC_CFG_1, Offset 0x0c)
- */
-#define HP100_RX_IDLE          0x80    /* 0:Yes, 1:No currently receiving pkts */
-#define HP100_TX_IDLE          0x40    /* 0:Yes, 1:No currently Txing pkts */
-#define HP100_RX_EN            0x20    /* 1: allow receiving of pkts */
-#define HP100_TX_EN            0x10    /* 1: allow transmitting of pkts */
-#define HP100_ACC_ERRORED      0x08    /* 0:No, 1:Yes allow Rx of errored pkts */
-#define HP100_ACC_MC           0x04    /* 0:No, 1:Yes allow Rx of multicast pkts */
-#define HP100_ACC_BC           0x02    /* 0:No, 1:Yes allow Rx of broadcast pkts */
-#define HP100_ACC_PHY          0x01    /* 0:No, 1:Yes allow Rx of ALL phys. pkts */
-#define HP100_MAC1MODEMASK     0xf0    /* Hide ACC bits */
-#define HP100_MAC1MODE1                0x00    /* Receive nothing, must also disable RX */
-#define HP100_MAC1MODE2                0x00
-#define HP100_MAC1MODE3                HP100_MAC1MODE2 | HP100_ACC_BC
-#define HP100_MAC1MODE4                HP100_MAC1MODE3 | HP100_ACC_MC
-#define HP100_MAC1MODE5                HP100_MAC1MODE4 /* set mc hash to all ones also */
-#define HP100_MAC1MODE6                HP100_MAC1MODE5 | HP100_ACC_PHY /* Promiscuous */
-/* Note MODE6 will receive all GOOD packets on the LAN. This really needs
-   a mode 7 defined to be LAN Analyzer mode, which will receive errored and
-   runt packets, and keep the CRC bytes. */
-#define HP100_MAC1MODE7                HP100_MAC1MODE6 | HP100_ACC_ERRORED
-
-/*
- *  MAC Configuration Register II
- * (Page MAC_CTRL, MAC_CFG_2, Offset 0x0d)
- */
-#define HP100_TR_MODE          0x80    /* 0:No, 1:Yes support Token Ring formats */
-#define HP100_TX_SAME          0x40    /* 0:No, 1:Yes Tx same packet continuous */
-#define HP100_LBK_XCVR         0x20    /* 0:No, 1:Yes loopback through MAC & */
-                                       /*   transceiver */
-#define HP100_LBK_MAC          0x10    /* 0:No, 1:Yes loopback through MAC */
-#define HP100_CRC_I            0x08    /* 0:No, 1:Yes inhibit CRC on Tx packets */
-#define HP100_ACCNA             0x04   /* 1: For 802.5: Accept only token ring
-                                        * group addr that maches NA mask */
-#define HP100_KEEP_CRC         0x02    /* 0:No, 1:Yes keep CRC on Rx packets. */
-                                       /*   The length will reflect this. */
-#define HP100_ACCFA             0x01   /* 1: For 802.5: Accept only functional
-                                        * addrs that match FA mask (page1) */
-#define HP100_MAC2MODEMASK     0x02
-#define HP100_MAC2MODE1                0x00
-#define HP100_MAC2MODE2                0x00
-#define HP100_MAC2MODE3                0x00
-#define HP100_MAC2MODE4                0x00
-#define HP100_MAC2MODE5                0x00
-#define HP100_MAC2MODE6                0x00
-#define HP100_MAC2MODE7                KEEP_CRC
-
-/*
- * MAC Configuration Register III
- * (Page MAC_CTRL, MAC_CFG_3, Offset 0x0e)
- */
-#define HP100_PACKET_PACE       0x03   /* Packet Pacing:
-                                        * 00: No packet pacing
-                                        * 01: 8 to 16 uS delay
-                                        * 10: 16 to 32 uS delay
-                                        * 11: 32 to 64 uS delay
-                                        */
-#define HP100_LRF_EN            0x04   /* 1: External LAN Rcv Filter and
-                                        * TCP/IP Checksumming enabled. */
-#define HP100_AUTO_MODE         0x10   /* 1: AutoSelect between 10/100 */
-
-/*
- * MAC Configuration Register IV
- * (Page MAC_CTRL, MAC_CFG_4, Offset 0x0f)
- */
-#define HP100_MAC_SEL_ST        0x01   /* (R): Status of external VGSEL
-                                        * Signal, 1=100VG, 0=10Mbit sel. */
-#define HP100_LINK_FAIL_ST      0x02   /* (R): Status of Link Fail portion
-                                        * of the Misc. Interrupt */
-
-/*
- *  100 MB LAN Training Request/Allowed Registers
- * (Page MAC_CTRL, TRAIN_REQUEST and TRAIN_ALLOW, Offset 0x14-0x16)(ETR parts only)
- */
-#define HP100_MACRQ_REPEATER         0x0001    /* 1: MAC tells HUB it wants to be
-                                                *    a cascaded repeater
-                                                * 0: ... wants to be a DTE */
-#define HP100_MACRQ_PROMSC           0x0006    /* 2 bits: Promiscious mode
-                                                * 00: Rcv only unicast packets
-                                                *     specifically addr to this
-                                                *     endnode
-                                                * 10: Rcv all pckts fwded by
-                                                *     the local repeater */
-#define HP100_MACRQ_FRAMEFMT_EITHER  0x0018    /* 11: either format allowed */
-#define HP100_MACRQ_FRAMEFMT_802_3   0x0000    /* 00: 802.3 is requested */
-#define HP100_MACRQ_FRAMEFMT_802_5   0x0010    /* 10: 802.5 format is requested */
-#define HP100_CARD_MACVER            0xe000    /* R: 3 bit Cards 100VG MAC version */
-#define HP100_MALLOW_REPEATER        0x0001    /* If reset, requested access as an
-                                                * end node is allowed */
-#define HP100_MALLOW_PROMSC          0x0004    /* 2 bits: Promiscious mode
-                                                * 00: Rcv only unicast packets
-                                                *     specifically addr to this
-                                                *     endnode
-                                                * 10: Rcv all pckts fwded by
-                                                *     the local repeater */
-#define HP100_MALLOW_FRAMEFMT        0x00e0    /* 2 bits: Frame Format
-                                                * 00: 802.3 format will be used
-                                                * 10: 802.5 format will be used */
-#define HP100_MALLOW_ACCDENIED       0x0400    /* N bit */
-#define HP100_MALLOW_CONFIGURE       0x0f00    /* C bit */
-#define HP100_MALLOW_DUPADDR         0x1000    /* D bit */
-#define HP100_HUB_MACVER             0xe000    /* R: 3 bit 802.12 MAC/RMAC training */
-                                            /*    protocol of repeater */
-
-/* ****************************************************************************** */
-
-/*
- *  Set/Reset bits
- */
-#define HP100_SET_HB           0x0100  /* 0:Set fields to 0 whose mask is 1 */
-#define HP100_SET_LB           0x0001  /* HB sets upper byte, LB sets lower byte */
-#define HP100_RESET_HB         0x0000  /* For readability when resetting bits */
-#define HP100_RESET_LB         0x0000  /* For readability when resetting bits */
-
-/*
- *  Misc. Constants
- */
-#define HP100_LAN_100          100     /* lan_type value for VG */
-#define HP100_LAN_10           10      /* lan_type value for 10BaseT */
-#define HP100_LAN_COAX         9       /* lan_type value for Coax */
-#define HP100_LAN_ERR          (-1)    /* lan_type value for link down */
-
-/*
- * Bus Master Data Structures  ----------------------------------------------
- */
-
-#define MAX_RX_PDL              30     /* Card limit = 31 */
-#define MAX_RX_FRAG             2      /* Don't need more... */
-#define MAX_TX_PDL              29
-#define MAX_TX_FRAG             2      /* Limit = 31 */
-
-/* Define total PDL area size in bytes (should be 4096) */
-/* This is the size of kernel (dma) memory that will be allocated. */
-#define MAX_RINGSIZE ((MAX_RX_FRAG*8+4+4)*MAX_RX_PDL+(MAX_TX_FRAG*8+4+4)*MAX_TX_PDL)+16
-
-/* Ethernet Packet Sizes */
-#define MIN_ETHER_SIZE          60
-#define MAX_ETHER_SIZE          1514   /* Needed for preallocation of */
-                                       /* skb buffer when busmastering */
-
-/* Tx or Rx Ring Entry */
-typedef struct hp100_ring {
-       u_int *pdl;             /* Address of PDLs PDH, dword before
-                                * this address is used for rx hdr */
-       u_int pdl_paddr;        /* Physical address of PDL */
-       struct sk_buff *skb;
-       struct hp100_ring *next;
-} hp100_ring_t;
-
-
-
-/* Mask for Header Descriptor */
-#define HP100_PKT_LEN_MASK     0x1FFF  /* AND with RxLength to get length */
-
-
-/* Receive Packet Status.  Note, the error bits are only valid if ACC_ERRORED
-   bit in the MAC Configuration Register 1 is set. */
-#define HP100_RX_PRI           0x8000  /* 0:No, 1:Yes packet is priority */
-#define HP100_SDF_ERR          0x4000  /* 0:No, 1:Yes start of frame error */
-#define HP100_SKEW_ERR         0x2000  /* 0:No, 1:Yes skew out of range */
-#define HP100_BAD_SYMBOL_ERR   0x1000  /* 0:No, 1:Yes invalid symbol received */
-#define HP100_RCV_IPM_ERR      0x0800  /* 0:No, 1:Yes pkt had an invalid packet */
-                                       /*   marker */
-#define HP100_SYMBOL_BAL_ERR   0x0400  /* 0:No, 1:Yes symbol balance error */
-#define HP100_VG_ALN_ERR       0x0200  /* 0:No, 1:Yes non-octet received */
-#define HP100_TRUNC_ERR                0x0100  /* 0:No, 1:Yes the packet was truncated */
-#define HP100_RUNT_ERR         0x0040  /* 0:No, 1:Yes pkt length < Min Pkt */
-                                       /*   Length Reg. */
-#define HP100_ALN_ERR          0x0010  /* 0:No, 1:Yes align error. */
-#define HP100_CRC_ERR          0x0008  /* 0:No, 1:Yes CRC occurred. */
-
-/* The last three bits indicate the type of destination address */
-
-#define HP100_MULTI_ADDR_HASH  0x0006  /* 110: Addr multicast, matched hash */
-#define HP100_BROADCAST_ADDR   0x0003  /* x11: Addr broadcast */
-#define HP100_MULTI_ADDR_NO_HASH 0x0002        /* 010: Addr multicast, didn't match hash */
-#define HP100_PHYS_ADDR_MATCH  0x0001  /* x01: Addr was physical and mine */
-#define HP100_PHYS_ADDR_NO_MATCH 0x0000        /* x00: Addr was physical but not mine */
-
-/*
- *  macros
- */
-
-#define hp100_inb( reg ) \
-        inb( ioaddr + HP100_REG_##reg )
-#define hp100_inw( reg ) \
-       inw( ioaddr + HP100_REG_##reg )
-#define hp100_inl( reg ) \
-       inl( ioaddr + HP100_REG_##reg )
-#define hp100_outb( data, reg ) \
-       outb( data, ioaddr + HP100_REG_##reg )
-#define hp100_outw( data, reg ) \
-       outw( data, ioaddr + HP100_REG_##reg )
-#define hp100_outl( data, reg ) \
-       outl( data, ioaddr + HP100_REG_##reg )
-#define hp100_orb( data, reg ) \
-       outb( inb( ioaddr + HP100_REG_##reg ) | (data), ioaddr + HP100_REG_##reg )
-#define hp100_orw( data, reg ) \
-       outw( inw( ioaddr + HP100_REG_##reg ) | (data), ioaddr + HP100_REG_##reg )
-#define hp100_andb( data, reg ) \
-       outb( inb( ioaddr + HP100_REG_##reg ) & (data), ioaddr + HP100_REG_##reg )
-#define hp100_andw( data, reg ) \
-       outw( inw( ioaddr + HP100_REG_##reg ) & (data), ioaddr + HP100_REG_##reg )
-
-#define hp100_page( page ) \
-       outw( HP100_PAGE_##page, ioaddr + HP100_REG_PAGING )
-#define hp100_ints_off() \
-       outw( HP100_INT_EN | HP100_RESET_LB, ioaddr + HP100_REG_OPTION_LSW )
-#define hp100_ints_on() \
-       outw( HP100_INT_EN | HP100_SET_LB, ioaddr + HP100_REG_OPTION_LSW )
-#define hp100_mem_map_enable() \
-       outw( HP100_MMAP_DIS | HP100_RESET_HB, ioaddr + HP100_REG_OPTION_LSW )
-#define hp100_mem_map_disable() \
-       outw( HP100_MMAP_DIS | HP100_SET_HB, ioaddr + HP100_REG_OPTION_LSW )
index 9e43c9a..2e40425 100644 (file)
@@ -2849,6 +2849,7 @@ static int emac_init_config(struct emac_instance *dev)
 {
        struct device_node *np = dev->ofdev->dev.of_node;
        const void *p;
+       int err;
 
        /* Read config from device-tree */
        if (emac_read_uint_prop(np, "mal-device", &dev->mal_ph, 1))
@@ -2897,8 +2898,8 @@ static int emac_init_config(struct emac_instance *dev)
                dev->mal_burst_size = 256;
 
        /* PHY mode needs some decoding */
-       dev->phy_mode = of_get_phy_mode(np);
-       if (dev->phy_mode < 0)
+       err = of_get_phy_mode(np, &dev->phy_mode);
+       if (err)
                dev->phy_mode = PHY_INTERFACE_MODE_NA;
 
        /* Check EMAC version */
index e9cda02..89a1b0f 100644 (file)
@@ -171,7 +171,7 @@ struct emac_instance {
        struct mal_commac               commac;
 
        /* PHY infos */
-       int                             phy_mode;
+       phy_interface_t                 phy_mode;
        u32                             phy_map;
        u32                             phy_address;
        u32                             phy_feat_exc;
index b9e821d..57a25c7 100644 (file)
@@ -78,7 +78,8 @@ static inline u32 zmii_mode_mask(int mode, int input)
        }
 }
 
-int zmii_attach(struct platform_device *ofdev, int input, int *mode)
+int zmii_attach(struct platform_device *ofdev, int input,
+               phy_interface_t *mode)
 {
        struct zmii_instance *dev = platform_get_drvdata(ofdev);
        struct zmii_regs __iomem *p = dev->base;
index 41d46e9..65daedc 100644 (file)
@@ -50,7 +50,8 @@ struct zmii_instance {
 
 int zmii_init(void);
 void zmii_exit(void);
-int zmii_attach(struct platform_device *ofdev, int input, int *mode);
+int zmii_attach(struct platform_device *ofdev, int input,
+               phy_interface_t *mode);
 void zmii_detach(struct platform_device *ofdev, int input);
 void zmii_get_mdio(struct platform_device *ofdev, int input);
 void zmii_put_mdio(struct platform_device *ofdev, int input);
index de8c581..adce7e3 100644 (file)
@@ -894,8 +894,9 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
        case e1000_pch2lan:
        case e1000_pch_lpt:
        case e1000_pch_spt:
-               /* fall through */
        case e1000_pch_cnp:
+               /* fall through */
+       case e1000_pch_tgp:
                mask |= BIT(18);
                break;
        default:
@@ -1559,6 +1560,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
        switch (hw->mac.type) {
        case e1000_pch_spt:
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
                fext_nvm11 = er32(FEXTNVM11);
                fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX;
                ew32(FEXTNVM11, fext_nvm11);
index eff75bd..f556163 100644 (file)
@@ -86,6 +86,17 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_ICP_I219_V8           0x15E0
 #define E1000_DEV_ID_PCH_ICP_I219_LM9          0x15E1
 #define E1000_DEV_ID_PCH_ICP_I219_V9           0x15E2
+#define E1000_DEV_ID_PCH_CMP_I219_LM10         0x0D4E
+#define E1000_DEV_ID_PCH_CMP_I219_V10          0x0D4F
+#define E1000_DEV_ID_PCH_CMP_I219_LM11         0x0D4C
+#define E1000_DEV_ID_PCH_CMP_I219_V11          0x0D4D
+#define E1000_DEV_ID_PCH_CMP_I219_LM12         0x0D53
+#define E1000_DEV_ID_PCH_CMP_I219_V12          0x0D55
+#define E1000_DEV_ID_PCH_TGP_I219_LM13         0x15FB
+#define E1000_DEV_ID_PCH_TGP_I219_V13          0x15FC
+#define E1000_DEV_ID_PCH_TGP_I219_LM14         0x15F9
+#define E1000_DEV_ID_PCH_TGP_I219_V14          0x15FA
+#define E1000_DEV_ID_PCH_TGP_I219_LM15         0x15F4
 
 #define E1000_REVISION_4       4
 
@@ -109,6 +120,7 @@ enum e1000_mac_type {
        e1000_pch_lpt,
        e1000_pch_spt,
        e1000_pch_cnp,
+       e1000_pch_tgp,
 };
 
 enum e1000_media_type {
index a1fab77..b4135c5 100644 (file)
@@ -316,6 +316,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
        case e1000_pch_lpt:
        case e1000_pch_spt:
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
                if (e1000_phy_is_accessible_pchlan(hw))
                        break;
 
@@ -458,6 +459,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
                case e1000_pch_lpt:
                case e1000_pch_spt:
                case e1000_pch_cnp:
+               case e1000_pch_tgp:
                        /* In case the PHY needs to be in mdio slow mode,
                         * set slow mode and try to get the PHY id again.
                         */
@@ -700,6 +702,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
        case e1000_pch_lpt:
        case e1000_pch_spt:
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
        case e1000_pchlan:
                /* check management mode */
                mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
@@ -1638,6 +1641,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
        case e1000_pch_lpt:
        case e1000_pch_spt:
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
                rc = e1000_init_phy_params_pchlan(hw);
                break;
        default:
@@ -2090,6 +2094,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
        case e1000_pch_lpt:
        case e1000_pch_spt:
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
                sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
                break;
        default:
@@ -3127,6 +3132,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
        switch (hw->mac.type) {
        case e1000_pch_spt:
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
                bank1_offset = nvm->flash_bank_size;
                act_offset = E1000_ICH_NVM_SIG_WORD;
 
@@ -4070,6 +4076,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
        case e1000_pch_lpt:
        case e1000_pch_spt:
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
                word = NVM_COMPAT;
                valid_csum_mask = NVM_COMPAT_VALID_CSUM;
                break;
index d7d56e4..032b886 100644 (file)
@@ -3538,6 +3538,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
                adapter->cc.shift = shift;
                break;
        case e1000_pch_cnp:
+       case e1000_pch_tgp:
                if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
                        /* Stable 24MHz frequency */
                        incperiod = INCPERIOD_24MHZ;
@@ -4049,6 +4050,8 @@ void e1000e_reset(struct e1000_adapter *adapter)
        case e1000_pch_lpt:
        case e1000_pch_spt:
        case e1000_pch_cnp:
+               /* fall-through */
+       case e1000_pch_tgp:
                fc->refresh_time = 0xFFFF;
                fc->pause_time = 0xFFFF;
 
@@ -4715,12 +4718,12 @@ int e1000e_close(struct net_device *netdev)
 
        pm_runtime_get_sync(&pdev->dev);
 
-       if (!test_bit(__E1000_DOWN, &adapter->state)) {
+       if (netif_device_present(netdev)) {
                e1000e_down(adapter, true);
                e1000_free_irq(adapter);
 
                /* Link status message must follow this format */
-               pr_info("%s NIC Link is Down\n", adapter->netdev->name);
+               pr_info("%s NIC Link is Down\n", netdev->name);
        }
 
        napi_disable(&adapter->napi);
@@ -6294,14 +6297,188 @@ fl_out:
        pm_runtime_put_sync(netdev->dev.parent);
 }
 
+#ifdef CONFIG_PM_SLEEP
+/* S0ix implementation */
+static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 mac_data;
+       u16 phy_data;
+
+       /* Disable the periodic inband message,
+        * don't request PCIe clock in K1 page770_17[10:9] = 10b
+        */
+       e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+       phy_data &= ~HV_PM_CTRL_K1_CLK_REQ;
+       phy_data |= BIT(10);
+       e1e_wphy(hw, HV_PM_CTRL, phy_data);
+
+       /* Make sure we don't exit K1 every time a new packet arrives
+        * 772_29[5] = 1 CS_Mode_Stay_In_K1
+        */
+       e1e_rphy(hw, I217_CGFREG, &phy_data);
+       phy_data |= BIT(5);
+       e1e_wphy(hw, I217_CGFREG, phy_data);
+
+       /* Change the MAC/PHY interface to SMBus
+        * Force the SMBus in PHY page769_23[0] = 1
+        * Force the SMBus in MAC CTRL_EXT[11] = 1
+        */
+       e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+       phy_data |= CV_SMB_CTRL_FORCE_SMBUS;
+       e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+       mac_data = er32(CTRL_EXT);
+       mac_data |= E1000_CTRL_EXT_FORCE_SMBUS;
+       ew32(CTRL_EXT, mac_data);
+
+       /* DFT control: PHY bit: page769_20[0] = 1
+        * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1
+        */
+       e1e_rphy(hw, I82579_DFT_CTRL, &phy_data);
+       phy_data |= BIT(0);
+       e1e_wphy(hw, I82579_DFT_CTRL, phy_data);
+
+       mac_data = er32(EXTCNF_CTRL);
+       mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+       ew32(EXTCNF_CTRL, mac_data);
+
+       /* Check MAC Tx/Rx packet buffer pointers.
+        * Reset MAC Tx/Rx packet buffer pointers to suppress any
+        * pending traffic indication that would prevent power gating.
+        */
+       mac_data = er32(TDFH);
+       if (mac_data)
+               ew32(TDFH, 0);
+       mac_data = er32(TDFT);
+       if (mac_data)
+               ew32(TDFT, 0);
+       mac_data = er32(TDFHS);
+       if (mac_data)
+               ew32(TDFHS, 0);
+       mac_data = er32(TDFTS);
+       if (mac_data)
+               ew32(TDFTS, 0);
+       mac_data = er32(TDFPC);
+       if (mac_data)
+               ew32(TDFPC, 0);
+       mac_data = er32(RDFH);
+       if (mac_data)
+               ew32(RDFH, 0);
+       mac_data = er32(RDFT);
+       if (mac_data)
+               ew32(RDFT, 0);
+       mac_data = er32(RDFHS);
+       if (mac_data)
+               ew32(RDFHS, 0);
+       mac_data = er32(RDFTS);
+       if (mac_data)
+               ew32(RDFTS, 0);
+       mac_data = er32(RDFPC);
+       if (mac_data)
+               ew32(RDFPC, 0);
+
+       /* Enable the Dynamic Power Gating in the MAC */
+       mac_data = er32(FEXTNVM7);
+       mac_data |= BIT(22);
+       ew32(FEXTNVM7, mac_data);
+
+       /* Disable the time synchronization clock */
+       mac_data = er32(FEXTNVM7);
+       mac_data |= BIT(31);
+       mac_data &= ~BIT(0);
+       ew32(FEXTNVM7, mac_data);
+
+       /* Dynamic Power Gating Enable */
+       mac_data = er32(CTRL_EXT);
+       mac_data |= BIT(3);
+       ew32(CTRL_EXT, mac_data);
+
+       /* Enable the Dynamic Clock Gating in the DMA and MAC */
+       mac_data = er32(CTRL_EXT);
+       mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
+       ew32(CTRL_EXT, mac_data);
+
+       /* No MAC DPG gating SLP_S0 in modern standby
+        * Switch the logic of the lanphypc to use PMC counter
+        */
+       mac_data = er32(FEXTNVM5);
+       mac_data |= BIT(7);
+       ew32(FEXTNVM5, mac_data);
+}
+
+static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 mac_data;
+       u16 phy_data;
+
+       /* Disable the Dynamic Power Gating in the MAC */
+       mac_data = er32(FEXTNVM7);
+       mac_data &= 0xFFBFFFFF;
+       ew32(FEXTNVM7, mac_data);
+
+       /* Enable the time synchronization clock */
+       mac_data = er32(FEXTNVM7);
+       mac_data |= BIT(0);
+       ew32(FEXTNVM7, mac_data);
+
+       /* Disable Dynamic Power Gating */
+       mac_data = er32(CTRL_EXT);
+       mac_data &= 0xFFFFFFF7;
+       ew32(CTRL_EXT, mac_data);
+
+       /* Disable the Dynamic Clock Gating in the DMA and MAC */
+       mac_data = er32(CTRL_EXT);
+       mac_data &= 0xFFF7FFFF;
+       ew32(CTRL_EXT, mac_data);
+
+       /* Revert the lanphypc logic to use the internal Gbe counter
+        * and not the PMC counter
+        */
+       mac_data = er32(FEXTNVM5);
+       mac_data &= 0xFFFFFF7F;
+       ew32(FEXTNVM5, mac_data);
+
+       /* Enable the periodic inband message,
+        * Request PCIe clock in K1 page770_17[10:9] =01b
+        */
+       e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+       phy_data &= 0xFBFF;
+       phy_data |= HV_PM_CTRL_K1_CLK_REQ;
+       e1e_wphy(hw, HV_PM_CTRL, phy_data);
+
+       /* Return back configuration
+        * 772_29[5] = 0 CS_Mode_Stay_In_K1
+        */
+       e1e_rphy(hw, I217_CGFREG, &phy_data);
+       phy_data &= 0xFFDF;
+       e1e_wphy(hw, I217_CGFREG, phy_data);
+
+       /* Change the MAC/PHY interface to Kumeran
+        * Unforce the SMBus in PHY page769_23[0] = 0
+        * Unforce the SMBus in MAC CTRL_EXT[11] = 0
+        */
+       e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+       phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS;
+       e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+       mac_data = er32(CTRL_EXT);
+       mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+       ew32(CTRL_EXT, mac_data);
+}
+#endif /* CONFIG_PM_SLEEP */
+
 static int e1000e_pm_freeze(struct device *dev)
 {
        struct net_device *netdev = dev_get_drvdata(dev);
        struct e1000_adapter *adapter = netdev_priv(netdev);
+       bool present;
+
+       rtnl_lock();
 
+       present = netif_device_present(netdev);
        netif_device_detach(netdev);
 
-       if (netif_running(netdev)) {
+       if (present && netif_running(netdev)) {
                int count = E1000_CHECK_RESET_COUNT;
 
                while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
@@ -6313,6 +6490,8 @@ static int e1000e_pm_freeze(struct device *dev)
                e1000e_down(adapter, false);
                e1000_free_irq(adapter);
        }
+       rtnl_unlock();
+
        e1000e_reset_interrupt_capability(adapter);
 
        /* Allow time for pending master requests to run */
@@ -6560,6 +6739,30 @@ static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state)
        __e1000e_disable_aspm(pdev, state, 1);
 }
 
+static int e1000e_pm_thaw(struct device *dev)
+{
+       struct net_device *netdev = dev_get_drvdata(dev);
+       struct e1000_adapter *adapter = netdev_priv(netdev);
+       int rc = 0;
+
+       e1000e_set_interrupt_capability(adapter);
+
+       rtnl_lock();
+       if (netif_running(netdev)) {
+               rc = e1000_request_irq(adapter);
+               if (rc)
+                       goto err_irq;
+
+               e1000e_up(adapter);
+       }
+
+       netif_device_attach(netdev);
+err_irq:
+       rtnl_unlock();
+
+       return rc;
+}
+
 #ifdef CONFIG_PM
 static int __e1000_resume(struct pci_dev *pdev)
 {
@@ -6627,29 +6830,12 @@ static int __e1000_resume(struct pci_dev *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int e1000e_pm_thaw(struct device *dev)
-{
-       struct net_device *netdev = dev_get_drvdata(dev);
-       struct e1000_adapter *adapter = netdev_priv(netdev);
-
-       e1000e_set_interrupt_capability(adapter);
-       if (netif_running(netdev)) {
-               u32 err = e1000_request_irq(adapter);
-
-               if (err)
-                       return err;
-
-               e1000e_up(adapter);
-       }
-
-       netif_device_attach(netdev);
-
-       return 0;
-}
-
 static int e1000e_pm_suspend(struct device *dev)
 {
+       struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+       struct e1000_adapter *adapter = netdev_priv(netdev);
        struct pci_dev *pdev = to_pci_dev(dev);
+       struct e1000_hw *hw = &adapter->hw;
        int rc;
 
        e1000e_flush_lpic(pdev);
@@ -6660,14 +6846,25 @@ static int e1000e_pm_suspend(struct device *dev)
        if (rc)
                e1000e_pm_thaw(dev);
 
+       /* Introduce S0ix implementation */
+       if (hw->mac.type >= e1000_pch_cnp)
+               e1000e_s0ix_entry_flow(adapter);
+
        return rc;
 }
 
 static int e1000e_pm_resume(struct device *dev)
 {
+       struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+       struct e1000_adapter *adapter = netdev_priv(netdev);
        struct pci_dev *pdev = to_pci_dev(dev);
+       struct e1000_hw *hw = &adapter->hw;
        int rc;
 
+       /* Introduce S0ix implementation */
+       if (hw->mac.type >= e1000_pch_cnp)
+               e1000e_s0ix_exit_flow(adapter);
+
        rc = __e1000_resume(pdev);
        if (rc)
                return rc;
@@ -6818,16 +7015,11 @@ static void e1000_netpoll(struct net_device *netdev)
 static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
                                                pci_channel_state_t state)
 {
-       struct net_device *netdev = pci_get_drvdata(pdev);
-       struct e1000_adapter *adapter = netdev_priv(netdev);
-
-       netif_device_detach(netdev);
+       e1000e_pm_freeze(&pdev->dev);
 
        if (state == pci_channel_io_perm_failure)
                return PCI_ERS_RESULT_DISCONNECT;
 
-       if (netif_running(netdev))
-               e1000e_down(adapter, true);
        pci_disable_device(pdev);
 
        /* Request a slot slot reset. */
@@ -6893,10 +7085,7 @@ static void e1000_io_resume(struct pci_dev *pdev)
 
        e1000_init_manageability_pt(adapter);
 
-       if (netif_running(netdev))
-               e1000e_up(adapter);
-
-       netif_device_attach(netdev);
+       e1000e_pm_thaw(&pdev->dev);
 
        /* If the controller has AMT, do not set DRV_LOAD until the interface
         * is up.  For all other cases, let the f/w know that the h/w is now
@@ -7407,15 +7596,13 @@ static void e1000_remove(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct e1000_adapter *adapter = netdev_priv(netdev);
-       bool down = test_bit(__E1000_DOWN, &adapter->state);
 
        e1000e_ptp_remove(adapter);
 
        /* The timers may be rescheduled, so explicitly disable them
         * from being rescheduled.
         */
-       if (!down)
-               set_bit(__E1000_DOWN, &adapter->state);
+       set_bit(__E1000_DOWN, &adapter->state);
        del_timer_sync(&adapter->phy_info_timer);
 
        cancel_work_sync(&adapter->reset_task);
@@ -7435,9 +7622,6 @@ static void e1000_remove(struct pci_dev *pdev)
                }
        }
 
-       /* Don't lie to e1000_close() down the road. */
-       if (!down)
-               clear_bit(__E1000_DOWN, &adapter->state);
        unregister_netdev(netdev);
 
        if (pci_dev_run_wake(pdev))
@@ -7567,6 +7751,17 @@ static const struct pci_device_id e1000_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V8), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM9), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V9), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM10), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V10), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM11), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp },
 
        { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */
 };
index 1a4c65d..eaa5a0f 100644 (file)
@@ -295,6 +295,8 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
        case e1000_pch_lpt:
        case e1000_pch_spt:
        case e1000_pch_cnp:
+               /* fall-through */
+       case e1000_pch_tgp:
                if ((hw->mac.type < e1000_pch_lpt) ||
                    (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
                        adapter->ptp_clock_info.max_adj = 24000000 - 1;
index 47f5ca7..df59fd1 100644 (file)
@@ -18,6 +18,7 @@
 #define E1000_FEXTNVM  0x00028 /* Future Extended NVM - RW */
 #define E1000_FEXTNVM3 0x0003C /* Future Extended NVM 3 - RW */
 #define E1000_FEXTNVM4 0x00024 /* Future Extended NVM 4 - RW */
+#define E1000_FEXTNVM5 0x00014 /* Future Extended NVM 5 - RW */
 #define E1000_FEXTNVM6 0x00010 /* Future Extended NVM 6 - RW */
 #define E1000_FEXTNVM7 0x000E4 /* Future Extended NVM 7 - RW */
 #define E1000_FEXTNVM9 0x5BB4  /* Future Extended NVM 9 - RW */
 #define E1000_RXMTRL   0x0B634 /* Time sync Rx EtherType and Msg Type - RW */
 #define E1000_RXUDP    0x0B638 /* Time Sync Rx UDP Port - RW */
 
+/* PHY registers */
+#define I82579_DFT_CTRL        PHY_REG(769, 20)
+
 #endif
index b144419..f306084 100644 (file)
@@ -534,6 +534,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev);
 int fm10k_iov_resume(struct pci_dev *pdev);
 void fm10k_iov_disable(struct pci_dev *pdev);
 int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs);
+void fm10k_iov_update_stats(struct fm10k_intfc *interface);
 s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
 int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
@@ -542,6 +543,8 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
                        int __always_unused min_rate, int max_rate);
 int fm10k_ndo_get_vf_config(struct net_device *netdev,
                            int vf_idx, struct ifla_vf_info *ivi);
+int fm10k_ndo_get_vf_stats(struct net_device *netdev,
+                          int vf_idx, struct ifla_vf_stats *stats);
 
 /* DebugFS */
 #ifdef CONFIG_DEBUG_FS
index afe1faf..8c50a12 100644 (file)
@@ -520,6 +520,27 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
        return num_vfs;
 }
 
+/**
+ * fm10k_iov_update_stats - Update stats for all VFs
+ * @interface: device private structure
+ *
+ * Updates the VF statistics for all enabled VFs. Expects to be called by
+ * fm10k_update_stats and assumes that locking via the __FM10K_UPDATING_STATS
+ * bit is already handled.
+ */
+void fm10k_iov_update_stats(struct fm10k_intfc *interface)
+{
+       struct fm10k_iov_data *iov_data = interface->iov_data;
+       struct fm10k_hw *hw = &interface->hw;
+       int i;
+
+       if (!iov_data)
+               return;
+
+       for (i = 0; i < iov_data->num_vfs; i++)
+               hw->iov.ops.update_stats(hw, iov_data->vf_info[i].stats, i);
+}
+
 static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface,
                                       struct fm10k_vf_info *vf_info)
 {
@@ -650,3 +671,30 @@ int fm10k_ndo_get_vf_config(struct net_device *netdev,
 
        return 0;
 }
+
+int fm10k_ndo_get_vf_stats(struct net_device *netdev,
+                          int vf_idx, struct ifla_vf_stats *stats)
+{
+       struct fm10k_intfc *interface = netdev_priv(netdev);
+       struct fm10k_iov_data *iov_data = interface->iov_data;
+       struct fm10k_hw *hw = &interface->hw;
+       struct fm10k_hw_stats_q *hw_stats;
+       u32 idx, qpp;
+
+       /* verify SR-IOV is active and that vf idx is valid */
+       if (!iov_data || vf_idx >= iov_data->num_vfs)
+               return -EINVAL;
+
+       qpp = fm10k_queues_per_pool(hw);
+       hw_stats = iov_data->vf_info[vf_idx].stats;
+
+       for (idx = 0; idx < qpp; idx++) {
+               stats->rx_packets += hw_stats[idx].rx_packets.count;
+               stats->tx_packets += hw_stats[idx].tx_packets.count;
+               stats->rx_bytes += hw_stats[idx].rx_bytes.count;
+               stats->tx_bytes += hw_stats[idx].tx_bytes.count;
+               stats->rx_dropped += hw_stats[idx].rx_drops.count;
+       }
+
+       return 0;
+}
index 2be9222..17738b0 100644 (file)
@@ -11,7 +11,7 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION    "0.26.1-k"
+#define DRV_VERSION    "0.27.1-k"
 #define DRV_SUMMARY    "Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
index 09f7a24..68baee0 100644 (file)
@@ -1643,6 +1643,7 @@ static const struct net_device_ops fm10k_netdev_ops = {
        .ndo_set_vf_vlan        = fm10k_ndo_set_vf_vlan,
        .ndo_set_vf_rate        = fm10k_ndo_set_vf_bw,
        .ndo_get_vf_config      = fm10k_ndo_get_vf_config,
+       .ndo_get_vf_stats       = fm10k_ndo_get_vf_stats,
        .ndo_udp_tunnel_add     = fm10k_udp_tunnel_add,
        .ndo_udp_tunnel_del     = fm10k_udp_tunnel_del,
        .ndo_dfwd_add_station   = fm10k_dfwd_add_station,
index bb236fa..d122d00 100644 (file)
@@ -630,6 +630,9 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
        net_stats->rx_errors = rx_errors;
        net_stats->rx_dropped = interface->stats.nodesc_drop.count;
 
+       /* Update VF statistics */
+       fm10k_iov_update_stats(interface);
+
        clear_bit(__FM10K_UPDATING_STATS, interface->state);
 }
 
index 160bc5b..ceb9b79 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #ifndef _FM10K_TLV_H_
 #define _FM10K_TLV_H_
@@ -76,8 +76,8 @@ struct fm10k_tlv_attr {
 #define FM10K_TLV_ATTR_S32(id)             { id, FM10K_TLV_SIGNED, 4 }
 #define FM10K_TLV_ATTR_S64(id)             { id, FM10K_TLV_SIGNED, 8 }
 #define FM10K_TLV_ATTR_LE_STRUCT(id, len)   { id, FM10K_TLV_LE_STRUCT, len }
-#define FM10K_TLV_ATTR_NESTED(id)          { id, FM10K_TLV_NESTED }
-#define FM10K_TLV_ATTR_LAST                { FM10K_TLV_ERROR }
+#define FM10K_TLV_ATTR_NESTED(id)          { id, FM10K_TLV_NESTED, 0 }
+#define FM10K_TLV_ATTR_LAST                { FM10K_TLV_ERROR, 0, 0 }
 
 struct fm10k_msg_data {
        unsigned int                id;
index 15ac1c7..63968c5 100644 (file)
@@ -581,6 +581,7 @@ struct fm10k_vf_info {
         * at the same offset as the mailbox
         */
        struct fm10k_mbx_info   mbx;            /* PF side of VF mailbox */
+       struct fm10k_hw_stats_q stats[FM10K_MAX_QUEUES_POOL];
        int                     rate;           /* Tx BW cap as defined by OS */
        u16                     glort;          /* resource tag for this VF */
        u16                     sw_vid;         /* Switch API assigned VLAN */
index 2af9f63..cb63673 100644 (file)
@@ -1118,6 +1118,7 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
                                            const u8 *macaddr);
 int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
+int i40e_count_filters(struct i40e_vsi *vsi);
 struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
 void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
 #ifdef CONFIG_I40E_DCB
index 72c0488..9f0a4e9 100644 (file)
@@ -507,6 +507,59 @@ shutdown_arq_out:
        return ret_code;
 }
 
+/**
+ *  i40e_set_hw_flags - set HW flags
+ *  @hw: pointer to the hardware structure
+ **/
+static void i40e_set_hw_flags(struct i40e_hw *hw)
+{
+       struct i40e_adminq_info *aq = &hw->aq;
+
+       hw->flags = 0;
+
+       switch (hw->mac.type) {
+       case I40E_MAC_XL710:
+               if (aq->api_maj_ver > 1 ||
+                   (aq->api_maj_ver == 1 &&
+                    aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
+                       hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+                       hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+                       /* The ability to RX (not drop) 802.1ad frames */
+                       hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+               }
+               break;
+       case I40E_MAC_X722:
+               hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE |
+                            I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+
+               if (aq->api_maj_ver > 1 ||
+                   (aq->api_maj_ver == 1 &&
+                    aq->api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722))
+                       hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+               /* fall through */
+       default:
+               break;
+       }
+
+       /* Newer versions of firmware require lock when reading the NVM */
+       if (aq->api_maj_ver > 1 ||
+           (aq->api_maj_ver == 1 &&
+            aq->api_min_ver >= 5))
+               hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+
+       if (aq->api_maj_ver > 1 ||
+           (aq->api_maj_ver == 1 &&
+            aq->api_min_ver >= 8)) {
+               hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
+               hw->flags |= I40E_HW_FLAG_DROP_MODE;
+       }
+
+       if (aq->api_maj_ver > 1 ||
+           (aq->api_maj_ver == 1 &&
+            aq->api_min_ver >= 9))
+               hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED;
+}
+
 /**
  *  i40e_init_adminq - main initialization routine for Admin Queue
  *  @hw: pointer to the hardware structure
@@ -571,6 +624,11 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
        if (ret_code != I40E_SUCCESS)
                goto init_adminq_free_arq;
 
+       /* Some features were introduced in different FW API version
+        * for different MAC type.
+        */
+       i40e_set_hw_flags(hw);
+
        /* get the NVM version info */
        i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION,
                           &hw->nvm.version);
@@ -596,25 +654,12 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
                hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
        }
 
-       /* Newer versions of firmware require lock when reading the NVM */
-       if (hw->aq.api_maj_ver > 1 ||
-           (hw->aq.api_maj_ver == 1 &&
-            hw->aq.api_min_ver >= 5))
-               hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
-
        /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
        if (hw->aq.api_maj_ver > 1 ||
            (hw->aq.api_maj_ver == 1 &&
             hw->aq.api_min_ver >= 7))
                hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
 
-       if (hw->aq.api_maj_ver > 1 ||
-           (hw->aq.api_maj_ver == 1 &&
-            hw->aq.api_min_ver >= 8)) {
-               hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
-               hw->flags |= I40E_HW_FLAG_DROP_MODE;
-       }
-
        if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
                ret_code = I40E_ERR_FIRMWARE_API_VERSION;
                goto init_adminq_free_arq;
index 530613f..a23f89f 100644 (file)
@@ -2249,7 +2249,13 @@ struct i40e_aqc_phy_register_access {
 #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL        1
 #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2
        u8      dev_address;
-       u8      reserved1[2];
+       u8      cmd_flags;
+#define I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE   0x01
+#define I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER      0x02
+#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT    2
+#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK     (0x3 << \
+               I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT)
+       u8      reserved1;
        __le32  reg_address;
        __le32  reg_value;
        u8      reserved2[4];
index d37c6e0..a079144 100644 (file)
@@ -29,6 +29,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
                case I40E_DEV_ID_QSFP_C:
                case I40E_DEV_ID_10G_BASE_T:
                case I40E_DEV_ID_10G_BASE_T4:
+               case I40E_DEV_ID_10G_BASE_T_BC:
                case I40E_DEV_ID_10G_B:
                case I40E_DEV_ID_10G_SFP:
                case I40E_DEV_ID_20G_KR2:
@@ -933,10 +934,6 @@ i40e_status i40e_init_shared_code(struct i40e_hw *hw)
        else
                hw->pf_id = (u8)(func_rid & 0x7);
 
-       if (hw->mac.type == I40E_MAC_X722)
-               hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE |
-                            I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
-
        status = i40e_init_nvm(hw);
        return status;
 }
@@ -1441,9 +1438,9 @@ static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx)
        u32 gpio_val = 0;
        u32 port;
 
-       if (!hw->func_caps.led[idx])
+       if (!I40E_IS_X710TL_DEVICE(hw->device_id) &&
+           !hw->func_caps.led[idx])
                return 0;
-
        gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(idx));
        port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK) >>
                I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT;
@@ -1462,8 +1459,15 @@ static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx)
 #define I40E_FILTER_ACTIVITY 0xE
 #define I40E_LINK_ACTIVITY 0xC
 #define I40E_MAC_ACTIVITY 0xD
+#define I40E_FW_LED BIT(4)
+#define I40E_LED_MODE_VALID (I40E_GLGEN_GPIO_CTL_LED_MODE_MASK >> \
+                            I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+
 #define I40E_LED0 22
 
+#define I40E_PIN_FUNC_SDP 0x0
+#define I40E_PIN_FUNC_LED 0x1
+
 /**
  * i40e_led_get - return current on/off mode
  * @hw: pointer to the hw struct
@@ -1508,8 +1512,10 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
 {
        int i;
 
-       if (mode & 0xfffffff0)
+       if (mode & ~I40E_LED_MODE_VALID) {
                hw_dbg(hw, "invalid mode passed in %X\n", mode);
+               return;
+       }
 
        /* as per the documentation GPIO 22-29 are the LED
         * GPIO pins named LED0..LED7
@@ -1519,6 +1525,20 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
 
                if (!gpio_val)
                        continue;
+
+               if (I40E_IS_X710TL_DEVICE(hw->device_id)) {
+                       u32 pin_func = 0;
+
+                       if (mode & I40E_FW_LED)
+                               pin_func = I40E_PIN_FUNC_SDP;
+                       else
+                               pin_func = I40E_PIN_FUNC_LED;
+
+                       gpio_val &= ~I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK;
+                       gpio_val |= ((pin_func <<
+                                    I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) &
+                                    I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK);
+               }
                gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
                /* this & is a bit of paranoia, but serves as a range check */
                gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
@@ -2570,9 +2590,16 @@ noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw)
                if (status)
                        return status;
 
-               hw->phy.link_info.req_fec_info =
-                       abilities.fec_cfg_curr_mod_ext_info &
-                       (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS);
+               if (abilities.fec_cfg_curr_mod_ext_info &
+                   I40E_AQ_ENABLE_FEC_AUTO)
+                       hw->phy.link_info.req_fec_info =
+                               (I40E_AQ_REQUEST_FEC_KR |
+                                I40E_AQ_REQUEST_FEC_RS);
+               else
+                       hw->phy.link_info.req_fec_info =
+                               abilities.fec_cfg_curr_mod_ext_info &
+                               (I40E_AQ_REQUEST_FEC_KR |
+                                I40E_AQ_REQUEST_FEC_RS);
 
                memcpy(hw->phy.link_info.module_type, &abilities.module_type,
                       sizeof(hw->phy.link_info.module_type));
@@ -4884,6 +4911,7 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw,
                break;
        case I40E_DEV_ID_10G_BASE_T:
        case I40E_DEV_ID_10G_BASE_T4:
+       case I40E_DEV_ID_10G_BASE_T_BC:
        case I40E_DEV_ID_10G_BASE_T_X722:
        case I40E_DEV_ID_25G_B:
        case I40E_DEV_ID_25G_SFP28:
@@ -5043,7 +5071,7 @@ static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
                status =
                       i40e_aq_get_phy_register(hw,
                                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-                                               I40E_PHY_COM_REG_PAGE,
+                                               I40E_PHY_COM_REG_PAGE, true,
                                                I40E_PHY_LED_PROV_REG_1,
                                                reg_val, NULL);
        } else {
@@ -5076,7 +5104,7 @@ static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
                status =
                       i40e_aq_set_phy_register(hw,
                                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-                                               I40E_PHY_COM_REG_PAGE,
+                                               I40E_PHY_COM_REG_PAGE, true,
                                                I40E_PHY_LED_PROV_REG_1,
                                                reg_val, NULL);
        } else {
@@ -5115,7 +5143,7 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
                status =
                      i40e_aq_get_phy_register(hw,
                                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-                                              I40E_PHY_COM_REG_PAGE,
+                                              I40E_PHY_COM_REG_PAGE, true,
                                               I40E_PHY_LED_PROV_REG_1,
                                               &reg_val_aq, NULL);
                if (status == I40E_SUCCESS)
@@ -5320,20 +5348,49 @@ do_retry:
 }
 
 /**
- * i40e_aq_set_phy_register
+ * i40e_mdio_if_number_selection - MDIO I/F number selection
+ * @hw: pointer to the hw struct
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
+ * @cmd: pointer to PHY Register command structure
+ **/
+static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio,
+                                         u8 mdio_num,
+                                         struct i40e_aqc_phy_register_access *cmd)
+{
+       if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) {
+               if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED)
+                       cmd->cmd_flags |=
+                               I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER |
+                               ((mdio_num <<
+                               I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT) &
+                               I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK);
+               else
+                       i40e_debug(hw, I40E_DEBUG_PHY,
+                                  "MDIO I/F number selection not supported by current FW version.\n");
+       }
+}
+
+/**
+ * i40e_aq_set_phy_register_ext
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
  * @reg_addr: PHY register address
  * @reg_val: new register value
  * @cmd_details: pointer to command details structure or NULL
  *
  * Write the external PHY register.
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_set_phy_register.
  **/
-i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
-                                    u8 phy_select, u8 dev_addr,
-                                    u32 reg_addr, u32 reg_val,
-                                    struct i40e_asq_cmd_details *cmd_details)
+enum i40e_status_code i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+                            u8 phy_select, u8 dev_addr, bool page_change,
+                            bool set_mdio, u8 mdio_num,
+                            u32 reg_addr, u32 reg_val,
+                            struct i40e_asq_cmd_details *cmd_details)
 {
        struct i40e_aq_desc desc;
        struct i40e_aqc_phy_register_access *cmd =
@@ -5348,26 +5405,36 @@ i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
        cmd->reg_address = cpu_to_le32(reg_addr);
        cmd->reg_value = cpu_to_le32(reg_val);
 
+       i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd);
+
+       if (!page_change)
+               cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
        status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 
        return status;
 }
 
 /**
- * i40e_aq_get_phy_register
+ * i40e_aq_get_phy_register_ext
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
  * @reg_addr: PHY register address
  * @reg_val: read register value
  * @cmd_details: pointer to command details structure or NULL
  *
  * Read the external PHY register.
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_get_phy_register.
  **/
-i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
-                                    u8 phy_select, u8 dev_addr,
-                                    u32 reg_addr, u32 *reg_val,
-                                    struct i40e_asq_cmd_details *cmd_details)
+enum i40e_status_code i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+                            u8 phy_select, u8 dev_addr, bool page_change,
+                            bool set_mdio, u8 mdio_num,
+                            u32 reg_addr, u32 *reg_val,
+                            struct i40e_asq_cmd_details *cmd_details)
 {
        struct i40e_aq_desc desc;
        struct i40e_aqc_phy_register_access *cmd =
@@ -5381,6 +5448,11 @@ i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
        cmd->dev_address = dev_addr;
        cmd->reg_address = cpu_to_le32(reg_addr);
 
+       i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd);
+
+       if (!page_change)
+               cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
        status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
        if (!status)
                *reg_val = le32_to_cpu(cmd->reg_value);
index 200a1cb..9de503c 100644 (file)
@@ -889,7 +889,9 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
 
                ret = i40e_read_nvm_module_data(hw,
                                                I40E_SR_EMP_SR_SETTINGS_PTR,
-                                               offset, 1,
+                                               offset,
+                                               I40E_LLDP_CURRENT_STATUS_OFFSET,
+                                               I40E_LLDP_CURRENT_STATUS_SIZE,
                                                &lldp_cfg.adminstatus);
        } else {
                ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
index 2a80c5d..ba86ad8 100644 (file)
@@ -32,6 +32,9 @@
 #define I40E_CEE_MAX_FEAT_TYPE         3
 #define I40E_LLDP_CURRENT_STATUS_XL710_OFFSET  0x2B
 #define I40E_LLDP_CURRENT_STATUS_X722_OFFSET   0x31
+#define I40E_LLDP_CURRENT_STATUS_OFFSET                1
+#define I40E_LLDP_CURRENT_STATUS_SIZE          1
+
 /* Defines for LLDP TLV header */
 #define I40E_LLDP_TLV_LEN_SHIFT                0
 #define I40E_LLDP_TLV_LEN_MASK         (0x01FF << I40E_LLDP_TLV_LEN_SHIFT)
index bac4da0..bf15a86 100644 (file)
@@ -23,6 +23,8 @@
 #define I40E_DEV_ID_10G_BASE_T_BC      0x15FF
 #define I40E_DEV_ID_10G_B              0x104F
 #define I40E_DEV_ID_10G_SFP            0x104E
+#define I40E_IS_X710TL_DEVICE(d) \
+       ((d) == I40E_DEV_ID_10G_BASE_T_BC)
 #define I40E_DEV_ID_KX_X722            0x37CE
 #define I40E_DEV_ID_QSFP_X722          0x37CF
 #define I40E_DEV_ID_SFP_X722           0x37D0
index 41e1240..d24d873 100644 (file)
@@ -722,7 +722,14 @@ static void i40e_get_settings_link_up_fec(u8 req_fec_info,
        ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
        ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
 
-       if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
+       if ((I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) &&
+           (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info)) {
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    FEC_NONE);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    FEC_BASER);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+       } else if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
                ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
        } else if (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info) {
                ethtool_link_ksettings_add_link_mode(ks, advertising,
@@ -730,12 +737,6 @@ static void i40e_get_settings_link_up_fec(u8 req_fec_info,
        } else {
                ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                     FEC_NONE);
-               if (I40E_AQ_SET_FEC_AUTO & req_fec_info) {
-                       ethtool_link_ksettings_add_link_mode(ks, advertising,
-                                                            FEC_RS);
-                       ethtool_link_ksettings_add_link_mode(ks, advertising,
-                                                            FEC_BASER);
-               }
        }
 }
 
@@ -1437,6 +1438,7 @@ static int i40e_get_fec_param(struct net_device *netdev,
        struct i40e_hw *hw = &pf->hw;
        i40e_status status = 0;
        int err = 0;
+       u8 fec_cfg;
 
        /* Get the current phy config */
        memset(&abilities, 0, sizeof(abilities));
@@ -1448,18 +1450,16 @@ static int i40e_get_fec_param(struct net_device *netdev,
        }
 
        fecparam->fec = 0;
-       if (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_AUTO)
+       fec_cfg = abilities.fec_cfg_curr_mod_ext_info;
+       if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
                fecparam->fec |= ETHTOOL_FEC_AUTO;
-       if ((abilities.fec_cfg_curr_mod_ext_info &
-            I40E_AQ_SET_FEC_REQUEST_RS) ||
-           (abilities.fec_cfg_curr_mod_ext_info &
-            I40E_AQ_SET_FEC_ABILITY_RS))
+       else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_RS |
+                I40E_AQ_SET_FEC_ABILITY_RS))
                fecparam->fec |= ETHTOOL_FEC_RS;
-       if ((abilities.fec_cfg_curr_mod_ext_info &
-            I40E_AQ_SET_FEC_REQUEST_KR) ||
-           (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_ABILITY_KR))
+       else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_KR |
+                I40E_AQ_SET_FEC_ABILITY_KR))
                fecparam->fec |= ETHTOOL_FEC_BASER;
-       if (abilities.fec_cfg_curr_mod_ext_info == 0)
+       if (fec_cfg == 0)
                fecparam->fec |= ETHTOOL_FEC_OFF;
 
        if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA)
@@ -5112,7 +5112,7 @@ static int i40e_get_module_info(struct net_device *netdev,
        case I40E_MODULE_TYPE_SFP:
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_I2C_EEPROM_DEV_ADDR, true,
                                I40E_MODULE_SFF_8472_COMP,
                                &sff8472_comp, NULL);
                if (status)
@@ -5120,7 +5120,7 @@ static int i40e_get_module_info(struct net_device *netdev,
 
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_I2C_EEPROM_DEV_ADDR, true,
                                I40E_MODULE_SFF_8472_SWAP,
                                &sff8472_swap, NULL);
                if (status)
@@ -5152,7 +5152,7 @@ static int i40e_get_module_info(struct net_device *netdev,
                /* Read from memory page 0. */
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               0,
+                               0, true,
                                I40E_MODULE_REVISION_ADDR,
                                &sff8636_rev, NULL);
                if (status)
@@ -5223,7 +5223,7 @@ static int i40e_get_module_eeprom(struct net_device *netdev,
 
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               addr, offset, &value, NULL);
+                               true, addr, offset, &value, NULL);
                if (status)
                        return -EIO;
                data[i] = value;
@@ -5242,6 +5242,7 @@ static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
 }
 
 static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = {
+       .get_drvinfo            = i40e_get_drvinfo,
        .set_eeprom             = i40e_set_eeprom,
        .get_eeprom_len         = i40e_get_eeprom_len,
        .get_eeprom             = i40e_get_eeprom,
index 6031223..9fac1ce 100644 (file)
@@ -1109,6 +1109,25 @@ void i40e_update_stats(struct i40e_vsi *vsi)
        i40e_update_vsi_stats(vsi);
 }
 
+/**
+ * i40e_count_filters - counts VSI mac filters
+ * @vsi: the VSI to be searched
+ *
+ * Returns count of mac filters
+ **/
+int i40e_count_filters(struct i40e_vsi *vsi)
+{
+       struct i40e_mac_filter *f;
+       struct hlist_node *h;
+       int bkt;
+       int cnt = 0;
+
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+               ++cnt;
+
+       return cnt;
+}
+
 /**
  * i40e_find_filter - Search VSI filter list for specific mac/vlan filter
  * @vsi: the VSI to be searched
@@ -3534,14 +3553,14 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
                q_vector->rx.target_itr =
                        ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
                wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
-                    q_vector->rx.target_itr);
+                    q_vector->rx.target_itr >> 1);
                q_vector->rx.current_itr = q_vector->rx.target_itr;
 
                q_vector->tx.next_update = jiffies + 1;
                q_vector->tx.target_itr =
                        ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
                wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
-                    q_vector->tx.target_itr);
+                    q_vector->tx.target_itr >> 1);
                q_vector->tx.current_itr = q_vector->tx.target_itr;
 
                wr32(hw, I40E_PFINT_RATEN(vector - 1),
@@ -3646,11 +3665,11 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
        /* set the ITR configuration */
        q_vector->rx.next_update = jiffies + 1;
        q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
-       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
+       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1);
        q_vector->rx.current_itr = q_vector->rx.target_itr;
        q_vector->tx.next_update = jiffies + 1;
        q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
-       wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
+       wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1);
        q_vector->tx.current_itr = q_vector->tx.target_itr;
 
        i40e_enable_misc_int_causes(pf);
@@ -7168,6 +7187,7 @@ static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
                ch->num_queue_pairs = qcnt;
                if (!i40e_setup_channel(pf, vsi, ch)) {
                        ret = -EINVAL;
+                       kfree(ch);
                        goto err_free;
                }
                ch->parent_vsi = vsi;
@@ -11396,7 +11416,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
 
        /* associate no queues to the misc vector */
        wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
-       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K);
+       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1);
 
        i40e_flush(hw);
 
@@ -12850,6 +12870,7 @@ static const struct net_device_ops i40e_netdev_ops = {
        .ndo_set_features       = i40e_set_features,
        .ndo_set_vf_mac         = i40e_ndo_set_vf_mac,
        .ndo_set_vf_vlan        = i40e_ndo_set_vf_port_vlan,
+       .ndo_get_vf_stats       = i40e_get_vf_stats,
        .ndo_set_vf_rate        = i40e_ndo_set_vf_bw,
        .ndo_get_vf_config      = i40e_ndo_get_vf_config,
        .ndo_set_vf_link_state  = i40e_ndo_set_vf_link_state,
@@ -12911,6 +12932,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
                          NETIF_F_GSO_IPXIP6            |
                          NETIF_F_GSO_UDP_TUNNEL        |
                          NETIF_F_GSO_UDP_TUNNEL_CSUM   |
+                         NETIF_F_GSO_UDP_L4            |
                          NETIF_F_SCTP_CRC              |
                          NETIF_F_RXHASH                |
                          NETIF_F_RXCSUM                |
index e4d8d20..7164f4a 100644 (file)
@@ -323,20 +323,24 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 
 /**
  * i40e_read_nvm_module_data - Reads NVM Buffer to specified memory location
- * @hw: pointer to the HW structure
+ * @hw: Pointer to the HW structure
  * @module_ptr: Pointer to module in words with respect to NVM beginning
- * @offset: offset in words from module start
+ * @module_offset: Offset in words from module start
+ * @data_offset: Offset in words from reading data area start
  * @words_data_size: Words to read from NVM
  * @data_ptr: Pointer to memory location where resulting buffer will be stored
  **/
-i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
-                                     u8 module_ptr, u16 offset,
-                                     u16 words_data_size,
-                                     u16 *data_ptr)
+enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw,
+                                               u8 module_ptr,
+                                               u16 module_offset,
+                                               u16 data_offset,
+                                               u16 words_data_size,
+                                               u16 *data_ptr)
 {
        i40e_status status;
+       u16 specific_ptr = 0;
        u16 ptr_value = 0;
-       u32 flat_offset;
+       u32 offset = 0;
 
        if (module_ptr != 0) {
                status = i40e_read_nvm_word(hw, module_ptr, &ptr_value);
@@ -352,36 +356,35 @@ i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
 
        /* Pointer not initialized */
        if (ptr_value == I40E_NVM_INVALID_PTR_VAL ||
-           ptr_value == I40E_NVM_INVALID_VAL)
+           ptr_value == I40E_NVM_INVALID_VAL) {
+               i40e_debug(hw, I40E_DEBUG_ALL, "Pointer not initialized.\n");
                return I40E_ERR_BAD_PTR;
+       }
 
        /* Check whether the module is in SR mapped area or outside */
        if (ptr_value & I40E_PTR_TYPE) {
                /* Pointer points outside of the Shared RAM mapped area */
-               ptr_value &= ~I40E_PTR_TYPE;
+               i40e_debug(hw, I40E_DEBUG_ALL,
+                          "Reading nvm data failed. Pointer points outside of the Shared RAM mapped area.\n");
 
-               /* PtrValue in 4kB units, need to convert to words */
-               ptr_value /= 2;
-               flat_offset = ((u32)ptr_value * 0x1000) + (u32)offset;
-               status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
-               if (!status) {
-                       status = i40e_aq_read_nvm(hw, 0, 2 * flat_offset,
-                                                 2 * words_data_size,
-                                                 data_ptr, true, NULL);
-                       i40e_release_nvm(hw);
-                       if (status) {
-                               i40e_debug(hw, I40E_DEBUG_ALL,
-                                          "Reading nvm aq failed.Error code: %d.\n",
-                                          status);
-                               return I40E_ERR_NVM;
-                       }
-               } else {
-                       return I40E_ERR_NVM;
-               }
+               return I40E_ERR_PARAM;
        } else {
                /* Read from the Shadow RAM */
-               status = i40e_read_nvm_buffer(hw, ptr_value + offset,
-                                             &words_data_size, data_ptr);
+
+               status = i40e_read_nvm_word(hw, ptr_value + module_offset,
+                                           &specific_ptr);
+               if (status) {
+                       i40e_debug(hw, I40E_DEBUG_ALL,
+                                  "Reading nvm word failed.Error code: %d.\n",
+                                  status);
+                       return I40E_ERR_NVM;
+               }
+
+               offset = ptr_value + module_offset + specific_ptr +
+                       data_offset;
+
+               status = i40e_read_nvm_buffer(hw, offset, &words_data_size,
+                                             data_ptr);
                if (status) {
                        i40e_debug(hw, I40E_DEBUG_ALL,
                                   "Reading nvm buffer failed.Error code: %d.\n",
index 5250441..bbb478f 100644 (file)
@@ -315,10 +315,12 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
 void i40e_release_nvm(struct i40e_hw *hw);
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
                                         u16 *data);
-i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
-                                     u8 module_ptr, u16 offset,
-                                     u16 words_data_size,
-                                     u16 *data_ptr);
+enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw,
+                                               u8 module_ptr,
+                                               u16 module_offset,
+                                               u16 data_offset,
+                                               u16 words_data_size,
+                                               u16 *data_ptr);
 i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
                                 u16 *words, u16 *data);
 i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw);
@@ -409,14 +411,24 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
                                u32 reg_addr, u32 reg_val,
                                struct i40e_asq_cmd_details *cmd_details);
 void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
-i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
-                                    u8 phy_select, u8 dev_addr,
-                                    u32 reg_addr, u32 reg_val,
-                                    struct i40e_asq_cmd_details *cmd_details);
-i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
-                                    u8 phy_select, u8 dev_addr,
-                                    u32 reg_addr, u32 *reg_val,
-                                    struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code
+i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+                            u8 phy_select, u8 dev_addr, bool page_change,
+                            bool set_mdio, u8 mdio_num,
+                            u32 reg_addr, u32 reg_val,
+                            struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code
+i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+                            u8 phy_select, u8 dev_addr, bool page_change,
+                            bool set_mdio, u8 mdio_num,
+                            u32 reg_addr, u32 *reg_val,
+                            struct i40e_asq_cmd_details *cmd_details);
+
+/* Convenience wrappers for most common use case */
+#define i40e_aq_set_phy_register(hw, ps, da, pc, ra, rv, cd)           \
+       i40e_aq_set_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd)
+#define i40e_aq_get_phy_register(hw, ps, da, pc, ra, rv, cd)           \
+       i40e_aq_get_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd)
 
 i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
                                            u16 reg, u8 phy_addr, u16 *value);
index e3f29dc..b849603 100644 (file)
@@ -2960,10 +2960,16 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
 
        /* remove payload length from inner checksum */
        paylen = skb->len - l4_offset;
-       csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
 
-       /* compute length of segmentation header */
-       *hdr_len = (l4.tcp->doff * 4) + l4_offset;
+       if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+               csum_replace_by_diff(&l4.udp->check, (__force __wsum)htonl(paylen));
+               /* compute length of segmentation header */
+               *hdr_len = sizeof(*l4.udp) + l4_offset;
+       } else {
+               csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+               /* compute length of segmentation header */
+               *hdr_len = (l4.tcp->doff * 4) + l4_offset;
+       }
 
        /* pull values out of skb_shinfo */
        gso_size = skb_shinfo(skb)->gso_size;
index b43ec94..6ea2867 100644 (file)
@@ -624,6 +624,7 @@ struct i40e_hw {
 #define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3)
 #define I40E_HW_FLAG_FW_LLDP_STOPPABLE      BIT_ULL(4)
 #define I40E_HW_FLAG_FW_LLDP_PERSISTENT     BIT_ULL(5)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6)
 #define I40E_HW_FLAG_DROP_MODE              BIT_ULL(7)
        u64 flags;
 
index 3d24408..6a3f0fc 100644 (file)
@@ -955,7 +955,6 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
                i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
                vf->lan_vsi_idx = 0;
                vf->lan_vsi_id = 0;
-               vf->num_mac = 0;
        }
 
        /* do the accounting and remove additional ADq VSI's */
@@ -2548,20 +2547,12 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
                                           struct virtchnl_ether_addr_list *al)
 {
        struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+       int mac2add_cnt = 0;
        int i;
 
-       /* If this VF is not privileged, then we can't add more than a limited
-        * number of addresses. Check to make sure that the additions do not
-        * push us over the limit.
-        */
-       if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
-           (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) {
-               dev_err(&pf->pdev->dev,
-                       "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
-               return -EPERM;
-       }
-
        for (i = 0; i < al->num_elements; i++) {
+               struct i40e_mac_filter *f;
                u8 *addr = al->list[i].addr;
 
                if (is_broadcast_ether_addr(addr) ||
@@ -2585,8 +2576,24 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
                                "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
                        return -EPERM;
                }
+
+               /*count filters that really will be added*/
+               f = i40e_find_mac(vsi, addr);
+               if (!f)
+                       ++mac2add_cnt;
        }
 
+       /* If this VF is not privileged, then we can't add more than a limited
+        * number of addresses. Check to make sure that the additions do not
+        * push us over the limit.
+        */
+       if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+           (i40e_count_filters(vsi) + mac2add_cnt) >
+                   I40E_VC_MAX_MAC_ADDR_PER_VF) {
+               dev_err(&pf->pdev->dev,
+                       "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+               return -EPERM;
+       }
        return 0;
 }
 
@@ -2640,8 +2647,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                                ret = I40E_ERR_PARAM;
                                spin_unlock_bh(&vsi->mac_filter_hash_lock);
                                goto error_param;
-                       } else {
-                               vf->num_mac++;
                        }
                }
        }
@@ -2689,16 +2694,6 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                        ret = I40E_ERR_INVALID_MAC_ADDR;
                        goto error_param;
                }
-
-               if (vf->pf_set_mac &&
-                   ether_addr_equal(al->list[i].addr,
-                                    vf->default_lan_addr.addr)) {
-                       dev_err(&pf->pdev->dev,
-                               "MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n",
-                               vf->default_lan_addr.addr, vf->vf_id);
-                       ret = I40E_ERR_PARAM;
-                       goto error_param;
-               }
        }
        vsi = pf->vsi[vf->lan_vsi_idx];
 
@@ -2709,8 +2704,6 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                        ret = I40E_ERR_INVALID_MAC_ADDR;
                        spin_unlock_bh(&vsi->mac_filter_hash_lock);
                        goto error_param;
-               } else {
-                       vf->num_mac--;
                }
 
        spin_unlock_bh(&vsi->mac_filter_hash_lock);
@@ -4531,3 +4524,51 @@ out:
        clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
        return ret;
 }
+
+/**
+ * i40e_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-127)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
+                     struct ifla_vf_stats *vf_stats)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_eth_stats *stats;
+       struct i40e_vsi *vsi;
+       struct i40e_vf *vf;
+
+       /* validate the request */
+       if (i40e_validate_vf(pf, vf_id))
+               return -EINVAL;
+
+       vf = &pf->vf[vf_id];
+       if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
+               dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+               return -EBUSY;
+       }
+
+       vsi = pf->vsi[vf->lan_vsi_idx];
+       if (!vsi)
+               return -EINVAL;
+
+       i40e_update_eth_stats(vsi);
+       stats = &vsi->eth_stats;
+
+       memset(vf_stats, 0, sizeof(*vf_stats));
+
+       vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+               stats->rx_multicast;
+       vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+               stats->tx_multicast;
+       vf_stats->rx_bytes   = stats->rx_bytes;
+       vf_stats->tx_bytes   = stats->tx_bytes;
+       vf_stats->broadcast  = stats->rx_broadcast;
+       vf_stats->multicast  = stats->rx_multicast;
+       vf_stats->rx_dropped = stats->rx_discards;
+       vf_stats->tx_dropped = stats->tx_discards;
+
+       return 0;
+}
index 7164b9b..631248c 100644 (file)
@@ -101,7 +101,6 @@ struct i40e_vf {
        bool link_up;           /* only valid if VF link is forced */
        bool queues_enabled;    /* true if the VF queues are enabled */
        bool spoofchk;
-       u16 num_mac;
        u16 num_vlan;
 
        /* ADq related variables */
@@ -139,5 +138,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
 
 void i40e_vc_notify_link_state(struct i40e_pf *pf);
 void i40e_vc_notify_reset(struct i40e_pf *pf);
+int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
+                     struct ifla_vf_stats *vf_stats);
 
 #endif /* _I40E_VIRTCHNL_PF_H_ */
index 9edde96..df5a969 100644 (file)
@@ -13,9 +13,12 @@ ice-y := ice_main.o  \
         ice_nvm.o      \
         ice_switch.o   \
         ice_sched.o    \
+        ice_base.o     \
         ice_lib.o      \
+        ice_txrx_lib.o \
         ice_txrx.o     \
         ice_flex_pipe.o        \
         ice_ethtool.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
 ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
index 45e1006..f552a67 100644 (file)
 #include <linux/ip.h>
 #include <linux/sctp.h>
 #include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
 #include <linux/if_bridge.h>
 #include <linux/ctype.h>
+#include <linux/bpf.h>
 #include <linux/avf/virtchnl.h>
 #include <net/ipv6.h>
+#include <net/xdp_sock.h>
 #include "ice_devids.h"
 #include "ice_type.h"
 #include "ice_txrx.h"
@@ -42,6 +45,7 @@
 #include "ice_sched.h"
 #include "ice_virtchnl_pf.h"
 #include "ice_sriov.h"
+#include "ice_xsk.h"
 
 extern const char ice_drv_ver[];
 #define ICE_BAR0               0
@@ -78,8 +82,7 @@ extern const char ice_drv_ver[];
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-#define ICE_MAX_MTU    (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
-                       (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
+#define ICE_MAX_MTU    (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
 
 #define ICE_UP_TABLE_TRANSLATE(val, i) \
                (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@@ -127,6 +130,14 @@ extern const char ice_drv_ver[];
                                     ICE_PROMISC_VLAN_TX  | \
                                     ICE_PROMISC_VLAN_RX)
 
+struct ice_txq_meta {
+       u32 q_teid;     /* Tx-scheduler element identifier */
+       u16 q_id;       /* Entry in VSI's txq_map bitmap */
+       u16 q_handle;   /* Relative index of Tx queue within TC */
+       u16 vsi_idx;    /* VSI index that Tx queue belongs to */
+       u8 tc;          /* TC number that Tx queue belongs to */
+};
+
 struct ice_tc_info {
        u16 qoffset;
        u16 qcount_tx;
@@ -274,6 +285,13 @@ struct ice_vsi {
        u16 num_rx_desc;
        u16 num_tx_desc;
        struct ice_tc_cfg tc_cfg;
+       struct bpf_prog *xdp_prog;
+       struct ice_ring **xdp_rings;     /* XDP ring array */
+       u16 num_xdp_txq;                 /* Used XDP queues */
+       u8 xdp_mapping_mode;             /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+       struct xdp_umem **xsk_umems;
+       u16 num_xsk_umems_used;
+       u16 num_xsk_umems;
 } ____cacheline_internodealigned_in_smp;
 
 /* struct that defines an interrupt vector */
@@ -313,6 +331,7 @@ enum ice_pf_flags {
        ICE_FLAG_NO_MEDIA,
        ICE_FLAG_FW_LLDP_AGENT,
        ICE_FLAG_ETHTOOL_CTXT,          /* set when ethtool holds RTNL lock */
+       ICE_FLAG_LEGACY_RX,
        ICE_PF_FLAGS_NBITS              /* must be last */
 };
 
@@ -417,6 +436,37 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
        return np->vsi->back;
 }
 
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+       return !!vsi->xdp_prog;
+}
+
+static inline void ice_set_ring_xdp(struct ice_ring *ring)
+{
+       ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_umem - get XDP UMEM bound to a ring
+ * @ring - ring to use
+ *
+ * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * NULL otherwise.
+ */
+static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+{
+       struct xdp_umem **umems = ring->vsi->xsk_umems;
+       int qid = ring->q_index;
+
+       if (ice_ring_is_xdp(ring))
+               qid -= ring->vsi->num_xdp_txq;
+
+       if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+               return NULL;
+
+       return umems[qid];
+}
+
 /**
  * ice_get_main_vsi - Get the PF VSI
  * @pf: PF instance
@@ -443,6 +493,11 @@ int ice_up(struct ice_vsi *vsi);
 int ice_down(struct ice_vsi *vsi);
 int ice_vsi_cfg(struct ice_vsi *vsi);
 struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+            u32 flags);
 int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644 (file)
index 0000000..69d2da1
--- /dev/null
@@ -0,0 +1,857 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_base.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+       int offset, i;
+
+       mutex_lock(qs_cfg->qs_mutex);
+       offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+                                           0, qs_cfg->q_count, 0);
+       if (offset >= qs_cfg->pf_map_size) {
+               mutex_unlock(qs_cfg->qs_mutex);
+               return -ENOMEM;
+       }
+
+       bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+       for (i = 0; i < qs_cfg->q_count; i++)
+               qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
+       mutex_unlock(qs_cfg->qs_mutex);
+
+       return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+       int i, index = 0;
+
+       mutex_lock(qs_cfg->qs_mutex);
+       for (i = 0; i < qs_cfg->q_count; i++) {
+               index = find_next_zero_bit(qs_cfg->pf_map,
+                                          qs_cfg->pf_map_size, index);
+               if (index >= qs_cfg->pf_map_size)
+                       goto err_scatter;
+               set_bit(index, qs_cfg->pf_map);
+               qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
+       }
+       mutex_unlock(qs_cfg->qs_mutex);
+
+       return 0;
+err_scatter:
+       for (index = 0; index < i; index++) {
+               clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+               qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+       }
+       mutex_unlock(qs_cfg->qs_mutex);
+
+       return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+       int i;
+
+       for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+               if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+                             QRX_CTRL_QENA_STAT_M))
+                       return 0;
+
+               usleep_range(20, 40);
+       }
+
+       return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+       struct ice_pf *pf = vsi->back;
+       struct ice_q_vector *q_vector;
+
+       /* allocate q_vector */
+       q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
+       if (!q_vector)
+               return -ENOMEM;
+
+       q_vector->vsi = vsi;
+       q_vector->v_idx = v_idx;
+       if (vsi->type == ICE_VSI_VF)
+               goto out;
+       /* only set affinity_mask if the CPU is online */
+       if (cpu_online(v_idx))
+               cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+       /* This will not be called in the driver load path because the netdev
+        * will not be created yet. All other cases with register the NAPI
+        * handler here (i.e. resume, reset/rebuild, etc.)
+        */
+       if (vsi->netdev)
+               netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+                              NAPI_POLL_WEIGHT);
+
+out:
+       /* tie q_vector and VSI together */
+       vsi->q_vectors[v_idx] = q_vector;
+
+       return 0;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+       struct ice_q_vector *q_vector;
+       struct ice_pf *pf = vsi->back;
+       struct ice_ring *ring;
+
+       if (!vsi->q_vectors[v_idx]) {
+               dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
+                       v_idx);
+               return;
+       }
+       q_vector = vsi->q_vectors[v_idx];
+
+       ice_for_each_ring(ring, q_vector->tx)
+               ring->q_vector = NULL;
+       ice_for_each_ring(ring, q_vector->rx)
+               ring->q_vector = NULL;
+
+       /* only VSI with an associated netdev is set up with NAPI */
+       if (vsi->netdev)
+               netif_napi_del(&q_vector->napi);
+
+       devm_kfree(&pf->pdev->dev, q_vector);
+       vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+       u32 regval = rd32(hw, GLINT_CTL);
+
+       /* no need to update global register if ITR gran is already set */
+       if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+           (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+            GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+           (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+            GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+           (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+            GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+           (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+             GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+               return;
+
+       regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+                 GLINT_CTL_ITR_GRAN_200_M) |
+                ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+                 GLINT_CTL_ITR_GRAN_100_M) |
+                ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+                 GLINT_CTL_ITR_GRAN_50_M) |
+                ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+                 GLINT_CTL_ITR_GRAN_25_M);
+       wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_q_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
+{
+       WARN_ONCE(ice_ring_is_xdp(ring) && tc,
+                 "XDP ring can't belong to TC other than 0");
+
+       /* Idea here for calculation is that we subtract the number of queue
+        * count from TC that ring belongs to from it's absolute queue index
+        * and as a result we get the queue's index within TC.
+        */
+       return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+       struct ice_vsi *vsi = ring->vsi;
+       struct ice_hw *hw = &vsi->back->hw;
+
+       tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+       tlan_ctx->port_num = vsi->port_info->lport;
+
+       /* Transmit Queue Length */
+       tlan_ctx->qlen = ring->count;
+
+       ice_set_cgd_num(tlan_ctx, ring);
+
+       /* PF number */
+       tlan_ctx->pf_num = hw->pf_id;
+
+       /* queue belongs to a specific VSI type
+        * VF / VM index should be programmed per vmvf_type setting:
+        * for vmvf_type = VF, it is VF number between 0-256
+        * for vmvf_type = VM, it is VM number between 0-767
+        * for PF or EMP this field should be set to zero
+        */
+       switch (vsi->type) {
+       case ICE_VSI_LB:
+               /* fall through */
+       case ICE_VSI_PF:
+               tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+               break;
+       case ICE_VSI_VF:
+               /* Firmware expects vmvf_num to be absolute VF ID */
+               tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
+               tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+               break;
+       default:
+               return;
+       }
+
+       /* make sure the context is associated with the right VSI */
+       tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+       tlan_ctx->tso_ena = ICE_TX_LEGACY;
+       tlan_ctx->tso_qnum = pf_q;
+
+       /* Legacy or Advanced Host Interface:
+        * 0: Advanced Host Interface
+        * 1: Legacy Host Interface
+        */
+       tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+       int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+       struct ice_vsi *vsi = ring->vsi;
+       u32 rxdid = ICE_RXDID_FLEX_NIC;
+       struct ice_rlan_ctx rlan_ctx;
+       struct ice_hw *hw;
+       u32 regval;
+       u16 pf_q;
+       int err;
+
+       hw = &vsi->back->hw;
+
+       /* what is Rx queue number in global space of 2K Rx queues */
+       pf_q = vsi->rxq_map[ring->q_index];
+
+       /* clear the context structure first */
+       memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+       ring->rx_buf_len = vsi->rx_buf_len;
+
+       if (ring->vsi->type == ICE_VSI_PF) {
+               if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+                       xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+                                        ring->q_index);
+
+               ring->xsk_umem = ice_xsk_umem(ring);
+               if (ring->xsk_umem) {
+                       xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+                       ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+                                          XDP_PACKET_HEADROOM;
+                       /* For AF_XDP ZC, we disallow packets to span on
+                        * multiple buffers, thus letting us skip that
+                        * handling in the fast-path.
+                        */
+                       chain_len = 1;
+                       ring->zca.free = ice_zca_free;
+                       err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+                                                        MEM_TYPE_ZERO_COPY,
+                                                        &ring->zca);
+                       if (err)
+                               return err;
+
+                       dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+                                ring->q_index);
+               } else {
+                       if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+                               xdp_rxq_info_reg(&ring->xdp_rxq,
+                                                ring->netdev,
+                                                ring->q_index);
+
+                       err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+                                                        MEM_TYPE_PAGE_SHARED,
+                                                        NULL);
+                       if (err)
+                               return err;
+               }
+       }
+       /* Receive Queue Base Address.
+        * Indicates the starting address of the descriptor queue defined in
+        * 128 Byte units.
+        */
+       rlan_ctx.base = ring->dma >> 7;
+
+       rlan_ctx.qlen = ring->count;
+
+       /* Receive Packet Data Buffer Size.
+        * The Packet Data Buffer Size is defined in 128 byte units.
+        */
+       rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+       /* use 32 byte descriptors */
+       rlan_ctx.dsize = 1;
+
+       /* Strip the Ethernet CRC bytes before the packet is posted to host
+        * memory.
+        */
+       rlan_ctx.crcstrip = 1;
+
+       /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+       rlan_ctx.l2tsel = 1;
+
+       rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+       rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+       rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+       /* This controls whether VLAN is stripped from inner headers
+        * The VLAN in the inner L2 header is stripped to the receive
+        * descriptor if enabled by this flag.
+        */
+       rlan_ctx.showiv = 0;
+
+       /* Max packet size for this queue - must not be set to a larger value
+        * than 5 x DBUF
+        */
+       rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+                              chain_len * ring->rx_buf_len);
+
+       /* Rx queue threshold in units of 64 */
+       rlan_ctx.lrxqthresh = 1;
+
+        /* Enable Flexible Descriptors in the queue context which
+         * allows this driver to select a specific receive descriptor format
+         */
+       if (vsi->type != ICE_VSI_VF) {
+               regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+               regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+                       QRXFLXP_CNTXT_RXDID_IDX_M;
+
+               /* increasing context priority to pick up profile ID;
+                * default is 0x01; setting to 0x03 to ensure profile
+                * is programming if prev context is of same priority
+                */
+               regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+                       QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+               wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+       }
+
+       /* Absolute queue number out of 2K needs to be passed */
+       err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+       if (err) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+                       pf_q, err);
+               return -EIO;
+       }
+
+       if (vsi->type == ICE_VSI_VF)
+               return 0;
+
+       /* configure Rx buffer alignment */
+       if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+               ice_clear_ring_build_skb_ena(ring);
+       else
+               ice_set_ring_build_skb_ena(ring);
+
+       /* init queue specific tail register */
+       ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+       writel(0, ring->tail);
+
+       err = ring->xsk_umem ?
+             ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+             ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+       if (err)
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+                        ring->xsk_umem ? "UMEM enabled " : "",
+                        ring->q_index, pf_q);
+
+       return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+       int ret = 0;
+
+       ret = __ice_vsi_get_qs_contig(qs_cfg);
+       if (ret) {
+               /* contig failed, so try with scatter approach */
+               qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+               qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
+                                       qs_cfg->scatter_count);
+               ret = __ice_vsi_get_qs_sc(qs_cfg);
+       }
+       return ret;
+}
+
+/**
+ * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ * @rxq_idx: Rx queue index
+ */
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+       int pf_q = vsi->rxq_map[rxq_idx];
+       struct ice_pf *pf = vsi->back;
+       struct ice_hw *hw = &pf->hw;
+       int ret = 0;
+       u32 rx_reg;
+
+       rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+       /* Skip if the queue is already in the requested state */
+       if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+               return 0;
+
+       /* turn on/off the queue */
+       if (ena)
+               rx_reg |= QRX_CTRL_QENA_REQ_M;
+       else
+               rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+       wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+       /* wait for the change to finish */
+       ret = ice_pf_rxq_wait(pf, pf_q, ena);
+       if (ret)
+               dev_err(&pf->pdev->dev,
+                       "VSI idx %d Rx ring %d %sable timeout\n",
+                       vsi->idx, pf_q, (ena ? "en" : "dis"));
+
+       return ret;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+       struct ice_pf *pf = vsi->back;
+       int v_idx = 0, num_q_vectors;
+       int err;
+
+       if (vsi->q_vectors[0]) {
+               dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+                       vsi->vsi_num);
+               return -EEXIST;
+       }
+
+       num_q_vectors = vsi->num_q_vectors;
+
+       for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+               err = ice_vsi_alloc_q_vector(vsi, v_idx);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       while (v_idx--)
+               ice_free_q_vector(vsi, v_idx);
+
+       dev_err(&pf->pdev->dev,
+               "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+               vsi->num_q_vectors, vsi->vsi_num, err);
+       vsi->num_q_vectors = 0;
+       return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+       int q_vectors = vsi->num_q_vectors;
+       int tx_rings_rem, rx_rings_rem;
+       int v_id;
+
+       /* initially assigning remaining rings count to VSIs num queue value */
+       tx_rings_rem = vsi->num_txq;
+       rx_rings_rem = vsi->num_rxq;
+
+       for (v_id = 0; v_id < q_vectors; v_id++) {
+               struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+               int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+               /* Tx rings mapping to vector */
+               tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+               q_vector->num_ring_tx = tx_rings_per_v;
+               q_vector->tx.ring = NULL;
+               q_vector->tx.itr_idx = ICE_TX_ITR;
+               q_base = vsi->num_txq - tx_rings_rem;
+
+               for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+                       struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+                       tx_ring->q_vector = q_vector;
+                       tx_ring->next = q_vector->tx.ring;
+                       q_vector->tx.ring = tx_ring;
+               }
+               tx_rings_rem -= tx_rings_per_v;
+
+               /* Rx rings mapping to vector */
+               rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+               q_vector->num_ring_rx = rx_rings_per_v;
+               q_vector->rx.ring = NULL;
+               q_vector->rx.itr_idx = ICE_RX_ITR;
+               q_base = vsi->num_rxq - rx_rings_rem;
+
+               for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+                       struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+                       rx_ring->q_vector = q_vector;
+                       rx_ring->next = q_vector->rx.ring;
+                       q_vector->rx.ring = rx_ring;
+               }
+               rx_rings_rem -= rx_rings_per_v;
+       }
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+       int v_idx;
+
+       ice_for_each_q_vector(vsi, v_idx)
+               ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+               struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+       struct ice_tlan_ctx tlan_ctx = { 0 };
+       struct ice_aqc_add_txqs_perq *txq;
+       struct ice_pf *pf = vsi->back;
+       u8 buf_len = sizeof(*qg_buf);
+       enum ice_status status;
+       u16 pf_q;
+       u8 tc;
+
+       pf_q = ring->reg_idx;
+       ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+       /* copy context contents into the qg_buf */
+       qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+       ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+                   ice_tlan_ctx_info);
+
+       /* init queue specific tail reg. It is referred as
+        * transmit comm scheduler queue doorbell.
+        */
+       ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+
+       if (IS_ENABLED(CONFIG_DCB))
+               tc = ring->dcb_tc;
+       else
+               tc = 0;
+
+       /* Add unique software queue handle of the Tx queue per
+        * TC into the VSI Tx ring
+        */
+       ring->q_handle = ice_calc_q_handle(vsi, ring, tc);
+
+       status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+                                1, qg_buf, buf_len, NULL);
+       if (status) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to set LAN Tx queue context, error: %d\n",
+                       status);
+               return -ENODEV;
+       }
+
+       /* Add Tx Queue TEID into the VSI Tx ring from the
+        * response. This will complete configuring and
+        * enabling the queue.
+        */
+       txq = &qg_buf->txqs[0];
+       if (pf_q == le16_to_cpu(txq->txq_id))
+               ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+       return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+       ice_cfg_itr_gran(hw);
+
+       if (q_vector->num_ring_rx) {
+               struct ice_ring_container *rc = &q_vector->rx;
+
+               /* if this value is set then don't overwrite with default */
+               if (!rc->itr_setting)
+                       rc->itr_setting = ICE_DFLT_RX_ITR;
+
+               rc->target_itr = ITR_TO_REG(rc->itr_setting);
+               rc->next_update = jiffies + 1;
+               rc->current_itr = rc->target_itr;
+               wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+                    ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+       }
+
+       if (q_vector->num_ring_tx) {
+               struct ice_ring_container *rc = &q_vector->tx;
+
+               /* if this value is set then don't overwrite with default */
+               if (!rc->itr_setting)
+                       rc->itr_setting = ICE_DFLT_TX_ITR;
+
+               rc->target_itr = ITR_TO_REG(rc->itr_setting);
+               rc->next_update = jiffies + 1;
+               rc->current_itr = rc->target_itr;
+               wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+                    ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+       }
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+       struct ice_pf *pf = vsi->back;
+       struct ice_hw *hw = &pf->hw;
+       u32 val;
+
+       itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+       val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+             ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+       wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+       if (ice_is_xdp_ena_vsi(vsi)) {
+               u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+               wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+                    val);
+       }
+       ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+       struct ice_pf *pf = vsi->back;
+       struct ice_hw *hw = &pf->hw;
+       u32 val;
+
+       itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+       val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+             ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+       wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+       ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+       wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+            (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+            GLINT_DYN_CTL_SWINT_TRIG_M |
+            GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+                    u16 rel_vmvf_num, struct ice_ring *ring,
+                    struct ice_txq_meta *txq_meta)
+{
+       struct ice_pf *pf = vsi->back;
+       struct ice_q_vector *q_vector;
+       struct ice_hw *hw = &pf->hw;
+       enum ice_status status;
+       u32 val;
+
+       /* clear cause_ena bit for disabled queues */
+       val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+       val &= ~QINT_TQCTL_CAUSE_ENA_M;
+       wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+       /* software is expected to wait for 100 ns */
+       ndelay(100);
+
+       /* trigger a software interrupt for the vector
+        * associated to the queue to schedule NAPI handler
+        */
+       q_vector = ring->q_vector;
+       if (q_vector)
+               ice_trigger_sw_intr(hw, q_vector);
+
+       status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+                                txq_meta->tc, 1, &txq_meta->q_handle,
+                                &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+                                rel_vmvf_num, NULL);
+
+       /* if the disable queue command was exercised during an
+        * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+        * This is not an error as the reset operation disables
+        * queues at the hardware level anyway.
+        */
+       if (status == ICE_ERR_RESET_ONGOING) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Reset in progress. LAN Tx queues already disabled\n");
+       } else if (status == ICE_ERR_DOES_NOT_EXIST) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "LAN Tx queues do not exist, nothing to disable\n");
+       } else if (status) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Failed to disable LAN Tx queues, error: %d\n", status);
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+                 struct ice_txq_meta *txq_meta)
+{
+       u8 tc;
+
+       if (IS_ENABLED(CONFIG_DCB))
+               tc = ring->dcb_tc;
+       else
+               tc = 0;
+
+       txq_meta->q_id = ring->reg_idx;
+       txq_meta->q_teid = ring->txq_teid;
+       txq_meta->q_handle = ring->q_handle;
+       txq_meta->vsi_idx = vsi->idx;
+       txq_meta->tc = tc;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644 (file)
index 0000000..407995e
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_setup_rx_ctx(struct ice_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+               struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+                    u16 rel_vmvf_num, struct ice_ring *ring,
+                    struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+                 struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
index 661a6f7..d11a0aa 100644 (file)
@@ -5,6 +5,7 @@
 #define _ICE_DCB_LIB_H_
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 
 #ifdef CONFIG_DCB
index 7e23034..7e77906 100644 (file)
@@ -156,6 +156,7 @@ struct ice_priv_flag {
 static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
        ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
        ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+       ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
 };
 
 #define ICE_PRIV_FLAG_ARRAY_SIZE       ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -623,7 +624,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
                        continue;
 
                rx_buf = &rx_ring->rx_buf[i];
-               received_buf = page_address(rx_buf->page);
+               received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
 
                if (ice_lbtest_check_frame(received_buf))
                        valid_frames++;
@@ -1256,6 +1257,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
                                        "Fail to enable MIB change events\n");
                }
        }
+       if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+               /* down and up VSI so that changes of Rx cfg are reflected. */
+               ice_down(vsi);
+               ice_up(vsi);
+       }
        clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
        return ret;
 }
@@ -2577,6 +2583,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 {
        struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
        struct ice_netdev_priv *np = netdev_priv(netdev);
+       struct ice_ring *xdp_rings = NULL;
        struct ice_vsi *vsi = np->vsi;
        struct ice_pf *pf = vsi->back;
        int i, timeout = 50, err = 0;
@@ -2611,6 +2618,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
                return 0;
        }
 
+       /* If there is a AF_XDP UMEM attached to any of Rx rings,
+        * disallow changing the number of descriptors -- regardless
+        * if the netdev is running or not.
+        */
+       if (ice_xsk_any_rx_ring_ena(vsi))
+               return -EBUSY;
+
        while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
                timeout--;
                if (!timeout)
@@ -2624,6 +2638,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
                        vsi->tx_rings[i]->count = new_tx_cnt;
                for (i = 0; i < vsi->alloc_rxq; i++)
                        vsi->rx_rings[i]->count = new_rx_cnt;
+               if (ice_is_xdp_ena_vsi(vsi))
+                       for (i = 0; i < vsi->num_xdp_txq; i++)
+                               vsi->xdp_rings[i]->count = new_tx_cnt;
+               vsi->num_tx_desc = new_tx_cnt;
+               vsi->num_rx_desc = new_rx_cnt;
                netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
                goto done;
        }
@@ -2650,15 +2669,43 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
                tx_rings[i].tx_buf = NULL;
                err = ice_setup_tx_ring(&tx_rings[i]);
                if (err) {
-                       while (i) {
-                               i--;
+                       while (i--)
                                ice_clean_tx_ring(&tx_rings[i]);
-                       }
                        devm_kfree(&pf->pdev->dev, tx_rings);
                        goto done;
                }
        }
 
+       if (!ice_is_xdp_ena_vsi(vsi))
+               goto process_rx;
+
+       /* alloc updated XDP resources */
+       netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+                   vsi->xdp_rings[0]->count, new_tx_cnt);
+
+       xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+                                sizeof(*xdp_rings), GFP_KERNEL);
+       if (!xdp_rings) {
+               err = -ENOMEM;
+               goto free_tx;
+       }
+
+       for (i = 0; i < vsi->num_xdp_txq; i++) {
+               /* clone ring and setup updated count */
+               xdp_rings[i] = *vsi->xdp_rings[i];
+               xdp_rings[i].count = new_tx_cnt;
+               xdp_rings[i].desc = NULL;
+               xdp_rings[i].tx_buf = NULL;
+               err = ice_setup_tx_ring(&xdp_rings[i]);
+               if (err) {
+                       while (i--)
+                               ice_clean_tx_ring(&xdp_rings[i]);
+                       devm_kfree(&pf->pdev->dev, xdp_rings);
+                       goto free_tx;
+               }
+               ice_set_ring_xdp(&xdp_rings[i]);
+       }
+
 process_rx:
        if (new_rx_cnt == vsi->rx_rings[0]->count)
                goto process_link;
@@ -2737,6 +2784,16 @@ process_link:
                        devm_kfree(&pf->pdev->dev, rx_rings);
                }
 
+               if (xdp_rings) {
+                       for (i = 0; i < vsi->num_xdp_txq; i++) {
+                               ice_free_tx_ring(vsi->xdp_rings[i]);
+                               *vsi->xdp_rings[i] = xdp_rings[i];
+                       }
+                       devm_kfree(&pf->pdev->dev, xdp_rings);
+               }
+
+               vsi->num_tx_desc = new_tx_cnt;
+               vsi->num_rx_desc = new_rx_cnt;
                ice_up(vsi);
        }
        goto done;
index cc75538..b1e96ca 100644 (file)
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
-/**
- * ice_setup_rx_ctx - Configure a receive ring context
- * @ring: The Rx ring to configure
- *
- * Configure the Rx descriptor ring in RLAN context.
- */
-static int ice_setup_rx_ctx(struct ice_ring *ring)
-{
-       struct ice_vsi *vsi = ring->vsi;
-       struct ice_hw *hw = &vsi->back->hw;
-       u32 rxdid = ICE_RXDID_FLEX_NIC;
-       struct ice_rlan_ctx rlan_ctx;
-       u32 regval;
-       u16 pf_q;
-       int err;
-
-       /* what is Rx queue number in global space of 2K Rx queues */
-       pf_q = vsi->rxq_map[ring->q_index];
-
-       /* clear the context structure first */
-       memset(&rlan_ctx, 0, sizeof(rlan_ctx));
-
-       rlan_ctx.base = ring->dma >> 7;
-
-       rlan_ctx.qlen = ring->count;
-
-       /* Receive Packet Data Buffer Size.
-        * The Packet Data Buffer Size is defined in 128 byte units.
-        */
-       rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
-
-       /* use 32 byte descriptors */
-       rlan_ctx.dsize = 1;
-
-       /* Strip the Ethernet CRC bytes before the packet is posted to host
-        * memory.
-        */
-       rlan_ctx.crcstrip = 1;
-
-       /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
-       rlan_ctx.l2tsel = 1;
-
-       rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
-       rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
-       rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
-
-       /* This controls whether VLAN is stripped from inner headers
-        * The VLAN in the inner L2 header is stripped to the receive
-        * descriptor if enabled by this flag.
-        */
-       rlan_ctx.showiv = 0;
-
-       /* Max packet size for this queue - must not be set to a larger value
-        * than 5 x DBUF
-        */
-       rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
-                              ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
-
-       /* Rx queue threshold in units of 64 */
-       rlan_ctx.lrxqthresh = 1;
-
-        /* Enable Flexible Descriptors in the queue context which
-         * allows this driver to select a specific receive descriptor format
-         */
-       if (vsi->type != ICE_VSI_VF) {
-               regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
-               regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
-                       QRXFLXP_CNTXT_RXDID_IDX_M;
-
-               /* increasing context priority to pick up profile ID;
-                * default is 0x01; setting to 0x03 to ensure profile
-                * is programming if prev context is of same priority
-                */
-               regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
-                       QRXFLXP_CNTXT_RXDID_PRIO_M;
-
-               wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
-       }
-
-       /* Absolute queue number out of 2K needs to be passed */
-       err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
-       if (err) {
-               dev_err(&vsi->back->pdev->dev,
-                       "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
-                       pf_q, err);
-               return -EIO;
-       }
-
-       if (vsi->type == ICE_VSI_VF)
-               return 0;
-
-       /* init queue specific tail register */
-       ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
-       writel(0, ring->tail);
-       ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
-
-       return 0;
-}
-
-/**
- * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
- * @ring: The Tx ring to configure
- * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
- * @pf_q: queue index in the PF space
- *
- * Configure the Tx descriptor ring in TLAN context.
- */
-static void
-ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
-{
-       struct ice_vsi *vsi = ring->vsi;
-       struct ice_hw *hw = &vsi->back->hw;
-
-       tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
-
-       tlan_ctx->port_num = vsi->port_info->lport;
-
-       /* Transmit Queue Length */
-       tlan_ctx->qlen = ring->count;
-
-       ice_set_cgd_num(tlan_ctx, ring);
-
-       /* PF number */
-       tlan_ctx->pf_num = hw->pf_id;
-
-       /* queue belongs to a specific VSI type
-        * VF / VM index should be programmed per vmvf_type setting:
-        * for vmvf_type = VF, it is VF number between 0-256
-        * for vmvf_type = VM, it is VM number between 0-767
-        * for PF or EMP this field should be set to zero
-        */
-       switch (vsi->type) {
-       case ICE_VSI_LB:
-               /* fall through */
-       case ICE_VSI_PF:
-               tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
-               break;
-       case ICE_VSI_VF:
-               /* Firmware expects vmvf_num to be absolute VF ID */
-               tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
-               tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
-               break;
-       default:
-               return;
-       }
-
-       /* make sure the context is associated with the right VSI */
-       tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
-
-       tlan_ctx->tso_ena = ICE_TX_LEGACY;
-       tlan_ctx->tso_qnum = pf_q;
-
-       /* Legacy or Advanced Host Interface:
-        * 0: Advanced Host Interface
-        * 1: Legacy Host Interface
-        */
-       tlan_ctx->legacy_int = ICE_TX_LEGACY;
-}
-
-/**
- * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
- * @pf: the PF being configured
- * @pf_q: the PF queue
- * @ena: enable or disable state of the queue
- *
- * This routine will wait for the given Rx queue of the PF to reach the
- * enabled or disabled state.
- * Returns -ETIMEDOUT in case of failing to reach the requested state after
- * multiple retries; else will return 0 in case of success.
- */
-static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
-{
-       int i;
-
-       for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
-               if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
-                             QRX_CTRL_QENA_STAT_M))
-                       return 0;
-
-               usleep_range(20, 40);
-       }
-
-       return -ETIMEDOUT;
-}
-
-/**
- * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
- * @vsi: the VSI being configured
- * @ena: start or stop the Rx rings
- * @rxq_idx: Rx queue index
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
-{
-       int pf_q = vsi->rxq_map[rxq_idx];
-       struct ice_pf *pf = vsi->back;
-       struct ice_hw *hw = &pf->hw;
-       int ret = 0;
-       u32 rx_reg;
-
-       rx_reg = rd32(hw, QRX_CTRL(pf_q));
-
-       /* Skip if the queue is already in the requested state */
-       if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
-               return 0;
-
-       /* turn on/off the queue */
-       if (ena)
-               rx_reg |= QRX_CTRL_QENA_REQ_M;
-       else
-               rx_reg &= ~QRX_CTRL_QENA_REQ_M;
-       wr32(hw, QRX_CTRL(pf_q), rx_reg);
-
-       /* wait for the change to finish */
-       ret = ice_pf_rxq_wait(pf, pf_q, ena);
-       if (ret)
-               dev_err(&pf->pdev->dev,
-                       "VSI idx %d Rx ring %d %sable timeout\n",
-                       vsi->idx, pf_q, (ena ? "en" : "dis"));
-
-       return ret;
-}
-
 /**
  * ice_vsi_ctrl_rx_rings - Start or stop a VSI's Rx rings
  * @vsi: the VSI being configured
@@ -270,7 +46,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
        if (!vsi->rx_rings)
                goto err_rings;
 
-       vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+       /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
+       vsi->txq_map = devm_kcalloc(&pf->pdev->dev, (2 * vsi->alloc_txq),
                                    sizeof(*vsi->txq_map), GFP_KERNEL);
 
        if (!vsi->txq_map)
@@ -281,7 +58,6 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
        if (!vsi->rxq_map)
                goto err_rxq_map;
 
-
        /* There is no need to allocate q_vectors for a loopback VSI. */
        if (vsi->type == ICE_VSI_LB)
                return 0;
@@ -605,88 +381,6 @@ unlock_pf:
        return vsi;
 }
 
-/**
- * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
- * @qs_cfg: gathered variables needed for PF->VSI queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
-{
-       int offset, i;
-
-       mutex_lock(qs_cfg->qs_mutex);
-       offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
-                                           0, qs_cfg->q_count, 0);
-       if (offset >= qs_cfg->pf_map_size) {
-               mutex_unlock(qs_cfg->qs_mutex);
-               return -ENOMEM;
-       }
-
-       bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
-       for (i = 0; i < qs_cfg->q_count; i++)
-               qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
-       mutex_unlock(qs_cfg->qs_mutex);
-
-       return 0;
-}
-
-/**
- * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
-{
-       int i, index = 0;
-
-       mutex_lock(qs_cfg->qs_mutex);
-       for (i = 0; i < qs_cfg->q_count; i++) {
-               index = find_next_zero_bit(qs_cfg->pf_map,
-                                          qs_cfg->pf_map_size, index);
-               if (index >= qs_cfg->pf_map_size)
-                       goto err_scatter;
-               set_bit(index, qs_cfg->pf_map);
-               qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
-       }
-       mutex_unlock(qs_cfg->qs_mutex);
-
-       return 0;
-err_scatter:
-       for (index = 0; index < i; index++) {
-               clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
-               qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
-       }
-       mutex_unlock(qs_cfg->qs_mutex);
-
-       return -ENOMEM;
-}
-
-/**
- * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * This function first tries to find contiguous space. If it is not successful,
- * it tries with the scatter approach.
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
-{
-       int ret = 0;
-
-       ret = __ice_vsi_get_qs_contig(qs_cfg);
-       if (ret) {
-               /* contig failed, so try with scatter approach */
-               qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
-               qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
-                                       qs_cfg->scatter_count);
-               ret = __ice_vsi_get_qs_sc(qs_cfg);
-       }
-       return ret;
-}
-
 /**
  * ice_vsi_get_qs - Assign queues from PF to VSI
  * @vsi: the VSI to assign queues to
@@ -1097,129 +791,6 @@ static int ice_vsi_init(struct ice_vsi *vsi)
        return ret;
 }
 
-/**
- * ice_free_q_vector - Free memory allocated for a specific interrupt vector
- * @vsi: VSI having the memory freed
- * @v_idx: index of the vector to be freed
- */
-static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-       struct ice_q_vector *q_vector;
-       struct ice_pf *pf = vsi->back;
-       struct ice_ring *ring;
-
-       if (!vsi->q_vectors[v_idx]) {
-               dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
-                       v_idx);
-               return;
-       }
-       q_vector = vsi->q_vectors[v_idx];
-
-       ice_for_each_ring(ring, q_vector->tx)
-               ring->q_vector = NULL;
-       ice_for_each_ring(ring, q_vector->rx)
-               ring->q_vector = NULL;
-
-       /* only VSI with an associated netdev is set up with NAPI */
-       if (vsi->netdev)
-               netif_napi_del(&q_vector->napi);
-
-       devm_kfree(&pf->pdev->dev, q_vector);
-       vsi->q_vectors[v_idx] = NULL;
-}
-
-/**
- * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
- * @vsi: the VSI having memory freed
- */
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
-{
-       int v_idx;
-
-       ice_for_each_q_vector(vsi, v_idx)
-               ice_free_q_vector(vsi, v_idx);
-}
-
-/**
- * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
- * @vsi: the VSI being configured
- * @v_idx: index of the vector in the VSI struct
- *
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-       struct ice_pf *pf = vsi->back;
-       struct ice_q_vector *q_vector;
-
-       /* allocate q_vector */
-       q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
-       if (!q_vector)
-               return -ENOMEM;
-
-       q_vector->vsi = vsi;
-       q_vector->v_idx = v_idx;
-       if (vsi->type == ICE_VSI_VF)
-               goto out;
-       /* only set affinity_mask if the CPU is online */
-       if (cpu_online(v_idx))
-               cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
-
-       /* This will not be called in the driver load path because the netdev
-        * will not be created yet. All other cases with register the NAPI
-        * handler here (i.e. resume, reset/rebuild, etc.)
-        */
-       if (vsi->netdev)
-               netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
-                              NAPI_POLL_WEIGHT);
-
-out:
-       /* tie q_vector and VSI together */
-       vsi->q_vectors[v_idx] = q_vector;
-
-       return 0;
-}
-
-/**
- * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
- * @vsi: the VSI being configured
- *
- * We allocate one q_vector per queue interrupt. If allocation fails we
- * return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
-{
-       struct ice_pf *pf = vsi->back;
-       int v_idx = 0, num_q_vectors;
-       int err;
-
-       if (vsi->q_vectors[0]) {
-               dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
-                       vsi->vsi_num);
-               return -EEXIST;
-       }
-
-       num_q_vectors = vsi->num_q_vectors;
-
-       for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-               err = ice_vsi_alloc_q_vector(vsi, v_idx);
-               if (err)
-                       goto err_out;
-       }
-
-       return 0;
-
-err_out:
-       while (v_idx--)
-               ice_free_q_vector(vsi, v_idx);
-
-       dev_err(&pf->pdev->dev,
-               "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
-               vsi->num_q_vectors, vsi->vsi_num, err);
-       vsi->num_q_vectors = 0;
-       return err;
-}
-
 /**
  * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
  * @vsi: ptr to the VSI
@@ -1340,66 +911,6 @@ err_out:
        return -ENOMEM;
 }
 
-/**
- * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
- * @vsi: the VSI being configured
- *
- * This function maps descriptor rings to the queue-specific vectors allotted
- * through the MSI-X enabling code. On a constrained vector budget, we map Tx
- * and Rx rings to the vector as "efficiently" as possible.
- */
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#else
-static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#endif /* CONFIG_DCB */
-{
-       int q_vectors = vsi->num_q_vectors;
-       int tx_rings_rem, rx_rings_rem;
-       int v_id;
-
-       /* initially assigning remaining rings count to VSIs num queue value */
-       tx_rings_rem = vsi->num_txq;
-       rx_rings_rem = vsi->num_rxq;
-
-       for (v_id = 0; v_id < q_vectors; v_id++) {
-               struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
-               int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
-
-               /* Tx rings mapping to vector */
-               tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
-               q_vector->num_ring_tx = tx_rings_per_v;
-               q_vector->tx.ring = NULL;
-               q_vector->tx.itr_idx = ICE_TX_ITR;
-               q_base = vsi->num_txq - tx_rings_rem;
-
-               for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
-                       struct ice_ring *tx_ring = vsi->tx_rings[q_id];
-
-                       tx_ring->q_vector = q_vector;
-                       tx_ring->next = q_vector->tx.ring;
-                       q_vector->tx.ring = tx_ring;
-               }
-               tx_rings_rem -= tx_rings_per_v;
-
-               /* Rx rings mapping to vector */
-               rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
-               q_vector->num_ring_rx = rx_rings_per_v;
-               q_vector->rx.ring = NULL;
-               q_vector->rx.itr_idx = ICE_RX_ITR;
-               q_base = vsi->num_rxq - rx_rings_rem;
-
-               for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
-                       struct ice_ring *rx_ring = vsi->rx_rings[q_id];
-
-                       rx_ring->q_vector = q_vector;
-                       rx_ring->next = q_vector->rx.ring;
-                       q_vector->rx.ring = rx_ring;
-               }
-               rx_rings_rem -= rx_rings_per_v;
-       }
-}
-
 /**
  * ice_vsi_manage_rss_lut - disable/enable RSS
  * @vsi: the VSI being changed
@@ -1673,6 +1184,31 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
        return err;
 }
 
+/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+       if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+               vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+               vsi->rx_buf_len = ICE_RXBUF_2048;
+#if (PAGE_SIZE < 8192)
+       } else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+                  (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+               vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+               vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+       } else {
+               vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+#if (PAGE_SIZE < 8192)
+               vsi->rx_buf_len = ICE_RXBUF_3072;
+#else
+               vsi->rx_buf_len = ICE_RXBUF_2048;
+#endif
+       }
+}
+
 /**
  * ice_vsi_cfg_rxqs - Configure the VSI for Rx
  * @vsi: the VSI being configured
@@ -1687,13 +1223,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
        if (vsi->type == ICE_VSI_VF)
                goto setup_rings;
 
-       if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
-               vsi->max_frame = vsi->netdev->mtu +
-                       ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-       else
-               vsi->max_frame = ICE_RXBUF_2048;
-
-       vsi->rx_buf_len = ICE_RXBUF_2048;
+       ice_vsi_cfg_frame_size(vsi);
 setup_rings:
        /* set up individual rings */
        for (i = 0; i < vsi->num_rxq; i++) {
@@ -1711,102 +1241,35 @@ setup_rings:
        return 0;
 }
 
-/**
- * ice_vsi_cfg_txq - Configure single Tx queue
- * @vsi: the VSI that queue belongs to
- * @ring: Tx ring to be configured
- * @tc_q_idx: queue index within given TC
- * @qg_buf: queue group buffer
- * @tc: TC that Tx ring belongs to
- */
-static int
-ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx,
-               struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc)
-{
-       struct ice_tlan_ctx tlan_ctx = { 0 };
-       struct ice_aqc_add_txqs_perq *txq;
-       struct ice_pf *pf = vsi->back;
-       u8 buf_len = sizeof(*qg_buf);
-       enum ice_status status;
-       u16 pf_q;
-
-       pf_q = ring->reg_idx;
-       ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
-       /* copy context contents into the qg_buf */
-       qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
-       ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
-                   ice_tlan_ctx_info);
-
-       /* init queue specific tail reg. It is referred as
-        * transmit comm scheduler queue doorbell.
-        */
-       ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
-
-       /* Add unique software queue handle of the Tx queue per
-        * TC into the VSI Tx ring
-        */
-       ring->q_handle = tc_q_idx;
-
-       status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
-                                1, qg_buf, buf_len, NULL);
-       if (status) {
-               dev_err(&pf->pdev->dev,
-                       "Failed to set LAN Tx queue context, error: %d\n",
-                       status);
-               return -ENODEV;
-       }
-
-       /* Add Tx Queue TEID into the VSI Tx ring from the
-        * response. This will complete configuring and
-        * enabling the queue.
-        */
-       txq = &qg_buf->txqs[0];
-       if (pf_q == le16_to_cpu(txq->txq_id))
-               ring->txq_teid = le32_to_cpu(txq->q_teid);
-
-       return 0;
-}
-
 /**
  * ice_vsi_cfg_txqs - Configure the VSI for Tx
  * @vsi: the VSI being configured
  * @rings: Tx ring array to be configured
- * @offset: offset within vsi->txq_map
  *
  * Return 0 on success and a negative value on error
  * Configure the Tx VSI for operation.
  */
 static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
 {
        struct ice_aqc_add_tx_qgrp *qg_buf;
-       struct ice_pf *pf = vsi->back;
-       u16 q_idx = 0, i;
+       u16 q_idx = 0;
        int err = 0;
-       u8 tc;
 
-       qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL);
+       qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
        if (!qg_buf)
                return -ENOMEM;
 
        qg_buf->num_txqs = 1;
 
-       /* set up and configure the Tx queues for each enabled TC */
-       ice_for_each_traffic_class(tc) {
-               if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
-                       break;
-
-               for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-                       err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset,
-                                             qg_buf, tc);
-                       if (err)
-                               goto err_cfg_txqs;
-
-                       q_idx++;
-               }
+       for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+               err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+               if (err)
+                       goto err_cfg_txqs;
        }
+
 err_cfg_txqs:
-       devm_kfree(&pf->pdev->dev, qg_buf);
+       kfree(qg_buf);
        return err;
 }
 
@@ -1819,159 +1282,46 @@ err_cfg_txqs:
  */
 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
 {
-       return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0);
-}
-
-/**
- * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
- * @intrl: interrupt rate limit in usecs
- * @gran: interrupt rate limit granularity in usecs
- *
- * This function converts a decimal interrupt rate limit in usecs to the format
- * expected by firmware.
- */
-u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
-{
-       u32 val = intrl / gran;
-
-       if (val)
-               return val | GLINT_RATE_INTRL_ENA_M;
-       return 0;
-}
-
-/**
- * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
- * @hw: board specific structure
- */
-static void ice_cfg_itr_gran(struct ice_hw *hw)
-{
-       u32 regval = rd32(hw, GLINT_CTL);
-
-       /* no need to update global register if ITR gran is already set */
-       if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
-           (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
-            GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
-           (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
-            GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
-           (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
-            GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
-           (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
-             GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
-               return;
-
-       regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
-                 GLINT_CTL_ITR_GRAN_200_M) |
-                ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
-                 GLINT_CTL_ITR_GRAN_100_M) |
-                ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
-                 GLINT_CTL_ITR_GRAN_50_M) |
-                ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
-                 GLINT_CTL_ITR_GRAN_25_M);
-       wr32(hw, GLINT_CTL, regval);
+       return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
 }
 
 /**
- * ice_cfg_itr - configure the initial interrupt throttle values
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector that's being configured
- *
- * Configure interrupt throttling values for the ring containers that are
- * associated with the interrupt vector passed in.
- */
-static void
-ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
-       ice_cfg_itr_gran(hw);
-
-       if (q_vector->num_ring_rx) {
-               struct ice_ring_container *rc = &q_vector->rx;
-
-               /* if this value is set then don't overwrite with default */
-               if (!rc->itr_setting)
-                       rc->itr_setting = ICE_DFLT_RX_ITR;
-
-               rc->target_itr = ITR_TO_REG(rc->itr_setting);
-               rc->next_update = jiffies + 1;
-               rc->current_itr = rc->target_itr;
-               wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-                    ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-       }
-
-       if (q_vector->num_ring_tx) {
-               struct ice_ring_container *rc = &q_vector->tx;
-
-               /* if this value is set then don't overwrite with default */
-               if (!rc->itr_setting)
-                       rc->itr_setting = ICE_DFLT_TX_ITR;
-
-               rc->target_itr = ITR_TO_REG(rc->itr_setting);
-               rc->next_update = jiffies + 1;
-               rc->current_itr = rc->target_itr;
-               wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-                    ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-       }
-}
-
-/**
- * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
  * @vsi: the VSI being configured
- * @txq: Tx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
  *
- * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
- * within the function space.
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
  */
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
 {
-       struct ice_pf *pf = vsi->back;
-       struct ice_hw *hw = &pf->hw;
-       u32 val;
+       int ret;
+       int i;
 
-       itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+       ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+       if (ret)
+               return ret;
 
-       val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
-             ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+       for (i = 0; i < vsi->num_xdp_txq; i++)
+               vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
 
-       wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+       return ret;
 }
 
 /**
- * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
- * @vsi: the VSI being configured
- * @rxq: Rx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
  *
- * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
- * within the function space.
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
  */
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
 {
-       struct ice_pf *pf = vsi->back;
-       struct ice_hw *hw = &pf->hw;
-       u32 val;
-
-       itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
-
-       val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
-             ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
-
-       wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+       u32 val = intrl / gran;
 
-       ice_flush(hw);
+       if (val)
+               return val | GLINT_RATE_INTRL_ENA_M;
+       return 0;
 }
 
 /**
@@ -2133,109 +1483,6 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
        return ice_vsi_ctrl_rx_rings(vsi, false);
 }
 
-/**
- * ice_trigger_sw_intr - trigger a software interrupt
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector to trigger the software interrupt for
- */
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
-       wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
-            (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
-            GLINT_DYN_CTL_SWINT_TRIG_M |
-            GLINT_DYN_CTL_INTENA_M);
-}
-
-/**
- * ice_vsi_stop_tx_ring - Disable single Tx ring
- * @vsi: the VSI being configured
- * @rst_src: reset source
- * @rel_vmvf_num: Relative ID of VF/VM
- * @ring: Tx ring to be stopped
- * @txq_meta: Meta data of Tx ring to be stopped
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-                    u16 rel_vmvf_num, struct ice_ring *ring,
-                    struct ice_txq_meta *txq_meta)
-{
-       struct ice_pf *pf = vsi->back;
-       struct ice_q_vector *q_vector;
-       struct ice_hw *hw = &pf->hw;
-       enum ice_status status;
-       u32 val;
-
-       /* clear cause_ena bit for disabled queues */
-       val = rd32(hw, QINT_TQCTL(ring->reg_idx));
-       val &= ~QINT_TQCTL_CAUSE_ENA_M;
-       wr32(hw, QINT_TQCTL(ring->reg_idx), val);
-
-       /* software is expected to wait for 100 ns */
-       ndelay(100);
-
-       /* trigger a software interrupt for the vector
-        * associated to the queue to schedule NAPI handler
-        */
-       q_vector = ring->q_vector;
-       if (q_vector)
-               ice_trigger_sw_intr(hw, q_vector);
-
-       status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
-                                txq_meta->tc, 1, &txq_meta->q_handle,
-                                &txq_meta->q_id, &txq_meta->q_teid, rst_src,
-                                rel_vmvf_num, NULL);
-
-       /* if the disable queue command was exercised during an
-        * active reset flow, ICE_ERR_RESET_ONGOING is returned.
-        * This is not an error as the reset operation disables
-        * queues at the hardware level anyway.
-        */
-       if (status == ICE_ERR_RESET_ONGOING) {
-               dev_dbg(&vsi->back->pdev->dev,
-                       "Reset in progress. LAN Tx queues already disabled\n");
-       } else if (status == ICE_ERR_DOES_NOT_EXIST) {
-               dev_dbg(&vsi->back->pdev->dev,
-                       "LAN Tx queues do not exist, nothing to disable\n");
-       } else if (status) {
-               dev_err(&vsi->back->pdev->dev,
-                       "Failed to disable LAN Tx queues, error: %d\n", status);
-               return -ENODEV;
-       }
-
-       return 0;
-}
-
-/**
- * ice_fill_txq_meta - Prepare the Tx queue's meta data
- * @vsi: VSI that ring belongs to
- * @ring: ring that txq_meta will be based on
- * @txq_meta: a helper struct that wraps Tx queue's information
- *
- * Set up a helper struct that will contain all the necessary fields that
- * are needed for stopping Tx queue
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-void
-ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
-                 struct ice_txq_meta *txq_meta)
-{
-       u8 tc = 0;
-
-#ifdef CONFIG_DCB
-       tc = ring->dcb_tc;
-#endif /* CONFIG_DCB */
-       txq_meta->q_id = ring->reg_idx;
-       txq_meta->q_teid = ring->txq_teid;
-       txq_meta->q_handle = ring->q_handle;
-       txq_meta->vsi_idx = vsi->idx;
-       txq_meta->tc = tc;
-}
-
 /**
  * ice_vsi_stop_tx_rings - Disable Tx rings
  * @vsi: the VSI being configured
@@ -2247,34 +1494,24 @@ static int
 ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
                      u16 rel_vmvf_num, struct ice_ring **rings)
 {
-       u16 i, q_idx = 0;
-       int status;
-       u8 tc;
+       u16 q_idx;
 
        if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
                return -EINVAL;
 
-       /* set up the Tx queue list to be disabled for each enabled TC */
-       ice_for_each_traffic_class(tc) {
-               if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
-                       break;
-
-               for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-                       struct ice_txq_meta txq_meta = { };
-
-                       if (!rings || !rings[q_idx])
-                               return -EINVAL;
+       for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+               struct ice_txq_meta txq_meta = { };
+               int status;
 
-                       ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
-                       status = ice_vsi_stop_tx_ring(vsi, rst_src,
-                                                     rel_vmvf_num,
-                                                     rings[q_idx], &txq_meta);
+               if (!rings || !rings[q_idx])
+                       return -EINVAL;
 
-                       if (status)
-                               return status;
+               ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+               status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+                                             rings[q_idx], &txq_meta);
 
-                       q_idx++;
-               }
+               if (status)
+                       return status;
        }
 
        return 0;
@@ -2293,6 +1530,15 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
        return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
 }
 
+/**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+       return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+}
+
 /**
  * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
  * @vsi: VSI to enable or disable VLAN pruning on
@@ -2690,6 +1936,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
                wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
                for (q = 0; q < q_vector->num_ring_tx; q++) {
                        wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+                       if (ice_is_xdp_ena_vsi(vsi)) {
+                               u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+                               wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+                       }
                        txq++;
                }
 
@@ -3064,6 +2315,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
                vsi->base_vector = 0;
        }
 
+       if (ice_is_xdp_ena_vsi(vsi))
+               /* return value check can be skipped here, it always returns
+                * 0 if reset is in progress
+                */
+               ice_destroy_xdp_rings(vsi);
        ice_vsi_put_qs(vsi);
        ice_vsi_clear_rings(vsi);
        ice_vsi_free_arrays(vsi);
@@ -3085,7 +2341,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
        if (ret < 0)
                goto err_vsi;
 
-
        switch (vsi->type) {
        case ICE_VSI_PF:
                ret = ice_vsi_alloc_q_vectors(vsi);
@@ -3105,6 +2360,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
                        goto err_vectors;
 
                ice_vsi_map_rings_to_vectors(vsi);
+               if (ice_is_xdp_ena_vsi(vsi)) {
+                       vsi->num_xdp_txq = vsi->alloc_txq;
+                       ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+                       if (ret)
+                               goto err_vectors;
+               }
                /* Do not exit if configuring RSS had an issue, at least
                 * receive traffic on first queue. Hence no need to capture
                 * return value
@@ -3131,9 +2392,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
        }
 
        /* configure VSI nodes based on number of queues and TC's */
-       for (i = 0; i < vsi->tc_cfg.numtc; i++)
+       for (i = 0; i < vsi->tc_cfg.numtc; i++) {
                max_txqs[i] = vsi->alloc_txq;
 
+               if (ice_is_xdp_ena_vsi(vsi))
+                       max_txqs[i] += vsi->num_xdp_txq;
+       }
+
        status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
                                 max_txqs);
        if (status) {
@@ -3270,6 +2535,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
        return buf;
 }
 
+/**
+ * ice_update_ring_stats - Update ring statistics
+ * @ring: ring to update
+ * @cont: used to increment per-vector counters
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void
+ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
+                     u64 pkts, u64 bytes)
+{
+       ring->stats.bytes += bytes;
+       ring->stats.pkts += pkts;
+       cont->total_bytes += bytes;
+       cont->total_pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
+{
+       u64_stats_update_begin(&tx_ring->syncp);
+       ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+       u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
+{
+       u64_stats_update_begin(&rx_ring->syncp);
+       ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+       u64_stats_update_end(&rx_ring->syncp);
+}
+
 /**
  * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
  * @vsi: the VSI being configured MAC filter
index 47bc033..8d5a797 100644 (file)
@@ -6,19 +6,6 @@
 
 #include "ice.h"
 
-struct ice_txq_meta {
-       /* Tx-scheduler element identifier */
-       u32 q_teid;
-       /* Entry in VSI's txq_map bitmap */
-       u16 q_id;
-       /* Relative index of Tx queue within TC */
-       u16 q_handle;
-       /* VSI index that Tx queue belongs to */
-       u16 vsi_idx;
-       /* TC number that Tx queue belongs to */
-       u8 tc;
-};
-
 int
 ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
                    const u8 *macaddr);
@@ -33,24 +20,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
 
 void ice_vsi_cfg_msix(struct ice_vsi *vsi);
 
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
-
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
-
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-                    u16 rel_vmvf_num, struct ice_ring *ring,
-                    struct ice_txq_meta *txq_meta);
-
-void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
-                      struct ice_txq_meta *txq_meta);
-
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
-#endif /* CONFIG_PCI_IOV */
-
 int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
 
 int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -67,6 +36,10 @@ int
 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
                          u16 rel_vmvf_num);
 
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
 int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
 
 void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@@ -98,16 +71,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi);
 
 bool ice_is_reset_in_progress(unsigned long *state);
 
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
-
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
-
 void ice_vsi_put_qs(struct ice_vsi *vsi);
 
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
-#endif /* CONFIG_DCB */
-
 void ice_vsi_dis_irq(struct ice_vsi *vsi);
 
 void ice_vsi_free_irq(struct ice_vsi *vsi);
@@ -118,6 +83,12 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
 
 int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
 
+void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+
 u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
 
 char *ice_nvm_version_str(struct ice_hw *hw);
index 214cd6e..363b284 100644 (file)
@@ -6,6 +6,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
@@ -1660,6 +1661,324 @@ free_q_irqs:
        return err;
 }
 
+/**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+       struct device *dev = &vsi->back->pdev->dev;
+       int i;
+
+       for (i = 0; i < vsi->num_xdp_txq; i++) {
+               u16 xdp_q_idx = vsi->alloc_txq + i;
+               struct ice_ring *xdp_ring;
+
+               xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+               if (!xdp_ring)
+                       goto free_xdp_rings;
+
+               xdp_ring->q_index = xdp_q_idx;
+               xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+               xdp_ring->ring_active = false;
+               xdp_ring->vsi = vsi;
+               xdp_ring->netdev = NULL;
+               xdp_ring->dev = dev;
+               xdp_ring->count = vsi->num_tx_desc;
+               vsi->xdp_rings[i] = xdp_ring;
+               if (ice_setup_tx_ring(xdp_ring))
+                       goto free_xdp_rings;
+               ice_set_ring_xdp(xdp_ring);
+               xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+       }
+
+       return 0;
+
+free_xdp_rings:
+       for (; i >= 0; i--)
+               if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+                       ice_free_tx_ring(vsi->xdp_rings[i]);
+       return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+       struct bpf_prog *old_prog;
+       int i;
+
+       old_prog = xchg(&vsi->xdp_prog, prog);
+       if (old_prog)
+               bpf_prog_put(old_prog);
+
+       ice_for_each_rxq(vsi, i)
+               WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+       u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+       int xdp_rings_rem = vsi->num_xdp_txq;
+       struct ice_pf *pf = vsi->back;
+       struct ice_qs_cfg xdp_qs_cfg = {
+               .qs_mutex = &pf->avail_q_mutex,
+               .pf_map = pf->avail_txqs,
+               .pf_map_size = pf->max_pf_txqs,
+               .q_count = vsi->num_xdp_txq,
+               .scatter_count = ICE_MAX_SCATTER_TXQS,
+               .vsi_map = vsi->txq_map,
+               .vsi_map_offset = vsi->alloc_txq,
+               .mapping_mode = ICE_VSI_MAP_CONTIG
+       };
+       enum ice_status status;
+       int i, v_idx;
+
+       vsi->xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+                                     sizeof(*vsi->xdp_rings), GFP_KERNEL);
+       if (!vsi->xdp_rings)
+               return -ENOMEM;
+
+       vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+       if (__ice_vsi_get_qs(&xdp_qs_cfg))
+               goto err_map_xdp;
+
+       if (ice_xdp_alloc_setup_rings(vsi))
+               goto clear_xdp_rings;
+
+       /* follow the logic from ice_vsi_map_rings_to_vectors */
+       ice_for_each_q_vector(vsi, v_idx) {
+               struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+               int xdp_rings_per_v, q_id, q_base;
+
+               xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+                                              vsi->num_q_vectors - v_idx);
+               q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+               for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+                       struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+                       xdp_ring->q_vector = q_vector;
+                       xdp_ring->next = q_vector->tx.ring;
+                       q_vector->tx.ring = xdp_ring;
+               }
+               xdp_rings_rem -= xdp_rings_per_v;
+       }
+
+       /* omit the scheduler update if in reset path; XDP queues will be
+        * taken into account at the end of ice_vsi_rebuild, where
+        * ice_cfg_vsi_lan is being called
+        */
+       if (ice_is_reset_in_progress(pf->state))
+               return 0;
+
+       /* tell the Tx scheduler that right now we have
+        * additional queues
+        */
+       for (i = 0; i < vsi->tc_cfg.numtc; i++)
+               max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+       status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+                                max_txqs);
+       if (status) {
+               dev_err(&pf->pdev->dev,
+                       "Failed VSI LAN queue config for XDP, error:%d\n",
+                       status);
+               goto clear_xdp_rings;
+       }
+       ice_vsi_assign_bpf_prog(vsi, prog);
+
+       return 0;
+clear_xdp_rings:
+       for (i = 0; i < vsi->num_xdp_txq; i++)
+               if (vsi->xdp_rings[i]) {
+                       kfree_rcu(vsi->xdp_rings[i], rcu);
+                       vsi->xdp_rings[i] = NULL;
+               }
+
+err_map_xdp:
+       mutex_lock(&pf->avail_q_mutex);
+       for (i = 0; i < vsi->num_xdp_txq; i++) {
+               clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+               vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+       }
+       mutex_unlock(&pf->avail_q_mutex);
+
+       devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+       return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+       u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+       struct ice_pf *pf = vsi->back;
+       int i, v_idx;
+
+       /* q_vectors are freed in reset path so there's no point in detaching
+        * rings; in case of rebuild being triggered not from reset reset bits
+        * in pf->state won't be set, so additionally check first q_vector
+        * against NULL
+        */
+       if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+               goto free_qmap;
+
+       ice_for_each_q_vector(vsi, v_idx) {
+               struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+               struct ice_ring *ring;
+
+               ice_for_each_ring(ring, q_vector->tx)
+                       if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+                               break;
+
+               /* restore the value of last node prior to XDP setup */
+               q_vector->tx.ring = ring;
+       }
+
+free_qmap:
+       mutex_lock(&pf->avail_q_mutex);
+       for (i = 0; i < vsi->num_xdp_txq; i++) {
+               clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+               vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+       }
+       mutex_unlock(&pf->avail_q_mutex);
+
+       for (i = 0; i < vsi->num_xdp_txq; i++)
+               if (vsi->xdp_rings[i]) {
+                       if (vsi->xdp_rings[i]->desc)
+                               ice_free_tx_ring(vsi->xdp_rings[i]);
+                       kfree_rcu(vsi->xdp_rings[i], rcu);
+                       vsi->xdp_rings[i] = NULL;
+               }
+
+       devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+       vsi->xdp_rings = NULL;
+
+       if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+               return 0;
+
+       ice_vsi_assign_bpf_prog(vsi, NULL);
+
+       /* notify Tx scheduler that we destroyed XDP queues and bring
+        * back the old number of child nodes
+        */
+       for (i = 0; i < vsi->tc_cfg.numtc; i++)
+               max_txqs[i] = vsi->num_txq;
+
+       return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+                              max_txqs);
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+                  struct netlink_ext_ack *extack)
+{
+       int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+       bool if_running = netif_running(vsi->netdev);
+       int ret = 0, xdp_ring_err = 0;
+
+       if (frame_size > vsi->rx_buf_len) {
+               NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+               return -EOPNOTSUPP;
+       }
+
+       /* need to stop netdev while setting up the program for Rx rings */
+       if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+               ret = ice_down(vsi);
+               if (ret) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Preparing device for XDP attach failed");
+                       return ret;
+               }
+       }
+
+       if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+               vsi->num_xdp_txq = vsi->alloc_txq;
+               xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+               if (xdp_ring_err)
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Setting up XDP Tx resources failed");
+       } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+               xdp_ring_err = ice_destroy_xdp_rings(vsi);
+               if (xdp_ring_err)
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Freeing XDP Tx resources failed");
+       } else {
+               ice_vsi_assign_bpf_prog(vsi, prog);
+       }
+
+       if (if_running)
+               ret = ice_up(vsi);
+
+       if (!ret && prog && vsi->xsk_umems) {
+               int i;
+
+               ice_for_each_rxq(vsi, i) {
+                       struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+                       if (rx_ring->xsk_umem)
+                               napi_schedule(&rx_ring->q_vector->napi);
+               }
+       }
+
+       return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+       struct ice_netdev_priv *np = netdev_priv(dev);
+       struct ice_vsi *vsi = np->vsi;
+
+       if (vsi->type != ICE_VSI_PF) {
+               NL_SET_ERR_MSG_MOD(xdp->extack,
+                                  "XDP can be loaded only on PF VSI");
+               return -EINVAL;
+       }
+
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+       case XDP_QUERY_PROG:
+               xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
+               return 0;
+       case XDP_SETUP_XSK_UMEM:
+               return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+                                         xdp->xsk.queue_id);
+       default:
+               return -EINVAL;
+       }
+}
+
 /**
  * ice_ena_misc_vector - enable the non-queue interrupts
  * @pf: board private structure
@@ -2219,6 +2538,8 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
                status = -ENODEV;
                goto unroll_vsi_setup;
        }
+       /* netdev has to be configured before setting frame size */
+       ice_vsi_cfg_frame_size(vsi);
 
        /* registering the NAPI handler requires both the queues and
         * netdev to be created, which are done in ice_pf_vsi_setup()
@@ -3505,6 +3826,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
        ice_vsi_cfg_dcb_rings(vsi);
 
        err = ice_vsi_cfg_lan_txqs(vsi);
+       if (!err && ice_is_xdp_ena_vsi(vsi))
+               err = ice_vsi_cfg_xdp_txqs(vsi);
        if (!err)
                err = ice_vsi_cfg_rxqs(vsi);
 
@@ -3920,6 +4243,13 @@ int ice_down(struct ice_vsi *vsi)
                netdev_err(vsi->netdev,
                           "Failed stop Tx rings, VSI %d error %d\n",
                           vsi->vsi_num, tx_err);
+       if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+               tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+               if (tx_err)
+                       netdev_err(vsi->netdev,
+                                  "Failed stop XDP rings, VSI %d error %d\n",
+                                  vsi->vsi_num, tx_err);
+       }
 
        rx_err = ice_vsi_stop_rx_rings(vsi);
        if (rx_err)
@@ -4328,6 +4658,18 @@ clear_recovery:
        dev_err(dev, "Rebuild failed, unload and reload driver\n");
 }
 
+/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+       if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+               return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+       else
+               return ICE_RXBUF_3072;
+}
+
 /**
  * ice_change_mtu - NDO callback to change the MTU
  * @netdev: network interface device structure
@@ -4347,6 +4689,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
                return 0;
        }
 
+       if (ice_is_xdp_ena_vsi(vsi)) {
+               int frame_size = ice_max_xdp_frame_size(vsi);
+
+               if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+                       netdev_err(netdev, "max MTU for XDP usage is %d\n",
+                                  frame_size - ICE_ETH_PKT_HDR_PAD);
+                       return -EINVAL;
+               }
+       }
+
        if (new_mtu < netdev->min_mtu) {
                netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
                           netdev->min_mtu);
@@ -4878,4 +5230,7 @@ static const struct net_device_ops ice_netdev_ops = {
        .ndo_fdb_add = ice_fdb_add,
        .ndo_fdb_del = ice_fdb_del,
        .ndo_tx_timeout = ice_tx_timeout,
+       .ndo_bpf = ice_xdp,
+       .ndo_xdp_xmit = ice_xdp_xmit,
+       .ndo_xsk_wakeup = ice_xsk_wakeup,
 };
index 33dd103..40a29b9 100644 (file)
@@ -5,8 +5,13 @@
 
 #include <linux/prefetch.h>
 #include <linux/mm.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
 #include "ice.h"
 #include "ice_dcb_lib.h"
+#include "ice_xsk.h"
 
 #define ICE_RX_HDR_SIZE                256
 
@@ -19,7 +24,10 @@ static void
 ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
 {
        if (tx_buf->skb) {
-               dev_kfree_skb_any(tx_buf->skb);
+               if (ice_ring_is_xdp(ring))
+                       page_frag_free(tx_buf->raw_buf);
+               else
+                       dev_kfree_skb_any(tx_buf->skb);
                if (dma_unmap_len(tx_buf, len))
                        dma_unmap_single(ring->dev,
                                         dma_unmap_addr(tx_buf, dma),
@@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 {
        u16 i;
 
+       if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+               ice_xsk_clean_xdp_ring(tx_ring);
+               goto tx_skip_free;
+       }
+
        /* ring already cleared, nothing to do */
        if (!tx_ring->tx_buf)
                return;
@@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
        for (i = 0; i < tx_ring->count; i++)
                ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
 
+tx_skip_free:
        memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
 
        /* Zero out the descriptor ring */
@@ -136,8 +150,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
                total_bytes += tx_buf->bytecount;
                total_pkts += tx_buf->gso_segs;
 
-               /* free the skb */
-               napi_consume_skb(tx_buf->skb, napi_budget);
+               if (ice_ring_is_xdp(tx_ring))
+                       page_frag_free(tx_buf->raw_buf);
+               else
+                       /* free the skb */
+                       napi_consume_skb(tx_buf->skb, napi_budget);
 
                /* unmap skb header data */
                dma_unmap_single(tx_ring->dev,
@@ -188,12 +205,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 
        i += tx_ring->count;
        tx_ring->next_to_clean = i;
-       u64_stats_update_begin(&tx_ring->syncp);
-       tx_ring->stats.bytes += total_bytes;
-       tx_ring->stats.pkts += total_pkts;
-       u64_stats_update_end(&tx_ring->syncp);
-       tx_ring->q_vector->tx.total_bytes += total_bytes;
-       tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+       ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+
+       if (ice_ring_is_xdp(tx_ring))
+               return !!budget;
 
        netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
                                  total_bytes);
@@ -273,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
        if (!rx_ring->rx_buf)
                return;
 
+       if (rx_ring->xsk_umem) {
+               ice_xsk_clean_rx_ring(rx_ring);
+               goto rx_skip_free;
+       }
+
        /* Free all the Rx ring sk_buffs */
        for (i = 0; i < rx_ring->count; i++) {
                struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -289,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
                 */
                dma_sync_single_range_for_cpu(dev, rx_buf->dma,
                                              rx_buf->page_offset,
-                                             ICE_RXBUF_2048, DMA_FROM_DEVICE);
+                                             rx_ring->rx_buf_len,
+                                             DMA_FROM_DEVICE);
 
                /* free resources associated with mapping */
-               dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
+               dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
                                     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
                __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
 
@@ -300,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
                rx_buf->page_offset = 0;
        }
 
+rx_skip_free:
        memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
 
        /* Zero out the descriptor ring */
@@ -319,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 void ice_free_rx_ring(struct ice_ring *rx_ring)
 {
        ice_clean_rx_ring(rx_ring);
+       if (rx_ring->vsi->type == ICE_VSI_PF)
+               if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+                       xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+       rx_ring->xdp_prog = NULL;
        devm_kfree(rx_ring->dev, rx_ring->rx_buf);
        rx_ring->rx_buf = NULL;
 
@@ -363,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
 
        rx_ring->next_to_use = 0;
        rx_ring->next_to_clean = 0;
+
+       if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+               WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+       if (rx_ring->vsi->type == ICE_VSI_PF &&
+           !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+               if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+                                    rx_ring->q_index))
+                       goto err;
        return 0;
 
 err:
@@ -372,34 +408,110 @@ err:
 }
 
 /**
- * ice_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
  */
-static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
 {
-       u16 prev_ntu = rx_ring->next_to_use;
+       if (ice_ring_uses_build_skb(rx_ring))
+               return ICE_SKB_PAD;
+       else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+               return XDP_PACKET_HEADROOM;
 
-       rx_ring->next_to_use = val;
+       return 0;
+}
 
-       /* update next to alloc since we have filled the ring */
-       rx_ring->next_to_alloc = val;
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
+           struct bpf_prog *xdp_prog)
+{
+       int err, result = ICE_XDP_PASS;
+       struct ice_ring *xdp_ring;
+       u32 act;
 
-       /* QRX_TAIL will be updated with any tail value, but hardware ignores
-        * the lower 3 bits. This makes it so we only bump tail on meaningful
-        * boundaries. Also, this allows us to bump tail on intervals of 8 up to
-        * the budget depending on the current traffic load.
-        */
-       val &= ~0x7;
-       if (prev_ntu != val) {
-               /* Force memory writes to complete before letting h/w
-                * know there are new descriptors to fetch. (Only
-                * applicable for weak-ordered memory model archs,
-                * such as IA-64).
-                */
-               wmb();
-               writel(val, rx_ring->tail);
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
+       switch (act) {
+       case XDP_PASS:
+               break;
+       case XDP_TX:
+               xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
+               result = ice_xmit_xdp_buff(xdp, xdp_ring);
+               break;
+       case XDP_REDIRECT:
+               err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+               result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               /* fallthrough -- not supported action */
+       case XDP_ABORTED:
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+               /* fallthrough -- handle aborts by dropping frame */
+       case XDP_DROP:
+               result = ICE_XDP_CONSUMED;
+               break;
+       }
+
+       return result;
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+            u32 flags)
+{
+       struct ice_netdev_priv *np = netdev_priv(dev);
+       unsigned int queue_index = smp_processor_id();
+       struct ice_vsi *vsi = np->vsi;
+       struct ice_ring *xdp_ring;
+       int drops = 0, i;
+
+       if (test_bit(__ICE_DOWN, vsi->state))
+               return -ENETDOWN;
+
+       if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
+               return -ENXIO;
+
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+               return -EINVAL;
+
+       xdp_ring = vsi->xdp_rings[queue_index];
+       for (i = 0; i < n; i++) {
+               struct xdp_frame *xdpf = frames[i];
+               int err;
+
+               err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+               if (err != ICE_XDP_TX) {
+                       xdp_return_frame_rx_napi(xdpf);
+                       drops++;
+               }
        }
+
+       if (unlikely(flags & XDP_XMIT_FLUSH))
+               ice_xdp_ring_update_tail(xdp_ring);
+
+       return n - drops;
 }
 
 /**
@@ -423,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
        }
 
        /* alloc new page for storage */
-       page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+       page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
        if (unlikely(!page)) {
                rx_ring->rx_stats.alloc_page_failed++;
                return false;
        }
 
        /* map page for use */
-       dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+       dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
                                 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
 
        /* if mapping failed free memory back to system since
         * there isn't much point in holding memory we can't use
         */
        if (dma_mapping_error(rx_ring->dev, dma)) {
-               __free_pages(page, 0);
+               __free_pages(page, ice_rx_pg_order(rx_ring));
                rx_ring->rx_stats.alloc_page_failed++;
                return false;
        }
 
        bi->dma = dma;
        bi->page = page;
-       bi->page_offset = 0;
+       bi->page_offset = ice_rx_offset(rx_ring);
        page_ref_add(page, USHRT_MAX - 1);
        bi->pagecnt_bias = USHRT_MAX;
 
@@ -486,7 +598,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
                /* sync the buffer for use by the device */
                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
                                                 bi->page_offset,
-                                                ICE_RXBUF_2048,
+                                                rx_ring->rx_buf_len,
                                                 DMA_FROM_DEVICE);
 
                /* Refresh the desc even if buffer_addrs didn't change
@@ -557,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
  */
 static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 {
-#if (PAGE_SIZE >= 8192)
-       unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
-#endif
        unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
        struct page *page = rx_buf->page;
 
@@ -572,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
        if (unlikely((page_count(page) - pagecnt_bias) > 1))
                return false;
 #else
-       if (rx_buf->page_offset > last_offset)
+#define ICE_LAST_OFFSET \
+       (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+       if (rx_buf->page_offset > ICE_LAST_OFFSET)
                return false;
 #endif /* PAGE_SIZE < 8192) */
 
@@ -590,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 
 /**
  * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: buffer containing page to add
  * @skb: sk_buff to place the data into
  * @size: packet length from rx_desc
@@ -599,13 +711,13 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
  * The function will then update the page offset.
  */
 static void
-ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
-               unsigned int size)
+ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+               struct sk_buff *skb, unsigned int size)
 {
 #if (PAGE_SIZE >= 8192)
-       unsigned int truesize = SKB_DATA_ALIGN(size);
+       unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
 #else
-       unsigned int truesize = ICE_RXBUF_2048;
+       unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #endif
 
        if (!size)
@@ -678,11 +790,65 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
        return rx_buf;
 }
 
+/**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+             struct xdp_buff *xdp)
+{
+       unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+                               SKB_DATA_ALIGN(xdp->data_end -
+                                              xdp->data_hard_start);
+#endif
+       struct sk_buff *skb;
+
+       /* Prefetch first cache line of first page. If xdp->data_meta
+        * is unused, this points exactly as xdp->data, otherwise we
+        * likely have a consumer accessing first few bytes of meta
+        * data, and then actual data.
+        */
+       prefetch(xdp->data_meta);
+#if L1_CACHE_BYTES < 128
+       prefetch((void *)(xdp->data + L1_CACHE_BYTES));
+#endif
+       /* build an skb around the page buffer */
+       skb = build_skb(xdp->data_hard_start, truesize);
+       if (unlikely(!skb))
+               return NULL;
+
+       /* must to record Rx queue, otherwise OS features such as
+        * symmetric queue won't work
+        */
+       skb_record_rx_queue(skb, rx_ring->q_index);
+
+       /* update pointers within the skb to store the data */
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
+       __skb_put(skb, xdp->data_end - xdp->data);
+       if (metasize)
+               skb_metadata_set(skb, metasize);
+
+       /* buffer is used by skb, update page_offset */
+       ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+
+       return skb;
+}
+
 /**
  * ice_construct_skb - Allocate skb and populate it
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: Rx buffer to pull data from
- * @size: the length of the packet
+ * @xdp: xdp_buff pointing to the data
  *
  * This function allocates an skb. It then populates it with the page
  * data from the current receive descriptor, taking care to set up the
@@ -690,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
  */
 static struct sk_buff *
 ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
-                 unsigned int size)
+                 struct xdp_buff *xdp)
 {
-       void *va = page_address(rx_buf->page) + rx_buf->page_offset;
+       unsigned int size = xdp->data_end - xdp->data;
        unsigned int headlen;
        struct sk_buff *skb;
 
        /* prefetch first cache line of first page */
-       prefetch(va);
+       prefetch(xdp->data);
 #if L1_CACHE_BYTES < 128
-       prefetch((u8 *)va + L1_CACHE_BYTES);
+       prefetch((void *)(xdp->data + L1_CACHE_BYTES));
 #endif /* L1_CACHE_BYTES */
 
        /* allocate a skb to store the frags */
@@ -712,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
        /* Determine available headroom for copy */
        headlen = size;
        if (headlen > ICE_RX_HDR_SIZE)
-               headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
+               headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
 
        /* align pull length to size of long to optimize memcpy performance */
-       memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+       memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+                                                        sizeof(long)));
 
        /* if we exhaust the linear part then add what is left as a frag */
        size -= headlen;
@@ -723,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 #if (PAGE_SIZE >= 8192)
                unsigned int truesize = SKB_DATA_ALIGN(size);
 #else
-               unsigned int truesize = ICE_RXBUF_2048;
+               unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #endif
                skb_add_rx_frag(skb, 0, rx_buf->page,
                                rx_buf->page_offset + headlen, size, truesize);
@@ -745,11 +912,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: Rx buffer to pull data from
  *
- * This function will  clean up the contents of the rx_buf. It will
- * either recycle the buffer or unmap it and free the associated resources.
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
  */
 static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 {
+       u32 ntc = rx_ring->next_to_clean + 1;
+
+       /* fetch, update, and store next to clean */
+       ntc = (ntc < rx_ring->count) ? ntc : 0;
+       rx_ring->next_to_clean = ntc;
+
        if (!rx_buf)
                return;
 
@@ -759,8 +933,9 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
                rx_ring->rx_stats.page_reuse_count++;
        } else {
                /* we are not reusing the buffer so unmap it */
-               dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
-                                    DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+               dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+                                    ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+                                    ICE_RX_DMA_ATTR);
                __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
        }
 
@@ -790,206 +965,31 @@ static bool ice_cleanup_headers(struct sk_buff *skb)
        return false;
 }
 
-/**
- * ice_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static bool
-ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
-{
-       return !!(rx_desc->wb.status_error0 &
-                 cpu_to_le16(stat_err_bits));
-}
-
 /**
  * ice_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
  * @rx_desc: Rx descriptor for current buffer
  * @skb: Current socket buffer containing buffer in progress
  *
- * This function updates next to clean. If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
  */
 static bool
 ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
               struct sk_buff *skb)
 {
-       u32 ntc = rx_ring->next_to_clean + 1;
-
-       /* fetch, update, and store next to clean */
-       ntc = (ntc < rx_ring->count) ? ntc : 0;
-       rx_ring->next_to_clean = ntc;
-
-       prefetch(ICE_RX_DESC(rx_ring, ntc));
-
        /* if we are the last buffer then there is nothing else to do */
 #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
        if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
                return false;
 
        /* place skb in next buffer to be received */
-       rx_ring->rx_buf[ntc].skb = skb;
+       rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
        rx_ring->rx_stats.non_eop_descs++;
 
        return true;
 }
 
-/**
- * ice_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- */
-static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
-{
-       return PKT_HASH_TYPE_NONE;
-}
-
-/**
- * ice_rx_hash - set the hash value in the skb
- * @rx_ring: descriptor ring
- * @rx_desc: specific descriptor
- * @skb: pointer to current skb
- * @rx_ptype: the ptype value from the descriptor
- */
-static void
-ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
-           struct sk_buff *skb, u8 rx_ptype)
-{
-       struct ice_32b_rx_flex_desc_nic *nic_mdid;
-       u32 hash;
-
-       if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
-               return;
-
-       if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
-               return;
-
-       nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
-       hash = le32_to_cpu(nic_mdid->rss_hash);
-       skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
-}
-
-/**
- * ice_rx_csum - Indicate in skb if checksum is good
- * @ring: the ring we care about
- * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
- * @ptype: the packet type decoded by hardware
- *
- * skb->protocol must be set before this function is called
- */
-static void
-ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
-           union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
-{
-       struct ice_rx_ptype_decoded decoded;
-       u32 rx_error, rx_status;
-       bool ipv4, ipv6;
-
-       rx_status = le16_to_cpu(rx_desc->wb.status_error0);
-       rx_error = rx_status;
-
-       decoded = ice_decode_rx_desc_ptype(ptype);
-
-       /* Start with CHECKSUM_NONE and by default csum_level = 0 */
-       skb->ip_summed = CHECKSUM_NONE;
-       skb_checksum_none_assert(skb);
-
-       /* check if Rx checksum is enabled */
-       if (!(ring->netdev->features & NETIF_F_RXCSUM))
-               return;
-
-       /* check if HW has decoded the packet and checksum */
-       if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
-               return;
-
-       if (!(decoded.known && decoded.outer_ip))
-               return;
-
-       ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-              (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
-       ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-              (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
-
-       if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
-                                BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
-               goto checksum_fail;
-       else if (ipv6 && (rx_status &
-                (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
-               goto checksum_fail;
-
-       /* check for L4 errors and handle packets that were not able to be
-        * checksummed due to arrival speed
-        */
-       if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
-               goto checksum_fail;
-
-       /* Only report checksum unnecessary for TCP, UDP, or SCTP */
-       switch (decoded.inner_prot) {
-       case ICE_RX_PTYPE_INNER_PROT_TCP:
-       case ICE_RX_PTYPE_INNER_PROT_UDP:
-       case ICE_RX_PTYPE_INNER_PROT_SCTP:
-               skb->ip_summed = CHECKSUM_UNNECESSARY;
-       default:
-               break;
-       }
-       return;
-
-checksum_fail:
-       ring->vsi->back->hw_csum_rx_error++;
-}
-
-/**
- * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: Rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
- */
-static void
-ice_process_skb_fields(struct ice_ring *rx_ring,
-                      union ice_32b_rx_flex_desc *rx_desc,
-                      struct sk_buff *skb, u8 ptype)
-{
-       ice_rx_hash(rx_ring, rx_desc, skb, ptype);
-
-       /* modifies the skb - consumes the enet header */
-       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
-       ice_rx_csum(rx_ring, skb, rx_desc, ptype);
-}
-
-/**
- * ice_receive_skb - Send a completed packet up the stack
- * @rx_ring: Rx ring in play
- * @skb: packet to send up
- * @vlan_tag: VLAN tag for packet
- *
- * This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without VLAN tag)
- */
-static void
-ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
-{
-       if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-           (vlan_tag & VLAN_VID_MASK))
-               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-       napi_gro_receive(&rx_ring->q_vector->napi, skb);
-}
-
 /**
  * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: Rx descriptor ring to transact packets on
@@ -1006,8 +1006,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 {
        unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
        u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+       unsigned int xdp_res, xdp_xmit = 0;
+       struct bpf_prog *xdp_prog = NULL;
+       struct xdp_buff xdp;
        bool failure;
 
+       xdp.rxq = &rx_ring->xdp_rxq;
+
        /* start the loop to process Rx packets bounded by 'budget' */
        while (likely(total_rx_pkts < (unsigned int)budget)) {
                union ice_32b_rx_flex_desc *rx_desc;
@@ -1042,10 +1047,57 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                /* retrieve a buffer from the ring */
                rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
 
+               if (!size) {
+                       xdp.data = NULL;
+                       xdp.data_end = NULL;
+                       xdp.data_hard_start = NULL;
+                       xdp.data_meta = NULL;
+                       goto construct_skb;
+               }
+
+               xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
+               xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
+               xdp.data_meta = xdp.data;
+               xdp.data_end = xdp.data + size;
+
+               rcu_read_lock();
+               xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+               if (!xdp_prog) {
+                       rcu_read_unlock();
+                       goto construct_skb;
+               }
+
+               xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
+               rcu_read_unlock();
+               if (!xdp_res)
+                       goto construct_skb;
+               if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+                       unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+                       truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+                       truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
+                                                 size);
+#endif
+                       xdp_xmit |= xdp_res;
+                       ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+               } else {
+                       rx_buf->pagecnt_bias++;
+               }
+               total_rx_bytes += size;
+               total_rx_pkts++;
+
+               cleaned_count++;
+               ice_put_rx_buf(rx_ring, rx_buf);
+               continue;
+construct_skb:
                if (skb)
-                       ice_add_rx_frag(rx_buf, skb, size);
+                       ice_add_rx_frag(rx_ring, rx_buf, skb, size);
+               else if (ice_ring_uses_build_skb(rx_ring))
+                       skb = ice_build_skb(rx_ring, rx_buf, &xdp);
                else
-                       skb = ice_construct_skb(rx_ring, rx_buf, size);
+                       skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
 
                /* exit if we failed to retrieve a buffer */
                if (!skb) {
@@ -1099,13 +1151,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
        /* return up to cleaned_count buffers to hardware */
        failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
 
-       /* update queue and vector specific stats */
-       u64_stats_update_begin(&rx_ring->syncp);
-       rx_ring->stats.pkts += total_rx_pkts;
-       rx_ring->stats.bytes += total_rx_bytes;
-       u64_stats_update_end(&rx_ring->syncp);
-       rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
-       rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+       if (xdp_prog)
+               ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+
+       ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
 
        /* guarantee a trip back through this routine if there was a failure */
        return failure ? budget : (int)total_rx_pkts;
@@ -1483,9 +1532,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
        /* Since the actual Tx work is minimal, we can give the Tx a larger
         * budget and be more aggressive about cleaning up the Tx descriptors.
         */
-       ice_for_each_ring(ring, q_vector->tx)
-               if (!ice_clean_tx_irq(ring, budget))
+       ice_for_each_ring(ring, q_vector->tx) {
+               bool wd = ring->xsk_umem ?
+                         ice_clean_tx_irq_zc(ring, budget) :
+                         ice_clean_tx_irq(ring, budget);
+
+               if (!wd)
                        clean_complete = false;
+       }
 
        /* Handle case where we are called by netpoll with a budget of 0 */
        if (unlikely(budget <= 0))
@@ -1505,7 +1559,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
        ice_for_each_ring(ring, q_vector->rx) {
                int cleaned;
 
-               cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+               /* A dedicated path for zero-copy allows making a single
+                * comparison in the irq context instead of many inside the
+                * ice_clean_rx_irq function and makes the codebase cleaner.
+                */
+               cleaned = ring->xsk_umem ?
+                         ice_clean_rx_irq_zc(ring, budget_per_ring) :
+                         ice_clean_rx_irq(ring, budget_per_ring);
                work_done += cleaned;
                /* if we clean as many as budgeted, we must not be done */
                if (cleaned >= budget_per_ring)
@@ -1527,17 +1587,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
        return min_t(int, work_done, budget - 1);
 }
 
-/* helper function for building cmd/type/offset */
-static __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
-{
-       return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
-                          (td_cmd    << ICE_TXD_QW1_CMD_S) |
-                          (td_offset << ICE_TXD_QW1_OFFSET_S) |
-                          ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
-                          (td_tag    << ICE_TXD_QW1_L2TAG1_S));
-}
-
 /**
  * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
  * @tx_ring: the ring to be checked
@@ -1689,9 +1738,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
                i = 0;
 
        /* write last descriptor with RS and EOP bits */
-       td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
-       tx_desc->cmd_type_offset_bsz =
-                       build_ctob(td_cmd, td_offset, size, td_tag);
+       td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+       tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
+                                                 td_tag);
 
        /* Force memory writes to complete before letting h/w know there
         * are new descriptors to fetch.
index 94a9280..a84cc0e 100644 (file)
@@ -4,8 +4,12 @@
 #ifndef _ICE_TXRX_H_
 #define _ICE_TXRX_H_
 
+#include "ice_type.h"
+
 #define ICE_DFLT_IRQ_WORK      256
+#define ICE_RXBUF_3072         3072
 #define ICE_RXBUF_2048         2048
+#define ICE_RXBUF_1536         1536
 #define ICE_MAX_CHAINED_RX_BUFS        5
 #define ICE_MAX_BUF_TXD                8
 #define ICE_MIN_TX_LEN         17
 #define ICE_RX_BUF_WRITE       16      /* Must be power of 2 */
 #define ICE_MAX_TXQ_PER_TXQG   128
 
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room.  From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *       up negative.  In these cases we should fall back to the legacy
+ *       receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+       int half_page_size;
+
+       half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+       return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+       int rx_buf_len;
+
+       /* If a 2K buffer cannot handle a standard Ethernet frame then
+        * optimize padding for a 3K buffer instead of a 1.5K buffer.
+        *
+        * For a 3K buffer we need to add enough padding to allow for
+        * tailroom due to NET_IP_ALIGN possibly shifting us out of
+        * cache-line alignment.
+        */
+       if (ICE_2K_TOO_SMALL_WITH_PADDING)
+               rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+       else
+               rx_buf_len = ICE_RXBUF_1536;
+
+       /* if needed make room for NET_IP_ALIGN */
+       rx_buf_len -= NET_IP_ALIGN;
+
+       return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
 /* We are assuming that the cache line is always 64 Bytes here for ice.
  * In order to make sure that is a correct assumption there is a check in probe
  * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
 #define ICE_TX_FLAGS_VLAN_PR_S 29
 #define ICE_TX_FLAGS_VLAN_S    16
 
+#define ICE_XDP_PASS           0
+#define ICE_XDP_CONSUMED       BIT(0)
+#define ICE_XDP_TX             BIT(1)
+#define ICE_XDP_REDIR          BIT(2)
+
 #define ICE_RX_DMA_ATTR \
        (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
 
+#define ICE_ETH_PKT_HDR_PAD    (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
 struct ice_tx_buf {
        struct ice_tx_desc *next_to_watch;
-       struct sk_buff *skb;
+       union {
+               struct sk_buff *skb;
+               void *raw_buf; /* used for XDP */
+       };
        unsigned int bytecount;
        unsigned short gso_segs;
        u32 tx_flags;
@@ -76,9 +157,17 @@ struct ice_tx_offload_params {
 struct ice_rx_buf {
        struct sk_buff *skb;
        dma_addr_t dma;
-       struct page *page;
-       unsigned int page_offset;
-       u16 pagecnt_bias;
+       union {
+               struct {
+                       struct page *page;
+                       unsigned int page_offset;
+                       u16 pagecnt_bias;
+               };
+               struct {
+                       void *addr;
+                       u64 handle;
+               };
+       };
 };
 
 struct ice_q_stats {
@@ -198,18 +287,44 @@ struct ice_ring {
        };
 
        struct rcu_head rcu;            /* to avoid race on free */
+       struct bpf_prog *xdp_prog;
+       struct xdp_umem *xsk_umem;
+       struct zero_copy_allocator zca;
+       /* CL3 - 3rd cacheline starts here */
+       struct xdp_rxq_info xdp_rxq;
        /* CLX - the below items are only accessed infrequently and should be
         * in their own cache line if possible
         */
+#define ICE_TX_FLAGS_RING_XDP          BIT(0)
+#define ICE_RX_FLAGS_RING_BUILD_SKB    BIT(1)
+       u8 flags;
        dma_addr_t dma;                 /* physical address of ring */
        unsigned int size;              /* length of descriptor ring in bytes */
        u32 txq_teid;                   /* Added Tx queue TEID */
        u16 rx_buf_len;
-#ifdef CONFIG_DCB
        u8 dcb_tc;                      /* Traffic class of ring */
-#endif /* CONFIG_DCB */
 } ____cacheline_internodealigned_in_smp;
 
+static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
+{
+       return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring)
+{
+       ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring)
+{
+       ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+{
+       return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
 struct ice_ring_container {
        /* head of linked-list of rings */
        struct ice_ring *ring;
@@ -230,6 +345,19 @@ struct ice_ring_container {
 #define ice_for_each_ring(pos, head) \
        for (pos = (head).ring; pos; pos = pos->next)
 
+static inline unsigned int ice_rx_pg_order(struct ice_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring->rx_buf_len > (PAGE_SIZE / 2))
+               return 1;
+#endif
+       return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
 bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
 netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 void ice_clean_tx_ring(struct ice_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644 (file)
index 0000000..35bbc4f
--- /dev/null
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_txrx_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+       u16 prev_ntu = rx_ring->next_to_use;
+
+       rx_ring->next_to_use = val;
+
+       /* update next to alloc since we have filled the ring */
+       rx_ring->next_to_alloc = val;
+
+       /* QRX_TAIL will be updated with any tail value, but hardware ignores
+        * the lower 3 bits. This makes it so we only bump tail on meaningful
+        * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+        * the budget depending on the current traffic load.
+        */
+       val &= ~0x7;
+       if (prev_ntu != val) {
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch. (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64).
+                */
+               wmb();
+               writel(val, rx_ring->tail);
+       }
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+       return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+           struct sk_buff *skb, u8 rx_ptype)
+{
+       struct ice_32b_rx_flex_desc_nic *nic_mdid;
+       u32 hash;
+
+       if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+               return;
+
+       if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+               return;
+
+       nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+       hash = le32_to_cpu(nic_mdid->rss_hash);
+       skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+           union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+       struct ice_rx_ptype_decoded decoded;
+       u32 rx_error, rx_status;
+       bool ipv4, ipv6;
+
+       rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+       rx_error = rx_status;
+
+       decoded = ice_decode_rx_desc_ptype(ptype);
+
+       /* Start with CHECKSUM_NONE and by default csum_level = 0 */
+       skb->ip_summed = CHECKSUM_NONE;
+       skb_checksum_none_assert(skb);
+
+       /* check if Rx checksum is enabled */
+       if (!(ring->netdev->features & NETIF_F_RXCSUM))
+               return;
+
+       /* check if HW has decoded the packet and checksum */
+       if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+               return;
+
+       if (!(decoded.known && decoded.outer_ip))
+               return;
+
+       ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+              (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+       ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+              (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+       if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+                                BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+               goto checksum_fail;
+       else if (ipv6 && (rx_status &
+                (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+               goto checksum_fail;
+
+       /* check for L4 errors and handle packets that were not able to be
+        * checksummed due to arrival speed
+        */
+       if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+               goto checksum_fail;
+
+       /* Only report checksum unnecessary for TCP, UDP, or SCTP */
+       switch (decoded.inner_prot) {
+       case ICE_RX_PTYPE_INNER_PROT_TCP:
+       case ICE_RX_PTYPE_INNER_PROT_UDP:
+       case ICE_RX_PTYPE_INNER_PROT_SCTP:
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+       default:
+               break;
+       }
+       return;
+
+checksum_fail:
+       ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+                      union ice_32b_rx_flex_desc *rx_desc,
+                      struct sk_buff *skb, u8 ptype)
+{
+       ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+       /* modifies the skb - consumes the enet header */
+       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+       ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+       if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+           (vlan_tag & VLAN_VID_MASK))
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+       napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+       u16 i = xdp_ring->next_to_use;
+       struct ice_tx_desc *tx_desc;
+       struct ice_tx_buf *tx_buf;
+       dma_addr_t dma;
+
+       if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+               xdp_ring->tx_stats.tx_busy++;
+               return ICE_XDP_CONSUMED;
+       }
+
+       dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(xdp_ring->dev, dma))
+               return ICE_XDP_CONSUMED;
+
+       tx_buf = &xdp_ring->tx_buf[i];
+       tx_buf->bytecount = size;
+       tx_buf->gso_segs = 1;
+       tx_buf->raw_buf = data;
+
+       /* record length, and DMA address */
+       dma_unmap_len_set(tx_buf, len, size);
+       dma_unmap_addr_set(tx_buf, dma, dma);
+
+       tx_desc = ICE_TX_DESC(xdp_ring, i);
+       tx_desc->buf_addr = cpu_to_le64(dma);
+       tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+                                                 size, 0);
+
+       /* Make certain all of the status bits have been updated
+        * before next_to_watch is written.
+        */
+       smp_wmb();
+
+       i++;
+       if (i == xdp_ring->count)
+               i = 0;
+
+       tx_buf->next_to_watch = tx_desc;
+       xdp_ring->next_to_use = i;
+
+       return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+       struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+       if (unlikely(!xdpf))
+               return ICE_XDP_CONSUMED;
+
+       return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+       if (xdp_res & ICE_XDP_REDIR)
+               xdp_do_flush_map();
+
+       if (xdp_res & ICE_XDP_TX) {
+               struct ice_ring *xdp_ring =
+                       rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+               ice_xdp_ring_update_tail(xdp_ring);
+       }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644 (file)
index 0000000..ba9164d
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+{
+       return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+       return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+                          (td_cmd    << ICE_TXD_QW1_CMD_S) |
+                          (td_offset << ICE_TXD_QW1_OFFSET_S) |
+                          ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+                          (td_tag    << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+       /* Force memory writes to complete before letting h/w
+        * know there are new descriptors to fetch.
+        */
+       wmb();
+       writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+                      union ice_32b_rx_flex_desc *rx_desc,
+                      struct sk_buff *skb, u8 ptype);
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
index b45797f..ad75741 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644 (file)
index 0000000..fcffad0
--- /dev/null
@@ -0,0 +1,1181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+       memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+              sizeof(vsi->rx_rings[q_idx]->rx_stats));
+       memset(&vsi->tx_rings[q_idx]->stats, 0,
+              sizeof(vsi->tx_rings[q_idx]->stats));
+       if (ice_is_xdp_ena_vsi(vsi))
+               memset(&vsi->xdp_rings[q_idx]->stats, 0,
+                      sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+       ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+       if (ice_is_xdp_ena_vsi(vsi))
+               ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+       ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+                    bool enable)
+{
+       if (!vsi->netdev || !q_vector)
+               return;
+
+       if (enable)
+               napi_enable(&q_vector->napi);
+       else
+               napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring,
+                struct ice_q_vector *q_vector)
+{
+       struct ice_pf *pf = vsi->back;
+       struct ice_hw *hw = &pf->hw;
+       int base = vsi->base_vector;
+       u16 reg;
+       u32 val;
+
+       /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+        * here only QINT_RQCTL
+        */
+       reg = rx_ring->reg_idx;
+       val = rd32(hw, QINT_RQCTL(reg));
+       val &= ~QINT_RQCTL_CAUSE_ENA_M;
+       wr32(hw, QINT_RQCTL(reg), val);
+
+       if (q_vector) {
+               u16 v_idx = q_vector->v_idx;
+
+               wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+               ice_flush(hw);
+               synchronize_irq(pf->msix_entries[v_idx + base].vector);
+       }
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+       u16 reg_idx = q_vector->reg_idx;
+       struct ice_pf *pf = vsi->back;
+       struct ice_hw *hw = &pf->hw;
+       struct ice_ring *ring;
+
+       ice_cfg_itr(hw, q_vector);
+
+       wr32(hw, GLINT_RATE(reg_idx),
+            ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+       ice_for_each_ring(ring, q_vector->tx)
+               ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
+                                     q_vector->tx.itr_idx);
+
+       ice_for_each_ring(ring, q_vector->rx)
+               ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx,
+                                     q_vector->rx.itr_idx);
+
+       ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+       struct ice_pf *pf = vsi->back;
+       struct ice_hw *hw = &pf->hw;
+
+       ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+       ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+       struct ice_txq_meta txq_meta = { };
+       struct ice_ring *tx_ring, *rx_ring;
+       struct ice_q_vector *q_vector;
+       int timeout = 50;
+       int err;
+
+       if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+               return -EINVAL;
+
+       tx_ring = vsi->tx_rings[q_idx];
+       rx_ring = vsi->rx_rings[q_idx];
+       q_vector = rx_ring->q_vector;
+
+       while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+               timeout--;
+               if (!timeout)
+                       return -EBUSY;
+               usleep_range(1000, 2000);
+       }
+       netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+       ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+       ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+       err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+       if (err)
+               return err;
+       if (ice_is_xdp_ena_vsi(vsi)) {
+               struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+               memset(&txq_meta, 0, sizeof(txq_meta));
+               ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+               err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+                                          &txq_meta);
+               if (err)
+                       return err;
+       }
+       err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx);
+       if (err)
+               return err;
+
+       ice_qvec_toggle_napi(vsi, q_vector, false);
+       ice_qp_clean_rings(vsi, q_idx);
+       ice_qp_reset_stats(vsi, q_idx);
+
+       return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+       struct ice_aqc_add_tx_qgrp *qg_buf;
+       struct ice_ring *tx_ring, *rx_ring;
+       struct ice_q_vector *q_vector;
+       int err;
+
+       if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+               return -EINVAL;
+
+       qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
+       if (!qg_buf)
+               return -ENOMEM;
+
+       qg_buf->num_txqs = 1;
+
+       tx_ring = vsi->tx_rings[q_idx];
+       rx_ring = vsi->rx_rings[q_idx];
+       q_vector = rx_ring->q_vector;
+
+       err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+       if (err)
+               goto free_buf;
+
+       if (ice_is_xdp_ena_vsi(vsi)) {
+               struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+               memset(qg_buf, 0, sizeof(*qg_buf));
+               qg_buf->num_txqs = 1;
+               err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+               if (err)
+                       goto free_buf;
+               ice_set_ring_xdp(xdp_ring);
+               xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+       }
+
+       err = ice_setup_rx_ctx(rx_ring);
+       if (err)
+               goto free_buf;
+
+       ice_qvec_cfg_msix(vsi, q_vector);
+
+       err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx);
+       if (err)
+               goto free_buf;
+
+       clear_bit(__ICE_CFG_BUSY, vsi->state);
+       ice_qvec_toggle_napi(vsi, q_vector, true);
+       ice_qvec_ena_irq(vsi, q_vector);
+
+       netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+       kfree(qg_buf);
+       return err;
+}
+
+/**
+ * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
+ * @vsi: VSI to allocate the UMEM on
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+{
+       if (vsi->xsk_umems)
+               return 0;
+
+       vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+                                GFP_KERNEL);
+
+       if (!vsi->xsk_umems) {
+               vsi->num_xsk_umems = 0;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/**
+ * ice_xsk_add_umem - add a UMEM region for XDP sockets
+ * @vsi: VSI to which the UMEM will be added
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+       int err;
+
+       err = ice_xsk_alloc_umems(vsi);
+       if (err)
+               return err;
+
+       vsi->xsk_umems[qid] = umem;
+       vsi->num_xsk_umems_used++;
+
+       return 0;
+}
+
+/**
+ * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * @vsi: VSI from which the VSI will be removed
+ * @qid: Ring/qid associated with the UMEM
+ */
+static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+{
+       vsi->xsk_umems[qid] = NULL;
+       vsi->num_xsk_umems_used--;
+
+       if (vsi->num_xsk_umems_used == 0) {
+               kfree(vsi->xsk_umems);
+               vsi->xsk_umems = NULL;
+               vsi->num_xsk_umems = 0;
+       }
+}
+
+/**
+ * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
+ * @vsi: VSI to map the UMEM region
+ * @umem: UMEM to map
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+       struct ice_pf *pf = vsi->back;
+       struct device *dev;
+       unsigned int i;
+
+       dev = &pf->pdev->dev;
+       for (i = 0; i < umem->npgs; i++) {
+               dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
+                                                   PAGE_SIZE,
+                                                   DMA_BIDIRECTIONAL,
+                                                   ICE_RX_DMA_ATTR);
+               if (dma_mapping_error(dev, dma)) {
+                       dev_dbg(dev,
+                               "XSK UMEM DMA mapping error on page num %d", i);
+                       goto out_unmap;
+               }
+
+               umem->pages[i].dma = dma;
+       }
+
+       return 0;
+
+out_unmap:
+       for (; i > 0; i--) {
+               dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+                                    DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+               umem->pages[i].dma = 0;
+       }
+
+       return -EFAULT;
+}
+
+/**
+ * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
+ * @vsi: VSI from which the UMEM will be unmapped
+ * @umem: UMEM to unmap
+ */
+static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+       struct ice_pf *pf = vsi->back;
+       struct device *dev;
+       unsigned int i;
+
+       dev = &pf->pdev->dev;
+       for (i = 0; i < umem->npgs; i++) {
+               dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+                                    DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+
+               umem->pages[i].dma = 0;
+       }
+}
+
+/**
+ * ice_xsk_umem_disable - disable a UMEM region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+{
+       if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
+           !vsi->xsk_umems[qid])
+               return -EINVAL;
+
+       ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+       ice_xsk_remove_umem(vsi, qid);
+
+       return 0;
+}
+
+/**
+ * ice_xsk_umem_enable - enable a UMEM region
+ * @vsi: Current VSI
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+       struct xdp_umem_fq_reuse *reuseq;
+       int err;
+
+       if (vsi->type != ICE_VSI_PF)
+               return -EINVAL;
+
+       vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+       if (qid >= vsi->num_xsk_umems)
+               return -EINVAL;
+
+       if (vsi->xsk_umems && vsi->xsk_umems[qid])
+               return -EBUSY;
+
+       reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+       if (!reuseq)
+               return -ENOMEM;
+
+       xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+       err = ice_xsk_umem_dma_map(vsi, umem);
+       if (err)
+               return err;
+
+       err = ice_xsk_add_umem(vsi, umem, qid);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+/**
+ * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * @vsi: Current VSI
+ * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+       bool if_running, umem_present = !!umem;
+       int ret = 0, umem_failure = 0;
+
+       if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+       if (if_running) {
+               ret = ice_qp_dis(vsi, qid);
+               if (ret) {
+                       netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret);
+                       goto xsk_umem_if_up;
+               }
+       }
+
+       umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
+                                     ice_xsk_umem_disable(vsi, qid);
+
+xsk_umem_if_up:
+       if (if_running) {
+               ret = ice_qp_ena(vsi, qid);
+               if (!ret && umem_present)
+                       napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+               else if (ret)
+                       netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret);
+       }
+
+       if (umem_failure) {
+               netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d",
+                          umem_present ? "en" : "dis", umem_failure);
+               return umem_failure;
+       }
+
+       return ret;
+}
+
+/**
+ * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
+ * @zca: zero-cpoy allocator
+ * @handle: Buffer handle
+ */
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+       struct ice_rx_buf *rx_buf;
+       struct ice_ring *rx_ring;
+       struct xdp_umem *umem;
+       u64 hr, mask;
+       u16 nta;
+
+       rx_ring = container_of(zca, struct ice_ring, zca);
+       umem = rx_ring->xsk_umem;
+       hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+       mask = umem->chunk_mask;
+
+       nta = rx_ring->next_to_alloc;
+       rx_buf = &rx_ring->rx_buf[nta];
+
+       nta++;
+       rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+       handle &= mask;
+
+       rx_buf->dma = xdp_umem_get_dma(umem, handle);
+       rx_buf->dma += hr;
+
+       rx_buf->addr = xdp_umem_get_data(umem, handle);
+       rx_buf->addr += hr;
+
+       rx_buf->handle = (u64)handle + umem->headroom;
+}
+
+/**
+ * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the hot path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+       struct xdp_umem *umem = rx_ring->xsk_umem;
+       void *addr = rx_buf->addr;
+       u64 handle, hr;
+
+       if (addr) {
+               rx_ring->rx_stats.page_reuse_count++;
+               return true;
+       }
+
+       if (!xsk_umem_peek_addr(umem, &handle)) {
+               rx_ring->rx_stats.alloc_page_failed++;
+               return false;
+       }
+
+       hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+       rx_buf->dma = xdp_umem_get_dma(umem, handle);
+       rx_buf->dma += hr;
+
+       rx_buf->addr = xdp_umem_get_data(umem, handle);
+       rx_buf->addr += hr;
+
+       rx_buf->handle = handle + umem->headroom;
+
+       xsk_umem_discard_addr(umem);
+       return true;
+}
+
+/**
+ * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the slow path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+       struct xdp_umem *umem = rx_ring->xsk_umem;
+       u64 handle, headroom;
+
+       if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+               rx_ring->rx_stats.alloc_page_failed++;
+               return false;
+       }
+
+       handle &= umem->chunk_mask;
+       headroom = umem->headroom + XDP_PACKET_HEADROOM;
+
+       rx_buf->dma = xdp_umem_get_dma(umem, handle);
+       rx_buf->dma += headroom;
+
+       rx_buf->addr = xdp_umem_get_data(umem, handle);
+       rx_buf->addr += headroom;
+
+       rx_buf->handle = handle + umem->headroom;
+
+       xsk_umem_discard_addr_rq(umem);
+       return true;
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ * @alloc: the function pointer to call for allocation
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns false if all allocations were successful, true if any fail.
+ */
+static bool
+ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
+                    bool alloc(struct ice_ring *, struct ice_rx_buf *))
+{
+       union ice_32b_rx_flex_desc *rx_desc;
+       u16 ntu = rx_ring->next_to_use;
+       struct ice_rx_buf *rx_buf;
+       bool ret = false;
+
+       if (!count)
+               return false;
+
+       rx_desc = ICE_RX_DESC(rx_ring, ntu);
+       rx_buf = &rx_ring->rx_buf[ntu];
+
+       do {
+               if (!alloc(rx_ring, rx_buf)) {
+                       ret = true;
+                       break;
+               }
+
+               dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
+                                                rx_ring->rx_buf_len,
+                                                DMA_BIDIRECTIONAL);
+
+               rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+               rx_desc->wb.status_error0 = 0;
+
+               rx_desc++;
+               rx_buf++;
+               ntu++;
+
+               if (unlikely(ntu == rx_ring->count)) {
+                       rx_desc = ICE_RX_DESC(rx_ring, 0);
+                       rx_buf = rx_ring->rx_buf;
+                       ntu = 0;
+               }
+       } while (--count);
+
+       if (rx_ring->next_to_use != ntu)
+               ice_release_rx_desc(rx_ring, ntu);
+
+       return ret;
+}
+
+/**
+ * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
+{
+       return ice_alloc_rx_bufs_zc(rx_ring, count,
+                                   ice_alloc_buf_fast_zc);
+}
+
+/**
+ * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
+{
+       return ice_alloc_rx_bufs_zc(rx_ring, count,
+                                   ice_alloc_buf_slow_zc);
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_ring *rx_ring)
+{
+       int ntc = rx_ring->next_to_clean + 1;
+
+       ntc = (ntc < rx_ring->count) ? ntc : 0;
+       rx_ring->next_to_clean = ntc;
+       prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_get_rx_buf_zc - Fetch the current Rx buffer
+ * @rx_ring: Rx ring
+ * @size: size of a buffer
+ *
+ * This function returns the current, received Rx buffer and does
+ * DMA synchronization.
+ *
+ * Returns a pointer to the received Rx buffer.
+ */
+static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
+{
+       struct ice_rx_buf *rx_buf;
+
+       rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+
+       dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
+                                     size, DMA_BIDIRECTIONAL);
+
+       return rx_buf;
+}
+
+/**
+ * ice_reuse_rx_buf_zc - reuse an Rx buffer
+ * @rx_ring: Rx ring
+ * @old_buf: The buffer to recycle
+ *
+ * This function recycles a finished Rx buffer, and places it on the recycle
+ * queue (next_to_alloc).
+ */
+static void
+ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+       unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
+       u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+       u16 nta = rx_ring->next_to_alloc;
+       struct ice_rx_buf *new_buf;
+
+       new_buf = &rx_ring->rx_buf[nta++];
+       rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+       new_buf->dma = old_buf->dma & mask;
+       new_buf->dma += hr;
+
+       new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
+       new_buf->addr += hr;
+
+       new_buf->handle = old_buf->handle & mask;
+       new_buf->handle += rx_ring->xsk_umem->headroom;
+
+       old_buf->addr = NULL;
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @rx_buf: zero-copy Rx buffer
+ * @xdp: XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+                    struct xdp_buff *xdp)
+{
+       unsigned int metasize = xdp->data - xdp->data_meta;
+       unsigned int datasize = xdp->data_end - xdp->data;
+       unsigned int datasize_hard = xdp->data_end -
+                                    xdp->data_hard_start;
+       struct sk_buff *skb;
+
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+                              GFP_ATOMIC | __GFP_NOWARN);
+       if (unlikely(!skb))
+               return NULL;
+
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
+       memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+       if (metasize)
+               skb_metadata_set(skb, metasize);
+
+       ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+
+       return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
+{
+       int err, result = ICE_XDP_PASS;
+       struct bpf_prog *xdp_prog;
+       struct ice_ring *xdp_ring;
+       u32 act;
+
+       rcu_read_lock();
+       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+       if (!xdp_prog) {
+               rcu_read_unlock();
+               return ICE_XDP_PASS;
+       }
+
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
+       xdp->handle += xdp->data - xdp->data_hard_start;
+       switch (act) {
+       case XDP_PASS:
+               break;
+       case XDP_TX:
+               xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
+               result = ice_xmit_xdp_buff(xdp, xdp_ring);
+               break;
+       case XDP_REDIRECT:
+               err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+               result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               /* fallthrough -- not supported action */
+       case XDP_ABORTED:
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+               /* fallthrough -- handle aborts by dropping frame */
+       case XDP_DROP:
+               result = ICE_XDP_CONSUMED;
+               break;
+       }
+
+       rcu_read_unlock();
+       return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+{
+       unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+       u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+       unsigned int xdp_xmit = 0;
+       struct xdp_buff xdp;
+       bool failure = 0;
+
+       xdp.rxq = &rx_ring->xdp_rxq;
+
+       while (likely(total_rx_packets < (unsigned int)budget)) {
+               union ice_32b_rx_flex_desc *rx_desc;
+               unsigned int size, xdp_res = 0;
+               struct ice_rx_buf *rx_buf;
+               struct sk_buff *skb;
+               u16 stat_err_bits;
+               u16 vlan_tag = 0;
+               u8 rx_ptype;
+
+               if (cleaned_count >= ICE_RX_BUF_WRITE) {
+                       failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
+                                                            cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+               stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+               if (!ice_test_staterr(rx_desc, stat_err_bits))
+                       break;
+
+               /* This memory barrier is needed to keep us from reading
+                * any other fields out of the rx_desc until we have
+                * verified the descriptor has been written back.
+                */
+               dma_rmb();
+
+               size = le16_to_cpu(rx_desc->wb.pkt_len) &
+                                  ICE_RX_FLX_DESC_PKT_LEN_M;
+               if (!size)
+                       break;
+
+               rx_buf = ice_get_rx_buf_zc(rx_ring, size);
+               if (!rx_buf->addr)
+                       break;
+
+               xdp.data = rx_buf->addr;
+               xdp.data_meta = xdp.data;
+               xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+               xdp.data_end = xdp.data + size;
+               xdp.handle = rx_buf->handle;
+
+               xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+               if (xdp_res) {
+                       if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+                               xdp_xmit |= xdp_res;
+                               rx_buf->addr = NULL;
+                       } else {
+                               ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+                       }
+
+                       total_rx_bytes += size;
+                       total_rx_packets++;
+                       cleaned_count++;
+
+                       ice_bump_ntc(rx_ring);
+                       continue;
+               }
+
+               /* XDP_PASS path */
+               skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+               if (!skb) {
+                       rx_ring->rx_stats.alloc_buf_failed++;
+                       break;
+               }
+
+               cleaned_count++;
+               ice_bump_ntc(rx_ring);
+
+               if (eth_skb_pad(skb)) {
+                       skb = NULL;
+                       continue;
+               }
+
+               total_rx_bytes += skb->len;
+               total_rx_packets++;
+
+               stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+               if (ice_test_staterr(rx_desc, stat_err_bits))
+                       vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+               rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+                                      ICE_RX_FLEX_DESC_PTYPE_M;
+
+               ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+               ice_receive_skb(rx_ring, skb, vlan_tag);
+       }
+
+       ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+       ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+       return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: max number of frames to xmit
+ *
+ * Returns true if cleanup/transmission is done.
+ */
+static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
+{
+       struct ice_tx_desc *tx_desc = NULL;
+       bool work_done = true;
+       struct xdp_desc desc;
+       dma_addr_t dma;
+
+       while (likely(budget-- > 0)) {
+               struct ice_tx_buf *tx_buf;
+
+               if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
+                       xdp_ring->tx_stats.tx_busy++;
+                       work_done = false;
+                       break;
+               }
+
+               tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+
+               if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+                       break;
+
+               dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+               dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+                                          DMA_BIDIRECTIONAL);
+
+               tx_buf->bytecount = desc.len;
+
+               tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+               tx_desc->buf_addr = cpu_to_le64(dma);
+               tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD,
+                                                         0, desc.len, 0);
+
+               xdp_ring->next_to_use++;
+               if (xdp_ring->next_to_use == xdp_ring->count)
+                       xdp_ring->next_to_use = 0;
+       }
+
+       if (tx_desc) {
+               ice_xdp_ring_update_tail(xdp_ring);
+               xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+       }
+
+       return budget > 0 && work_done;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+       xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+       dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+                        dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+       dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if cleanup/tranmission is done.
+ */
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
+{
+       int total_packets = 0, total_bytes = 0;
+       s16 ntc = xdp_ring->next_to_clean;
+       struct ice_tx_desc *tx_desc;
+       struct ice_tx_buf *tx_buf;
+       bool xmit_done = true;
+       u32 xsk_frames = 0;
+
+       tx_desc = ICE_TX_DESC(xdp_ring, ntc);
+       tx_buf = &xdp_ring->tx_buf[ntc];
+       ntc -= xdp_ring->count;
+
+       do {
+               if (!(tx_desc->cmd_type_offset_bsz &
+                     cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+                       break;
+
+               total_bytes += tx_buf->bytecount;
+               total_packets++;
+
+               if (tx_buf->raw_buf) {
+                       ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+                       tx_buf->raw_buf = NULL;
+               } else {
+                       xsk_frames++;
+               }
+
+               tx_desc->cmd_type_offset_bsz = 0;
+               tx_buf++;
+               tx_desc++;
+               ntc++;
+
+               if (unlikely(!ntc)) {
+                       ntc -= xdp_ring->count;
+                       tx_buf = xdp_ring->tx_buf;
+                       tx_desc = ICE_TX_DESC(xdp_ring, 0);
+               }
+
+               prefetch(tx_desc);
+
+       } while (likely(--budget));
+
+       ntc += xdp_ring->count;
+       xdp_ring->next_to_clean = ntc;
+
+       if (xsk_frames)
+               xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+
+       ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
+       xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
+
+       return budget > 0 && xmit_done;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+              u32 __always_unused flags)
+{
+       struct ice_netdev_priv *np = netdev_priv(netdev);
+       struct ice_q_vector *q_vector;
+       struct ice_vsi *vsi = np->vsi;
+       struct ice_ring *ring;
+
+       if (test_bit(__ICE_DOWN, vsi->state))
+               return -ENETDOWN;
+
+       if (!ice_is_xdp_ena_vsi(vsi))
+               return -ENXIO;
+
+       if (queue_id >= vsi->num_txq)
+               return -ENXIO;
+
+       if (!vsi->xdp_rings[queue_id]->xsk_umem)
+               return -ENXIO;
+
+       ring = vsi->xdp_rings[queue_id];
+
+       /* The idea here is that if NAPI is running, mark a miss, so
+        * it will run again. If not, trigger an interrupt and
+        * schedule the NAPI from interrupt context. If NAPI would be
+        * scheduled here, the interrupt affinity would not be
+        * honored.
+        */
+       q_vector = ring->q_vector;
+       if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+               ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+       return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+       int i;
+
+       if (!vsi->xsk_umems)
+               return false;
+
+       for (i = 0; i < vsi->num_xsk_umems; i++) {
+               if (vsi->xsk_umems[i])
+                       return true;
+       }
+
+       return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+{
+       u16 i;
+
+       for (i = 0; i < rx_ring->count; i++) {
+               struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+               if (!rx_buf->addr)
+                       continue;
+
+               xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
+               rx_buf->addr = NULL;
+       }
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
+{
+       u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+       u32 xsk_frames = 0;
+
+       while (ntc != ntu) {
+               struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+               if (tx_buf->raw_buf)
+                       ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+               else
+                       xsk_frames++;
+
+               tx_buf->raw_buf = NULL;
+
+               ntc++;
+               if (ntc >= xdp_ring->count)
+                       ntc = 0;
+       }
+
+       if (xsk_frames)
+               xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644 (file)
index 0000000..3479e1d
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+#include "ice.h"
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
+#else
+static inline int
+ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
+                  struct xdp_umem __always_unused *umem,
+                  u16 __always_unused qid)
+{
+       return -ENOTSUPP;
+}
+
+static inline void
+ice_zca_free(struct zero_copy_allocator __always_unused *zca,
+            unsigned long __always_unused handle)
+{
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
+                   int __always_unused budget)
+{
+       return 0;
+}
+
+static inline bool
+ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
+                   int __always_unused budget)
+{
+       return false;
+}
+
+static inline bool
+ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
+                         u16 __always_unused count)
+{
+       return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+       return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+              u32 __always_unused queue_id, u32 __always_unused flags)
+{
+       return -ENOTSUPP;
+}
+
+#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
+#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */
index 6ad775b..63ec253 100644 (file)
@@ -127,6 +127,7 @@ struct e1000_adv_tx_context_desc {
 };
 
 #define E1000_ADVTXD_MACLEN_SHIFT    9  /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000  /* L4 Packet TYPE of UDP */
 #define E1000_ADVTXD_TUCMD_IPV4    0x00000400  /* IP Packet Type: 1=IPv4 */
 #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800  /* L4 Packet TYPE of TCP */
 #define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */
index 9148c62..48a40e4 100644 (file)
@@ -2518,6 +2518,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
        if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN))
                return features & ~(NETIF_F_HW_CSUM |
                                    NETIF_F_SCTP_CRC |
+                                   NETIF_F_GSO_UDP_L4 |
                                    NETIF_F_HW_VLAN_CTAG_TX |
                                    NETIF_F_TSO |
                                    NETIF_F_TSO6);
@@ -2526,6 +2527,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
        if (unlikely(network_hdr_len >  IGB_MAX_NETWORK_HDR_LEN))
                return features & ~(NETIF_F_HW_CSUM |
                                    NETIF_F_SCTP_CRC |
+                                   NETIF_F_GSO_UDP_L4 |
                                    NETIF_F_TSO |
                                    NETIF_F_TSO6);
 
@@ -3122,7 +3124,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                            NETIF_F_HW_CSUM;
 
        if (hw->mac.type >= e1000_82576)
-               netdev->features |= NETIF_F_SCTP_CRC;
+               netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
 
        if (hw->mac.type >= e1000_i350)
                netdev->features |= NETIF_F_HW_TC;
@@ -5696,6 +5698,7 @@ static int igb_tso(struct igb_ring *tx_ring,
        } ip;
        union {
                struct tcphdr *tcp;
+               struct udphdr *udp;
                unsigned char *hdr;
        } l4;
        u32 paylen, l4_offset;
@@ -5715,7 +5718,8 @@ static int igb_tso(struct igb_ring *tx_ring,
        l4.hdr = skb_checksum_start(skb);
 
        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
-       type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+       type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
+                     E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP;
 
        /* initialize outer IP header fields */
        if (ip.v4->version == 4) {
@@ -5743,12 +5747,19 @@ static int igb_tso(struct igb_ring *tx_ring,
        /* determine offset of inner transport header */
        l4_offset = l4.hdr - skb->data;
 
-       /* compute length of segmentation header */
-       *hdr_len = (l4.tcp->doff * 4) + l4_offset;
-
        /* remove payload length from inner checksum */
        paylen = skb->len - l4_offset;
-       csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
+       if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) {
+               /* compute length of segmentation header */
+               *hdr_len = (l4.tcp->doff * 4) + l4_offset;
+               csum_replace_by_diff(&l4.tcp->check,
+                       (__force __wsum)htonl(paylen));
+       } else {
+               /* compute length of segmentation header */
+               *hdr_len = sizeof(*l4.udp) + l4_offset;
+               csum_replace_by_diff(&l4.udp->check,
+                                    (__force __wsum)htonl(paylen));
+       }
 
        /* update gso size and bytecount with header size */
        first->gso_segs = skb_shinfo(skb)->gso_segs;
index 7e16345..0868677 100644 (file)
@@ -411,7 +411,6 @@ struct igc_adapter {
        u32 tx_hwtstamp_timeouts;
        u32 tx_hwtstamp_skipped;
        u32 rx_hwtstamp_cleared;
-       u32 *shadow_vfta;
 
        u32 rss_queues;
        u32 rss_indir_tbl_init;
index f3f2325..f3788f0 100644 (file)
 #define IGC_RCTL_BAM           0x00008000 /* broadcast enable */
 
 /* Receive Descriptor bit definitions */
-#define IGC_RXD_STAT_EOP       0x02    /* End of Packet */
+#define IGC_RXD_STAT_EOP       0x02    /* End of Packet */
+#define IGC_RXD_STAT_IXSM      0x04    /* Ignore checksum */
+#define IGC_RXD_STAT_UDPCS     0x10    /* UDP xsum calculated */
+#define IGC_RXD_STAT_TCPCS     0x20    /* TCP xsum calculated */
 
 #define IGC_RXDEXT_STATERR_CE          0x01000000
 #define IGC_RXDEXT_STATERR_SE          0x02000000
 #define IGC_ADVTXD_TUCMD_L4T_TCP       0x00000800  /* L4 Packet Type of TCP */
 #define IGC_ADVTXD_TUCMD_L4T_SCTP      0x00001000 /* L4 packet TYPE of SCTP */
 
+/* Maximum size of the MTA register table in all supported adapters */
+#define MAX_MTA_REG                    128
+
 #endif /* _IGC_DEFINES_H_ */
index abb2d72..20f7106 100644 (file)
@@ -91,6 +91,7 @@ struct igc_mac_info {
        u16 mta_reg_count;
        u16 uta_reg_count;
 
+       u32 mta_shadow[MAX_MTA_REG];
        u16 rar_entry_count;
 
        u8 forced_speed_duplex;
index 5eeb4c8..12aa6b5 100644 (file)
@@ -784,3 +784,107 @@ bool igc_enable_mng_pass_thru(struct igc_hw *hw)
 out:
        return ret_val;
 }
+
+/**
+ *  igc_hash_mc_addr - Generate a multicast hash value
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: pointer to a multicast address
+ *
+ *  Generates a multicast address hash value which is used to determine
+ *  the multicast filter table array address and new table value.  See
+ *  igc_mta_set()
+ **/
+static u32 igc_hash_mc_addr(struct igc_hw *hw, u8 *mc_addr)
+{
+       u32 hash_value, hash_mask;
+       u8 bit_shift = 0;
+
+       /* Register count multiplied by bits per register */
+       hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+       /* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+        * where 0xFF would still fall within the hash mask.
+        */
+       while (hash_mask >> bit_shift != 0xFF)
+               bit_shift++;
+
+       /* The portion of the address that is used for the hash table
+        * is determined by the mc_filter_type setting.
+        * The algorithm is such that there is a total of 8 bits of shifting.
+        * The bit_shift for a mc_filter_type of 0 represents the number of
+        * left-shifts where the MSB of mc_addr[5] would still fall within
+        * the hash_mask.  Case 0 does this exactly.  Since there are a total
+        * of 8 bits of shifting, then mc_addr[4] will shift right the
+        * remaining number of bits. Thus 8 - bit_shift.  The rest of the
+        * cases are a variation of this algorithm...essentially raising the
+        * number of bits to shift mc_addr[5] left, while still keeping the
+        * 8-bit shifting total.
+        *
+        * For example, given the following Destination MAC Address and an
+        * MTA register count of 128 (thus a 4096-bit vector and 0xFFF mask),
+        * we can see that the bit_shift for case 0 is 4.  These are the hash
+        * values resulting from each mc_filter_type...
+        * [0] [1] [2] [3] [4] [5]
+        * 01  AA  00  12  34  56
+        * LSB                 MSB
+        *
+        * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
+        * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
+        * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
+        * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
+        */
+       switch (hw->mac.mc_filter_type) {
+       default:
+       case 0:
+               break;
+       case 1:
+               bit_shift += 1;
+               break;
+       case 2:
+               bit_shift += 2;
+               break;
+       case 3:
+               bit_shift += 4;
+               break;
+       }
+
+       hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+                                 (((u16)mc_addr[5]) << bit_shift)));
+
+       return hash_value;
+}
+
+/**
+ *  igc_update_mc_addr_list - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates entire Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void igc_update_mc_addr_list(struct igc_hw *hw,
+                            u8 *mc_addr_list, u32 mc_addr_count)
+{
+       u32 hash_value, hash_bit, hash_reg;
+       int i;
+
+       /* clear mta_shadow */
+       memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+       /* update mta_shadow from mc_addr_list */
+       for (i = 0; (u32)i < mc_addr_count; i++) {
+               hash_value = igc_hash_mc_addr(hw, mc_addr_list);
+
+               hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+               hash_bit = hash_value & 0x1F;
+
+               hw->mac.mta_shadow[hash_reg] |= BIT(hash_bit);
+               mc_addr_list += ETH_ALEN;
+       }
+
+       /* replace the entire MTA table */
+       for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+               array_wr32(IGC_MTA, i, hw->mac.mta_shadow[i]);
+       wrfl();
+}
index 782bc99..832ccce 100644 (file)
@@ -29,6 +29,8 @@ s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
                                    u16 *duplex);
 
 bool igc_enable_mng_pass_thru(struct igc_hw *hw);
+void igc_update_mc_addr_list(struct igc_hw *hw,
+                            u8 *mc_addr_list, u32 mc_addr_count);
 
 enum igc_mng_mode {
        igc_mng_mode_none = 0,
index 8e424df..6105c6d 100644 (file)
@@ -795,6 +795,44 @@ static int igc_set_mac(struct net_device *netdev, void *p)
        return 0;
 }
 
+/**
+ *  igc_write_mc_addr_list - write multicast addresses to MTA
+ *  @netdev: network interface device structure
+ *
+ *  Writes multicast address list to the MTA hash table.
+ *  Returns: -ENOMEM on failure
+ *           0 on no addresses written
+ *           X on writing X addresses to MTA
+ **/
+static int igc_write_mc_addr_list(struct net_device *netdev)
+{
+       struct igc_adapter *adapter = netdev_priv(netdev);
+       struct igc_hw *hw = &adapter->hw;
+       struct netdev_hw_addr *ha;
+       u8  *mta_list;
+       int i;
+
+       if (netdev_mc_empty(netdev)) {
+               /* nothing to program, so clear mc list */
+               igc_update_mc_addr_list(hw, NULL, 0);
+               return 0;
+       }
+
+       mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
+       if (!mta_list)
+               return -ENOMEM;
+
+       /* The shared function expects a packed array of only addresses. */
+       i = 0;
+       netdev_for_each_mc_addr(ha, netdev)
+               memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+
+       igc_update_mc_addr_list(hw, mta_list, i);
+       kfree(mta_list);
+
+       return netdev_mc_count(netdev);
+}
+
 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
                            struct igc_tx_buffer *first,
                            u32 vlan_macip_lens, u32 type_tucmd,
@@ -1163,6 +1201,46 @@ static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
        return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
 }
 
+static void igc_rx_checksum(struct igc_ring *ring,
+                           union igc_adv_rx_desc *rx_desc,
+                           struct sk_buff *skb)
+{
+       skb_checksum_none_assert(skb);
+
+       /* Ignore Checksum bit is set */
+       if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
+               return;
+
+       /* Rx checksum disabled via ethtool */
+       if (!(ring->netdev->features & NETIF_F_RXCSUM))
+               return;
+
+       /* TCP/UDP checksum error bit is set */
+       if (igc_test_staterr(rx_desc,
+                            IGC_RXDEXT_STATERR_TCPE |
+                            IGC_RXDEXT_STATERR_IPE)) {
+               /* work around errata with sctp packets where the TCPE aka
+                * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+                * packets (aka let the stack check the crc32c)
+                */
+               if (!(skb->len == 60 &&
+                     test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
+                       u64_stats_update_begin(&ring->rx_syncp);
+                       ring->rx_stats.csum_err++;
+                       u64_stats_update_end(&ring->rx_syncp);
+               }
+               /* let the stack verify checksum errors */
+               return;
+       }
+       /* It must be a TCP or UDP packet with a valid checksum */
+       if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
+                                     IGC_RXD_STAT_UDPCS))
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+       dev_dbg(ring->dev, "cksum success: bits %08X\n",
+               le32_to_cpu(rx_desc->wb.upper.status_error));
+}
+
 static inline void igc_rx_hash(struct igc_ring *ring,
                               union igc_adv_rx_desc *rx_desc,
                               struct sk_buff *skb)
@@ -1189,6 +1267,8 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring,
 {
        igc_rx_hash(rx_ring, rx_desc, skb);
 
+       igc_rx_checksum(rx_ring, rx_desc, skb);
+
        skb_record_rx_queue(skb, rx_ring->queue_index);
 
        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
@@ -2518,6 +2598,110 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter,
                                        IGC_MAC_STATE_QUEUE_STEERING | flags);
 }
 
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_add_mac_filter(struct igc_adapter *adapter,
+                             const u8 *addr, const u8 queue)
+{
+       struct igc_hw *hw = &adapter->hw;
+       int rar_entries = hw->mac.rar_entry_count;
+       int i;
+
+       if (is_zero_ether_addr(addr))
+               return -EINVAL;
+
+       /* Search for the first empty entry in the MAC table.
+        * Do not touch entries at the end of the table reserved for the VF MAC
+        * addresses.
+        */
+       for (i = 0; i < rar_entries; i++) {
+               if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
+                                              addr, 0))
+                       continue;
+
+               ether_addr_copy(adapter->mac_table[i].addr, addr);
+               adapter->mac_table[i].queue = queue;
+               adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE;
+
+               igc_rar_set_index(adapter, i);
+               return i;
+       }
+
+       return -ENOSPC;
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_del_mac_filter(struct igc_adapter *adapter,
+                             const u8 *addr, const u8 queue)
+{
+       struct igc_hw *hw = &adapter->hw;
+       int rar_entries = hw->mac.rar_entry_count;
+       int i;
+
+       if (is_zero_ether_addr(addr))
+               return -EINVAL;
+
+       /* Search for matching entry in the MAC table based on given address
+        * and queue. Do not touch entries at the end of the table reserved
+        * for the VF MAC addresses.
+        */
+       for (i = 0; i < rar_entries; i++) {
+               if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
+                       continue;
+               if (adapter->mac_table[i].state != 0)
+                       continue;
+               if (adapter->mac_table[i].queue != queue)
+                       continue;
+               if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
+                       continue;
+
+               /* When a filter for the default address is "deleted",
+                * we return it to its initial configuration
+                */
+               if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
+                       adapter->mac_table[i].state =
+                               IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+                       adapter->mac_table[i].queue = 0;
+               } else {
+                       adapter->mac_table[i].state = 0;
+                       adapter->mac_table[i].queue = 0;
+                       memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+               }
+
+               igc_rar_set_index(adapter, i);
+               return 0;
+       }
+
+       return -ENOENT;
+}
+
+static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+       struct igc_adapter *adapter = netdev_priv(netdev);
+       int ret;
+
+       ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+       return min_t(int, ret, 0);
+}
+
+static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+       struct igc_adapter *adapter = netdev_priv(netdev);
+
+       igc_del_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+       return 0;
+}
+
 /**
  * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
  * @netdev: network interface device structure
@@ -2529,6 +2713,44 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter,
  */
 static void igc_set_rx_mode(struct net_device *netdev)
 {
+       struct igc_adapter *adapter = netdev_priv(netdev);
+       struct igc_hw *hw = &adapter->hw;
+       u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
+       int count;
+
+       /* Check for Promiscuous and All Multicast modes */
+       if (netdev->flags & IFF_PROMISC) {
+               rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
+       } else {
+               if (netdev->flags & IFF_ALLMULTI) {
+                       rctl |= IGC_RCTL_MPE;
+               } else {
+                       /* Write addresses to the MTA, if the attempt fails
+                        * then we should just turn on promiscuous mode so
+                        * that we can at least receive multicast traffic
+                        */
+                       count = igc_write_mc_addr_list(netdev);
+                       if (count < 0)
+                               rctl |= IGC_RCTL_MPE;
+               }
+       }
+
+       /* Write addresses to available RAR registers, if there is not
+        * sufficient space to store all the addresses then enable
+        * unicast promiscuous mode
+        */
+       if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
+               rctl |= IGC_RCTL_UPE;
+
+       /* update state of unicast and multicast */
+       rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
+       wr32(IGC_RCTL, rctl);
+
+#if (PAGE_SIZE < 8192)
+       if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
+               rlpml = IGC_MAX_FRAME_BUILD_SKB;
+#endif
+       wr32(IGC_RLPML, rlpml);
 }
 
 /**
@@ -3982,6 +4204,7 @@ static const struct net_device_ops igc_netdev_ops = {
        .ndo_open               = igc_open,
        .ndo_stop               = igc_close,
        .ndo_start_xmit         = igc_xmit_frame,
+       .ndo_set_rx_mode        = igc_set_rx_mode,
        .ndo_set_mac_address    = igc_set_mac,
        .ndo_change_mtu         = igc_change_mtu,
        .ndo_get_stats          = igc_get_stats,
@@ -4211,7 +4434,9 @@ static int igc_probe(struct pci_dev *pdev,
                goto err_sw_init;
 
        /* Add supported features to the features list*/
+       netdev->features |= NETIF_F_RXCSUM;
        netdev->features |= NETIF_F_HW_CSUM;
+       netdev->features |= NETIF_F_SCTP_CRC;
 
        /* setup the private structure */
        err = igc_sw_init(adapter);
@@ -4349,7 +4574,6 @@ static void igc_remove(struct pci_dev *pdev)
        pci_release_mem_regions(pdev);
 
        kfree(adapter->mac_table);
-       kfree(adapter->shadow_vfta);
        free_netdev(netdev);
 
        pci_disable_pcie_error_reporting(pdev);
index cc3196a..fd9f5d4 100644 (file)
@@ -832,9 +832,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
                                int xdp_count, int xdp_idx,
                                int rxr_count, int rxr_idx)
 {
+       int node = dev_to_node(&adapter->pdev->dev);
        struct ixgbe_q_vector *q_vector;
        struct ixgbe_ring *ring;
-       int node = NUMA_NO_NODE;
        int cpu = -1;
        int ring_count;
        u8 tcs = adapter->hw_tcs;
@@ -845,10 +845,8 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
        if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
                u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
                if (rss_i > 1 && adapter->atr_sample_rate) {
-                       if (cpu_online(v_idx)) {
-                               cpu = v_idx;
-                               node = cpu_to_node(cpu);
-                       }
+                       cpu = cpumask_local_spread(v_idx, node);
+                       node = cpu_to_node(cpu);
                }
        }
 
index 91b3780..1129ae7 100644 (file)
@@ -7945,6 +7945,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
        } ip;
        union {
                struct tcphdr *tcp;
+               struct udphdr *udp;
                unsigned char *hdr;
        } l4;
        u32 paylen, l4_offset;
@@ -7968,7 +7969,8 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
        l4.hdr = skb_checksum_start(skb);
 
        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
-       type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP;
+       type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
+                     IXGBE_ADVTXD_TUCMD_L4T_UDP : IXGBE_ADVTXD_TUCMD_L4T_TCP;
 
        /* initialize outer IP header fields */
        if (ip.v4->version == 4) {
@@ -7998,12 +8000,20 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
        /* determine offset of inner transport header */
        l4_offset = l4.hdr - skb->data;
 
-       /* compute length of segmentation header */
-       *hdr_len = (l4.tcp->doff * 4) + l4_offset;
-
        /* remove payload length from inner checksum */
        paylen = skb->len - l4_offset;
-       csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+
+       if (type_tucmd & IXGBE_ADVTXD_TUCMD_L4T_TCP) {
+               /* compute length of segmentation header */
+               *hdr_len = (l4.tcp->doff * 4) + l4_offset;
+               csum_replace_by_diff(&l4.tcp->check,
+                                    (__force __wsum)htonl(paylen));
+       } else {
+               /* compute length of segmentation header */
+               *hdr_len = sizeof(*l4.udp) + l4_offset;
+               csum_replace_by_diff(&l4.udp->check,
+                                    (__force __wsum)htonl(paylen));
+       }
 
        /* update gso size and bytecount with header size */
        first->gso_segs = skb_shinfo(skb)->gso_segs;
@@ -8639,7 +8649,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
            adapter->ptp_clock) {
-               if (!test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
+               if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
+                   !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
                                           &adapter->state)) {
                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
                        tx_flags |= IXGBE_TX_FLAGS_TSTAMP;
@@ -10189,6 +10200,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
        if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN))
                return features & ~(NETIF_F_HW_CSUM |
                                    NETIF_F_SCTP_CRC |
+                                   NETIF_F_GSO_UDP_L4 |
                                    NETIF_F_HW_VLAN_CTAG_TX |
                                    NETIF_F_TSO |
                                    NETIF_F_TSO6);
@@ -10197,6 +10209,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
        if (unlikely(network_hdr_len >  IXGBE_MAX_NETWORK_HDR_LEN))
                return features & ~(NETIF_F_HW_CSUM |
                                    NETIF_F_SCTP_CRC |
+                                   NETIF_F_GSO_UDP_L4 |
                                    NETIF_F_TSO |
                                    NETIF_F_TSO6);
 
@@ -10906,7 +10919,7 @@ skip_sriov:
                            IXGBE_GSO_PARTIAL_FEATURES;
 
        if (hw->mac.type >= ixgbe_mac_82599EB)
-               netdev->features |= NETIF_F_SCTP_CRC;
+               netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
 
 #ifdef CONFIG_IXGBE_IPSEC
 #define IXGBE_ESP_FEATURES     (NETIF_F_HW_ESP | \
index fb94216..3d5caea 100644 (file)
@@ -61,6 +61,7 @@ config MVNETA
        depends on ARCH_MVEBU || COMPILE_TEST
        select MVMDIO
        select PHYLINK
+       select PAGE_POOL
        ---help---
          This driver supports the network interface units in the
          Marvell ARMADA XP, ARMADA 370, ARMADA 38x and
index 82ea55a..d5b6441 100644 (file)
@@ -2959,15 +2959,16 @@ static void set_params(struct mv643xx_eth_private *mp,
 static int get_phy_mode(struct mv643xx_eth_private *mp)
 {
        struct device *dev = mp->dev->dev.parent;
-       int iface = -1;
+       phy_interface_t iface;
+       int err;
 
        if (dev->of_node)
-               iface = of_get_phy_mode(dev->of_node);
+               err = of_get_phy_mode(dev->of_node, &iface);
 
        /* Historical default if unspecified. We could also read/write
         * the interface state in the PSC1
         */
-       if (iface < 0)
+       if (!dev->of_node || err)
                iface = PHY_INTERFACE_MODE_GMII;
        return iface;
 }
index e498206..274ac39 100644 (file)
@@ -37,6 +37,8 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/tso.h>
+#include <net/page_pool.h>
+#include <linux/bpf_trace.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
              ETH_HLEN + ETH_FCS_LEN,                        \
              cache_line_size())
 
+#define MVNETA_SKB_HEADROOM    (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
+                                NET_IP_ALIGN)
+#define MVNETA_SKB_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
+                        MVNETA_SKB_HEADROOM))
+#define MVNETA_SKB_SIZE(len)   (SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
+#define MVNETA_MAX_RX_BUF_SIZE (PAGE_SIZE - MVNETA_SKB_PAD)
+
 #define IS_TSO_HEADER(txq, addr) \
        ((addr >= txq->tso_hdrs_phys) && \
         (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
@@ -346,6 +355,11 @@ struct mvneta_statistic {
 #define T_REG_64       64
 #define T_SW           1
 
+#define MVNETA_XDP_PASS                BIT(0)
+#define MVNETA_XDP_DROPPED     BIT(1)
+#define MVNETA_XDP_TX          BIT(2)
+#define MVNETA_XDP_REDIR       BIT(3)
+
 static const struct mvneta_statistic mvneta_statistics[] = {
        { 0x3000, T_REG_64, "good_octets_received", },
        { 0x3010, T_REG_32, "good_frames_received", },
@@ -425,6 +439,8 @@ struct mvneta_port {
        u32 cause_rx_tx;
        struct napi_struct napi;
 
+       struct bpf_prog *xdp_prog;
+
        /* Core clock */
        struct clk *clk;
        /* AXI clock */
@@ -545,6 +561,20 @@ struct mvneta_rx_desc {
 };
 #endif
 
+enum mvneta_tx_buf_type {
+       MVNETA_TYPE_SKB,
+       MVNETA_TYPE_XDP_TX,
+       MVNETA_TYPE_XDP_NDO,
+};
+
+struct mvneta_tx_buf {
+       enum mvneta_tx_buf_type type;
+       union {
+               struct xdp_frame *xdpf;
+               struct sk_buff *skb;
+       };
+};
+
 struct mvneta_tx_queue {
        /* Number of this TX queue, in the range 0-7 */
        u8 id;
@@ -560,8 +590,8 @@ struct mvneta_tx_queue {
        int tx_stop_threshold;
        int tx_wake_threshold;
 
-       /* Array of transmitted skb */
-       struct sk_buff **tx_skb;
+       /* Array of transmitted buffers */
+       struct mvneta_tx_buf *buf;
 
        /* Index of last TX DMA descriptor that was inserted */
        int txq_put_index;
@@ -603,6 +633,10 @@ struct mvneta_rx_queue {
        u32 pkts_coal;
        u32 time_coal;
 
+       /* page_pool */
+       struct page_pool *page_pool;
+       struct xdp_rxq_info xdp_rxq;
+
        /* Virtual address of the RX buffer */
        void  **buf_virt_addr;
 
@@ -641,7 +675,6 @@ static int txq_number = 8;
 static int rxq_def;
 
 static int rx_copybreak __read_mostly = 256;
-static int rx_header_size __read_mostly = 128;
 
 /* HW BM need that each port be identify by a unique ID */
 static int global_port_id;
@@ -1761,24 +1794,25 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
        int i;
 
        for (i = 0; i < num; i++) {
+               struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index];
                struct mvneta_tx_desc *tx_desc = txq->descs +
                        txq->txq_get_index;
-               struct sk_buff *skb = txq->tx_skb[txq->txq_get_index];
-
-               if (skb) {
-                       bytes_compl += skb->len;
-                       pkts_compl++;
-               }
 
                mvneta_txq_inc_get(txq);
 
-               if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
+               if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) &&
+                   buf->type != MVNETA_TYPE_XDP_TX)
                        dma_unmap_single(pp->dev->dev.parent,
                                         tx_desc->buf_phys_addr,
                                         tx_desc->data_size, DMA_TO_DEVICE);
-               if (!skb)
-                       continue;
-               dev_kfree_skb_any(skb);
+               if (buf->type == MVNETA_TYPE_SKB && buf->skb) {
+                       bytes_compl += buf->skb->len;
+                       pkts_compl++;
+                       dev_kfree_skb_any(buf->skb);
+               } else if (buf->type == MVNETA_TYPE_XDP_TX ||
+                          buf->type == MVNETA_TYPE_XDP_NDO) {
+                       xdp_return_frame(buf->xdpf);
+               }
        }
 
        netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
@@ -1812,23 +1846,21 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
                            struct mvneta_rx_queue *rxq,
                            gfp_t gfp_mask)
 {
+       enum dma_data_direction dma_dir;
        dma_addr_t phys_addr;
        struct page *page;
 
-       page = __dev_alloc_page(gfp_mask);
+       page = page_pool_alloc_pages(rxq->page_pool,
+                                    gfp_mask | __GFP_NOWARN);
        if (!page)
                return -ENOMEM;
 
-       /* map page for use */
-       phys_addr = dma_map_page(pp->dev->dev.parent, page, 0, PAGE_SIZE,
-                                DMA_FROM_DEVICE);
-       if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
-               __free_page(page);
-               return -ENOMEM;
-       }
-
-       phys_addr += pp->rx_offset_correction;
+       phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction;
+       dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+       dma_sync_single_for_device(pp->dev->dev.parent, phys_addr,
+                                  MVNETA_MAX_RX_BUF_SIZE, dma_dir);
        mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);
+
        return 0;
 }
 
@@ -1894,10 +1926,29 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
                if (!data || !(rx_desc->buf_phys_addr))
                        continue;
 
-               dma_unmap_page(pp->dev->dev.parent, rx_desc->buf_phys_addr,
-                              PAGE_SIZE, DMA_FROM_DEVICE);
-               __free_page(data);
+               page_pool_put_page(rxq->page_pool, data, false);
+       }
+       if (xdp_rxq_info_is_reg(&rxq->xdp_rxq))
+               xdp_rxq_info_unreg(&rxq->xdp_rxq);
+       page_pool_destroy(rxq->page_pool);
+       rxq->page_pool = NULL;
+}
+
+static void
+mvneta_update_stats(struct mvneta_port *pp, u32 pkts,
+                   u32 len, bool tx)
+{
+       struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+       u64_stats_update_begin(&stats->syncp);
+       if (tx) {
+               stats->tx_packets += pkts;
+               stats->tx_bytes += len;
+       } else {
+               stats->rx_packets += pkts;
+               stats->rx_bytes += len;
        }
+       u64_stats_update_end(&stats->syncp);
 }
 
 static inline
@@ -1925,43 +1976,292 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
        return i;
 }
 
+static int
+mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
+                       struct xdp_frame *xdpf, bool dma_map)
+{
+       struct mvneta_tx_desc *tx_desc;
+       struct mvneta_tx_buf *buf;
+       dma_addr_t dma_addr;
+
+       if (txq->count >= txq->tx_stop_threshold)
+               return MVNETA_XDP_DROPPED;
+
+       tx_desc = mvneta_txq_next_desc_get(txq);
+
+       buf = &txq->buf[txq->txq_put_index];
+       if (dma_map) {
+               /* ndo_xdp_xmit */
+               dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
+                                         xdpf->len, DMA_TO_DEVICE);
+               if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
+                       mvneta_txq_desc_put(txq);
+                       return MVNETA_XDP_DROPPED;
+               }
+               buf->type = MVNETA_TYPE_XDP_NDO;
+       } else {
+               struct page *page = virt_to_page(xdpf->data);
+
+               dma_addr = page_pool_get_dma_addr(page) +
+                          sizeof(*xdpf) + xdpf->headroom;
+               dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
+                                          xdpf->len, DMA_BIDIRECTIONAL);
+               buf->type = MVNETA_TYPE_XDP_TX;
+       }
+       buf->xdpf = xdpf;
+
+       tx_desc->command = MVNETA_TXD_FLZ_DESC;
+       tx_desc->buf_phys_addr = dma_addr;
+       tx_desc->data_size = xdpf->len;
+
+       mvneta_update_stats(pp, 1, xdpf->len, true);
+       mvneta_txq_inc_put(txq);
+       txq->pending++;
+       txq->count++;
+
+       return MVNETA_XDP_TX;
+}
+
+static int
+mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
+{
+       struct mvneta_tx_queue *txq;
+       struct netdev_queue *nq;
+       struct xdp_frame *xdpf;
+       int cpu;
+       u32 ret;
+
+       xdpf = convert_to_xdp_frame(xdp);
+       if (unlikely(!xdpf))
+               return MVNETA_XDP_DROPPED;
+
+       cpu = smp_processor_id();
+       txq = &pp->txqs[cpu % txq_number];
+       nq = netdev_get_tx_queue(pp->dev, txq->id);
+
+       __netif_tx_lock(nq, cpu);
+       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+       if (ret == MVNETA_XDP_TX)
+               mvneta_txq_pend_desc_add(pp, txq, 0);
+       __netif_tx_unlock(nq);
+
+       return ret;
+}
+
+static int
+mvneta_xdp_xmit(struct net_device *dev, int num_frame,
+               struct xdp_frame **frames, u32 flags)
+{
+       struct mvneta_port *pp = netdev_priv(dev);
+       int cpu = smp_processor_id();
+       struct mvneta_tx_queue *txq;
+       struct netdev_queue *nq;
+       int i, drops = 0;
+       u32 ret;
+
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+               return -EINVAL;
+
+       txq = &pp->txqs[cpu % txq_number];
+       nq = netdev_get_tx_queue(pp->dev, txq->id);
+
+       __netif_tx_lock(nq, cpu);
+       for (i = 0; i < num_frame; i++) {
+               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+               if (ret != MVNETA_XDP_TX) {
+                       xdp_return_frame_rx_napi(frames[i]);
+                       drops++;
+               }
+       }
+
+       if (unlikely(flags & XDP_XMIT_FLUSH))
+               mvneta_txq_pend_desc_add(pp, txq, 0);
+       __netif_tx_unlock(nq);
+
+       return num_frame - drops;
+}
+
+static int
+mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+              struct bpf_prog *prog, struct xdp_buff *xdp)
+{
+       u32 ret, act = bpf_prog_run_xdp(prog, xdp);
+
+       switch (act) {
+       case XDP_PASS:
+               ret = MVNETA_XDP_PASS;
+               break;
+       case XDP_REDIRECT: {
+               int err;
+
+               err = xdp_do_redirect(pp->dev, xdp, prog);
+               if (err) {
+                       ret = MVNETA_XDP_DROPPED;
+                       xdp_return_buff(xdp);
+               } else {
+                       ret = MVNETA_XDP_REDIR;
+               }
+               break;
+       }
+       case XDP_TX:
+               ret = mvneta_xdp_xmit_back(pp, xdp);
+               if (ret != MVNETA_XDP_TX)
+                       xdp_return_buff(xdp);
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               /* fall through */
+       case XDP_ABORTED:
+               trace_xdp_exception(pp->dev, prog, act);
+               /* fall through */
+       case XDP_DROP:
+               page_pool_recycle_direct(rxq->page_pool,
+                                        virt_to_head_page(xdp->data));
+               ret = MVNETA_XDP_DROPPED;
+               break;
+       }
+
+       return ret;
+}
+
+static int
+mvneta_swbm_rx_frame(struct mvneta_port *pp,
+                    struct mvneta_rx_desc *rx_desc,
+                    struct mvneta_rx_queue *rxq,
+                    struct xdp_buff *xdp,
+                    struct bpf_prog *xdp_prog,
+                    struct page *page, u32 *xdp_ret)
+{
+       unsigned char *data = page_address(page);
+       int data_len = -MVNETA_MH_SIZE, len;
+       struct net_device *dev = pp->dev;
+       enum dma_data_direction dma_dir;
+
+       if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) {
+               len = MVNETA_MAX_RX_BUF_SIZE;
+               data_len += len;
+       } else {
+               len = rx_desc->data_size;
+               data_len += len - ETH_FCS_LEN;
+       }
+
+       dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+       dma_sync_single_for_cpu(dev->dev.parent,
+                               rx_desc->buf_phys_addr,
+                               len, dma_dir);
+
+       /* Prefetch header */
+       prefetch(data);
+
+       xdp->data_hard_start = data;
+       xdp->data = data + MVNETA_SKB_HEADROOM + MVNETA_MH_SIZE;
+       xdp->data_end = xdp->data + data_len;
+       xdp_set_data_meta_invalid(xdp);
+
+       if (xdp_prog) {
+               u32 ret;
+
+               ret = mvneta_run_xdp(pp, rxq, xdp_prog, xdp);
+               if (ret != MVNETA_XDP_PASS) {
+                       mvneta_update_stats(pp, 1,
+                                           xdp->data_end - xdp->data,
+                                           false);
+                       rx_desc->buf_phys_addr = 0;
+                       *xdp_ret |= ret;
+                       return ret;
+               }
+       }
+
+       rxq->skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
+       if (unlikely(!rxq->skb)) {
+               netdev_err(dev,
+                          "Can't allocate skb on queue %d\n",
+                          rxq->id);
+               dev->stats.rx_dropped++;
+               rxq->skb_alloc_err++;
+               return -ENOMEM;
+       }
+       page_pool_release_page(rxq->page_pool, page);
+
+       skb_reserve(rxq->skb,
+                   xdp->data - xdp->data_hard_start);
+       skb_put(rxq->skb, xdp->data_end - xdp->data);
+       mvneta_rx_csum(pp, rx_desc->status, rxq->skb);
+
+       rxq->left_size = rx_desc->data_size - len;
+       rx_desc->buf_phys_addr = 0;
+
+       return 0;
+}
+
+static void
+mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
+                           struct mvneta_rx_desc *rx_desc,
+                           struct mvneta_rx_queue *rxq,
+                           struct page *page)
+{
+       struct net_device *dev = pp->dev;
+       enum dma_data_direction dma_dir;
+       int data_len, len;
+
+       if (rxq->left_size > MVNETA_MAX_RX_BUF_SIZE) {
+               len = MVNETA_MAX_RX_BUF_SIZE;
+               data_len = len;
+       } else {
+               len = rxq->left_size;
+               data_len = len - ETH_FCS_LEN;
+       }
+       dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+       dma_sync_single_for_cpu(dev->dev.parent,
+                               rx_desc->buf_phys_addr,
+                               len, dma_dir);
+       if (data_len > 0) {
+               /* refill descriptor with new buffer later */
+               skb_add_rx_frag(rxq->skb,
+                               skb_shinfo(rxq->skb)->nr_frags,
+                               page, MVNETA_SKB_HEADROOM, data_len,
+                               PAGE_SIZE);
+       }
+       page_pool_release_page(rxq->page_pool, page);
+       rx_desc->buf_phys_addr = 0;
+       rxq->left_size -= len;
+}
+
 /* Main rx processing when using software buffer management */
 static int mvneta_rx_swbm(struct napi_struct *napi,
                          struct mvneta_port *pp, int budget,
                          struct mvneta_rx_queue *rxq)
 {
+       int rcvd_pkts = 0, rcvd_bytes = 0, rx_proc = 0;
        struct net_device *dev = pp->dev;
-       int rx_todo, rx_proc;
-       int refill = 0;
-       u32 rcvd_pkts = 0;
-       u32 rcvd_bytes = 0;
+       struct bpf_prog *xdp_prog;
+       struct xdp_buff xdp_buf;
+       int rx_todo, refill;
+       u32 xdp_ret = 0;
 
        /* Get number of received packets */
        rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
-       rx_proc = 0;
+
+       rcu_read_lock();
+       xdp_prog = READ_ONCE(pp->xdp_prog);
+       xdp_buf.rxq = &rxq->xdp_rxq;
 
        /* Fairness NAPI loop */
-       while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
+       while (rx_proc < budget && rx_proc < rx_todo) {
                struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
-               unsigned char *data;
-               struct page *page;
-               dma_addr_t phys_addr;
                u32 rx_status, index;
-               int rx_bytes, skb_size, copy_size;
-               int frag_num, frag_size, frag_offset;
+               struct page *page;
 
                index = rx_desc - rxq->descs;
                page = (struct page *)rxq->buf_virt_addr[index];
-               data = page_address(page);
-               /* Prefetch header */
-               prefetch(data);
 
-               phys_addr = rx_desc->buf_phys_addr;
                rx_status = rx_desc->status;
                rx_proc++;
                rxq->refill_num++;
 
                if (rx_status & MVNETA_RXD_FIRST_DESC) {
+                       int err;
+
                        /* Check errors only for FIRST descriptor */
                        if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
                                mvneta_rx_error(pp, rx_desc);
@@ -1969,85 +2269,18 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                                /* leave the descriptor untouched */
                                continue;
                        }
-                       rx_bytes = rx_desc->data_size -
-                                  (ETH_FCS_LEN + MVNETA_MH_SIZE);
 
-                       /* Allocate small skb for each new packet */
-                       skb_size = max(rx_copybreak, rx_header_size);
-                       rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
-                       if (unlikely(!rxq->skb)) {
-                               netdev_err(dev,
-                                          "Can't allocate skb on queue %d\n",
-                                          rxq->id);
-                               dev->stats.rx_dropped++;
-                               rxq->skb_alloc_err++;
+                       err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf,
+                                                  xdp_prog, page, &xdp_ret);
+                       if (err)
                                continue;
-                       }
-                       copy_size = min(skb_size, rx_bytes);
-
-                       /* Copy data from buffer to SKB, skip Marvell header */
-                       memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
-                              copy_size);
-                       skb_put(rxq->skb, copy_size);
-                       rxq->left_size = rx_bytes - copy_size;
-
-                       mvneta_rx_csum(pp, rx_status, rxq->skb);
-                       if (rxq->left_size == 0) {
-                               int size = copy_size + MVNETA_MH_SIZE;
-
-                               dma_sync_single_range_for_cpu(dev->dev.parent,
-                                                             phys_addr, 0,
-                                                             size,
-                                                             DMA_FROM_DEVICE);
-
-                               /* leave the descriptor and buffer untouched */
-                       } else {
-                               /* refill descriptor with new buffer later */
-                               rx_desc->buf_phys_addr = 0;
-
-                               frag_num = 0;
-                               frag_offset = copy_size + MVNETA_MH_SIZE;
-                               frag_size = min(rxq->left_size,
-                                               (int)(PAGE_SIZE - frag_offset));
-                               skb_add_rx_frag(rxq->skb, frag_num, page,
-                                               frag_offset, frag_size,
-                                               PAGE_SIZE);
-                               dma_unmap_page(dev->dev.parent, phys_addr,
-                                              PAGE_SIZE, DMA_FROM_DEVICE);
-                               rxq->left_size -= frag_size;
-                       }
                } else {
-                       /* Middle or Last descriptor */
                        if (unlikely(!rxq->skb)) {
                                pr_debug("no skb for rx_status 0x%x\n",
                                         rx_status);
                                continue;
                        }
-                       if (!rxq->left_size) {
-                               /* last descriptor has only FCS */
-                               /* and can be discarded */
-                               dma_sync_single_range_for_cpu(dev->dev.parent,
-                                                             phys_addr, 0,
-                                                             ETH_FCS_LEN,
-                                                             DMA_FROM_DEVICE);
-                               /* leave the descriptor and buffer untouched */
-                       } else {
-                               /* refill descriptor with new buffer later */
-                               rx_desc->buf_phys_addr = 0;
-
-                               frag_num = skb_shinfo(rxq->skb)->nr_frags;
-                               frag_offset = 0;
-                               frag_size = min(rxq->left_size,
-                                               (int)(PAGE_SIZE - frag_offset));
-                               skb_add_rx_frag(rxq->skb, frag_num, page,
-                                               frag_offset, frag_size,
-                                               PAGE_SIZE);
-
-                               dma_unmap_page(dev->dev.parent, phys_addr,
-                                              PAGE_SIZE, DMA_FROM_DEVICE);
-
-                               rxq->left_size -= frag_size;
-                       }
+                       mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, page);
                } /* Middle or Last descriptor */
 
                if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2072,17 +2305,14 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 
                /* clean uncomplete skb pointer in queue */
                rxq->skb = NULL;
-               rxq->left_size = 0;
        }
+       rcu_read_unlock();
 
-       if (rcvd_pkts) {
-               struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+       if (xdp_ret & MVNETA_XDP_REDIR)
+               xdp_do_flush_map();
 
-               u64_stats_update_begin(&stats->syncp);
-               stats->rx_packets += rcvd_pkts;
-               stats->rx_bytes   += rcvd_bytes;
-               u64_stats_update_end(&stats->syncp);
-       }
+       if (rcvd_pkts)
+               mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);
 
        /* return some buffers to hardware queue, one at a time is too slow */
        refill = mvneta_rx_refill_queue(pp, rxq);
@@ -2206,14 +2436,8 @@ err_drop_frame:
                napi_gro_receive(napi, skb);
        }
 
-       if (rcvd_pkts) {
-               struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
-
-               u64_stats_update_begin(&stats->syncp);
-               stats->rx_packets += rcvd_pkts;
-               stats->rx_bytes   += rcvd_bytes;
-               u64_stats_update_end(&stats->syncp);
-       }
+       if (rcvd_pkts)
+               mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);
 
        /* Update rxq management counters */
        mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
@@ -2225,16 +2449,19 @@ static inline void
 mvneta_tso_put_hdr(struct sk_buff *skb,
                   struct mvneta_port *pp, struct mvneta_tx_queue *txq)
 {
-       struct mvneta_tx_desc *tx_desc;
        int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+       struct mvneta_tx_desc *tx_desc;
 
-       txq->tx_skb[txq->txq_put_index] = NULL;
        tx_desc = mvneta_txq_next_desc_get(txq);
        tx_desc->data_size = hdr_len;
        tx_desc->command = mvneta_skb_tx_csum(pp, skb);
        tx_desc->command |= MVNETA_TXD_F_DESC;
        tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
                                 txq->txq_put_index * TSO_HEADER_SIZE;
+       buf->type = MVNETA_TYPE_SKB;
+       buf->skb = NULL;
+
        mvneta_txq_inc_put(txq);
 }
 
@@ -2243,6 +2470,7 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
                    struct sk_buff *skb, char *data, int size,
                    bool last_tcp, bool is_last)
 {
+       struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
        struct mvneta_tx_desc *tx_desc;
 
        tx_desc = mvneta_txq_next_desc_get(txq);
@@ -2256,7 +2484,8 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
        }
 
        tx_desc->command = 0;
-       txq->tx_skb[txq->txq_put_index] = NULL;
+       buf->type = MVNETA_TYPE_SKB;
+       buf->skb = NULL;
 
        if (last_tcp) {
                /* last descriptor in the TCP packet */
@@ -2264,7 +2493,7 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
 
                /* last descriptor in SKB */
                if (is_last)
-                       txq->tx_skb[txq->txq_put_index] = skb;
+                       buf->skb = skb;
        }
        mvneta_txq_inc_put(txq);
        return 0;
@@ -2349,6 +2578,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
        int i, nr_frags = skb_shinfo(skb)->nr_frags;
 
        for (i = 0; i < nr_frags; i++) {
+               struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
                void *addr = skb_frag_address(frag);
 
@@ -2368,12 +2598,13 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
                if (i == nr_frags - 1) {
                        /* Last descriptor */
                        tx_desc->command = MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
-                       txq->tx_skb[txq->txq_put_index] = skb;
+                       buf->skb = skb;
                } else {
                        /* Descriptor in the middle: Not First, Not Last */
                        tx_desc->command = 0;
-                       txq->tx_skb[txq->txq_put_index] = NULL;
+                       buf->skb = NULL;
                }
+               buf->type = MVNETA_TYPE_SKB;
                mvneta_txq_inc_put(txq);
        }
 
@@ -2401,6 +2632,7 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
        struct mvneta_port *pp = netdev_priv(dev);
        u16 txq_id = skb_get_queue_mapping(skb);
        struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
+       struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
        struct mvneta_tx_desc *tx_desc;
        int len = skb->len;
        int frags = 0;
@@ -2433,16 +2665,17 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
                goto out;
        }
 
+       buf->type = MVNETA_TYPE_SKB;
        if (frags == 1) {
                /* First and Last descriptor */
                tx_cmd |= MVNETA_TXD_FLZ_DESC;
                tx_desc->command = tx_cmd;
-               txq->tx_skb[txq->txq_put_index] = skb;
+               buf->skb = skb;
                mvneta_txq_inc_put(txq);
        } else {
                /* First but not Last */
                tx_cmd |= MVNETA_TXD_F_DESC;
-               txq->tx_skb[txq->txq_put_index] = NULL;
+               buf->skb = NULL;
                mvneta_txq_inc_put(txq);
                tx_desc->command = tx_cmd;
                /* Continue with other skb fragments */
@@ -2459,7 +2692,6 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 
 out:
        if (frags > 0) {
-               struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
                struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
 
                netdev_tx_sent_queue(nq, len);
@@ -2474,10 +2706,7 @@ out:
                else
                        txq->pending += frags;
 
-               u64_stats_update_begin(&stats->syncp);
-               stats->tx_packets++;
-               stats->tx_bytes  += len;
-               u64_stats_update_end(&stats->syncp);
+               mvneta_update_stats(pp, 1, len, true);
        } else {
                dev->stats.tx_dropped++;
                dev_kfree_skb_any(skb);
@@ -2830,11 +3059,55 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
        return rx_done;
 }
 
+static int mvneta_create_page_pool(struct mvneta_port *pp,
+                                  struct mvneta_rx_queue *rxq, int size)
+{
+       struct bpf_prog *xdp_prog = READ_ONCE(pp->xdp_prog);
+       struct page_pool_params pp_params = {
+               .order = 0,
+               .flags = PP_FLAG_DMA_MAP,
+               .pool_size = size,
+               .nid = cpu_to_node(0),
+               .dev = pp->dev->dev.parent,
+               .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+       };
+       int err;
+
+       rxq->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(rxq->page_pool)) {
+               err = PTR_ERR(rxq->page_pool);
+               rxq->page_pool = NULL;
+               return err;
+       }
+
+       err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id);
+       if (err < 0)
+               goto err_free_pp;
+
+       err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
+                                        rxq->page_pool);
+       if (err)
+               goto err_unregister_rxq;
+
+       return 0;
+
+err_unregister_rxq:
+       xdp_rxq_info_unreg(&rxq->xdp_rxq);
+err_free_pp:
+       page_pool_destroy(rxq->page_pool);
+       rxq->page_pool = NULL;
+       return err;
+}
+
 /* Handle rxq fill: allocates rxq skbs; called when initializing a port */
 static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
                           int num)
 {
-       int i;
+       int i, err;
+
+       err = mvneta_create_page_pool(pp, rxq, num);
+       if (err < 0)
+               return err;
 
        for (i = 0; i < num; i++) {
                memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
@@ -2908,7 +3181,7 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
                /* Set Offset */
                mvneta_rxq_offset_set(pp, rxq, 0);
                mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ?
-                                       PAGE_SIZE :
+                                       MVNETA_MAX_RX_BUF_SIZE :
                                        MVNETA_RX_BUF_SIZE(pp->pkt_size));
                mvneta_rxq_bm_disable(pp, rxq);
                mvneta_rxq_fill(pp, rxq, rxq->size);
@@ -2989,9 +3262,8 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
 
        txq->last_desc = txq->size - 1;
 
-       txq->tx_skb = kmalloc_array(txq->size, sizeof(*txq->tx_skb),
-                                   GFP_KERNEL);
-       if (!txq->tx_skb) {
+       txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL);
+       if (!txq->buf) {
                dma_free_coherent(pp->dev->dev.parent,
                                  txq->size * MVNETA_DESC_ALIGNED_SIZE,
                                  txq->descs, txq->descs_phys);
@@ -3003,7 +3275,7 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
                                           txq->size * TSO_HEADER_SIZE,
                                           &txq->tso_hdrs_phys, GFP_KERNEL);
        if (!txq->tso_hdrs) {
-               kfree(txq->tx_skb);
+               kfree(txq->buf);
                dma_free_coherent(pp->dev->dev.parent,
                                  txq->size * MVNETA_DESC_ALIGNED_SIZE,
                                  txq->descs, txq->descs_phys);
@@ -3056,7 +3328,7 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
 {
        struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
 
-       kfree(txq->tx_skb);
+       kfree(txq->buf);
 
        if (txq->tso_hdrs)
                dma_free_coherent(pp->dev->dev.parent,
@@ -3263,6 +3535,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
                mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
        }
 
+       if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+               return -EINVAL;
+       }
+
        dev->mtu = mtu;
 
        if (!netif_running(dev)) {
@@ -3932,6 +4209,47 @@ static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        return phylink_mii_ioctl(pp->phylink, ifr, cmd);
 }
 
+static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
+                           struct netlink_ext_ack *extack)
+{
+       bool need_update, running = netif_running(dev);
+       struct mvneta_port *pp = netdev_priv(dev);
+       struct bpf_prog *old_prog;
+
+       if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
+               return -EOPNOTSUPP;
+       }
+
+       need_update = !!pp->xdp_prog != !!prog;
+       if (running && need_update)
+               mvneta_stop(dev);
+
+       old_prog = xchg(&pp->xdp_prog, prog);
+       if (old_prog)
+               bpf_prog_put(old_prog);
+
+       if (running && need_update)
+               return mvneta_open(dev);
+
+       return 0;
+}
+
+static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+       struct mvneta_port *pp = netdev_priv(dev);
+
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
+       case XDP_QUERY_PROG:
+               xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0;
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
 /* Ethtool methods */
 
 /* Set link ksettings (phy address, speed) for ethtools */
@@ -4328,6 +4646,8 @@ static const struct net_device_ops mvneta_netdev_ops = {
        .ndo_fix_features    = mvneta_fix_features,
        .ndo_get_stats64     = mvneta_get_stats64,
        .ndo_do_ioctl        = mvneta_ioctl,
+       .ndo_bpf             = mvneta_xdp,
+       .ndo_xdp_xmit        = mvneta_xdp_xmit,
 };
 
 static const struct ethtool_ops mvneta_eth_tool_ops = {
@@ -4477,9 +4797,9 @@ static int mvneta_probe(struct platform_device *pdev)
        struct phy *comphy;
        const char *dt_mac_addr;
        char hw_mac_addr[ETH_ALEN];
+       phy_interface_t phy_mode;
        const char *mac_from;
        int tx_csum_limit;
-       int phy_mode;
        int err;
        int cpu;
 
@@ -4492,10 +4812,9 @@ static int mvneta_probe(struct platform_device *pdev)
        if (dev->irq == 0)
                return -EINVAL;
 
-       phy_mode = of_get_phy_mode(dn);
-       if (phy_mode < 0) {
+       err = of_get_phy_mode(dn, &phy_mode);
+       if (err) {
                dev_err(&pdev->dev, "incorrect phy-mode\n");
-               err = -EINVAL;
                goto err_free_irq;
        }
 
@@ -4618,7 +4937,7 @@ static int mvneta_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
 
        pp->id = global_port_id++;
-       pp->rx_offset_correction = 0; /* not relevant for SW BM */
+       pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
 
        /* Obtain access to BM resources if enabled and already initialized */
        bm_node = of_parse_phandle(dn, "buffer-manager", 0);
index 111b3b8..17e24c1 100644 (file)
@@ -2863,7 +2863,7 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
        skb->ip_summed = CHECKSUM_NONE;
 }
 
-/* Reuse skb if possible, or allocate a new skb and add it to BM pool */
+/* Allocate a new skb and add it to BM pool */
 static int mvpp2_rx_refill(struct mvpp2_port *port,
                           struct mvpp2_bm_pool *bm_pool, int pool)
 {
@@ -2871,7 +2871,6 @@ static int mvpp2_rx_refill(struct mvpp2_port *port,
        phys_addr_t phys_addr;
        void *buf;
 
-       /* No recycle or too many buffers are in use, so allocate a new skb */
        buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr, &phys_addr,
                              GFP_ATOMIC);
        if (!buf)
@@ -2957,14 +2956,13 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
                 * by the hardware, and the information about the buffer is
                 * comprised by the RX descriptor.
                 */
-               if (rx_status & MVPP2_RXD_ERR_SUMMARY) {
-err_drop_frame:
-                       dev->stats.rx_errors++;
-                       mvpp2_rx_error(port, rx_desc);
-                       /* Return the buffer to the pool */
-                       mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
-                       continue;
-               }
+               if (rx_status & MVPP2_RXD_ERR_SUMMARY)
+                       goto err_drop_frame;
+
+               dma_sync_single_for_cpu(dev->dev.parent, dma_addr,
+                                       rx_bytes + MVPP2_MH_SIZE,
+                                       DMA_FROM_DEVICE);
+               prefetch(data);
 
                if (bm_pool->frag_size > PAGE_SIZE)
                        frag_size = 0;
@@ -2983,8 +2981,9 @@ err_drop_frame:
                        goto err_drop_frame;
                }
 
-               dma_unmap_single(dev->dev.parent, dma_addr,
-                                bm_pool->buf_size, DMA_FROM_DEVICE);
+               dma_unmap_single_attrs(dev->dev.parent, dma_addr,
+                                      bm_pool->buf_size, DMA_FROM_DEVICE,
+                                      DMA_ATTR_SKIP_CPU_SYNC);
 
                rcvd_pkts++;
                rcvd_bytes += rx_bytes;
@@ -2995,6 +2994,13 @@ err_drop_frame:
                mvpp2_rx_csum(port, rx_status, skb);
 
                napi_gro_receive(napi, skb);
+               continue;
+
+err_drop_frame:
+               dev->stats.rx_errors++;
+               mvpp2_rx_error(port, rx_desc);
+               /* Return the buffer to the pool */
+               mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
        }
 
        if (rcvd_pkts) {
index 51b77c2..3fb7ee3 100644 (file)
@@ -1489,8 +1489,10 @@ static int pxa168_eth_probe(struct platform_device *pdev)
                        goto err_netdev;
                }
                of_property_read_u32(np, "reg", &pep->phy_addr);
-               pep->phy_intf = of_get_phy_mode(pdev->dev.of_node);
                of_node_put(np);
+               err = of_get_phy_mode(pdev->dev.of_node, &pep->phy_intf);
+               if (err && err != -ENODEV)
+                       goto err_netdev;
        }
 
        /* Hardware supports only 3 ports */
index ef11cf3..0fe9715 100644 (file)
@@ -57,7 +57,7 @@ static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
        default:
                updated = false;
                break;
-       };
+       }
 
        if (updated) {
                val = mtk_r32(eth, MTK_MAC_MISC);
@@ -143,7 +143,7 @@ static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path)
        default:
                updated = false;
                break;
-       };
+       }
 
        if (updated)
                regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
@@ -174,7 +174,7 @@ static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, int path)
                break;
        default:
                updated = false;
-       };
+       }
 
        if (updated)
                regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
index 703adb9..385a4ab 100644 (file)
@@ -2758,9 +2758,10 @@ static const struct net_device_ops mtk_netdev_ops = {
 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 {
        const __be32 *_id = of_get_property(np, "reg", NULL);
+       phy_interface_t phy_mode;
        struct phylink *phylink;
-       int phy_mode, id, err;
        struct mtk_mac *mac;
+       int id, err;
 
        if (!_id) {
                dev_err(eth->dev, "missing mac id\n");
@@ -2805,10 +2806,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
        mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
        /* phylink create */
-       phy_mode = of_get_phy_mode(np);
-       if (phy_mode < 0) {
+       err = of_get_phy_mode(np, &phy_mode);
+       if (err) {
                dev_err(eth->dev, "incorrect phy-mode\n");
-               err = -EINVAL;
                goto free_netdev;
        }
 
index 4db27df..32d8342 100644 (file)
@@ -93,7 +93,7 @@ int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id,
        case SPEED_1000:
                val |= SGMII_SPEED_1000;
                break;
-       };
+       }
 
        if (state->duplex == DUPLEX_FULL)
                val |= SGMII_DUPLEX_FULL;
index fce9b3a..22c72fb 100644 (file)
@@ -3935,13 +3935,17 @@ static void mlx4_restart_one_down(struct pci_dev *pdev);
 static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
                               struct devlink *devlink);
 
-static int mlx4_devlink_reload_down(struct devlink *devlink,
+static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
                                    struct netlink_ext_ack *extack)
 {
        struct mlx4_priv *priv = devlink_priv(devlink);
        struct mlx4_dev *dev = &priv->dev;
        struct mlx4_dev_persistent *persist = dev->persist;
 
+       if (netns_change) {
+               NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported");
+               return -EOPNOTSUPP;
+       }
        if (persist->num_vfs)
                mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
        mlx4_restart_one_down(persist->pdev);
index 5708fcc..a6f390f 100644 (file)
@@ -70,7 +70,7 @@ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/t
 
 mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
                                        steering/dr_matcher.o steering/dr_rule.o \
-                                       steering/dr_icm_pool.o steering/dr_crc32.o \
+                                       steering/dr_icm_pool.o \
                                        steering/dr_ste.o steering/dr_send.o \
                                        steering/dr_cmd.o steering/dr_fw.o \
                                        steering/dr_action.o steering/fs_dr.o
index ea934cd..34cba97 100644 (file)
@@ -866,7 +866,7 @@ static void cmd_work_handler(struct work_struct *work)
        if (!ent->page_queue) {
                alloc_ret = alloc_ent(cmd);
                if (alloc_ret < 0) {
-                       mlx5_core_err(dev, "failed to allocate command entry\n");
+                       mlx5_core_err_rl(dev, "failed to allocate command entry\n");
                        if (ent->callback) {
                                ent->callback(-EAGAIN, ent->context);
                                mlx5_free_cmd_msg(dev, ent->out);
index 633b117..7b672ad 100644 (file)
@@ -175,7 +175,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
  *     @port_buffer: <output> port receive buffer configuration
  *     @change: <output>
  *
- *     Update buffer configuration based on pfc configuraiton and
+ *     Update buffer configuration based on pfc configuration and
  *     priority to buffer mapping.
  *     Buffer's lossy bit is changed to:
  *             lossless if there is at least one PFC enabled priority
index b860569..6c72b59 100644 (file)
@@ -222,7 +222,8 @@ static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 }
 
 static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
-                                    void *context)
+                                    void *context,
+                                    struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
        struct mlx5e_err_ctx *err_ctx = context;
@@ -301,7 +302,8 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
 }
 
 static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
-                                     struct devlink_fmsg *fmsg)
+                                     struct devlink_fmsg *fmsg,
+                                     struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
        struct mlx5e_params *params = &priv->channels.params;
index bfed558..b468549 100644 (file)
@@ -135,7 +135,8 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 }
 
 static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
-                                    void *context)
+                                    void *context,
+                                    struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
        struct mlx5e_err_ctx *err_ctx = context;
@@ -205,7 +206,8 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
 }
 
 static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
-                                     struct devlink_fmsg *fmsg)
+                                     struct devlink_fmsg *fmsg,
+                                     struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
        struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
index fda0b37..bb970b2 100644 (file)
@@ -2241,13 +2241,14 @@ out_err:
 
 struct mlx5_fields {
        u8  field;
-       u8  size;
+       u8  field_bsize;
+       u32 field_mask;
        u32 offset;
        u32 match_offset;
 };
 
-#define OFFLOAD(fw_field, size, field, off, match_field) \
-               {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
+#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
+               {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
                 offsetof(struct pedit_headers, field) + (off), \
                 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
 
@@ -2265,18 +2266,18 @@ struct mlx5_fields {
 })
 
 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
-                        void *matchmaskp, int size)
+                        void *matchmaskp, u8 bsize)
 {
        bool same = false;
 
-       switch (size) {
-       case sizeof(u8):
+       switch (bsize) {
+       case 8:
                same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
                break;
-       case sizeof(u16):
+       case 16:
                same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
                break;
-       case sizeof(u32):
+       case 32:
                same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
                break;
        }
@@ -2285,41 +2286,43 @@ static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
 }
 
 static struct mlx5_fields fields[] = {
-       OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16),
-       OFFLOAD(DMAC_15_0,  2, eth.h_dest[4], 0, dmac_15_0),
-       OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16),
-       OFFLOAD(SMAC_15_0,  2, eth.h_source[4], 0, smac_15_0),
-       OFFLOAD(ETHERTYPE,  2, eth.h_proto, 0, ethertype),
-       OFFLOAD(FIRST_VID,  2, vlan.h_vlan_TCI, 0, first_vid),
-
-       OFFLOAD(IP_TTL, 1, ip4.ttl,   0, ttl_hoplimit),
-       OFFLOAD(SIPV4,  4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
-       OFFLOAD(DIPV4,  4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-
-       OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0,
+       OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
+       OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
+       OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
+       OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
+       OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
+       OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
+
+       OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
+       OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
+       OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+       OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+
+       OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
                src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
-       OFFLOAD(SIPV6_95_64,  4, ip6.saddr.s6_addr32[1], 0,
+       OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
                src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
-       OFFLOAD(SIPV6_63_32,  4, ip6.saddr.s6_addr32[2], 0,
+       OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
                src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
-       OFFLOAD(SIPV6_31_0,   4, ip6.saddr.s6_addr32[3], 0,
+       OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
                src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
-       OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0,
+       OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
-       OFFLOAD(DIPV6_95_64,  4, ip6.daddr.s6_addr32[1], 0,
+       OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
-       OFFLOAD(DIPV6_63_32,  4, ip6.daddr.s6_addr32[2], 0,
+       OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
-       OFFLOAD(DIPV6_31_0,   4, ip6.daddr.s6_addr32[3], 0,
+       OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
-       OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit),
+       OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
 
-       OFFLOAD(TCP_SPORT, 2, tcp.source,  0, tcp_sport),
-       OFFLOAD(TCP_DPORT, 2, tcp.dest,    0, tcp_dport),
-       OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags),
+       OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
+       OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
+       /* in linux iphdr tcp_flags is 8 bits long */
+       OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
 
-       OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport),
-       OFFLOAD(UDP_DPORT, 2, udp.dest,   0, udp_dport),
+       OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
+       OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
 };
 
 /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
@@ -2332,19 +2335,17 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                                struct netlink_ext_ack *extack)
 {
        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
-       void *headers_c = get_match_headers_criteria(*action_flags,
-                                                    &parse_attr->spec);
-       void *headers_v = get_match_headers_value(*action_flags,
-                                                 &parse_attr->spec);
        int i, action_size, nactions, max_actions, first, last, next_z;
-       void *s_masks_p, *a_masks_p, *vals_p;
+       void *headers_c, *headers_v, *action, *vals_p;
+       u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
        struct mlx5_fields *f;
-       u8 cmd, field_bsize;
-       u32 s_mask, a_mask;
        unsigned long mask;
        __be32 mask_be32;
        __be16 mask_be16;
-       void *action;
+       u8 cmd;
+
+       headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
+       headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
 
        set_masks = &hdrs[0].masks;
        add_masks = &hdrs[1].masks;
@@ -2369,8 +2370,8 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                s_masks_p = (void *)set_masks + f->offset;
                a_masks_p = (void *)add_masks + f->offset;
 
-               memcpy(&s_mask, s_masks_p, f->size);
-               memcpy(&a_mask, a_masks_p, f->size);
+               s_mask = *s_masks_p & f->field_mask;
+               a_mask = *a_masks_p & f->field_mask;
 
                if (!s_mask && !a_mask) /* nothing to offload here */
                        continue;
@@ -2399,38 +2400,34 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                        vals_p = (void *)set_vals + f->offset;
                        /* don't rewrite if we have a match on the same value */
                        if (cmp_val_mask(vals_p, s_masks_p, match_val,
-                                        match_mask, f->size))
+                                        match_mask, f->field_bsize))
                                skip = true;
                        /* clear to denote we consumed this field */
-                       memset(s_masks_p, 0, f->size);
+                       *s_masks_p &= ~f->field_mask;
                } else {
-                       u32 zero = 0;
-
                        cmd  = MLX5_ACTION_TYPE_ADD;
                        mask = a_mask;
                        vals_p = (void *)add_vals + f->offset;
                        /* add 0 is no change */
-                       if (!memcmp(vals_p, &zero, f->size))
+                       if ((*(u32 *)vals_p & f->field_mask) == 0)
                                skip = true;
                        /* clear to denote we consumed this field */
-                       memset(a_masks_p, 0, f->size);
+                       *a_masks_p &= ~f->field_mask;
                }
                if (skip)
                        continue;
 
-               field_bsize = f->size * BITS_PER_BYTE;
-
-               if (field_bsize == 32) {
+               if (f->field_bsize == 32) {
                        mask_be32 = *(__be32 *)&mask;
                        mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
-               } else if (field_bsize == 16) {
+               } else if (f->field_bsize == 16) {
                        mask_be16 = *(__be16 *)&mask;
                        mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
                }
 
-               first = find_first_bit(&mask, field_bsize);
-               next_z = find_next_zero_bit(&mask, field_bsize, first);
-               last  = find_last_bit(&mask, field_bsize);
+               first = find_first_bit(&mask, f->field_bsize);
+               next_z = find_next_zero_bit(&mask, f->field_bsize, first);
+               last  = find_last_bit(&mask, f->field_bsize);
                if (first < next_z && next_z < last) {
                        NL_SET_ERR_MSG_MOD(extack,
                                           "rewrite of few sub-fields isn't supported");
@@ -2443,16 +2440,22 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                MLX5_SET(set_action_in, action, field, f->field);
 
                if (cmd == MLX5_ACTION_TYPE_SET) {
-                       MLX5_SET(set_action_in, action, offset, first);
+                       int start;
+
+                       /* if field is bit sized it can start not from first bit */
+                       start = find_first_bit((unsigned long *)&f->field_mask,
+                                              f->field_bsize);
+
+                       MLX5_SET(set_action_in, action, offset, first - start);
                        /* length is num of bits to be written, zero means length of 32 */
                        MLX5_SET(set_action_in, action, length, (last - first + 1));
                }
 
-               if (field_bsize == 32)
+               if (f->field_bsize == 32)
                        MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
-               else if (field_bsize == 16)
+               else if (f->field_bsize == 16)
                        MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
-               else if (field_bsize == 8)
+               else if (f->field_bsize == 8)
                        MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
 
                action += action_size;
@@ -3443,6 +3446,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        }
 
+       if (!(attr->action &
+             (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+               NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action");
+               return -EOPNOTSUPP;
+       }
+
        if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "current firmware doesn't support split rule for port mirroring");
index 67dc4f0..66951ff 100644 (file)
@@ -461,8 +461,14 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
                        if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
                                              &sq->state)) {
+                               struct mlx5e_tx_wqe_info *wi;
+                               u16 ci;
+
+                               ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+                               wi = &sq->db.wqe_info[ci];
                                mlx5e_dump_error_cqe(sq,
                                                     (struct mlx5_err_cqe *)cqe);
+                               mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
                                queue_work(cq->channel->priv->wq,
                                           &sq->recover_work);
                        }
index 30aae76..89a2806 100644 (file)
@@ -1831,6 +1831,15 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
        flush_workqueue(esw->work_queue);
 }
 
+static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
+{
+       struct mlx5_vport *vport;
+       int i;
+
+       mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+               memset(&vport->info, 0, sizeof(vport->info));
+}
+
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
 
@@ -1923,7 +1932,7 @@ abort:
        return err;
 }
 
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 {
        int old_mode;
 
@@ -1952,6 +1961,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
        }
+       if (clear_vf)
+               mlx5_eswitch_clear_vf_vports_info(esw);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
index 6bd6f58..804a7ed 100644 (file)
@@ -270,7 +270,7 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
 int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
                               u16 vport, u8 mac[ETH_ALEN]);
 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -603,7 +603,7 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
 static inline int  mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
-static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
 static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
index 369499e..2276bb1 100644 (file)
@@ -1369,7 +1369,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
                return -EINVAL;
        }
 
-       mlx5_eswitch_disable(esw);
+       mlx5_eswitch_disable(esw, false);
        mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
        err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
        if (err) {
@@ -2195,7 +2195,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
 {
        int err, err1;
 
-       mlx5_eswitch_disable(esw);
+       mlx5_eswitch_disable(esw, false);
        err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
        if (err) {
                NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
index eb8b0fe..11621d2 100644 (file)
 
 #include <linux/mlx5/driver.h>
 
-enum mlx5_fpga_device_id {
-       MLX5_FPGA_DEVICE_UNKNOWN = 0,
-       MLX5_FPGA_DEVICE_KU040 = 1,
-       MLX5_FPGA_DEVICE_KU060 = 2,
-       MLX5_FPGA_DEVICE_KU060_2 = 3,
+enum mlx5_fpga_id {
+       MLX5_FPGA_NEWTON = 0,
+       MLX5_FPGA_EDISON = 1,
+       MLX5_FPGA_MORSE = 2,
+       MLX5_FPGA_MORSEQ = 3,
 };
 
 enum mlx5_fpga_image {
index d046d1e..2ce4241 100644 (file)
@@ -81,19 +81,28 @@ static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
        }
 }
 
-static const char *mlx5_fpga_device_name(u32 device)
+static const char *mlx5_fpga_name(u32 fpga_id)
 {
-       switch (device) {
-       case MLX5_FPGA_DEVICE_KU040:
-               return "ku040";
-       case MLX5_FPGA_DEVICE_KU060:
-               return "ku060";
-       case MLX5_FPGA_DEVICE_KU060_2:
-               return "ku060_2";
-       case MLX5_FPGA_DEVICE_UNKNOWN:
-       default:
-               return "unknown";
+       static char ret[32];
+
+       switch (fpga_id) {
+       case MLX5_FPGA_NEWTON:
+               return "Newton";
+       case MLX5_FPGA_EDISON:
+               return "Edison";
+       case MLX5_FPGA_MORSE:
+               return "Morse";
+       case MLX5_FPGA_MORSEQ:
+               return "MorseQ";
        }
+
+       snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
+       return ret;
+}
+
+static int mlx5_is_fpga_lookaside(u32 fpga_id)
+{
+       return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
 }
 
 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
@@ -110,8 +119,12 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
        fdev->last_admin_image = query.admin_image;
        fdev->last_oper_image = query.oper_image;
 
-       mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
-                     query.status, query.admin_image, query.oper_image);
+       mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
+                      query.status, query.admin_image, query.oper_image);
+
+       /* for FPGA lookaside projects FPGA load status is not important */
+       if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
+               return 0;
 
        if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
                mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
@@ -167,25 +180,30 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
        struct mlx5_fpga_device *fdev = mdev->fpga;
        unsigned int max_num_qps;
        unsigned long flags;
-       u32 fpga_device_id;
+       u32 fpga_id;
        int err;
 
        if (!fdev)
                return 0;
 
-       err = mlx5_fpga_device_load_check(fdev);
+       err = mlx5_fpga_caps(fdev->mdev);
        if (err)
                goto out;
 
-       err = mlx5_fpga_caps(fdev->mdev);
+       err = mlx5_fpga_device_load_check(fdev);
        if (err)
                goto out;
 
-       fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
-       mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
-                      mlx5_fpga_device_name(fpga_device_id),
-                      fpga_device_id,
+       fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
+       mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
+
+       /* No QPs if FPGA does not participate in net processing */
+       if (mlx5_is_fpga_lookaside(fpga_id))
+               goto out;
+
+       mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
                       mlx5_fpga_image_name(fdev->last_oper_image),
+                      fdev->last_oper_image,
                       MLX5_CAP_FPGA(fdev->mdev, image_version),
                       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
                       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
@@ -264,6 +282,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
        if (!fdev)
                return;
 
+       if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
+               return;
+
        spin_lock_irqsave(&fdev->state_lock, flags);
        if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
                spin_unlock_irqrestore(&fdev->state_lock, flags);
index 3bbb493..0246f5c 100644 (file)
@@ -531,9 +531,16 @@ static void del_hw_fte(struct fs_node *node)
        }
 }
 
+static void del_sw_fte_rcu(struct rcu_head *head)
+{
+       struct fs_fte *fte = container_of(head, struct fs_fte, rcu);
+       struct mlx5_flow_steering *steering = get_steering(&fte->node);
+
+       kmem_cache_free(steering->ftes_cache, fte);
+}
+
 static void del_sw_fte(struct fs_node *node)
 {
-       struct mlx5_flow_steering *steering = get_steering(node);
        struct mlx5_flow_group *fg;
        struct fs_fte *fte;
        int err;
@@ -546,7 +553,8 @@ static void del_sw_fte(struct fs_node *node)
                                     rhash_fte);
        WARN_ON(err);
        ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
-       kmem_cache_free(steering->ftes_cache, fte);
+
+       call_rcu(&fte->rcu, del_sw_fte_rcu);
 }
 
 static void del_hw_flow_group(struct fs_node *node)
@@ -1623,22 +1631,47 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
 }
 
 static struct fs_fte *
-lookup_fte_locked(struct mlx5_flow_group *g,
-                 const u32 *match_value,
-                 bool take_write)
+lookup_fte_for_write_locked(struct mlx5_flow_group *g, const u32 *match_value)
 {
        struct fs_fte *fte_tmp;
 
-       if (take_write)
-               nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
-       else
-               nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
-       fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
-                                        rhash_fte);
+       nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+
+       fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, rhash_fte);
+       if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+               fte_tmp = NULL;
+               goto out;
+       }
+
+       if (!fte_tmp->node.active) {
+               tree_put_node(&fte_tmp->node, false);
+               fte_tmp = NULL;
+               goto out;
+       }
+       nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+
+out:
+       up_write_ref_node(&g->node, false);
+       return fte_tmp;
+}
+
+static struct fs_fte *
+lookup_fte_for_read_locked(struct mlx5_flow_group *g, const u32 *match_value)
+{
+       struct fs_fte *fte_tmp;
+
+       if (!tree_get_node(&g->node))
+               return NULL;
+
+       rcu_read_lock();
+       fte_tmp = rhashtable_lookup(&g->ftes_hash, match_value, rhash_fte);
        if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+               rcu_read_unlock();
                fte_tmp = NULL;
                goto out;
        }
+       rcu_read_unlock();
+
        if (!fte_tmp->node.active) {
                tree_put_node(&fte_tmp->node, false);
                fte_tmp = NULL;
@@ -1646,14 +1679,21 @@ lookup_fte_locked(struct mlx5_flow_group *g,
        }
 
        nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+
 out:
-       if (take_write)
-               up_write_ref_node(&g->node, false);
-       else
-               up_read_ref_node(&g->node);
+       tree_put_node(&g->node, false);
        return fte_tmp;
 }
 
+static struct fs_fte *
+lookup_fte_locked(struct mlx5_flow_group *g, const u32 *match_value, bool write)
+{
+       if (write)
+               return lookup_fte_for_write_locked(g, match_value);
+       else
+               return lookup_fte_for_read_locked(g, match_value);
+}
+
 static struct mlx5_flow_handle *
 try_add_to_existing_fg(struct mlx5_flow_table *ft,
                       struct list_head *match_head,
@@ -1814,6 +1854,13 @@ search_again_locked:
                return rule;
        }
 
+       fte = alloc_fte(ft, spec, flow_act);
+       if (IS_ERR(fte)) {
+               up_write_ref_node(&ft->node, false);
+               err = PTR_ERR(fte);
+               goto err_alloc_fte;
+       }
+
        nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
        up_write_ref_node(&ft->node, false);
 
@@ -1821,17 +1868,9 @@ search_again_locked:
        if (err)
                goto err_release_fg;
 
-       fte = alloc_fte(ft, spec, flow_act);
-       if (IS_ERR(fte)) {
-               err = PTR_ERR(fte);
-               goto err_release_fg;
-       }
-
        err = insert_fte(g, fte);
-       if (err) {
-               kmem_cache_free(steering->ftes_cache, fte);
+       if (err)
                goto err_release_fg;
-       }
 
        nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
        up_write_ref_node(&g->node, false);
@@ -1843,6 +1882,8 @@ search_again_locked:
 
 err_release_fg:
        up_write_ref_node(&g->node, false);
+       kmem_cache_free(steering->ftes_cache, fte);
+err_alloc_fte:
        tree_put_node(&g->node, false);
        return ERR_PTR(err);
 }
index 00717eb..f278298 100644 (file)
@@ -202,6 +202,7 @@ struct fs_fte {
        enum fs_fte_status              status;
        struct mlx5_fc                  *counter;
        struct rhash_head               hash;
+       struct rcu_head rcu;
        int                             modify_mask;
 };
 
index c07f315..e718170 100644 (file)
@@ -390,7 +390,8 @@ static void print_health_info(struct mlx5_core_dev *dev)
 
 static int
 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
-                         struct devlink_fmsg *fmsg)
+                         struct devlink_fmsg *fmsg,
+                         struct netlink_ext_ack *extack)
 {
        struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
        struct mlx5_core_health *health = &dev->priv.health;
@@ -491,7 +492,8 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
 
 static int
 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
-                     struct devlink_fmsg *fmsg, void *priv_ctx)
+                     struct devlink_fmsg *fmsg, void *priv_ctx,
+                     struct netlink_ext_ack *extack)
 {
        struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
        int err;
@@ -545,7 +547,8 @@ static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
 
 static int
 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
-                              void *priv_ctx)
+                              void *priv_ctx,
+                              struct netlink_ext_ack *extack)
 {
        struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 
@@ -555,7 +558,8 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
 #define MLX5_CR_DUMP_CHUNK_SIZE 256
 static int
 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
-                           struct devlink_fmsg *fmsg, void *priv_ctx)
+                           struct devlink_fmsg *fmsg, void *priv_ctx,
+                           struct netlink_ext_ack *extack)
 {
        struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
        u32 crdump_size = dev->priv.health.crdump_size;
index c5ef2ff..fc0d958 100644 (file)
@@ -145,34 +145,35 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 {
        *port1 = 1;
        *port2 = 2;
-       if (!tracker->netdev_state[0].tx_enabled ||
-           !tracker->netdev_state[0].link_up) {
+       if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled ||
+           !tracker->netdev_state[MLX5_LAG_P1].link_up) {
                *port1 = 2;
                return;
        }
 
-       if (!tracker->netdev_state[1].tx_enabled ||
-           !tracker->netdev_state[1].link_up)
+       if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled ||
+           !tracker->netdev_state[MLX5_LAG_P2].link_up)
                *port2 = 1;
 }
 
 void mlx5_modify_lag(struct mlx5_lag *ldev,
                     struct lag_tracker *tracker)
 {
-       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        u8 v2p_port1, v2p_port2;
        int err;
 
        mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
                                       &v2p_port2);
 
-       if (v2p_port1 != ldev->v2p_map[0] ||
-           v2p_port2 != ldev->v2p_map[1]) {
-               ldev->v2p_map[0] = v2p_port1;
-               ldev->v2p_map[1] = v2p_port2;
+       if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
+           v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
+               ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
+               ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
 
                mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
-                              ldev->v2p_map[0], ldev->v2p_map[1]);
+                              ldev->v2p_map[MLX5_LAG_P1],
+                              ldev->v2p_map[MLX5_LAG_P2]);
 
                err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
                if (err)
@@ -185,16 +186,17 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 static int mlx5_create_lag(struct mlx5_lag *ldev,
                           struct lag_tracker *tracker)
 {
-       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        int err;
 
-       mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
-                                      &ldev->v2p_map[1]);
+       mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
+                                      &ldev->v2p_map[MLX5_LAG_P2]);
 
        mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
-                      ldev->v2p_map[0], ldev->v2p_map[1]);
+                      ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
 
-       err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
+       err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
+                                 ldev->v2p_map[MLX5_LAG_P2]);
        if (err)
                mlx5_core_err(dev0,
                              "Failed to create LAG (%d)\n",
@@ -207,7 +209,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
                      u8 flags)
 {
        bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
-       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        int err;
 
        err = mlx5_create_lag(ldev, tracker);
@@ -229,7 +231,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 
 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 {
-       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        bool roce_lag = __mlx5_lag_is_roce(ldev);
        int err;
 
@@ -252,14 +254,15 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 
 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 {
-       if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+       if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
                return false;
 
 #ifdef CONFIG_MLX5_ESWITCH
-       return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+       return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
+                                  ldev->pf[MLX5_LAG_P2].dev);
 #else
-       return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
-               !mlx5_sriov_is_enabled(ldev->pf[1].dev));
+       return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
+               !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
 #endif
 }
 
@@ -285,8 +288,8 @@ static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
 
 static void mlx5_do_bond(struct mlx5_lag *ldev)
 {
-       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
-       struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
        struct lag_tracker tracker;
        bool do_bond, roce_lag;
        int err;
@@ -692,10 +695,11 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
                goto unlock;
 
        if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
-               ndev = ldev->tracker.netdev_state[0].tx_enabled ?
-                      ldev->pf[0].netdev : ldev->pf[1].netdev;
+               ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
+                      ldev->pf[MLX5_LAG_P1].netdev :
+                      ldev->pf[MLX5_LAG_P2].netdev;
        } else {
-               ndev = ldev->pf[0].netdev;
+               ndev = ldev->pf[MLX5_LAG_P1].netdev;
        }
        if (ndev)
                dev_hold(ndev);
@@ -717,7 +721,8 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
                return true;
 
        ldev = mlx5_lag_dev_get(dev);
-       if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
+       if (!ldev || !__mlx5_lag_is_roce(ldev) ||
+           ldev->pf[MLX5_LAG_P1].dev == dev)
                return true;
 
        /* If bonded, we do not add an IB device for PF1. */
@@ -746,11 +751,11 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
        ldev = mlx5_lag_dev_get(dev);
        if (ldev && __mlx5_lag_is_roce(ldev)) {
                num_ports = MLX5_MAX_PORTS;
-               mdev[0] = ldev->pf[0].dev;
-               mdev[1] = ldev->pf[1].dev;
+               mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
+               mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
        } else {
                num_ports = 1;
-               mdev[0] = dev;
+               mdev[MLX5_LAG_P1] = dev;
        }
 
        for (i = 0; i < num_ports; ++i) {
index 1dea0b1..f1068aa 100644 (file)
@@ -7,6 +7,11 @@
 #include "mlx5_core.h"
 #include "lag_mp.h"
 
+enum {
+       MLX5_LAG_P1,
+       MLX5_LAG_P2,
+};
+
 enum {
        MLX5_LAG_FLAG_ROCE   = 1 << 0,
        MLX5_LAG_FLAG_SRIOV  = 1 << 1,
index 5d20d61..b70afa3 100644 (file)
 
 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
 {
-       if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+       if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
                return false;
 
-       return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+       return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
+                                        ldev->pf[MLX5_LAG_P2].dev);
 }
 
 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
@@ -43,7 +44,8 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
  *     2 - set affinity to port 2.
  *
  **/
-static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
+static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
+                                      enum mlx5_lag_port_affinity port)
 {
        struct lag_tracker tracker;
 
@@ -51,37 +53,37 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
                return;
 
        switch (port) {
-       case 0:
-               tracker.netdev_state[0].tx_enabled = true;
-               tracker.netdev_state[1].tx_enabled = true;
-               tracker.netdev_state[0].link_up = true;
-               tracker.netdev_state[1].link_up = true;
+       case MLX5_LAG_NORMAL_AFFINITY:
+               tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
+               tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
+               tracker.netdev_state[MLX5_LAG_P1].link_up = true;
+               tracker.netdev_state[MLX5_LAG_P2].link_up = true;
                break;
-       case 1:
-               tracker.netdev_state[0].tx_enabled = true;
-               tracker.netdev_state[0].link_up = true;
-               tracker.netdev_state[1].tx_enabled = false;
-               tracker.netdev_state[1].link_up = false;
+       case MLX5_LAG_P1_AFFINITY:
+               tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
+               tracker.netdev_state[MLX5_LAG_P1].link_up = true;
+               tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
+               tracker.netdev_state[MLX5_LAG_P2].link_up = false;
                break;
-       case 2:
-               tracker.netdev_state[0].tx_enabled = false;
-               tracker.netdev_state[0].link_up = false;
-               tracker.netdev_state[1].tx_enabled = true;
-               tracker.netdev_state[1].link_up = true;
+       case MLX5_LAG_P2_AFFINITY:
+               tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
+               tracker.netdev_state[MLX5_LAG_P1].link_up = false;
+               tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
+               tracker.netdev_state[MLX5_LAG_P2].link_up = true;
                break;
        default:
-               mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
-                              port);
+               mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
+                              "Invalid affinity port %d", port);
                return;
        }
 
-       if (tracker.netdev_state[0].tx_enabled)
-               mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
+       if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
+               mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
                                         MLX5_DEV_EVENT_PORT_AFFINITY,
                                         (void *)0);
 
-       if (tracker.netdev_state[1].tx_enabled)
-               mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
+       if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
+               mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
                                         MLX5_DEV_EVENT_PORT_AFFINITY,
                                         (void *)0);
 
@@ -141,11 +143,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
        /* Verify next hops are ports of the same hca */
        fib_nh0 = fib_info_nh(fi, 0);
        fib_nh1 = fib_info_nh(fi, 1);
-       if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
-             fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
-           !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
-             fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
-               mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
+       if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
+             fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
+           !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
+             fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
+               mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
+                              "Multipath offload require two ports of the same HCA\n");
                return;
        }
 
@@ -157,7 +160,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
                mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
        }
 
-       mlx5_lag_set_port_affinity(ldev, 0);
+       mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
        mp->mfi = fi;
 }
 
@@ -182,7 +185,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
                }
        } else if (event == FIB_EVENT_NH_ADD &&
                   fib_info_num_path(fi) == 2) {
-               mlx5_lag_set_port_affinity(ldev, 0);
+               mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
        }
 }
 
@@ -248,9 +251,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
        struct net_device *fib_dev;
        struct fib_info *fi;
 
-       if (!net_eq(info->net, &init_net))
-               return NOTIFY_DONE;
-
        if (info->family != AF_INET)
                return NOTIFY_DONE;
 
@@ -270,8 +270,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
                        return notifier_from_errno(-EINVAL);
                }
                fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
-               if (fib_dev != ldev->pf[0].netdev &&
-                   fib_dev != ldev->pf[1].netdev) {
+               if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
+                   fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
                        return NOTIFY_DONE;
                }
                fib_work = mlx5_lag_init_fib_work(ldev, event);
@@ -311,8 +311,8 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
                return 0;
 
        mp->fib_nb.notifier_call = mlx5_lag_fib_event;
-       err = register_fib_notifier(&mp->fib_nb,
-                                   mlx5_lag_fib_event_flush);
+       err = register_fib_notifier(&init_net, &mp->fib_nb,
+                                   mlx5_lag_fib_event_flush, NULL);
        if (err)
                mp->fib_nb.notifier_call = NULL;
 
@@ -326,6 +326,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
        if (!mp->fib_nb.notifier_call)
                return;
 
-       unregister_fib_notifier(&mp->fib_nb);
+       unregister_fib_notifier(&init_net, &mp->fib_nb);
        mp->fib_nb.notifier_call = NULL;
 }
index 6d14b11..79be89e 100644 (file)
@@ -7,6 +7,12 @@
 #include "lag.h"
 #include "mlx5_core.h"
 
+enum mlx5_lag_port_affinity {
+       MLX5_LAG_NORMAL_AFFINITY,
+       MLX5_LAG_P1_AFFINITY,
+       MLX5_LAG_P2_AFFINITY,
+};
+
 struct lag_mp {
        struct notifier_block     fib_nb;
        struct fib_info           *mfi; /* used in tracking fib events */
index e47dd7c..c9a091d 100644 (file)
@@ -1228,8 +1228,6 @@ function_teardown:
 
 static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 {
-       int err = 0;
-
        if (cleanup) {
                mlx5_unregister_device(dev);
                mlx5_drain_health_wq(dev);
@@ -1257,7 +1255,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
        mlx5_function_teardown(dev, cleanup);
 out:
        mutex_unlock(&dev->intf_state_mutex);
-       return err;
+       return 0;
 }
 
 static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
index 61fcfd8..f641f13 100644 (file)
@@ -108,7 +108,7 @@ enable_vfs_hca:
        return 0;
 }
 
-static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
+static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev, bool clear_vf)
 {
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
        int num_vfs = pci_num_vf(dev->pdev);
@@ -127,7 +127,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
        }
 
        if (MLX5_ESWITCH_MANAGER(dev))
-               mlx5_eswitch_disable(dev->priv.eswitch);
+               mlx5_eswitch_disable(dev->priv.eswitch, clear_vf);
 
        if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
                mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
@@ -147,7 +147,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
        err = pci_enable_sriov(pdev, num_vfs);
        if (err) {
                mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
-               mlx5_device_disable_sriov(dev);
+               mlx5_device_disable_sriov(dev, true);
        }
        return err;
 }
@@ -157,7 +157,7 @@ static void mlx5_sriov_disable(struct pci_dev *pdev)
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 
        pci_disable_sriov(pdev);
-       mlx5_device_disable_sriov(dev);
+       mlx5_device_disable_sriov(dev, true);
 }
 
 int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
@@ -192,7 +192,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
        if (!mlx5_core_is_pf(dev))
                return;
 
-       mlx5_device_disable_sriov(dev);
+       mlx5_device_disable_sriov(dev, false);
 }
 
 static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c
deleted file mode 100644 (file)
index 9e2eccb..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-/* Copyright (c) 2011-2015 Stephan Brumme. All rights reserved.
- * Slicing-by-16 contributed by Bulat Ziganshin
- *
- * This software is provided 'as-is', without any express or implied warranty.
- * In no event will the author be held liable for any damages arising from the
- * of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- *    claim that you wrote the original software.
- * 2. If you use this software in a product, an acknowledgment in the product
- *    documentation would be appreciated but is not required.
- * 3. Altered source versions must be plainly marked as such, and must not be
- *    misrepresented as being the original software.
- *
- * Taken from http://create.stephan-brumme.com/crc32/ and adapted.
- */
-
-#include "dr_types.h"
-
-#define DR_STE_CRC_POLY 0xEDB88320L
-
-static u32 dr_ste_crc_tab32[8][256];
-
-static void dr_crc32_calc_lookup_entry(u32 (*tbl)[256], u8 i, u8 j)
-{
-       tbl[i][j] = (tbl[i - 1][j] >> 8) ^ tbl[0][tbl[i - 1][j] & 0xff];
-}
-
-void mlx5dr_crc32_init_table(void)
-{
-       u32 crc, i, j;
-
-       for (i = 0; i < 256; i++) {
-               crc = i;
-               for (j = 0; j < 8; j++) {
-                       if (crc & 0x00000001L)
-                               crc = (crc >> 1) ^ DR_STE_CRC_POLY;
-                       else
-                               crc = crc >> 1;
-               }
-               dr_ste_crc_tab32[0][i] = crc;
-       }
-
-       /* Init CRC lookup tables according to crc_slice_8 algorithm */
-       for (i = 0; i < 256; i++) {
-               dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 1, i);
-               dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 2, i);
-               dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 3, i);
-               dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 4, i);
-               dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 5, i);
-               dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 6, i);
-               dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 7, i);
-       }
-}
-
-/* Compute CRC32 (Slicing-by-8 algorithm) */
-u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length)
-{
-       const u32 *curr = (const u32 *)input_data;
-       const u8 *curr_char;
-       u32 crc = 0, one, two;
-
-       if (!input_data)
-               return 0;
-
-       /* Process eight bytes at once (Slicing-by-8) */
-       while (length >= 8) {
-               one = *curr++ ^ crc;
-               two = *curr++;
-
-               crc = dr_ste_crc_tab32[0][(two >> 24) & 0xff]
-                       ^ dr_ste_crc_tab32[1][(two >> 16) & 0xff]
-                       ^ dr_ste_crc_tab32[2][(two >> 8) & 0xff]
-                       ^ dr_ste_crc_tab32[3][two & 0xff]
-                       ^ dr_ste_crc_tab32[4][(one >> 24) & 0xff]
-                       ^ dr_ste_crc_tab32[5][(one >> 16) & 0xff]
-                       ^ dr_ste_crc_tab32[6][(one >> 8) & 0xff]
-                       ^ dr_ste_crc_tab32[7][one & 0xff];
-
-               length -= 8;
-       }
-
-       curr_char = (const u8 *)curr;
-       /* Remaining 1 to 7 bytes (standard algorithm) */
-       while (length-- != 0)
-               crc = (crc >> 8) ^ dr_ste_crc_tab32[0][(crc & 0xff)
-                       ^ *curr_char++];
-
-       return ((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
-               ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
-}
index 5b24732..a9da961 100644 (file)
@@ -326,9 +326,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
                goto uninit_resourses;
        }
 
-       /* Init CRC table for htbl CRC calculation */
-       mlx5dr_crc32_init_table();
-
        return dmn;
 
 uninit_resourses:
index 67dea76..5db947d 100644 (file)
@@ -146,17 +146,15 @@ dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_domain *dmn)
 
 int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
                                   struct mlx5dr_matcher_rx_tx *nic_matcher,
-                                  bool ipv6)
+                                  enum mlx5dr_ipv outer_ipv,
+                                  enum mlx5dr_ipv inner_ipv)
 {
-       if (ipv6) {
-               nic_matcher->ste_builder = nic_matcher->ste_builder6;
-               nic_matcher->num_of_builders = nic_matcher->num_of_builders6;
-       } else {
-               nic_matcher->ste_builder = nic_matcher->ste_builder4;
-               nic_matcher->num_of_builders = nic_matcher->num_of_builders4;
-       }
+       nic_matcher->ste_builder =
+               nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
+       nic_matcher->num_of_builders =
+               nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv];
 
-       if (!nic_matcher->num_of_builders) {
+       if (!nic_matcher->ste_builder) {
                mlx5dr_dbg(matcher->tbl->dmn,
                           "Rule not supported on this matcher due to IP related fields\n");
                return -EINVAL;
@@ -167,26 +165,19 @@ int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
 
 static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
                                       struct mlx5dr_matcher_rx_tx *nic_matcher,
-                                      bool ipv6)
+                                      enum mlx5dr_ipv outer_ipv,
+                                      enum mlx5dr_ipv inner_ipv)
 {
        struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
        struct mlx5dr_domain *dmn = matcher->tbl->dmn;
        struct mlx5dr_match_param mask = {};
        struct mlx5dr_match_misc3 *misc3;
        struct mlx5dr_ste_build *sb;
-       u8 *num_of_builders;
        bool inner, rx;
        int idx = 0;
        int ret, i;
 
-       if (ipv6) {
-               sb = nic_matcher->ste_builder6;
-               num_of_builders = &nic_matcher->num_of_builders6;
-       } else {
-               sb = nic_matcher->ste_builder4;
-               num_of_builders = &nic_matcher->num_of_builders4;
-       }
-
+       sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
        rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
 
        /* Create a temporary mask to track and clear used mask fields */
@@ -249,7 +240,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
                if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer))
                        mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
 
-               if (ipv6) {
+               if (outer_ipv == DR_RULE_IPV6) {
                        if (dr_mask_is_dst_addr_set(&mask.outer))
                                mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
                                                                 inner, rx);
@@ -325,7 +316,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
                if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner))
                        mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
 
-               if (ipv6) {
+               if (inner_ipv == DR_RULE_IPV6) {
                        if (dr_mask_is_dst_addr_set(&mask.inner))
                                mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
                                                                 inner, rx);
@@ -373,7 +364,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
                }
        }
 
-       *num_of_builders = idx;
+       nic_matcher->ste_builder = sb;
+       nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv] = idx;
 
        return 0;
 }
@@ -524,24 +516,33 @@ static void dr_matcher_uninit(struct mlx5dr_matcher *matcher)
        }
 }
 
-static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
-                              struct mlx5dr_matcher_rx_tx *nic_matcher)
+static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher,
+                                          struct mlx5dr_matcher_rx_tx *nic_matcher)
 {
        struct mlx5dr_domain *dmn = matcher->tbl->dmn;
-       int ret, ret_v4, ret_v6;
 
-       ret_v4 = dr_matcher_set_ste_builders(matcher, nic_matcher, false);
-       ret_v6 = dr_matcher_set_ste_builders(matcher, nic_matcher, true);
+       dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV4);
+       dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV6);
+       dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV4);
+       dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6);
 
-       if (ret_v4 && ret_v6) {
+       if (!nic_matcher->ste_builder) {
                mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n");
                return -EINVAL;
        }
 
-       if (!ret_v4)
-               nic_matcher->ste_builder = nic_matcher->ste_builder4;
-       else
-               nic_matcher->ste_builder = nic_matcher->ste_builder6;
+       return 0;
+}
+
+static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
+                              struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+       struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+       int ret;
+
+       ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher);
+       if (ret)
+               return ret;
 
        nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
                                                      DR_CHUNK_SIZE_1,
index e8b6560..90c79a1 100644 (file)
@@ -954,12 +954,12 @@ static int dr_rule_destroy_rule(struct mlx5dr_rule *rule)
        return 0;
 }
 
-static bool dr_rule_is_ipv6(struct mlx5dr_match_param *param)
+static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
 {
-       return (param->outer.ip_version == 6 ||
-               param->inner.ip_version == 6 ||
-               param->outer.ethertype == ETH_P_IPV6 ||
-               param->inner.ethertype == ETH_P_IPV6);
+       if (spec->ip_version == 6 || spec->ethertype == ETH_P_IPV6)
+               return DR_RULE_IPV6;
+
+       return DR_RULE_IPV4;
 }
 
 static bool dr_rule_skip(enum mlx5dr_domain_type domain,
@@ -1023,7 +1023,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
 
        ret = mlx5dr_matcher_select_builders(matcher,
                                             nic_matcher,
-                                            dr_rule_is_ipv6(param));
+                                            dr_rule_get_ipv(&param->outer),
+                                            dr_rule_get_ipv(&param->inner));
        if (ret)
                goto out_err;
 
index 4efe1b0..7e9d6cf 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 Mellanox Technologies. */
 
 #include <linux/types.h>
+#include <linux/crc32.h>
 #include "dr_types.h"
 
 #define DR_STE_CRC_POLY 0xEDB88320L
@@ -107,6 +108,13 @@ struct dr_hw_ste_format {
        u8 mask[DR_STE_SIZE_MASK];
 };
 
+static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
+{
+       u32 crc = crc32(0, input_data, length);
+
+       return htonl(crc);
+}
+
 u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
 {
        struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
@@ -128,7 +136,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
                bit = bit >> 1;
        }
 
-       crc32 = mlx5dr_crc32_slice8_calc(masked, DR_STE_SIZE_TAG);
+       crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG);
        index = crc32 & (htbl->chunk->num_of_entries - 1);
 
        return index;
index 1cb3769..c1f45a6 100644 (file)
@@ -106,6 +106,12 @@ enum mlx5dr_action_type {
        DR_ACTION_TYP_MAX,
 };
 
+enum mlx5dr_ipv {
+       DR_RULE_IPV4,
+       DR_RULE_IPV6,
+       DR_RULE_IPV_MAX,
+};
+
 struct mlx5dr_icm_pool;
 struct mlx5dr_icm_chunk;
 struct mlx5dr_icm_bucket;
@@ -679,11 +685,11 @@ struct mlx5dr_matcher_rx_tx {
        struct mlx5dr_ste_htbl *s_htbl;
        struct mlx5dr_ste_htbl *e_anchor;
        struct mlx5dr_ste_build *ste_builder;
-       struct mlx5dr_ste_build ste_builder4[DR_RULE_MAX_STES];
-       struct mlx5dr_ste_build ste_builder6[DR_RULE_MAX_STES];
+       struct mlx5dr_ste_build ste_builder_arr[DR_RULE_IPV_MAX]
+                                              [DR_RULE_IPV_MAX]
+                                              [DR_RULE_MAX_STES];
        u8 num_of_builders;
-       u8 num_of_builders4;
-       u8 num_of_builders6;
+       u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX];
        u64 default_icm_addr;
        struct mlx5dr_table_rx_tx *nic_tbl;
 };
@@ -812,7 +818,8 @@ mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps)
 
 int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
                                   struct mlx5dr_matcher_rx_tx *nic_matcher,
-                                  bool ipv6);
+                                  enum mlx5dr_ipv outer_ipv,
+                                  enum mlx5dr_ipv inner_ipv);
 
 static inline u32
 mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size)
@@ -962,9 +969,6 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
                           struct mlx5dr_match_param *set_param,
                           struct mlx5dr_match_parameters *mask);
 
-void mlx5dr_crc32_init_table(void);
-u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length);
-
 struct mlx5dr_qp {
        struct mlx5_core_dev *mdev;
        struct mlx5_wq_qp wq;
index dd2315c..f2a0e72 100644 (file)
 #include "wq.h"
 #include "mlx5_core.h"
 
-u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
-{
-       return (u32)wq->fbc.sz_m1 + 1;
-}
-
-u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
-{
-       return wq->fbc.sz_m1 + 1;
-}
-
-u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq)
-{
-       return wq->fbc.log_stride;
-}
-
-u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
-{
-       return (u32)wq->fbc.sz_m1 + 1;
-}
-
 static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
 {
        return ((u32)1 << log_sz) << log_stride;
@@ -96,6 +76,24 @@ err_db_free:
        return err;
 }
 
+void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides)
+{
+       size_t len;
+       void *wqe;
+
+       if (!net_ratelimit())
+               return;
+
+       nstrides = max_t(u8, nstrides, 1);
+
+       len = nstrides << wq->fbc.log_stride;
+       wqe = mlx5_wq_cyc_get_wqe(wq, ix);
+
+       pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %ld\n",
+               mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len);
+       print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false);
+}
+
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                      void *qpc, struct mlx5_wq_qp *wq,
                      struct mlx5_wq_ctrl *wq_ctrl)
index 55791f7..d9a94bc 100644 (file)
@@ -79,7 +79,7 @@ struct mlx5_wq_ll {
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                       void *wqc, struct mlx5_wq_cyc *wq,
                       struct mlx5_wq_ctrl *wq_ctrl);
-u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
+void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                      void *qpc, struct mlx5_wq_qp *wq,
@@ -88,16 +88,18 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                     void *cqc, struct mlx5_cqwq *wq,
                     struct mlx5_wq_ctrl *wq_ctrl);
-u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
-u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                      void *wqc, struct mlx5_wq_ll *wq,
                      struct mlx5_wq_ctrl *wq_ctrl);
-u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
 
+static inline u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
+{
+       return (u32)wq->fbc.sz_m1 + 1;
+}
+
 static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq)
 {
        return wq->cur_sz == wq->sz;
@@ -168,6 +170,16 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
        return !equal && !smaller;
 }
 
+static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
+{
+       return wq->fbc.sz_m1 + 1;
+}
+
+static inline u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq)
+{
+       return wq->fbc.log_stride;
+}
+
 static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr)
 {
        return ctr & wq->fbc.sz_m1;
@@ -224,6 +236,11 @@ static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
        return cqe;
 }
 
+static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
+{
+       return (u32)wq->fbc.sz_m1 + 1;
+}
+
 static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
 {
        return wq->cur_sz == wq->fbc.sz_m1;
index 4421ab2..e1a90f5 100644 (file)
@@ -127,6 +127,16 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core)
 }
 EXPORT_SYMBOL(mlxsw_core_res_query_enabled);
 
+bool
+mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
+                                         const struct mlxsw_fw_rev *req_rev)
+{
+       return rev->minor > req_rev->minor ||
+              (rev->minor == req_rev->minor &&
+               rev->subminor >= req_rev->subminor);
+}
+EXPORT_SYMBOL(mlxsw_core_fw_rev_minor_subminor_validate);
+
 struct mlxsw_rx_listener_item {
        struct list_head list;
        struct mlxsw_rx_listener rxl;
@@ -985,6 +995,7 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 
 static int
 mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
+                                         bool netns_change,
                                          struct netlink_ext_ack *extack)
 {
        struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -1005,7 +1016,7 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
        return mlxsw_core_bus_device_register(mlxsw_core->bus_info,
                                              mlxsw_core->bus,
                                              mlxsw_core->bus_priv, true,
-                                             devlink);
+                                             devlink, extack);
 }
 
 static int mlxsw_devlink_flash_update(struct devlink *devlink,
@@ -1098,7 +1109,8 @@ static int
 __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                                 const struct mlxsw_bus *mlxsw_bus,
                                 void *bus_priv, bool reload,
-                                struct devlink *devlink)
+                                struct devlink *devlink,
+                                struct netlink_ext_ack *extack)
 {
        const char *device_kind = mlxsw_bus_info->device_kind;
        struct mlxsw_core *mlxsw_core;
@@ -1172,7 +1184,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
        }
 
        if (mlxsw_driver->init) {
-               err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+               err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack);
                if (err)
                        goto err_driver_init;
        }
@@ -1223,14 +1235,16 @@ err_devlink_alloc:
 int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                                   const struct mlxsw_bus *mlxsw_bus,
                                   void *bus_priv, bool reload,
-                                  struct devlink *devlink)
+                                  struct devlink *devlink,
+                                  struct netlink_ext_ack *extack)
 {
        bool called_again = false;
        int err;
 
 again:
        err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus,
-                                              bus_priv, reload, devlink);
+                                              bus_priv, reload,
+                                              devlink, extack);
        /* -EAGAIN is returned in case the FW was updated. FW needs
         * a reset, so lets try to call __mlxsw_core_bus_device_register()
         * again.
@@ -2003,6 +2017,35 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get);
 
+int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module)
+{
+       enum mlxsw_reg_pmtm_module_type module_type;
+       char pmtm_pl[MLXSW_REG_PMTM_LEN];
+       int err;
+
+       mlxsw_reg_pmtm_pack(pmtm_pl, module);
+       err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl);
+       if (err)
+               return err;
+       mlxsw_reg_pmtm_unpack(pmtm_pl, &module_type);
+
+       /* Here we need to get the module width according to the module type. */
+
+       switch (module_type) {
+       case MLXSW_REG_PMTM_MODULE_TYPE_BP_4X: /* fall through */
+       case MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP:
+               return 4;
+       case MLXSW_REG_PMTM_MODULE_TYPE_BP_2X:
+               return 2;
+       case MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP: /* fall through */
+       case MLXSW_REG_PMTM_MODULE_TYPE_BP_1X:
+               return 1;
+       default:
+               return -EINVAL;
+       }
+}
+EXPORT_SYMBOL(mlxsw_core_module_max_width);
+
 static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
                                    const char *buf, size_t size)
 {
index 5d7d2ab..0d18bee 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
+#include <linux/net_namespace.h>
 #include <net/devlink.h>
 
 #include "trap.h"
@@ -23,6 +24,7 @@ struct mlxsw_core_port;
 struct mlxsw_driver;
 struct mlxsw_bus;
 struct mlxsw_bus_info;
+struct mlxsw_fw_rev;
 
 unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core);
 
@@ -30,13 +32,18 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
 
 bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core);
 
+bool
+mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
+                                         const struct mlxsw_fw_rev *req_rev);
+
 int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver);
 void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver);
 
 int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                                   const struct mlxsw_bus *mlxsw_bus,
                                   void *bus_priv, bool reload,
-                                  struct devlink *devlink);
+                                  struct devlink *devlink,
+                                  struct netlink_ext_ack *extack);
 void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, bool reload);
 
 struct mlxsw_tx_info {
@@ -193,6 +200,7 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
 struct devlink_port *
 mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
                                 u8 local_port);
+int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module);
 
 int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
 bool mlxsw_core_schedule_work(struct work_struct *work);
@@ -252,7 +260,8 @@ struct mlxsw_driver {
        const char *kind;
        size_t priv_size;
        int (*init)(struct mlxsw_core *mlxsw_core,
-                   const struct mlxsw_bus_info *mlxsw_bus_info);
+                   const struct mlxsw_bus_info *mlxsw_bus_info,
+                   struct netlink_ext_ack *extack);
        void (*fini)(struct mlxsw_core *mlxsw_core);
        int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core);
        int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port,
@@ -350,6 +359,11 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
 #define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id)                   \
        mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id)
 
+static inline struct net *mlxsw_core_net(struct mlxsw_core *mlxsw_core)
+{
+       return devlink_net(priv_to_devlink(mlxsw_core));
+}
+
 #define MLXSW_BUS_F_TXRX       BIT(0)
 #define MLXSW_BUS_F_RESET      BIT(1)
 
index d2c7ce6..08215fe 100644 (file)
@@ -50,6 +50,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
        char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
        char mcia_pl[MLXSW_REG_MCIA_LEN];
        u16 i2c_addr;
+       u8 page = 0;
        int status;
        int err;
 
@@ -62,11 +63,21 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
 
        i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW;
        if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) {
-               i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH;
-               offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH;
+               page = MLXSW_REG_MCIA_PAGE_GET(offset);
+               offset -= MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH * page;
+               /* When reading upper pages 1, 2 and 3 the offset starts at
+                * 128. Please refer to "QSFP+ Memory Map" figure in SFF-8436
+                * specification for graphical depiction.
+                * MCIA register accepts buffer size <= 48. Page of size 128
+                * should be read by chunks of size 48, 48, 32. Align the size
+                * of the last chunk to avoid reading after the end of the
+                * page.
+                */
+               if (offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH)
+                       size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset;
        }
 
-       mlxsw_reg_mcia_pack(mcia_pl, module, 0, 0, offset, size, i2c_addr);
+       mlxsw_reg_mcia_pack(mcia_pl, module, 0, page, offset, size, i2c_addr);
 
        err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl);
        if (err)
@@ -168,7 +179,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
        switch (module_id) {
        case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP:
                modinfo->type       = ETH_MODULE_SFF_8436;
-               modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
                break;
        case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */
        case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28:
@@ -176,10 +187,10 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
                    module_rev_id >=
                    MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) {
                        modinfo->type       = ETH_MODULE_SFF_8636;
-                       modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
                } else {
                        modinfo->type       = ETH_MODULE_SFF_8436;
-                       modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
                }
                break;
        case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP:
index 5b00726..9bf8da5 100644 (file)
@@ -41,7 +41,7 @@ struct mlxsw_hwmon {
        struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
        unsigned int attrs_count;
        u8 sensor_count;
-       u8 module_sensor_count;
+       u8 module_sensor_max;
 };
 
 static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -56,7 +56,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
        int err;
 
        index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
-                                          mlxsw_hwmon->module_sensor_count);
+                                          mlxsw_hwmon->module_sensor_max);
        mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
        err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
        if (err) {
@@ -79,7 +79,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
        int err;
 
        index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
-                                          mlxsw_hwmon->module_sensor_count);
+                                          mlxsw_hwmon->module_sensor_max);
        mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
        err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
        if (err) {
@@ -109,7 +109,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
                return -EINVAL;
 
        index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
-                                          mlxsw_hwmon->module_sensor_count);
+                                          mlxsw_hwmon->module_sensor_max);
        mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
        err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
        if (err) {
@@ -336,7 +336,7 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
                        container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
        struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
        int index = mlwsw_hwmon_attr->type_index -
-                   mlxsw_hwmon->module_sensor_count + 1;
+                   mlxsw_hwmon->module_sensor_max + 1;
 
        return sprintf(buf, "gearbox %03u\n", index);
 }
@@ -528,51 +528,45 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
 
 static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
-       unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core);
-       char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0};
-       int i, index;
-       u8 width;
-       int err;
+       char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+       u8 module_sensor_max;
+       int i, err;
 
        if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core))
                return 0;
 
+       mlxsw_reg_mgpir_pack(mgpir_pl);
+       err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
+       if (err)
+               return err;
+
+       mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
+                              &module_sensor_max);
+
        /* Add extra attributes for module temperature. Sensor index is
         * assigned to sensor_count value, while all indexed before
         * sensor_count are already utilized by the sensors connected through
         * mtmp register by mlxsw_hwmon_temp_init().
         */
-       index = mlxsw_hwmon->sensor_count;
-       for (i = 1; i < module_count; i++) {
-               mlxsw_reg_pmlp_pack(pmlp_pl, i);
-               err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp),
-                                     pmlp_pl);
-               if (err) {
-                       dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n",
-                               i);
-                       return err;
-               }
-               width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-               if (!width)
-                       continue;
+       mlxsw_hwmon->module_sensor_max = mlxsw_hwmon->sensor_count +
+                                        module_sensor_max;
+       for (i = mlxsw_hwmon->sensor_count;
+            i < mlxsw_hwmon->module_sensor_max; i++) {
                mlxsw_hwmon_attr_add(mlxsw_hwmon,
-                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index,
-                                    index);
+                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, i, i);
                mlxsw_hwmon_attr_add(mlxsw_hwmon,
                                     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT,
-                                    index, index);
+                                    i, i);
                mlxsw_hwmon_attr_add(mlxsw_hwmon,
-                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
-                                    index, index);
+                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, i,
+                                    i);
                mlxsw_hwmon_attr_add(mlxsw_hwmon,
                                     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
-                                    index, index);
+                                    i, i);
                mlxsw_hwmon_attr_add(mlxsw_hwmon,
                                     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
-                                    index, index);
-               index++;
+                                    i, i);
        }
-       mlxsw_hwmon->module_sensor_count = index;
 
        return 0;
 }
@@ -590,14 +584,14 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
        if (err)
                return err;
 
-       mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL);
+       mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL, NULL);
        if (!gbox_num)
                return 0;
 
-       index = mlxsw_hwmon->module_sensor_count;
-       max_index = mlxsw_hwmon->module_sensor_count + gbox_num;
+       index = mlxsw_hwmon->module_sensor_max;
+       max_index = mlxsw_hwmon->module_sensor_max + gbox_num;
        while (index < max_index) {
-               sensor_index = index % mlxsw_hwmon->module_sensor_count +
+               sensor_index = index % mlxsw_hwmon->module_sensor_max +
                               MLXSW_REG_MTMP_GBOX_INDEX_MIN;
                mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
                err = mlxsw_reg_write(mlxsw_hwmon->core,
index 35a1dc8..c721b17 100644 (file)
@@ -112,6 +112,7 @@ struct mlxsw_thermal {
        struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
        enum thermal_device_mode mode;
        struct mlxsw_thermal_module *tz_module_arr;
+       u8 tz_module_num;
        struct mlxsw_thermal_module *tz_gearbox_arr;
        u8 tz_gearbox_num;
        unsigned int tz_highest_score;
@@ -775,23 +776,10 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
 
 static int
 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
-                         struct mlxsw_thermal *thermal, u8 local_port)
+                         struct mlxsw_thermal *thermal, u8 module)
 {
        struct mlxsw_thermal_module *module_tz;
-       char pmlp_pl[MLXSW_REG_PMLP_LEN];
-       u8 width, module;
-       int err;
-
-       mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
-       err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl);
-       if (err)
-               return err;
 
-       width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-       if (!width)
-               return 0;
-
-       module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
        module_tz = &thermal->tz_module_arr[module];
        /* Skip if parent is already set (case of port split). */
        if (module_tz->parent)
@@ -819,26 +807,34 @@ static int
 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
                           struct mlxsw_thermal *thermal)
 {
-       unsigned int module_count = mlxsw_core_max_ports(core);
        struct mlxsw_thermal_module *module_tz;
+       char mgpir_pl[MLXSW_REG_MGPIR_LEN];
        int i, err;
 
        if (!mlxsw_core_res_query_enabled(core))
                return 0;
 
-       thermal->tz_module_arr = kcalloc(module_count,
+       mlxsw_reg_mgpir_pack(mgpir_pl);
+       err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+       if (err)
+               return err;
+
+       mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
+                              &thermal->tz_module_num);
+
+       thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
                                         sizeof(*thermal->tz_module_arr),
                                         GFP_KERNEL);
        if (!thermal->tz_module_arr)
                return -ENOMEM;
 
-       for (i = 1; i < module_count; i++) {
+       for (i = 0; i < thermal->tz_module_num; i++) {
                err = mlxsw_thermal_module_init(dev, core, thermal, i);
                if (err)
                        goto err_unreg_tz_module_arr;
        }
 
-       for (i = 0; i < module_count - 1; i++) {
+       for (i = 0; i < thermal->tz_module_num; i++) {
                module_tz = &thermal->tz_module_arr[i];
                if (!module_tz->parent)
                        continue;
@@ -850,7 +846,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
        return 0;
 
 err_unreg_tz_module_arr:
-       for (i = module_count - 1; i >= 0; i--)
+       for (i = thermal->tz_module_num - 1; i >= 0; i--)
                mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
        kfree(thermal->tz_module_arr);
        return err;
@@ -859,13 +855,12 @@ err_unreg_tz_module_arr:
 static void
 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
 {
-       unsigned int module_count = mlxsw_core_max_ports(thermal->core);
        int i;
 
        if (!mlxsw_core_res_query_enabled(thermal->core))
                return;
 
-       for (i = module_count - 1; i >= 0; i--)
+       for (i = thermal->tz_module_num - 1; i >= 0; i--)
                mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
        kfree(thermal->tz_module_arr);
 }
@@ -913,7 +908,8 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
        if (err)
                return err;
 
-       mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL);
+       mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL,
+                              NULL);
        if (!thermal->tz_gearbox_num)
                return 0;
 
index 95f408d..34566eb 100644 (file)
@@ -640,7 +640,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
 
        err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
                                             &mlxsw_i2c_bus, mlxsw_i2c, false,
-                                            NULL);
+                                            NULL, NULL);
        if (err) {
                dev_err(&client->dev, "Fail to register core bus\n");
                return err;
index 471b0ca..2b54391 100644 (file)
 
 static const char mlxsw_m_driver_name[] = "mlxsw_minimal";
 
+#define MLXSW_M_FWREV_MINOR    2000
+#define MLXSW_M_FWREV_SUBMINOR 1886
+
+static const struct mlxsw_fw_rev mlxsw_m_fw_rev = {
+       .minor = MLXSW_M_FWREV_MINOR,
+       .subminor = MLXSW_M_FWREV_SUBMINOR,
+};
+
 struct mlxsw_m_port;
 
 struct mlxsw_m {
@@ -172,6 +180,7 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module)
        }
 
        SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev);
+       dev_net_set(dev, mlxsw_core_net(mlxsw_m->core));
        mlxsw_m_port = netdev_priv(dev);
        mlxsw_m_port->dev = dev;
        mlxsw_m_port->mlxsw_m = mlxsw_m;
@@ -325,8 +334,27 @@ static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
        kfree(mlxsw_m->ports);
 }
 
+static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
+{
+       const struct mlxsw_fw_rev *rev = &mlxsw_m->bus_info->fw_rev;
+
+       /* Validate driver and FW are compatible.
+        * Do not check major version, since it defines chip type, while
+        * driver is supposed to support any type.
+        */
+       if (mlxsw_core_fw_rev_minor_subminor_validate(rev, &mlxsw_m_fw_rev))
+               return 0;
+
+       dev_err(mlxsw_m->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n",
+               rev->major, rev->minor, rev->subminor, rev->major,
+               mlxsw_m_fw_rev.minor, mlxsw_m_fw_rev.subminor);
+
+       return -EINVAL;
+}
+
 static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
-                       const struct mlxsw_bus_info *mlxsw_bus_info)
+                       const struct mlxsw_bus_info *mlxsw_bus_info,
+                       struct netlink_ext_ack *extack)
 {
        struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
        int err;
@@ -334,6 +362,10 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
        mlxsw_m->core = mlxsw_core;
        mlxsw_m->bus_info = mlxsw_bus_info;
 
+       err = mlxsw_m_fw_rev_validate(mlxsw_m);
+       if (err)
+               return err;
+
        err = mlxsw_m_base_mac_get(mlxsw_m);
        if (err) {
                dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n");
index 615455a..914c33e 100644 (file)
@@ -284,15 +284,18 @@ static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q,
 static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
                              struct mlxsw_pci_queue *q)
 {
+       int tclass;
        int i;
        int err;
 
        q->producer_counter = 0;
        q->consumer_counter = 0;
+       tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC :
+                                                     MLXSW_PCI_SDQ_CTL_TC;
 
        /* Set CQ of same number of this SDQ. */
        mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num);
-       mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 3);
+       mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass);
        mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
        for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
                dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
@@ -963,6 +966,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
        eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox);
 
        if (num_sdqs + num_rdqs > num_cqs ||
+           num_sdqs < MLXSW_PCI_SDQS_MIN ||
            num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) {
                dev_err(&pdev->dev, "Unsupported number of queues\n");
                return -EINVAL;
@@ -1520,7 +1524,15 @@ static struct mlxsw_pci_queue *
 mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci,
                   const struct mlxsw_tx_info *tx_info)
 {
-       u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci);
+       u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1;
+       u8 sdqn;
+
+       if (tx_info->is_emad) {
+               sdqn = MLXSW_PCI_SDQ_EMAD_INDEX;
+       } else {
+               BUILD_BUG_ON(MLXSW_PCI_SDQ_EMAD_INDEX != 0);
+               sdqn = 1 + (tx_info->local_port % ctl_sdq_count);
+       }
 
        return mlxsw_pci_sdq_get(mlxsw_pci, sdqn);
 }
@@ -1790,7 +1802,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
                                             &mlxsw_pci_bus, mlxsw_pci, false,
-                                            NULL);
+                                            NULL, NULL);
        if (err) {
                dev_err(&pdev->dev, "cannot register bus device\n");
                goto err_bus_device_register;
index e57e42e..e0d7d2d 100644 (file)
@@ -27,7 +27,7 @@
 
 #define MLXSW_PCI_SW_RESET                     0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT             BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       20000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       900000
 #define MLXSW_PCI_SW_RESET_WAIT_MSECS          100
 #define MLXSW_PCI_FW_READY                     0xA1844
 #define MLXSW_PCI_FW_READY_MASK                        0xFFFF
 #define MLXSW_PCI_EQ_ASYNC_NUM 0
 #define MLXSW_PCI_EQ_COMP_NUM  1
 
+#define MLXSW_PCI_SDQS_MIN     2 /* EMAD and control traffic */
+#define MLXSW_PCI_SDQ_EMAD_INDEX       0
+#define MLXSW_PCI_SDQ_EMAD_TC  0
+#define MLXSW_PCI_SDQ_CTL_TC   3
+
 #define MLXSW_PCI_AQ_PAGES     8
 #define MLXSW_PCI_AQ_SIZE      (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
 #define MLXSW_PCI_WQE_SIZE     32 /* 32 bytes per element */
index a33eeef..741fd29 100644 (file)
@@ -24,8 +24,6 @@
 
 #define MLXSW_PORT_DONT_CARE           0xFF
 
-#define MLXSW_PORT_MODULE_MAX_WIDTH    4
-
 enum mlxsw_port_admin_status {
        MLXSW_PORT_ADMIN_STATUS_UP = 1,
        MLXSW_PORT_ADMIN_STATUS_DOWN = 2,
index 5494cf9..bec035e 100644 (file)
@@ -3969,6 +3969,7 @@ MLXSW_ITEM32(reg, pmlp, local_port, 0x00, 16, 8);
  * 1 - Lane 0 is used.
  * 2 - Lanes 0 and 1 are used.
  * 4 - Lanes 0, 1, 2 and 3 are used.
+ * 8 - Lanes 0-7 are used.
  * Access: RW
  */
 MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8);
@@ -3983,14 +3984,14 @@ MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false);
  * Tx Lane. When rxtx field is cleared, this field is used for Rx as well.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 4, 0x04, 0x00, false);
 
 /* reg_pmlp_rx_lane
  * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is
  * equal to Tx lane.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 4, 0x04, 0x00, false);
 
 static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
 {
@@ -4111,6 +4112,7 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4           BIT(9)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2                BIT(10)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4                BIT(12)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8                         BIT(15)
 
 /* reg_ptys_ext_eth_proto_cap
  * Extended Ethernet port supported speeds and protocols.
@@ -5373,6 +5375,55 @@ static inline void mlxsw_reg_pplr_pack(char *payload, u8 local_port,
                                 MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0);
 }
 
+/* PMTM - Port Module Type Mapping Register
+ * ----------------------------------------
+ * The PMTM allows query or configuration of module types.
+ */
+#define MLXSW_REG_PMTM_ID 0x5067
+#define MLXSW_REG_PMTM_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN);
+
+/* reg_pmtm_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmtm_module_type {
+       /* Backplane with 4 lanes */
+       MLXSW_REG_PMTM_MODULE_TYPE_BP_4X,
+       /* QSFP */
+       MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP,
+       /* SFP */
+       MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP,
+       /* Backplane with single lane */
+       MLXSW_REG_PMTM_MODULE_TYPE_BP_1X = 4,
+       /* Backplane with two lane */
+       MLXSW_REG_PMTM_MODULE_TYPE_BP_2X = 8,
+       /* Chip2Chip */
+       MLXSW_REG_PMTM_MODULE_TYPE_C2C = 10,
+};
+
+/* reg_pmtm_module_type
+ * Module type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 4);
+
+static inline void mlxsw_reg_pmtm_pack(char *payload, u8 module)
+{
+       MLXSW_REG_ZERO(pmtm, payload);
+       mlxsw_reg_pmtm_module_set(payload, module);
+}
+
+static inline void
+mlxsw_reg_pmtm_unpack(char *payload,
+                     enum mlxsw_reg_pmtm_module_type *module_type)
+{
+       *module_type = mlxsw_reg_pmtm_module_type_get(payload);
+}
+
 /* HTGT - Host Trap Group Table
  * ----------------------------
  * Configures the properties for forwarding to CPU.
@@ -8411,6 +8462,7 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16);
 MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16);
 
 #define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH      256
+#define MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH   128
 #define MLXSW_REG_MCIA_EEPROM_SIZE             48
 #define MLXSW_REG_MCIA_I2C_ADDR_LOW            0x50
 #define MLXSW_REG_MCIA_I2C_ADDR_HIGH           0x51
@@ -8446,6 +8498,14 @@ enum mlxsw_reg_mcia_eeprom_module_info {
  */
 MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
 
+/* This is used to access the optional upper pages (1-3) in the QSFP+
+ * memory map. Page 1 is available on offset 256 through 383, page 2 -
+ * on offset 384 through 511, page 3 - on offset 512 through 639.
+ */
+#define MLXSW_REG_MCIA_PAGE_GET(off) (((off) - \
+                               MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) / \
+                               MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1)
+
 static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock,
                                       u8 page_number, u16 device_addr,
                                       u8 size, u8 i2c_device_addr)
@@ -8670,7 +8730,7 @@ mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
  * properties.
  */
 #define MLXSW_REG_MPAR_ID 0x901B
-#define MLXSW_REG_MPAR_LEN 0x08
+#define MLXSW_REG_MPAR_LEN 0x0C
 
 MLXSW_REG_DEFINE(mpar, MLXSW_REG_MPAR_ID, MLXSW_REG_MPAR_LEN);
 
@@ -9531,6 +9591,12 @@ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
  */
 MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
 
+/* num_of_modules
+ * Number of modules.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8);
+
 static inline void mlxsw_reg_mgpir_pack(char *payload)
 {
        MLXSW_REG_ZERO(mgpir, payload);
@@ -9539,7 +9605,7 @@ static inline void mlxsw_reg_mgpir_pack(char *payload)
 static inline void
 mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
                       enum mlxsw_reg_mgpir_device_type *device_type,
-                      u8 *devices_per_flash)
+                      u8 *devices_per_flash, u8 *num_of_modules)
 {
        if (num_of_devices)
                *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
@@ -9548,6 +9614,8 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
        if (devices_per_flash)
                *devices_per_flash =
                                mlxsw_reg_mgpir_devices_per_flash_get(payload);
+       if (num_of_modules)
+               *num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload);
 }
 
 /* TNGCR - Tunneling NVE General Configuration Register
@@ -10526,6 +10594,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(pbmc),
        MLXSW_REG(pspa),
        MLXSW_REG(pplr),
+       MLXSW_REG(pmtm),
        MLXSW_REG(htgt),
        MLXSW_REG(hpkt),
        MLXSW_REG(rgcr),
index 33a9fc9..6534184 100644 (file)
@@ -26,7 +26,8 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_MAX_LAG_MEMBERS,
        MLXSW_RES_ID_LOCAL_PORTS_IN_1X,
        MLXSW_RES_ID_LOCAL_PORTS_IN_2X,
-       MLXSW_RES_ID_MAX_BUFFER_SIZE,
+       MLXSW_RES_ID_LOCAL_PORTS_IN_4X,
+       MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER,
        MLXSW_RES_ID_CELL_SIZE,
        MLXSW_RES_ID_MAX_HEADROOM_SIZE,
        MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS,
@@ -82,7 +83,8 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
        [MLXSW_RES_ID_LOCAL_PORTS_IN_1X] = 0x2610,
        [MLXSW_RES_ID_LOCAL_PORTS_IN_2X] = 0x2611,
-       [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802,        /* Bytes */
+       [MLXSW_RES_ID_LOCAL_PORTS_IN_4X] = 0x2612,
+       [MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER] = 0x2805,       /* Bytes */
        [MLXSW_RES_ID_CELL_SIZE] = 0x2803,      /* Bytes */
        [MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811,      /* Bytes */
        [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901,
index dcf9562..ea4cc2a 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/inetdevice.h>
 #include <linux/netlink.h>
 #include <linux/jhash.h>
+#include <linux/log2.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
@@ -48,7 +49,7 @@
 
 #define MLXSW_SP1_FWREV_MAJOR 13
 #define MLXSW_SP1_FWREV_MINOR 2000
-#define MLXSW_SP1_FWREV_SUBMINOR 1886
+#define MLXSW_SP1_FWREV_SUBMINOR 2308
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -63,6 +64,21 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
        "." __stringify(MLXSW_SP1_FWREV_MINOR) \
        "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2"
 
+#define MLXSW_SP2_FWREV_MAJOR 29
+#define MLXSW_SP2_FWREV_MINOR 2000
+#define MLXSW_SP2_FWREV_SUBMINOR 2308
+
+static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
+       .major = MLXSW_SP2_FWREV_MAJOR,
+       .minor = MLXSW_SP2_FWREV_MINOR,
+       .subminor = MLXSW_SP2_FWREV_SUBMINOR,
+};
+
+#define MLXSW_SP2_FW_FILENAME \
+       "mellanox/mlxsw_spectrum2-" __stringify(MLXSW_SP2_FWREV_MAJOR) \
+       "." __stringify(MLXSW_SP2_FWREV_MINOR) \
+       "." __stringify(MLXSW_SP2_FWREV_SUBMINOR) ".mfa2"
+
 static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum";
 static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2";
 static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3";
@@ -409,9 +425,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
        }
        if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
            MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) &&
-           (rev->minor > req_rev->minor ||
-            (rev->minor == req_rev->minor &&
-             rev->subminor >= req_rev->subminor)))
+           mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
                return 0;
 
        dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
@@ -735,35 +749,69 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl);
 }
 
-static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
-                                        u8 local_port, u8 *p_module,
-                                        u8 *p_width, u8 *p_lane)
+static int
+mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+                             struct mlxsw_sp_port_mapping *port_mapping)
 {
        char pmlp_pl[MLXSW_REG_PMLP_LEN];
+       bool separate_rxtx;
+       u8 module;
+       u8 width;
        int err;
+       int i;
 
        mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl);
        if (err)
                return err;
-       *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
-       *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-       *p_lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0);
+       module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+       width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+       separate_rxtx = mlxsw_reg_pmlp_rxtx_get(pmlp_pl);
+
+       if (width && !is_power_of_2(width)) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: width value is not power of 2\n",
+                       local_port);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < width; i++) {
+               if (mlxsw_reg_pmlp_module_get(pmlp_pl, i) != module) {
+                       dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: contains multiple modules\n",
+                               local_port);
+                       return -EINVAL;
+               }
+               if (separate_rxtx &&
+                   mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) !=
+                   mlxsw_reg_pmlp_rx_lane_get(pmlp_pl, i)) {
+                       dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are different\n",
+                               local_port);
+                       return -EINVAL;
+               }
+               if (mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) != i) {
+                       dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are not sequential\n",
+                               local_port);
+                       return -EINVAL;
+               }
+       }
+
+       port_mapping->module = module;
+       port_mapping->width = width;
+       port_mapping->lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0);
        return 0;
 }
 
-static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port,
-                                   u8 module, u8 width, u8 lane)
+static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+       struct mlxsw_sp_port_mapping *port_mapping = &mlxsw_sp_port->mapping;
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        char pmlp_pl[MLXSW_REG_PMLP_LEN];
        int i;
 
        mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port);
-       mlxsw_reg_pmlp_width_set(pmlp_pl, width);
-       for (i = 0; i < width; i++) {
-               mlxsw_reg_pmlp_module_set(pmlp_pl, i, module);
-               mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, lane + i);  /* Rx & Tx */
+       mlxsw_reg_pmlp_width_set(pmlp_pl, port_mapping->width);
+       for (i = 0; i < port_mapping->width; i++) {
+               mlxsw_reg_pmlp_module_set(pmlp_pl, i, port_mapping->module);
+               mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, port_mapping->lane + i); /* Rx & Tx */
        }
 
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl);
@@ -2914,9 +2962,22 @@ mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = {
 #define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \
        ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4)
 
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_400gaui_8[] = {
+       ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT,
+       ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT,
+       ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT,
+       ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT,
+       ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN \
+       ARRAY_SIZE(mlxsw_sp2_mask_ethtool_400gaui_8)
+
 #define MLXSW_SP_PORT_MASK_WIDTH_1X    BIT(0)
 #define MLXSW_SP_PORT_MASK_WIDTH_2X    BIT(1)
 #define MLXSW_SP_PORT_MASK_WIDTH_4X    BIT(2)
+#define MLXSW_SP_PORT_MASK_WIDTH_8X    BIT(3)
 
 static u8 mlxsw_sp_port_mask_width_get(u8 width)
 {
@@ -2927,6 +2988,8 @@ static u8 mlxsw_sp_port_mask_width_get(u8 width)
                return MLXSW_SP_PORT_MASK_WIDTH_2X;
        case 4:
                return MLXSW_SP_PORT_MASK_WIDTH_4X;
+       case 8:
+               return MLXSW_SP_PORT_MASK_WIDTH_8X;
        default:
                WARN_ON_ONCE(1);
                return 0;
@@ -2948,7 +3011,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN,
                .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_1X |
                                  MLXSW_SP_PORT_MASK_WIDTH_2X |
-                                 MLXSW_SP_PORT_MASK_WIDTH_4X,
+                                 MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_100,
        },
        {
@@ -2957,7 +3021,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN,
                .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_1X |
                                  MLXSW_SP_PORT_MASK_WIDTH_2X |
-                                 MLXSW_SP_PORT_MASK_WIDTH_4X,
+                                 MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_1000,
        },
        {
@@ -2966,7 +3031,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN,
                .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_1X |
                                  MLXSW_SP_PORT_MASK_WIDTH_2X |
-                                 MLXSW_SP_PORT_MASK_WIDTH_4X,
+                                 MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_2500,
        },
        {
@@ -2975,7 +3041,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN,
                .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_1X |
                                  MLXSW_SP_PORT_MASK_WIDTH_2X |
-                                 MLXSW_SP_PORT_MASK_WIDTH_4X,
+                                 MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_5000,
        },
        {
@@ -2984,14 +3051,16 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN,
                .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_1X |
                                  MLXSW_SP_PORT_MASK_WIDTH_2X |
-                                 MLXSW_SP_PORT_MASK_WIDTH_4X,
+                                 MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_10000,
        },
        {
                .mask           = MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G,
                .mask_ethtool   = mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g,
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN,
-               .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_4X,
+               .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_40000,
        },
        {
@@ -3000,7 +3069,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN,
                .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_1X |
                                  MLXSW_SP_PORT_MASK_WIDTH_2X |
-                                 MLXSW_SP_PORT_MASK_WIDTH_4X,
+                                 MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_25000,
        },
        {
@@ -3008,7 +3078,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .mask_ethtool   = mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2,
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN,
                .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_2X |
-                                 MLXSW_SP_PORT_MASK_WIDTH_4X,
+                                 MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_50000,
        },
        {
@@ -3022,7 +3093,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .mask           = MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4,
                .mask_ethtool   = mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4,
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN,
-               .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_4X,
+               .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_100000,
        },
        {
@@ -3036,9 +3108,17 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
                .mask           = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4,
                .mask_ethtool   = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4,
                .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN,
-               .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_4X,
+               .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_4X |
+                                 MLXSW_SP_PORT_MASK_WIDTH_8X,
                .speed          = SPEED_200000,
        },
+       {
+               .mask           = MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8,
+               .mask_ethtool   = mlxsw_sp2_mask_ethtool_400gaui_8,
+               .m_ethtool_len  = MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN,
+               .mask_width     = MLXSW_SP_PORT_MASK_WIDTH_8X,
+               .speed          = SPEED_400000,
+       },
 };
 
 #define MLXSW_SP2_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp2_port_link_mode)
@@ -3435,7 +3515,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 };
 
 static int
-mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
+mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        const struct mlxsw_sp_port_type_speed_ops *ops;
@@ -3451,7 +3531,7 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
                                   &base_speed);
        if (err)
                return err;
-       upper_speed = base_speed * width;
+       upper_speed = base_speed * mlxsw_sp_port->mapping.width;
 
        eth_proto_admin = ops->to_ptys_upper_speed(mlxsw_sp, upper_speed);
        ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
@@ -3612,15 +3692,18 @@ static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-                               bool split, u8 module, u8 width, u8 lane)
+                               u8 split_base_local_port,
+                               struct mlxsw_sp_port_mapping *port_mapping)
 {
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+       bool split = !!split_base_local_port;
        struct mlxsw_sp_port *mlxsw_sp_port;
        struct net_device *dev;
        int err;
 
        err = mlxsw_core_port_init(mlxsw_sp->core, local_port,
-                                  module + 1, split, lane / width,
+                                  port_mapping->module + 1, split,
+                                  port_mapping->lane / port_mapping->width,
                                   mlxsw_sp->base_mac,
                                   sizeof(mlxsw_sp->base_mac));
        if (err) {
@@ -3635,15 +3718,15 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_alloc_etherdev;
        }
        SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev);
+       dev_net_set(dev, mlxsw_sp_net(mlxsw_sp));
        mlxsw_sp_port = netdev_priv(dev);
        mlxsw_sp_port->dev = dev;
        mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
        mlxsw_sp_port->local_port = local_port;
        mlxsw_sp_port->pvid = MLXSW_SP_DEFAULT_VID;
        mlxsw_sp_port->split = split;
-       mlxsw_sp_port->mapping.module = module;
-       mlxsw_sp_port->mapping.width = width;
-       mlxsw_sp_port->mapping.lane = lane;
+       mlxsw_sp_port->split_base_local_port = split_base_local_port;
+       mlxsw_sp_port->mapping = *port_mapping;
        mlxsw_sp_port->link.autoneg = 1;
        INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list);
        INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list);
@@ -3668,7 +3751,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
        dev->netdev_ops = &mlxsw_sp_port_netdev_ops;
        dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops;
 
-       err = mlxsw_sp_port_module_map(mlxsw_sp_port, module, width, lane);
+       err = mlxsw_sp_port_module_map(mlxsw_sp_port);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to map module\n",
                        mlxsw_sp_port->local_port);
@@ -3710,7 +3793,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_port_system_port_mapping_set;
        }
 
-       err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width);
+       err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n",
                        mlxsw_sp_port->local_port);
@@ -3933,14 +4016,13 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
                if (mlxsw_sp_port_created(mlxsw_sp, i))
                        mlxsw_sp_port_remove(mlxsw_sp, i);
        mlxsw_sp_cpu_port_remove(mlxsw_sp);
-       kfree(mlxsw_sp->port_to_module);
        kfree(mlxsw_sp->ports);
 }
 
 static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 {
        unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
-       u8 module, width, lane;
+       struct mlxsw_sp_port_mapping *port_mapping;
        size_t alloc_size;
        int i;
        int err;
@@ -3950,66 +4032,98 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
        if (!mlxsw_sp->ports)
                return -ENOMEM;
 
-       mlxsw_sp->port_to_module = kmalloc_array(max_ports, sizeof(int),
-                                                GFP_KERNEL);
-       if (!mlxsw_sp->port_to_module) {
-               err = -ENOMEM;
-               goto err_port_to_module_alloc;
-       }
-
        err = mlxsw_sp_cpu_port_create(mlxsw_sp);
        if (err)
                goto err_cpu_port_create;
 
        for (i = 1; i < max_ports; i++) {
-               /* Mark as invalid */
-               mlxsw_sp->port_to_module[i] = -1;
-
-               err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module,
-                                                   &width, &lane);
-               if (err)
-                       goto err_port_module_info_get;
-               if (!width)
+               port_mapping = mlxsw_sp->port_mapping[i];
+               if (!port_mapping)
                        continue;
-               mlxsw_sp->port_to_module[i] = module;
-               err = mlxsw_sp_port_create(mlxsw_sp, i, false,
-                                          module, width, lane);
+               err = mlxsw_sp_port_create(mlxsw_sp, i, 0, port_mapping);
                if (err)
                        goto err_port_create;
        }
        return 0;
 
 err_port_create:
-err_port_module_info_get:
        for (i--; i >= 1; i--)
                if (mlxsw_sp_port_created(mlxsw_sp, i))
                        mlxsw_sp_port_remove(mlxsw_sp, i);
        mlxsw_sp_cpu_port_remove(mlxsw_sp);
 err_cpu_port_create:
-       kfree(mlxsw_sp->port_to_module);
-err_port_to_module_alloc:
        kfree(mlxsw_sp->ports);
        return err;
 }
 
-static u8 mlxsw_sp_cluster_base_port_get(u8 local_port)
+static int mlxsw_sp_port_module_info_init(struct mlxsw_sp *mlxsw_sp)
 {
-       u8 offset = (local_port - 1) % MLXSW_SP_PORTS_PER_CLUSTER_MAX;
+       unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+       struct mlxsw_sp_port_mapping port_mapping;
+       int i;
+       int err;
+
+       mlxsw_sp->port_mapping = kcalloc(max_ports,
+                                        sizeof(struct mlxsw_sp_port_mapping *),
+                                        GFP_KERNEL);
+       if (!mlxsw_sp->port_mapping)
+               return -ENOMEM;
+
+       for (i = 1; i < max_ports; i++) {
+               err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &port_mapping);
+               if (err)
+                       goto err_port_module_info_get;
+               if (!port_mapping.width)
+                       continue;
+
+               mlxsw_sp->port_mapping[i] = kmemdup(&port_mapping,
+                                                   sizeof(port_mapping),
+                                                   GFP_KERNEL);
+               if (!mlxsw_sp->port_mapping[i])
+                       goto err_port_module_info_dup;
+       }
+       return 0;
+
+err_port_module_info_get:
+err_port_module_info_dup:
+       for (i--; i >= 1; i--)
+               kfree(mlxsw_sp->port_mapping[i]);
+       kfree(mlxsw_sp->port_mapping);
+       return err;
+}
+
+static void mlxsw_sp_port_module_info_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+
+       for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
+               kfree(mlxsw_sp->port_mapping[i]);
+       kfree(mlxsw_sp->port_mapping);
+}
+
+static u8 mlxsw_sp_cluster_base_port_get(u8 local_port, unsigned int max_width)
+{
+       u8 offset = (local_port - 1) % max_width;
 
        return local_port - offset;
 }
 
-static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
-                                     u8 module, unsigned int count, u8 offset)
+static int
+mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
+                          struct mlxsw_sp_port_mapping *port_mapping,
+                          unsigned int count, u8 offset)
 {
-       u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
+       struct mlxsw_sp_port_mapping split_port_mapping;
        int err, i;
 
+       split_port_mapping = *port_mapping;
+       split_port_mapping.width /= count;
        for (i = 0; i < count; i++) {
                err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * offset,
-                                          true, module, width, i * width);
+                                          base_port, &split_port_mapping);
                if (err)
                        goto err_port_create;
+               split_port_mapping.lane += split_port_mapping.width;
        }
 
        return 0;
@@ -4022,45 +4136,55 @@ err_port_create:
 }
 
 static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp,
-                                        u8 base_port, unsigned int count)
+                                        u8 base_port,
+                                        unsigned int count, u8 offset)
 {
-       u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH;
+       struct mlxsw_sp_port_mapping *port_mapping;
        int i;
 
-       /* Split by four means we need to re-create two ports, otherwise
-        * only one.
-        */
-       count = count / 2;
-
-       for (i = 0; i < count; i++) {
-               local_port = base_port + i * 2;
-               if (mlxsw_sp->port_to_module[local_port] < 0)
+       /* Go over original unsplit ports in the gap and recreate them. */
+       for (i = 0; i < count * offset; i++) {
+               port_mapping = mlxsw_sp->port_mapping[base_port + i];
+               if (!port_mapping)
                        continue;
-               module = mlxsw_sp->port_to_module[local_port];
-
-               mlxsw_sp_port_create(mlxsw_sp, local_port, false, module,
-                                    width, 0);
+               mlxsw_sp_port_create(mlxsw_sp, base_port + i, 0, port_mapping);
        }
 }
 
+static int mlxsw_sp_local_ports_offset(struct mlxsw_core *mlxsw_core,
+                                      unsigned int count,
+                                      unsigned int max_width)
+{
+       enum mlxsw_res_id local_ports_in_x_res_id;
+       int split_width = max_width / count;
+
+       if (split_width == 1)
+               local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_1X;
+       else if (split_width == 2)
+               local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_2X;
+       else if (split_width == 4)
+               local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_4X;
+       else
+               return -EINVAL;
+
+       if (!mlxsw_core_res_valid(mlxsw_core, local_ports_in_x_res_id))
+               return -EINVAL;
+       return mlxsw_core_res_get(mlxsw_core, local_ports_in_x_res_id);
+}
+
 static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
                               unsigned int count,
                               struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-       u8 local_ports_in_1x, local_ports_in_2x, offset;
+       struct mlxsw_sp_port_mapping port_mapping;
        struct mlxsw_sp_port *mlxsw_sp_port;
-       u8 module, cur_width, base_port;
+       int max_width;
+       u8 base_port;
+       int offset;
        int i;
        int err;
 
-       if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) ||
-           !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X))
-               return -EIO;
-
-       local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X);
-       local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X);
-
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port) {
                dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
@@ -4069,47 +4193,70 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
                return -EINVAL;
        }
 
-       module = mlxsw_sp_port->mapping.module;
-       cur_width = mlxsw_sp_port->mapping.width;
+       /* Split ports cannot be split. */
+       if (mlxsw_sp_port->split) {
+               netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n");
+               NL_SET_ERR_MSG_MOD(extack, "Port cannot be split further");
+               return -EINVAL;
+       }
+
+       max_width = mlxsw_core_module_max_width(mlxsw_core,
+                                               mlxsw_sp_port->mapping.module);
+       if (max_width < 0) {
+               netdev_err(mlxsw_sp_port->dev, "Cannot get max width of port module\n");
+               NL_SET_ERR_MSG_MOD(extack, "Cannot get max width of port module");
+               return max_width;
+       }
 
-       if (count != 2 && count != 4) {
-               netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n");
-               NL_SET_ERR_MSG_MOD(extack, "Port can only be split into 2 or 4 ports");
+       /* Split port with non-max and 1 module width cannot be split. */
+       if (mlxsw_sp_port->mapping.width != max_width || max_width == 1) {
+               netdev_err(mlxsw_sp_port->dev, "Port cannot be split\n");
+               NL_SET_ERR_MSG_MOD(extack, "Port cannot be split");
                return -EINVAL;
        }
 
-       if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) {
-               netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n");
-               NL_SET_ERR_MSG_MOD(extack, "Port cannot be split further");
+       if (count == 1 || !is_power_of_2(count) || count > max_width) {
+               netdev_err(mlxsw_sp_port->dev, "Invalid split count\n");
+               NL_SET_ERR_MSG_MOD(extack, "Invalid split count");
                return -EINVAL;
        }
 
-       /* Make sure we have enough slave (even) ports for the split. */
-       if (count == 2) {
-               offset = local_ports_in_2x;
-               base_port = local_port;
-               if (mlxsw_sp->ports[base_port + local_ports_in_2x]) {
-                       netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n");
-                       NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration");
-                       return -EINVAL;
-               }
-       } else {
-               offset = local_ports_in_1x;
-               base_port = mlxsw_sp_cluster_base_port_get(local_port);
-               if (mlxsw_sp->ports[base_port + 1] ||
-                   mlxsw_sp->ports[base_port + 3]) {
+       offset = mlxsw_sp_local_ports_offset(mlxsw_core, count, max_width);
+       if (offset < 0) {
+               netdev_err(mlxsw_sp_port->dev, "Cannot obtain local port offset\n");
+               NL_SET_ERR_MSG_MOD(extack, "Cannot obtain local port offset");
+               return -EINVAL;
+       }
+
+       /* Only in case max split is being done, the local port and
+        * base port may differ.
+        */
+       base_port = count == max_width ?
+                   mlxsw_sp_cluster_base_port_get(local_port, max_width) :
+                   local_port;
+
+       for (i = 0; i < count * offset; i++) {
+               /* Expect base port to exist and also the one in the middle in
+                * case of maximal split count.
+                */
+               if (i == 0 || (count == max_width && i == count / 2))
+                       continue;
+
+               if (mlxsw_sp_port_created(mlxsw_sp, base_port + i)) {
                        netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n");
                        NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration");
                        return -EINVAL;
                }
        }
 
+       port_mapping = mlxsw_sp_port->mapping;
+
        for (i = 0; i < count; i++)
                if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset))
                        mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset);
 
-       err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count,
-                                        offset);
+       err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, &port_mapping,
+                                        count, offset);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n");
                goto err_port_split_create;
@@ -4118,7 +4265,7 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
        return 0;
 
 err_port_split_create:
-       mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
+       mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count, offset);
        return err;
 }
 
@@ -4126,19 +4273,13 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port,
                                 struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-       u8 local_ports_in_1x, local_ports_in_2x, offset;
        struct mlxsw_sp_port *mlxsw_sp_port;
-       u8 cur_width, base_port;
        unsigned int count;
+       int max_width;
+       u8 base_port;
+       int offset;
        int i;
 
-       if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) ||
-           !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X))
-               return -EIO;
-
-       local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X);
-       local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X);
-
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port) {
                dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
@@ -4153,25 +4294,30 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port,
                return -EINVAL;
        }
 
-       cur_width = mlxsw_sp_port->mapping.width;
-       count = cur_width == 1 ? 4 : 2;
+       max_width = mlxsw_core_module_max_width(mlxsw_core,
+                                               mlxsw_sp_port->mapping.module);
+       if (max_width < 0) {
+               netdev_err(mlxsw_sp_port->dev, "Cannot get max width of port module\n");
+               NL_SET_ERR_MSG_MOD(extack, "Cannot get max width of port module");
+               return max_width;
+       }
 
-       if (count == 2)
-               offset = local_ports_in_2x;
-       else
-               offset = local_ports_in_1x;
+       count = max_width / mlxsw_sp_port->mapping.width;
 
-       base_port = mlxsw_sp_cluster_base_port_get(local_port);
+       offset = mlxsw_sp_local_ports_offset(mlxsw_core, count, max_width);
+       if (WARN_ON(offset < 0)) {
+               netdev_err(mlxsw_sp_port->dev, "Cannot obtain local port offset\n");
+               NL_SET_ERR_MSG_MOD(extack, "Cannot obtain local port offset");
+               return -EINVAL;
+       }
 
-       /* Determine which ports to remove. */
-       if (count == 2 && local_port >= base_port + 2)
-               base_port = base_port + 2;
+       base_port = mlxsw_sp_port->split_base_local_port;
 
        for (i = 0; i < count; i++)
                if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset))
                        mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset);
 
-       mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
+       mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count, offset);
 
        return 0;
 }
@@ -4738,7 +4884,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
                                    unsigned long event, void *ptr);
 
 static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
-                        const struct mlxsw_bus_info *mlxsw_bus_info)
+                        const struct mlxsw_bus_info *mlxsw_bus_info,
+                        struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
        int err;
@@ -4831,7 +4978,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_acl_init;
        }
 
-       err = mlxsw_sp_router_init(mlxsw_sp);
+       err = mlxsw_sp_router_init(mlxsw_sp, extack);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
                goto err_router_init;
@@ -4864,7 +5011,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
         * respin.
         */
        mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
-       err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+       err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
+                                             &mlxsw_sp->netdevice_nb);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n");
                goto err_netdev_notifier;
@@ -4876,6 +5024,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_dpipe_init;
        }
 
+       err = mlxsw_sp_port_module_info_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to init port module info\n");
+               goto err_port_module_info_init;
+       }
+
        err = mlxsw_sp_ports_create(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
@@ -4885,9 +5039,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
        return 0;
 
 err_ports_create:
+       mlxsw_sp_port_module_info_fini(mlxsw_sp);
+err_port_module_info_init:
        mlxsw_sp_dpipe_fini(mlxsw_sp);
 err_dpipe_init:
-       unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+       unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
+                                         &mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
        if (mlxsw_sp->clock)
                mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
@@ -4924,7 +5081,8 @@ err_fids_init:
 }
 
 static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
-                         const struct mlxsw_bus_info *mlxsw_bus_info)
+                         const struct mlxsw_bus_info *mlxsw_bus_info,
+                         struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
@@ -4944,14 +5102,17 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
        mlxsw_sp->listeners = mlxsw_sp1_listener;
        mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 
-       return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+       return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
 }
 
 static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
-                         const struct mlxsw_bus_info *mlxsw_bus_info)
+                         const struct mlxsw_bus_info *mlxsw_bus_info,
+                         struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+       mlxsw_sp->req_rev = &mlxsw_sp2_fw_rev;
+       mlxsw_sp->fw_filename = MLXSW_SP2_FW_FILENAME;
        mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
        mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
        mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
@@ -4964,7 +5125,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
        mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
        mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
 
-       return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+       return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
 }
 
 static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
@@ -4972,8 +5133,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
        mlxsw_sp_ports_remove(mlxsw_sp);
+       mlxsw_sp_port_module_info_fini(mlxsw_sp);
        mlxsw_sp_dpipe_fini(mlxsw_sp);
-       unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+       unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
+                                         &mlxsw_sp->netdevice_nb);
        if (mlxsw_sp->clock) {
                mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
                mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
@@ -5165,14 +5328,61 @@ static int mlxsw_sp2_resources_kvd_register(struct mlxsw_core *mlxsw_core)
                                         &kvd_size_params);
 }
 
+static int mlxsw_sp_resources_span_register(struct mlxsw_core *mlxsw_core)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_core);
+       struct devlink_resource_size_params span_size_params;
+       u32 max_span;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SPAN))
+               return -EIO;
+
+       max_span = MLXSW_CORE_RES_GET(mlxsw_core, MAX_SPAN);
+       devlink_resource_size_params_init(&span_size_params, max_span, max_span,
+                                         1, DEVLINK_RESOURCE_UNIT_ENTRY);
+
+       return devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_SPAN,
+                                        max_span, MLXSW_SP_RESOURCE_SPAN,
+                                        DEVLINK_RESOURCE_ID_PARENT_TOP,
+                                        &span_size_params);
+}
+
 static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
 {
-       return mlxsw_sp1_resources_kvd_register(mlxsw_core);
+       int err;
+
+       err = mlxsw_sp1_resources_kvd_register(mlxsw_core);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_resources_span_register(mlxsw_core);
+       if (err)
+               goto err_resources_span_register;
+
+       return 0;
+
+err_resources_span_register:
+       devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
+       return err;
 }
 
 static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
 {
-       return mlxsw_sp2_resources_kvd_register(mlxsw_core);
+       int err;
+
+       err = mlxsw_sp2_resources_kvd_register(mlxsw_core);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_resources_span_register(mlxsw_core);
+       if (err)
+               goto err_resources_span_register;
+
+       return 0;
+
+err_resources_span_register:
+       devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
+       return err;
 }
 
 static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
@@ -6565,3 +6775,4 @@ MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table);
 MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table);
 MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table);
 MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);
+MODULE_FIRMWARE(MLXSW_SP2_FW_FILENAME);
index b2a0028..347bec9 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/dcbnl.h>
 #include <linux/in6.h>
 #include <linux/notifier.h>
+#include <linux/net_namespace.h>
 #include <net/psample.h>
 #include <net/pkt_cls.h>
 #include <net/red.h>
@@ -31,8 +32,6 @@
 
 #define MLXSW_SP_MID_MAX 7000
 
-#define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
-
 #define MLXSW_SP_PORT_BASE_SPEED_25G 25000 /* Mb/s */
 #define MLXSW_SP_PORT_BASE_SPEED_50G 50000 /* Mb/s */
 
@@ -47,6 +46,8 @@
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
 
+#define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents"
+
 enum mlxsw_sp_resource_id {
        MLXSW_SP_RESOURCE_KVD = 1,
        MLXSW_SP_RESOURCE_KVD_LINEAR,
@@ -55,6 +56,7 @@ enum mlxsw_sp_resource_id {
        MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
        MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
        MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+       MLXSW_SP_RESOURCE_SPAN,
 };
 
 struct mlxsw_sp_port;
@@ -139,6 +141,12 @@ struct mlxsw_sp_port_type_speed_ops;
 struct mlxsw_sp_ptp_state;
 struct mlxsw_sp_ptp_ops;
 
+struct mlxsw_sp_port_mapping {
+       u8 module;
+       u8 width;
+       u8 lane;
+};
+
 struct mlxsw_sp {
        struct mlxsw_sp_port **ports;
        struct mlxsw_core *core;
@@ -146,7 +154,7 @@ struct mlxsw_sp {
        unsigned char base_mac[ETH_ALEN];
        const unsigned char *mac_mask;
        struct mlxsw_sp_upper *lags;
-       int *port_to_module;
+       struct mlxsw_sp_port_mapping **port_mapping;
        struct mlxsw_sp_sb *sb;
        struct mlxsw_sp_bridge *bridge;
        struct mlxsw_sp_router *router;
@@ -255,11 +263,11 @@ struct mlxsw_sp_port {
                struct ieee_pfc *pfc;
                enum mlxsw_reg_qpts_trust_state trust_state;
        } dcb;
-       struct {
-               u8 module;
-               u8 width;
-               u8 lane;
-       } mapping;
+       struct mlxsw_sp_port_mapping mapping; /* mapping is constant during the
+                                              * mlxsw_sp_port lifetime, however
+                                              * the same localport can have
+                                              * different mapping.
+                                              */
        /* TC handles */
        struct list_head mall_tc_list;
        struct {
@@ -283,6 +291,7 @@ struct mlxsw_sp_port {
                u16 egr_types;
                struct mlxsw_sp_ptp_port_stats stats;
        } ptp;
+       u8 split_base_local_port;
 };
 
 struct mlxsw_sp_port_type_speed_ops {
@@ -524,7 +533,8 @@ union mlxsw_sp_l3addr {
        struct in6_addr addr6;
 };
 
-int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
+                        struct netlink_ext_ack *extack);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
                                         unsigned long event, void *ptr);
@@ -982,4 +992,9 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core,
 int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core,
                             const struct devlink_trap_group *group);
 
+static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp)
+{
+       return mlxsw_core_net(mlxsw_sp->core);
+}
+
 #endif
index b9eeae3..968f090 100644 (file)
@@ -35,6 +35,7 @@ struct mlxsw_sp_sb_cm {
 };
 
 #define MLXSW_SP_SB_INFI -1U
+#define MLXSW_SP_SB_REST -2U
 
 struct mlxsw_sp_sb_pm {
        u32 min_buff;
@@ -421,19 +422,16 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp)
                .freeze_size = _freeze_size,                            \
        }
 
-#define MLXSW_SP1_SB_PR_INGRESS_SIZE   12440000
-#define MLXSW_SP1_SB_PR_EGRESS_SIZE    13232000
 #define MLXSW_SP1_SB_PR_CPU_SIZE       (256 * 1000)
 
 /* Order according to mlxsw_sp1_sb_pool_dess */
 static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = {
-       MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-                      MLXSW_SP1_SB_PR_INGRESS_SIZE),
+       MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
-       MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
-                          MLXSW_SP1_SB_PR_EGRESS_SIZE, true, false),
+       MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST,
+                          true, false),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
@@ -445,19 +443,16 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = {
                           MLXSW_SP1_SB_PR_CPU_SIZE, true, false),
 };
 
-#define MLXSW_SP2_SB_PR_INGRESS_SIZE   35297568
-#define MLXSW_SP2_SB_PR_EGRESS_SIZE    35297568
 #define MLXSW_SP2_SB_PR_CPU_SIZE       (256 * 1000)
 
 /* Order according to mlxsw_sp2_sb_pool_dess */
 static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
-       MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-                      MLXSW_SP2_SB_PR_INGRESS_SIZE),
+       MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
-       MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
-                          MLXSW_SP2_SB_PR_EGRESS_SIZE, true, false),
+       MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST,
+                          true, false),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
@@ -471,11 +466,33 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
 
 static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
                                const struct mlxsw_sp_sb_pr *prs,
+                               const struct mlxsw_sp_sb_pool_des *pool_dess,
                                size_t prs_len)
 {
+       /* Round down, unlike mlxsw_sp_bytes_cells(). */
+       u32 sb_cells = div_u64(mlxsw_sp->sb->sb_size, mlxsw_sp->sb->cell_size);
+       u32 rest_cells[2] = {sb_cells, sb_cells};
        int i;
        int err;
 
+       /* Calculate how much space to give to the "REST" pools in either
+        * direction.
+        */
+       for (i = 0; i < prs_len; i++) {
+               enum mlxsw_reg_sbxx_dir dir = pool_dess[i].dir;
+               u32 size = prs[i].size;
+               u32 size_cells;
+
+               if (size == MLXSW_SP_SB_INFI || size == MLXSW_SP_SB_REST)
+                       continue;
+
+               size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size);
+               if (WARN_ON_ONCE(size_cells > rest_cells[dir]))
+                       continue;
+
+               rest_cells[dir] -= size_cells;
+       }
+
        for (i = 0; i < prs_len; i++) {
                u32 size = prs[i].size;
                u32 size_cells;
@@ -483,6 +500,10 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
                if (size == MLXSW_SP_SB_INFI) {
                        err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
                                                   0, true);
+               } else if (size == MLXSW_SP_SB_REST) {
+                       size_cells = rest_cells[pool_dess[i].dir];
+                       err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
+                                                  size_cells, false);
                } else {
                        size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size);
                        err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
@@ -904,7 +925,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
        if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
                return -EIO;
 
-       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE))
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, GUARANTEED_SHARED_BUFFER))
                return -EIO;
 
        if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_HEADROOM_SIZE))
@@ -915,7 +936,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
                return -ENOMEM;
        mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE);
        mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
-                                                  MAX_BUFFER_SIZE);
+                                                  GUARANTEED_SHARED_BUFFER);
        max_headroom_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
                                               MAX_HEADROOM_SIZE);
        /* Round down, because this limit must not be overstepped. */
@@ -926,6 +947,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
        if (err)
                goto err_sb_ports_init;
        err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp->sb_vals->prs,
+                                  mlxsw_sp->sb_vals->pool_dess,
                                   mlxsw_sp->sb_vals->pool_count);
        if (err)
                goto err_sb_prs_init;
@@ -1013,7 +1035,8 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
        mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
        pr = &mlxsw_sp->sb_vals->prs[pool_index];
 
-       if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+       if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                     GUARANTEED_SHARED_BUFFER)) {
                NL_SET_ERR_MSG_MOD(extack, "Exceeded shared buffer size");
                return -EINVAL;
        }
@@ -1021,12 +1044,12 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
        if (pr->freeze_mode && pr->mode != mode) {
                NL_SET_ERR_MSG_MOD(extack, "Changing this pool's threshold type is forbidden");
                return -EINVAL;
-       };
+       }
 
        if (pr->freeze_size && pr->size != size) {
                NL_SET_ERR_MSG_MOD(extack, "Changing this pool's size is forbidden");
                return -EINVAL;
-       };
+       }
 
        return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode,
                                    pool_size, false);
index 17f334b..2153bcc 100644 (file)
@@ -870,7 +870,7 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
                    mlxsw_sp_fid_vni(fid, &vni)))
                goto out;
 
-       nve_dev = dev_get_by_index(&init_net, nve_ifindex);
+       nve_dev = dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex);
        if (!nve_dev)
                goto out;
 
index bdf53cf..68cc673 100644 (file)
@@ -305,7 +305,8 @@ mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
                        p->max);
                return -EINVAL;
        }
-       if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+       if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                       GUARANTEED_SHARED_BUFFER)) {
                dev_err(mlxsw_sp->bus_info->dev,
                        "spectrum: RED: max value %u is too big\n", p->max);
                return -EINVAL;
index a330b36..0e99b64 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/if_macvlan.h>
 #include <linux/refcount.h>
 #include <linux/jhash.h>
+#include <linux/net_namespace.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
@@ -2551,14 +2552,14 @@ static int mlxsw_sp_router_schedule_work(struct net *net,
        struct mlxsw_sp_netevent_work *net_work;
        struct mlxsw_sp_router *router;
 
-       if (!net_eq(net, &init_net))
+       router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
+       if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
                return NOTIFY_DONE;
 
        net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
        if (!net_work)
                return NOTIFY_BAD;
 
-       router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
        INIT_WORK(&net_work->work, cb);
        net_work->mlxsw_sp = router->mlxsw_sp;
        mlxsw_core_schedule_work(&net_work->work);
@@ -6019,12 +6020,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
                mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
                fib_info_put(fib_work->fen_info.fi);
                break;
-       case FIB_EVENT_RULE_ADD:
-               /* if we get here, a rule was added that we do not support.
-                * just do the fib_abort
-                */
-               mlxsw_sp_router_fib_abort(mlxsw_sp);
-               break;
        case FIB_EVENT_NH_ADD: /* fall through */
        case FIB_EVENT_NH_DEL:
                mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
@@ -6065,12 +6060,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
                                         fib_work->fib6_work.nrt6);
                mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
                break;
-       case FIB_EVENT_RULE_ADD:
-               /* if we get here, a rule was added that we do not support.
-                * just do the fib_abort
-                */
-               mlxsw_sp_router_fib_abort(mlxsw_sp);
-               break;
        }
        rtnl_unlock();
        kfree(fib_work);
@@ -6112,12 +6101,6 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
                                              &fib_work->ven_info);
                dev_put(fib_work->ven_info.dev);
                break;
-       case FIB_EVENT_RULE_ADD:
-               /* if we get here, a rule was added that we do not support.
-                * just do the fib_abort
-                */
-               mlxsw_sp_router_fib_abort(mlxsw_sp);
-               break;
        }
        rtnl_unlock();
        kfree(fib_work);
@@ -6213,7 +6196,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
        rule = fr_info->rule;
 
        /* Rule only affects locally generated traffic */
-       if (rule->iifindex == info->net->loopback_dev->ifindex)
+       if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
                return 0;
 
        switch (info->family) {
@@ -6250,8 +6233,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
        struct mlxsw_sp_router *router;
        int err;
 
-       if (!net_eq(info->net, &init_net) ||
-           (info->family != AF_INET && info->family != AF_INET6 &&
+       if ((info->family != AF_INET && info->family != AF_INET6 &&
             info->family != RTNL_FAMILY_IPMR &&
             info->family != RTNL_FAMILY_IP6MR))
                return NOTIFY_DONE;
@@ -6263,9 +6245,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
        case FIB_EVENT_RULE_DEL:
                err = mlxsw_sp_router_fib_rule_event(event, info,
                                                     router->mlxsw_sp);
-               if (!err || info->extack)
-                       return notifier_from_errno(err);
-               break;
+               return notifier_from_errno(err);
        case FIB_EVENT_ENTRY_ADD:
        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
        case FIB_EVENT_ENTRY_APPEND:  /* fall through */
@@ -7974,9 +7954,10 @@ static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
        mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
 }
 
-static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
+static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
 {
-       bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
+       struct net *net = mlxsw_sp_net(mlxsw_sp);
+       bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
 
        mlxsw_sp_mp_hash_header_set(recr2_pl,
                                    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
@@ -7991,9 +7972,9 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
        mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
 }
 
-static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
+static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
 {
-       bool only_l3 = !ip6_multipath_hash_policy(&init_net);
+       bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
 
        mlxsw_sp_mp_hash_header_set(recr2_pl,
                                    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
@@ -8021,8 +8002,8 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 
        seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
        mlxsw_reg_recr2_pack(recr2_pl, seed);
-       mlxsw_sp_mp4_hash_init(recr2_pl);
-       mlxsw_sp_mp6_hash_init(recr2_pl);
+       mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
+       mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
 
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
 }
@@ -8053,7 +8034,8 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
 
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
-       bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
+       struct net *net = mlxsw_sp_net(mlxsw_sp);
+       bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
        char rgcr_pl[MLXSW_REG_RGCR_LEN];
        u64 max_rifs;
        int err;
@@ -8079,7 +8061,8 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 }
 
-int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
+                        struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp_router *router;
        int err;
@@ -8155,8 +8138,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
                goto err_dscp_init;
 
        mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
-       err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
-                                   mlxsw_sp_router_fib_dump_flush);
+       err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
+                                   &mlxsw_sp->router->fib_nb,
+                                   mlxsw_sp_router_fib_dump_flush, extack);
        if (err)
                goto err_register_fib_notifier;
 
@@ -8195,7 +8179,8 @@ err_register_inetaddr_notifier:
 
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
-       unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
+       unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
+                               &mlxsw_sp->router->fib_nb);
        unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
        mlxsw_sp_neigh_fini(mlxsw_sp);
        mlxsw_sp_vrs_fini(mlxsw_sp);
index 560a60e..200d324 100644 (file)
 #include "spectrum_span.h"
 #include "spectrum_switchdev.h"
 
+static u64 mlxsw_sp_span_occ_get(void *priv)
+{
+       const struct mlxsw_sp *mlxsw_sp = priv;
+       u64 occ = 0;
+       int i;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               if (mlxsw_sp->span.entries[i].ref_count)
+                       occ++;
+       }
+
+       return occ;
+}
+
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 {
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
        int i;
 
        if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
@@ -36,13 +51,19 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
                curr->id = i;
        }
 
+       devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN,
+                                         mlxsw_sp_span_occ_get, mlxsw_sp);
+
        return 0;
 }
 
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 {
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
        int i;
 
+       devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN);
+
        for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
                struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
 
index 5ecb451..a3af171 100644 (file)
@@ -2591,7 +2591,7 @@ __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp,
        if (err)
                return err;
 
-       dev = __dev_get_by_index(&init_net, nve_ifindex);
+       dev = __dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex);
        if (!dev)
                return -EINVAL;
        *nve_dev = dev;
index 0d9356b..4ff1e62 100644 (file)
@@ -446,7 +446,8 @@ static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
 }
 
 static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core,
-                         const struct mlxsw_bus_info *mlxsw_bus_info)
+                         const struct mlxsw_bus_info *mlxsw_bus_info,
+                         struct netlink_ext_ack *extack)
 {
        struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
        int err;
index 1c14c05..de6cb22 100644 (file)
@@ -992,6 +992,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
        if (!dev)
                return -ENOMEM;
        SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev);
+       dev_net_set(dev, mlxsw_core_net(mlxsw_sx->core));
        mlxsw_sx_port = netdev_priv(dev);
        mlxsw_sx_port->dev = dev;
        mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
@@ -1563,7 +1564,8 @@ static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
 }
 
 static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
-                        const struct mlxsw_bus_info *mlxsw_bus_info)
+                        const struct mlxsw_bus_info *mlxsw_bus_info,
+                        struct netlink_ext_ack *extack)
 {
        struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
        int err;
index aac1151..723724b 100644 (file)
@@ -364,12 +364,12 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 
        for_each_available_child_of_node(ports, portnp) {
                struct device_node *phy_node;
+               phy_interface_t phy_mode;
                struct phy_device *phy;
                struct resource *res;
                struct phy *serdes;
                void __iomem *regs;
                char res_name[8];
-               int phy_mode;
                u32 port;
 
                if (of_property_read_u32(portnp, "reg", &port))
@@ -398,11 +398,11 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
                        goto out_put_ports;
                }
 
-               phy_mode = of_get_phy_mode(portnp);
-               if (phy_mode < 0)
-                       ocelot->ports[port]->phy_mode = PHY_INTERFACE_MODE_NA;
-               else
-                       ocelot->ports[port]->phy_mode = phy_mode;
+               err = of_get_phy_mode(portnp, &phy_mode);
+               if (err && err != -ENODEV)
+                       goto out_put_ports;
+
+               ocelot->ports[port]->phy_mode = phy_mode;
 
                switch (ocelot->ports[port]->phy_mode) {
                case PHY_INTERFACE_MODE_NA:
index 5afcb3c..c80bb83 100644 (file)
@@ -3952,7 +3952,7 @@ static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
 {
        struct nfp_insn_meta *meta1, *meta2;
-       const s32 exp_mask[] = {
+       static const s32 exp_mask[] = {
                [BPF_B] = 0x000000ffU,
                [BPF_H] = 0x0000ffffU,
                [BPF_W] = 0xffffffffU,
index 61aabff..bcdcd6d 100644 (file)
@@ -872,7 +872,8 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
 
                /* jump forward, a TX may have gotten lost, need to sync TX */
                if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4)
-                       tls_offload_tx_resync_request(nskb->sk);
+                       tls_offload_tx_resync_request(nskb->sk, seq,
+                                                     ntls->next_seq);
 
                *nr_frags = 0;
                return nskb;
index 2761f3a..49c7987 100644 (file)
@@ -1346,10 +1346,9 @@ static int nixge_probe(struct platform_device *pdev)
                }
        }
 
-       priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-       if ((int)priv->phy_mode < 0) {
+       err = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode);
+       if (err) {
                netdev_err(ndev, "not find \"phy-mode\" property\n");
-               err = -EINVAL;
                goto unregister_mdio;
        }
 
index 544012a..ebb81d6 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/etherdevice.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
@@ -391,6 +392,7 @@ struct rx_status_t {
 struct netdata_local {
        struct platform_device  *pdev;
        struct net_device       *ndev;
+       struct device_node      *phy_node;
        spinlock_t              lock;
        void __iomem            *net_base;
        u32                     msg_enable;
@@ -749,22 +751,26 @@ static void lpc_handle_link_change(struct net_device *ndev)
 static int lpc_mii_probe(struct net_device *ndev)
 {
        struct netdata_local *pldat = netdev_priv(ndev);
-       struct phy_device *phydev = phy_find_first(pldat->mii_bus);
-
-       if (!phydev) {
-               netdev_err(ndev, "no PHY found\n");
-               return -ENODEV;
-       }
+       struct phy_device *phydev;
 
        /* Attach to the PHY */
        if (lpc_phy_interface_mode(&pldat->pdev->dev) == PHY_INTERFACE_MODE_MII)
                netdev_info(ndev, "using MII interface\n");
        else
                netdev_info(ndev, "using RMII interface\n");
+
+       if (pldat->phy_node)
+               phydev =  of_phy_find_device(pldat->phy_node);
+       else
+               phydev = phy_find_first(pldat->mii_bus);
+       if (!phydev) {
+               netdev_err(ndev, "no PHY found\n");
+               return -ENODEV;
+       }
+
        phydev = phy_connect(ndev, phydev_name(phydev),
                             &lpc_handle_link_change,
                             lpc_phy_interface_mode(&pldat->pdev->dev));
-
        if (IS_ERR(phydev)) {
                netdev_err(ndev, "Could not attach to PHY\n");
                return PTR_ERR(phydev);
@@ -783,6 +789,7 @@ static int lpc_mii_probe(struct net_device *ndev)
 
 static int lpc_mii_init(struct netdata_local *pldat)
 {
+       struct device_node *node;
        int err = -ENXIO;
 
        pldat->mii_bus = mdiobus_alloc();
@@ -812,7 +819,10 @@ static int lpc_mii_init(struct netdata_local *pldat)
 
        platform_set_drvdata(pldat->pdev, pldat->mii_bus);
 
-       if (mdiobus_register(pldat->mii_bus))
+       node = of_get_child_by_name(pldat->pdev->dev.of_node, "mdio");
+       err = of_mdiobus_register(pldat->mii_bus, node);
+       of_node_put(node);
+       if (err)
                goto err_out_unregister_bus;
 
        if (lpc_mii_probe(pldat->ndev) != 0)
@@ -1345,6 +1355,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
        netdev_dbg(ndev, "DMA buffer V address :0x%p\n",
                        pldat->dma_buff_base_v);
 
+       pldat->phy_node = of_parse_phandle(np, "phy-handle", 0);
+
        /* Get MAC address from current HW setting (POR state is all zeros) */
        __lpc_get_mac(pldat, ndev->dev_addr);
 
index 7a70606..98e102a 100644 (file)
@@ -12,7 +12,7 @@ struct ionic_lif;
 
 #define IONIC_DRV_NAME         "ionic"
 #define IONIC_DRV_DESCRIPTION  "Pensando Ethernet NIC Driver"
-#define IONIC_DRV_VERSION      "0.15.0-k"
+#define IONIC_DRV_VERSION      "0.18.0-k"
 
 #define PCI_VENDOR_ID_PENSANDO                 0x1dd8
 
@@ -46,6 +46,8 @@ struct ionic {
        DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
        struct work_struct nb_work;
        struct notifier_block nb;
+       struct timer_list watchdog_timer;
+       int watchdog_period;
 };
 
 struct ionic_admin_ctx {
index d168a64..5f9d2ec 100644 (file)
 #include "ionic_dev.h"
 #include "ionic_lif.h"
 
+static void ionic_watchdog_cb(struct timer_list *t)
+{
+       struct ionic *ionic = from_timer(ionic, t, watchdog_timer);
+
+       mod_timer(&ionic->watchdog_timer,
+                 round_jiffies(jiffies + ionic->watchdog_period));
+
+       ionic_heartbeat_check(ionic);
+}
+
 void ionic_init_devinfo(struct ionic *ionic)
 {
        struct ionic_dev *idev = &ionic->idev;
@@ -72,6 +82,11 @@ int ionic_dev_setup(struct ionic *ionic)
                return -EFAULT;
        }
 
+       timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
+       ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+       mod_timer(&ionic->watchdog_timer,
+                 round_jiffies(jiffies + ionic->watchdog_period));
+
        idev->db_pages = bar->vaddr;
        idev->phy_db_pages = bar->bus_addr;
 
@@ -80,10 +95,53 @@ int ionic_dev_setup(struct ionic *ionic)
 
 void ionic_dev_teardown(struct ionic *ionic)
 {
-       /* place holder */
+       del_timer_sync(&ionic->watchdog_timer);
 }
 
 /* Devcmd Interface */
+int ionic_heartbeat_check(struct ionic *ionic)
+{
+       struct ionic_dev *idev = &ionic->idev;
+       unsigned long hb_time;
+       u32 fw_status;
+       u32 hb;
+
+       /* wait a little more than one second before testing again */
+       hb_time = jiffies;
+       if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
+               return 0;
+
+       /* firmware is useful only if fw_status is non-zero */
+       fw_status = ioread32(&idev->dev_info_regs->fw_status);
+       if (!fw_status)
+               return -ENXIO;
+
+       /* early FW has no heartbeat, else FW will return non-zero */
+       hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
+       if (!hb)
+               return 0;
+
+       /* are we stalled? */
+       if (hb == idev->last_hb) {
+               /* only complain once for each stall seen */
+               if (idev->last_hb_time != 1) {
+                       dev_info(ionic->dev, "FW heartbeat stalled at %d\n",
+                                idev->last_hb);
+                       idev->last_hb_time = 1;
+               }
+
+               return -ENXIO;
+       }
+
+       if (idev->last_hb_time == 1)
+               dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb);
+
+       idev->last_hb = hb;
+       idev->last_hb_time = hb_time;
+
+       return 0;
+}
+
 u8 ionic_dev_cmd_status(struct ionic_dev *idev)
 {
        return ioread8(&idev->dev_cmd_regs->comp.comp.status);
index 9610aeb..4665c5d 100644 (file)
@@ -16,6 +16,7 @@
 #define IONIC_MIN_TXRX_DESC            16
 #define IONIC_DEF_TXRX_DESC            4096
 #define IONIC_LIFS_MAX                 1024
+#define IONIC_WATCHDOG_SECS            5
 #define IONIC_ITR_COAL_USEC_DEFAULT    64
 
 #define IONIC_DEV_CMD_REG_VERSION      1
@@ -123,6 +124,9 @@ struct ionic_dev {
        union ionic_dev_info_regs __iomem *dev_info_regs;
        union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
 
+       unsigned long last_hb_time;
+       u32 last_hb;
+
        u64 __iomem *db_pages;
        dma_addr_t phy_db_pages;
 
@@ -151,12 +155,19 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
                              struct ionic_desc_info *desc_info,
                              struct ionic_cq_info *cq_info, void *cb_arg);
 
+struct ionic_page_info {
+       struct page *page;
+       dma_addr_t dma_addr;
+};
+
 struct ionic_desc_info {
        void *desc;
        void *sg_desc;
        struct ionic_desc_info *next;
        unsigned int index;
        unsigned int left;
+       unsigned int npages;
+       struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
        ionic_desc_cb cb;
        void *cb_arg;
 };
@@ -295,5 +306,6 @@ void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
 void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
 void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
                     unsigned int stop_index);
+int ionic_heartbeat_check(struct ionic *ionic);
 
 #endif /* _IONIC_DEV_H_ */
index af1647a..6fb27dc 100644 (file)
@@ -19,31 +19,30 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 
        err = devlink_info_driver_name_put(req, IONIC_DRV_NAME);
        if (err)
-               goto info_out;
+               return err;
 
        err = devlink_info_version_running_put(req,
                                               DEVLINK_INFO_VERSION_GENERIC_FW,
                                               idev->dev_info.fw_version);
        if (err)
-               goto info_out;
+               return err;
 
        snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_type);
        err = devlink_info_version_fixed_put(req,
                                             DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
                                             buf);
        if (err)
-               goto info_out;
+               return err;
 
        snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_rev);
        err = devlink_info_version_fixed_put(req,
                                             DEVLINK_INFO_VERSION_GENERIC_ASIC_REV,
                                             buf);
        if (err)
-               goto info_out;
+               return err;
 
        err = devlink_info_serial_number_put(req, idev->dev_info.serial_num);
 
-info_out:
        return err;
 }
 
index 7d10265..f778fff 100644 (file)
@@ -254,12 +254,9 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
        struct ionic_dev *idev;
-       u32 req_rs, req_fc;
-       u8 fec_type;
        int err = 0;
 
        idev = &lif->ionic->idev;
-       fec_type = IONIC_PORT_FEC_TYPE_NONE;
 
        /* set autoneg */
        if (ks->base.autoneg != idev->port_info->config.an_enable) {
@@ -281,29 +278,6 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
                        return err;
        }
 
-       /* set FEC */
-       req_rs = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_RS);
-       req_fc = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_BASER);
-       if (req_rs && req_fc) {
-               netdev_info(netdev, "Only select one FEC mode at a time\n");
-               return -EINVAL;
-       } else if (req_fc) {
-               fec_type = IONIC_PORT_FEC_TYPE_FC;
-       } else if (req_rs) {
-               fec_type = IONIC_PORT_FEC_TYPE_RS;
-       } else if (!(req_rs | req_fc)) {
-               fec_type = IONIC_PORT_FEC_TYPE_NONE;
-       }
-
-       if (fec_type != idev->port_info->config.fec_type) {
-               mutex_lock(&ionic->dev_cmd_lock);
-               ionic_dev_cmd_port_fec(idev, fec_type);
-               err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
-               mutex_unlock(&ionic->dev_cmd_lock);
-               if (err)
-                       return err;
-       }
-
        return 0;
 }
 
@@ -353,6 +327,70 @@ static int ionic_set_pauseparam(struct net_device *netdev,
        return 0;
 }
 
+static int ionic_get_fecparam(struct net_device *netdev,
+                             struct ethtool_fecparam *fec)
+{
+       struct ionic_lif *lif = netdev_priv(netdev);
+
+       switch (lif->ionic->idev.port_info->config.fec_type) {
+       case IONIC_PORT_FEC_TYPE_NONE:
+               fec->active_fec = ETHTOOL_FEC_OFF;
+               break;
+       case IONIC_PORT_FEC_TYPE_RS:
+               fec->active_fec = ETHTOOL_FEC_RS;
+               break;
+       case IONIC_PORT_FEC_TYPE_FC:
+               fec->active_fec = ETHTOOL_FEC_BASER;
+               break;
+       }
+
+       fec->fec = ETHTOOL_FEC_OFF | ETHTOOL_FEC_RS | ETHTOOL_FEC_BASER;
+
+       return 0;
+}
+
+static int ionic_set_fecparam(struct net_device *netdev,
+                             struct ethtool_fecparam *fec)
+{
+       struct ionic_lif *lif = netdev_priv(netdev);
+       u8 fec_type;
+       int ret = 0;
+
+       if (lif->ionic->idev.port_info->config.an_enable) {
+               netdev_err(netdev, "FEC request not allowed while autoneg is enabled\n");
+               return -EINVAL;
+       }
+
+       switch (fec->fec) {
+       case ETHTOOL_FEC_NONE:
+               fec_type = IONIC_PORT_FEC_TYPE_NONE;
+               break;
+       case ETHTOOL_FEC_OFF:
+               fec_type = IONIC_PORT_FEC_TYPE_NONE;
+               break;
+       case ETHTOOL_FEC_RS:
+               fec_type = IONIC_PORT_FEC_TYPE_RS;
+               break;
+       case ETHTOOL_FEC_BASER:
+               fec_type = IONIC_PORT_FEC_TYPE_FC;
+               break;
+       case ETHTOOL_FEC_AUTO:
+       default:
+               netdev_err(netdev, "FEC request 0x%04x not supported\n",
+                          fec->fec);
+               return -EINVAL;
+       }
+
+       if (fec_type != lif->ionic->idev.port_info->config.fec_type) {
+               mutex_lock(&lif->ionic->dev_cmd_lock);
+               ionic_dev_cmd_port_fec(&lif->ionic->idev, fec_type);
+               ret = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+               mutex_unlock(&lif->ionic->dev_cmd_lock);
+       }
+
+       return ret;
+}
+
 static int ionic_get_coalesce(struct net_device *netdev,
                              struct ethtool_coalesce *coalesce)
 {
@@ -372,7 +410,6 @@ static int ionic_set_coalesce(struct net_device *netdev,
        struct ionic_identity *ident;
        struct ionic_qcq *qcq;
        unsigned int i;
-       u32 usecs;
        u32 coal;
 
        if (coalesce->rx_max_coalesced_frames ||
@@ -410,26 +447,27 @@ static int ionic_set_coalesce(struct net_device *netdev,
                return -EINVAL;
        }
 
+       /* Convert the usec request to a HW useable value.  If they asked
+        * for non-zero and it resolved to zero, bump it up
+        */
        coal = ionic_coal_usec_to_hw(lif->ionic, coalesce->rx_coalesce_usecs);
-
-       if (coal > IONIC_INTR_CTRL_COAL_MAX)
-               return -ERANGE;
-
-       /* If they asked for non-zero and it resolved to zero, bump it up */
        if (!coal && coalesce->rx_coalesce_usecs)
                coal = 1;
 
-       /* Convert it back to get device resolution */
-       usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+       if (coal > IONIC_INTR_CTRL_COAL_MAX)
+               return -ERANGE;
 
-       if (usecs != lif->rx_coalesce_usecs) {
-               lif->rx_coalesce_usecs = usecs;
+       /* Save the new value */
+       lif->rx_coalesce_usecs = coalesce->rx_coalesce_usecs;
+       if (coal != lif->rx_coalesce_hw) {
+               lif->rx_coalesce_hw = coal;
 
                if (test_bit(IONIC_LIF_UP, lif->state)) {
                        for (i = 0; i < lif->nxqs; i++) {
                                qcq = lif->rxqcqs[i].qcq;
                                ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-                                                    qcq->intr.index, coal);
+                                                    qcq->intr.index,
+                                                    lif->rx_coalesce_hw);
                        }
                }
        }
@@ -453,6 +491,7 @@ static int ionic_set_ringparam(struct net_device *netdev,
 {
        struct ionic_lif *lif = netdev_priv(netdev);
        bool running;
+       int err;
 
        if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
                netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n");
@@ -470,8 +509,9 @@ static int ionic_set_ringparam(struct net_device *netdev,
            ring->rx_pending == lif->nrxq_descs)
                return 0;
 
-       if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
-               return -EBUSY;
+       err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET);
+       if (err)
+               return err;
 
        running = test_bit(IONIC_LIF_UP, lif->state);
        if (running)
@@ -504,6 +544,7 @@ static int ionic_set_channels(struct net_device *netdev,
 {
        struct ionic_lif *lif = netdev_priv(netdev);
        bool running;
+       int err;
 
        if (!ch->combined_count || ch->other_count ||
            ch->rx_count || ch->tx_count)
@@ -512,8 +553,9 @@ static int ionic_set_channels(struct net_device *netdev,
        if (ch->combined_count == lif->nxqs)
                return 0;
 
-       if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
-               return -EBUSY;
+       err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET);
+       if (err)
+               return err;
 
        running = test_bit(IONIC_LIF_UP, lif->state);
        if (running)
@@ -747,6 +789,7 @@ static const struct ethtool_ops ionic_ethtool_ops = {
        .get_regs               = ionic_get_regs,
        .get_link               = ethtool_op_get_link,
        .get_link_ksettings     = ionic_get_link_ksettings,
+       .set_link_ksettings     = ionic_set_link_ksettings,
        .get_coalesce           = ionic_get_coalesce,
        .set_coalesce           = ionic_set_coalesce,
        .get_ringparam          = ionic_get_ringparam,
@@ -769,7 +812,8 @@ static const struct ethtool_ops ionic_ethtool_ops = {
        .get_module_eeprom      = ionic_get_module_eeprom,
        .get_pauseparam         = ionic_get_pauseparam,
        .set_pauseparam         = ionic_set_pauseparam,
-       .set_link_ksettings     = ionic_set_link_ksettings,
+       .get_fecparam           = ionic_get_fecparam,
+       .set_fecparam           = ionic_set_fecparam,
        .nway_reset             = ionic_nway_reset,
 };
 
index 5bfdda1..dbdb7c5 100644 (file)
@@ -111,7 +111,7 @@ struct ionic_admin_cmd {
 };
 
 /**
- * struct admin_comp - General admin command completion format
+ * struct ionic_admin_comp - General admin command completion format
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
@@ -134,7 +134,7 @@ static inline u8 color_match(u8 color, u8 done_color)
 }
 
 /**
- * struct nop_cmd - NOP command
+ * struct ionic_nop_cmd - NOP command
  * @opcode: opcode
  */
 struct ionic_nop_cmd {
@@ -143,7 +143,7 @@ struct ionic_nop_cmd {
 };
 
 /**
- * struct nop_comp - NOP command completion
+ * struct ionic_nop_comp - NOP command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_nop_comp {
@@ -152,7 +152,7 @@ struct ionic_nop_comp {
 };
 
 /**
- * struct dev_init_cmd - Device init command
+ * struct ionic_dev_init_cmd - Device init command
  * @opcode:    opcode
  * @type:      device type
  */
@@ -172,7 +172,7 @@ struct ionic_dev_init_comp {
 };
 
 /**
- * struct dev_reset_cmd - Device reset command
+ * struct ionic_dev_reset_cmd - Device reset command
  * @opcode: opcode
  */
 struct ionic_dev_reset_cmd {
@@ -192,7 +192,7 @@ struct ionic_dev_reset_comp {
 #define IONIC_IDENTITY_VERSION_1       1
 
 /**
- * struct dev_identify_cmd - Driver/device identify command
+ * struct ionic_dev_identify_cmd - Driver/device identify command
  * @opcode:  opcode
  * @ver:     Highest version of identify supported by driver
  */
@@ -284,7 +284,7 @@ enum ionic_lif_type {
 };
 
 /**
- * struct lif_identify_cmd - lif identify command
+ * struct ionic_lif_identify_cmd - lif identify command
  * @opcode:  opcode
  * @type:    lif type (enum lif_type)
  * @ver:     version of identify returned by device
@@ -297,7 +297,7 @@ struct ionic_lif_identify_cmd {
 };
 
 /**
- * struct lif_identify_comp - lif identify command completion
+ * struct ionic_lif_identify_comp - lif identify command completion
  * @status:  status of the command (enum status_code)
  * @ver:     version of identify returned by device
  */
@@ -325,7 +325,7 @@ enum ionic_logical_qtype {
 };
 
 /**
- * struct lif_logical_qtype - Descriptor of logical to hardware queue type.
+ * struct ionic_lif_logical_qtype - Descriptor of logical to hardware queue type.
  * @qtype:          Hardware Queue Type.
  * @qid_count:      Number of Queue IDs of the logical type.
  * @qid_base:       Minimum Queue ID of the logical type.
@@ -349,7 +349,7 @@ enum ionic_lif_state {
  * @name:           lif name
  * @mtu:            mtu
  * @mac:            station mac address
- * @features:       features (enum eth_hw_features)
+ * @features:       features (enum ionic_eth_hw_features)
  * @queue_count:    queue counts per queue-type
  */
 union ionic_lif_config {
@@ -367,7 +367,7 @@ union ionic_lif_config {
 };
 
 /**
- * struct lif_identity - lif identity information (type-specific)
+ * struct ionic_lif_identity - lif identity information (type-specific)
  *
  * @capabilities    LIF capabilities
  *
@@ -441,11 +441,11 @@ union ionic_lif_identity {
 };
 
 /**
- * struct lif_init_cmd - LIF init command
+ * struct ionic_lif_init_cmd - LIF init command
  * @opcode:       opcode
  * @type:         LIF type (enum lif_type)
  * @index:        LIF index
- * @info_pa:      destination address for lif info (struct lif_info)
+ * @info_pa:      destination address for lif info (struct ionic_lif_info)
  */
 struct ionic_lif_init_cmd {
        u8     opcode;
@@ -457,7 +457,7 @@ struct ionic_lif_init_cmd {
 };
 
 /**
- * struct lif_init_comp - LIF init command completion
+ * struct ionic_lif_init_comp - LIF init command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_lif_init_comp {
@@ -468,7 +468,7 @@ struct ionic_lif_init_comp {
 };
 
 /**
- * struct q_init_cmd - Queue init command
+ * struct ionic_q_init_cmd - Queue init command
  * @opcode:       opcode
  * @type:         Logical queue type
  * @ver:          Queue version (defines opcode/descriptor scope)
@@ -525,7 +525,7 @@ struct ionic_q_init_cmd {
 };
 
 /**
- * struct q_init_comp - Queue init command completion
+ * struct ionic_q_init_comp - Queue init command completion
  * @status:     The status of the command (enum status_code)
  * @ver:        Queue version (defines opcode/descriptor scope)
  * @comp_index: The index in the descriptor ring for which this
@@ -556,7 +556,7 @@ enum ionic_txq_desc_opcode {
 };
 
 /**
- * struct txq_desc - Ethernet Tx queue descriptor format
+ * struct ionic_txq_desc - Ethernet Tx queue descriptor format
  * @opcode:       Tx operation, see TXQ_DESC_OPCODE_*:
  *
  *                   IONIC_TXQ_DESC_OPCODE_CSUM_NONE:
@@ -735,7 +735,7 @@ static inline void decode_txq_desc_cmd(u64 cmd, u8 *opcode, u8 *flags,
 #define IONIC_RX_MAX_SG_ELEMS  8
 
 /**
- * struct txq_sg_desc - Transmit scatter-gather (SG) list
+ * struct ionic_txq_sg_desc - Transmit scatter-gather (SG) list
  * @addr:      DMA address of SG element data buffer
  * @len:       Length of SG element data buffer, in bytes
  */
@@ -748,7 +748,7 @@ struct ionic_txq_sg_desc {
 };
 
 /**
- * struct txq_comp - Ethernet transmit queue completion descriptor
+ * struct ionic_txq_comp - Ethernet transmit queue completion descriptor
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *                 is the completion.
@@ -768,7 +768,7 @@ enum ionic_rxq_desc_opcode {
 };
 
 /**
- * struct rxq_desc - Ethernet Rx queue descriptor format
+ * struct ionic_rxq_desc - Ethernet Rx queue descriptor format
  * @opcode:       Rx operation, see RXQ_DESC_OPCODE_*:
  *
  *                   RXQ_DESC_OPCODE_SIMPLE:
@@ -789,7 +789,7 @@ struct ionic_rxq_desc {
 };
 
 /**
- * struct rxq_sg_desc - Receive scatter-gather (SG) list
+ * struct ionic_rxq_sg_desc - Receive scatter-gather (SG) list
  * @addr:      DMA address of SG element data buffer
  * @len:       Length of SG element data buffer, in bytes
  */
@@ -802,7 +802,7 @@ struct ionic_rxq_sg_desc {
 };
 
 /**
- * struct rxq_comp - Ethernet receive queue completion descriptor
+ * struct ionic_rxq_comp - Ethernet receive queue completion descriptor
  * @status:       The status of the command (enum status_code)
  * @num_sg_elems: Number of SG elements used by this descriptor
  * @comp_index:   The index in the descriptor ring for which this
@@ -896,7 +896,7 @@ enum ionic_eth_hw_features {
 };
 
 /**
- * struct q_control_cmd - Queue control command
+ * struct ionic_q_control_cmd - Queue control command
  * @opcode:     opcode
  * @type:       Queue type
  * @lif_index:  LIF index
@@ -1033,8 +1033,8 @@ enum ionic_port_loopback_mode {
 
 /**
  * Transceiver Status information
- * @state:    Transceiver status (enum xcvr_state)
- * @phy:      Physical connection type (enum phy_type)
+ * @state:    Transceiver status (enum ionic_xcvr_state)
+ * @phy:      Physical connection type (enum ionic_phy_type)
  * @pid:      Transceiver link mode (enum pid)
  * @sprom:    Transceiver sprom contents
  */
@@ -1051,9 +1051,9 @@ struct ionic_xcvr_status {
  * @mtu:                mtu
  * @state:              port admin state (enum port_admin_state)
  * @an_enable:          autoneg enable
- * @fec_type:           fec type (enum port_fec_type)
- * @pause_type:         pause type (enum port_pause_type)
- * @loopback_mode:      loopback mode (enum port_loopback_mode)
+ * @fec_type:           fec type (enum ionic_port_fec_type)
+ * @pause_type:         pause type (enum ionic_port_pause_type)
+ * @loopback_mode:      loopback mode (enum ionic_port_loopback_mode)
  */
 union ionic_port_config {
        struct {
@@ -1080,7 +1080,7 @@ union ionic_port_config {
 
 /**
  * Port Status information
- * @status:             link status (enum port_oper_status)
+ * @status:             link status (enum ionic_port_oper_status)
  * @id:                 port id
  * @speed:              link speed (in Mbps)
  * @xcvr:               tranceiver status
@@ -1094,7 +1094,7 @@ struct ionic_port_status {
 };
 
 /**
- * struct port_identify_cmd - Port identify command
+ * struct ionic_port_identify_cmd - Port identify command
  * @opcode:     opcode
  * @index:      port index
  * @ver:        Highest version of identify supported by driver
@@ -1107,7 +1107,7 @@ struct ionic_port_identify_cmd {
 };
 
 /**
- * struct port_identify_comp - Port identify command completion
+ * struct ionic_port_identify_comp - Port identify command completion
  * @status: The status of the command (enum status_code)
  * @ver:    Version of identify returned by device
  */
@@ -1118,10 +1118,10 @@ struct ionic_port_identify_comp {
 };
 
 /**
- * struct port_init_cmd - Port initialization command
+ * struct ionic_port_init_cmd - Port initialization command
  * @opcode:     opcode
  * @index:      port index
- * @info_pa:    destination address for port info (struct port_info)
+ * @info_pa:    destination address for port info (struct ionic_port_info)
  */
 struct ionic_port_init_cmd {
        u8     opcode;
@@ -1132,7 +1132,7 @@ struct ionic_port_init_cmd {
 };
 
 /**
- * struct port_init_comp - Port initialization command completion
+ * struct ionic_port_init_comp - Port initialization command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_port_init_comp {
@@ -1141,7 +1141,7 @@ struct ionic_port_init_comp {
 };
 
 /**
- * struct port_reset_cmd - Port reset command
+ * struct ionic_port_reset_cmd - Port reset command
  * @opcode:     opcode
  * @index:      port index
  */
@@ -1152,7 +1152,7 @@ struct ionic_port_reset_cmd {
 };
 
 /**
- * struct port_reset_comp - Port reset command completion
+ * struct ionic_port_reset_comp - Port reset command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_port_reset_comp {
@@ -1183,7 +1183,7 @@ enum ionic_port_attr {
 };
 
 /**
- * struct port_setattr_cmd - Set port attributes on the NIC
+ * struct ionic_port_setattr_cmd - Set port attributes on the NIC
  * @opcode:     Opcode
  * @index:      port index
  * @attr:       Attribute type (enum ionic_port_attr)
@@ -1207,7 +1207,7 @@ struct ionic_port_setattr_cmd {
 };
 
 /**
- * struct port_setattr_comp - Port set attr command completion
+ * struct ionic_port_setattr_comp - Port set attr command completion
  * @status:     The status of the command (enum status_code)
  * @color:      Color bit
  */
@@ -1218,7 +1218,7 @@ struct ionic_port_setattr_comp {
 };
 
 /**
- * struct port_getattr_cmd - Get port attributes from the NIC
+ * struct ionic_port_getattr_cmd - Get port attributes from the NIC
  * @opcode:     Opcode
  * @index:      port index
  * @attr:       Attribute type (enum ionic_port_attr)
@@ -1231,7 +1231,7 @@ struct ionic_port_getattr_cmd {
 };
 
 /**
- * struct port_getattr_comp - Port get attr command completion
+ * struct ionic_port_getattr_comp - Port get attr command completion
  * @status:     The status of the command (enum status_code)
  * @color:      Color bit
  */
@@ -1252,10 +1252,10 @@ struct ionic_port_getattr_comp {
 };
 
 /**
- * struct lif_status - Lif status register
+ * struct ionic_lif_status - Lif status register
  * @eid:             most recent NotifyQ event id
  * @port_num:        port the lif is connected to
- * @link_status:     port status (enum port_oper_status)
+ * @link_status:     port status (enum ionic_port_oper_status)
  * @link_speed:      speed of link in Mbps
  * @link_down_count: number of times link status changes
  */
@@ -1270,7 +1270,7 @@ struct ionic_lif_status {
 };
 
 /**
- * struct lif_reset_cmd - LIF reset command
+ * struct ionic_lif_reset_cmd - LIF reset command
  * @opcode:    opcode
  * @index:     LIF index
  */
@@ -1290,7 +1290,7 @@ enum ionic_dev_state {
 };
 
 /**
- * enum dev_attr - List of device attributes
+ * enum ionic_dev_attr - List of device attributes
  */
 enum ionic_dev_attr {
        IONIC_DEV_ATTR_STATE    = 0,
@@ -1299,10 +1299,10 @@ enum ionic_dev_attr {
 };
 
 /**
- * struct dev_setattr_cmd - Set Device attributes on the NIC
+ * struct ionic_dev_setattr_cmd - Set Device attributes on the NIC
  * @opcode:     Opcode
- * @attr:       Attribute type (enum dev_attr)
- * @state:      Device state (enum dev_state)
+ * @attr:       Attribute type (enum ionic_dev_attr)
+ * @state:      Device state (enum ionic_dev_state)
  * @name:       The bus info, e.g. PCI slot-device-function, 0 terminated
  * @features:   Device features
  */
@@ -1319,7 +1319,7 @@ struct ionic_dev_setattr_cmd {
 };
 
 /**
- * struct dev_setattr_comp - Device set attr command completion
+ * struct ionic_dev_setattr_comp - Device set attr command completion
  * @status:     The status of the command (enum status_code)
  * @features:   Device features
  * @color:      Color bit
@@ -1335,9 +1335,9 @@ struct ionic_dev_setattr_comp {
 };
 
 /**
- * struct dev_getattr_cmd - Get Device attributes from the NIC
+ * struct ionic_dev_getattr_cmd - Get Device attributes from the NIC
  * @opcode:     opcode
- * @attr:       Attribute type (enum dev_attr)
+ * @attr:       Attribute type (enum ionic_dev_attr)
  */
 struct ionic_dev_getattr_cmd {
        u8     opcode;
@@ -1346,7 +1346,7 @@ struct ionic_dev_getattr_cmd {
 };
 
 /**
- * struct dev_setattr_comp - Device set attr command completion
+ * struct ionic_dev_setattr_comp - Device set attr command completion
  * @status:     The status of the command (enum status_code)
  * @features:   Device features
  * @color:      Color bit
@@ -1376,7 +1376,7 @@ enum ionic_rss_hash_types {
 };
 
 /**
- * enum lif_attr - List of LIF attributes
+ * enum ionic_lif_attr - List of LIF attributes
  */
 enum ionic_lif_attr {
        IONIC_LIF_ATTR_STATE        = 0,
@@ -1389,15 +1389,15 @@ enum ionic_lif_attr {
 };
 
 /**
- * struct lif_setattr_cmd - Set LIF attributes on the NIC
+ * struct ionic_lif_setattr_cmd - Set LIF attributes on the NIC
  * @opcode:     Opcode
- * @type:       Attribute type (enum lif_attr)
+ * @type:       Attribute type (enum ionic_lif_attr)
  * @index:      LIF index
  * @state:      lif state (enum lif_state)
  * @name:       The netdev name string, 0 terminated
  * @mtu:        Mtu
  * @mac:        Station mac
- * @features:   Features (enum eth_hw_features)
+ * @features:   Features (enum ionic_eth_hw_features)
  * @rss:        RSS properties
  *              @types:     The hash types to enable (see rss_hash_types).
  *              @key:       The hash secret key.
@@ -1426,11 +1426,11 @@ struct ionic_lif_setattr_cmd {
 };
 
 /**
- * struct lif_setattr_comp - LIF set attr command completion
+ * struct ionic_lif_setattr_comp - LIF set attr command completion
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
- * @features:   features (enum eth_hw_features)
+ * @features:   features (enum ionic_eth_hw_features)
  * @color:      Color bit
  */
 struct ionic_lif_setattr_comp {
@@ -1445,9 +1445,9 @@ struct ionic_lif_setattr_comp {
 };
 
 /**
- * struct lif_getattr_cmd - Get LIF attributes from the NIC
+ * struct ionic_lif_getattr_cmd - Get LIF attributes from the NIC
  * @opcode:     Opcode
- * @attr:       Attribute type (enum lif_attr)
+ * @attr:       Attribute type (enum ionic_lif_attr)
  * @index:      LIF index
  */
 struct ionic_lif_getattr_cmd {
@@ -1458,7 +1458,7 @@ struct ionic_lif_getattr_cmd {
 };
 
 /**
- * struct lif_getattr_comp - LIF get attr command completion
+ * struct ionic_lif_getattr_comp - LIF get attr command completion
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
@@ -1466,7 +1466,7 @@ struct ionic_lif_getattr_cmd {
  * @name:       The netdev name string, 0 terminated
  * @mtu:        Mtu
  * @mac:        Station mac
- * @features:   Features (enum eth_hw_features)
+ * @features:   Features (enum ionic_eth_hw_features)
  * @color:      Color bit
  */
 struct ionic_lif_getattr_comp {
@@ -1492,7 +1492,7 @@ enum ionic_rx_mode {
 };
 
 /**
- * struct rx_mode_set_cmd - Set LIF's Rx mode command
+ * struct ionic_rx_mode_set_cmd - Set LIF's Rx mode command
  * @opcode:     opcode
  * @lif_index:  LIF index
  * @rx_mode:    Rx mode flags:
@@ -1519,7 +1519,7 @@ enum ionic_rx_filter_match_type {
 };
 
 /**
- * struct rx_filter_add_cmd - Add LIF Rx filter command
+ * struct ionic_rx_filter_add_cmd - Add LIF Rx filter command
  * @opcode:     opcode
  * @qtype:      Queue type
  * @lif_index:  LIF index
@@ -1550,7 +1550,7 @@ struct ionic_rx_filter_add_cmd {
 };
 
 /**
- * struct rx_filter_add_comp - Add LIF Rx filter command completion
+ * struct ionic_rx_filter_add_comp - Add LIF Rx filter command completion
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
@@ -1567,7 +1567,7 @@ struct ionic_rx_filter_add_comp {
 };
 
 /**
- * struct rx_filter_del_cmd - Delete LIF Rx filter command
+ * struct ionic_rx_filter_del_cmd - Delete LIF Rx filter command
  * @opcode:     opcode
  * @lif_index:  LIF index
  * @filter_id:  Filter ID
@@ -1583,7 +1583,7 @@ struct ionic_rx_filter_del_cmd {
 typedef struct ionic_admin_comp ionic_rx_filter_del_comp;
 
 /**
- * struct qos_identify_cmd - QoS identify command
+ * struct ionic_qos_identify_cmd - QoS identify command
  * @opcode:    opcode
  * @ver:     Highest version of identify supported by driver
  *
@@ -1595,7 +1595,7 @@ struct ionic_qos_identify_cmd {
 };
 
 /**
- * struct qos_identify_comp - QoS identify command completion
+ * struct ionic_qos_identify_comp - QoS identify command completion
  * @status: The status of the command (enum status_code)
  * @ver:    Version of identify returned by device
  */
@@ -1610,7 +1610,7 @@ struct ionic_qos_identify_comp {
 #define IONIC_QOS_DSCP_MAX_VALUES      64
 
 /**
- * enum qos_class
+ * enum ionic_qos_class
  */
 enum ionic_qos_class {
        IONIC_QOS_CLASS_DEFAULT         = 0,
@@ -1623,7 +1623,7 @@ enum ionic_qos_class {
 };
 
 /**
- * enum qos_class_type - Traffic classification criteria
+ * enum ionic_qos_class_type - Traffic classification criteria
  */
 enum ionic_qos_class_type {
        IONIC_QOS_CLASS_TYPE_NONE       = 0,
@@ -1632,7 +1632,7 @@ enum ionic_qos_class_type {
 };
 
 /**
- * enum qos_sched_type - Qos class scheduling type
+ * enum ionic_qos_sched_type - Qos class scheduling type
  */
 enum ionic_qos_sched_type {
        IONIC_QOS_SCHED_TYPE_STRICT     = 0,    /* Strict priority */
@@ -1640,15 +1640,15 @@ enum ionic_qos_sched_type {
 };
 
 /**
- * union qos_config - Qos configuration structure
+ * union ionic_qos_config - Qos configuration structure
  * @flags:             Configuration flags
  *     IONIC_QOS_CONFIG_F_ENABLE               enable
  *     IONIC_QOS_CONFIG_F_DROP                 drop/nodrop
  *     IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP         enable dot1q pcp rewrite
  *     IONIC_QOS_CONFIG_F_RW_IP_DSCP           enable ip dscp rewrite
- * @sched_type:                Qos class scheduling type (enum qos_sched_type)
- * @class_type:                Qos class type (enum qos_class_type)
- * @pause_type:                Qos pause type (enum qos_pause_type)
+ * @sched_type:                Qos class scheduling type (enum ionic_qos_sched_type)
+ * @class_type:                Qos class type (enum ionic_qos_class_type)
+ * @pause_type:                Qos pause type (enum ionic_qos_pause_type)
  * @name:              Qos class name
  * @mtu:               MTU of the class
  * @pfc_dot1q_pcp:     Pcp value for pause frames (valid iff F_NODROP)
@@ -1697,7 +1697,7 @@ union ionic_qos_config {
 };
 
 /**
- * union qos_identity - QoS identity structure
+ * union ionic_qos_identity - QoS identity structure
  * @version:   Version of the identify structure
  * @type:      QoS system type
  * @nclasses:  Number of usable QoS classes
@@ -1730,7 +1730,7 @@ struct ionic_qos_init_cmd {
 typedef struct ionic_admin_comp ionic_qos_init_comp;
 
 /**
- * struct qos_reset_cmd - Qos config reset command
+ * struct ionic_qos_reset_cmd - Qos config reset command
  * @opcode:    Opcode
  */
 struct ionic_qos_reset_cmd {
@@ -1742,7 +1742,7 @@ struct ionic_qos_reset_cmd {
 typedef struct ionic_admin_comp ionic_qos_reset_comp;
 
 /**
- * struct fw_download_cmd - Firmware download command
+ * struct ionic_fw_download_cmd - Firmware download command
  * @opcode:    opcode
  * @addr:      dma address of the firmware buffer
  * @offset:    offset of the firmware buffer within the full image
@@ -1765,9 +1765,9 @@ enum ionic_fw_control_oper {
 };
 
 /**
- * struct fw_control_cmd - Firmware control command
+ * struct ionic_fw_control_cmd - Firmware control command
  * @opcode:    opcode
- * @oper:      firmware control operation (enum fw_control_oper)
+ * @oper:      firmware control operation (enum ionic_fw_control_oper)
  * @slot:      slot to activate
  */
 struct ionic_fw_control_cmd {
@@ -1779,7 +1779,7 @@ struct ionic_fw_control_cmd {
 };
 
 /**
- * struct fw_control_comp - Firmware control copletion
+ * struct ionic_fw_control_comp - Firmware control copletion
  * @opcode:    opcode
  * @slot:      slot where the firmware was installed
  */
@@ -1797,13 +1797,13 @@ struct ionic_fw_control_comp {
  ******************************************************************/
 
 /**
- * struct rdma_reset_cmd - Reset RDMA LIF cmd
+ * struct ionic_rdma_reset_cmd - Reset RDMA LIF cmd
  * @opcode:        opcode
  * @lif_index:     lif index
  *
  * There is no rdma specific dev command completion struct.  Completion uses
- * the common struct admin_comp.  Only the status is indicated.  Nonzero status
- * means the LIF does not support rdma.
+ * the common struct ionic_admin_comp.  Only the status is indicated.
+ * Nonzero status means the LIF does not support rdma.
  **/
 struct ionic_rdma_reset_cmd {
        u8     opcode;
@@ -1813,7 +1813,7 @@ struct ionic_rdma_reset_cmd {
 };
 
 /**
- * struct rdma_queue_cmd - Create RDMA Queue command
+ * struct ionic_rdma_queue_cmd - Create RDMA Queue command
  * @opcode:        opcode, 52, 53
  * @lif_index      lif index
  * @qid_ver:       (qid | (rdma version << 24))
@@ -1839,7 +1839,7 @@ struct ionic_rdma_reset_cmd {
  * memory registration.
  *
  * There is no rdma specific dev command completion struct.  Completion uses
- * the common struct admin_comp.  Only the status is indicated.
+ * the common struct ionic_admin_comp.  Only the status is indicated.
  **/
 struct ionic_rdma_queue_cmd {
        u8     opcode;
@@ -1860,7 +1860,7 @@ struct ionic_rdma_queue_cmd {
  ******************************************************************/
 
 /**
- * struct notifyq_event
+ * struct ionic_notifyq_event
  * @eid:   event number
  * @ecode: event code
  * @data:  unspecified data about the event
@@ -1875,7 +1875,7 @@ struct ionic_notifyq_event {
 };
 
 /**
- * struct link_change_event
+ * struct ionic_link_change_event
  * @eid:               event number
  * @ecode:             event code = EVENT_OPCODE_LINK_CHANGE
  * @link_status:       link up or down, with error bits (enum port_status)
@@ -1892,7 +1892,7 @@ struct ionic_link_change_event {
 };
 
 /**
- * struct reset_event
+ * struct ionic_reset_event
  * @eid:               event number
  * @ecode:             event code = EVENT_OPCODE_RESET
  * @reset_code:                reset type
@@ -1910,7 +1910,7 @@ struct ionic_reset_event {
 };
 
 /**
- * struct heartbeat_event
+ * struct ionic_heartbeat_event
  * @eid:       event number
  * @ecode:     event code = EVENT_OPCODE_HEARTBEAT
  *
@@ -1923,7 +1923,7 @@ struct ionic_heartbeat_event {
 };
 
 /**
- * struct log_event
+ * struct ionic_log_event
  * @eid:       event number
  * @ecode:     event code = EVENT_OPCODE_LOG
  * @data:      log data
@@ -1937,7 +1937,7 @@ struct ionic_log_event {
 };
 
 /**
- * struct port_stats
+ * struct ionic_port_stats
  */
 struct ionic_port_stats {
        __le64 frames_rx_ok;
@@ -2067,7 +2067,7 @@ struct ionic_mgmt_port_stats {
 };
 
 /**
- * struct port_identity - port identity structure
+ * struct ionic_port_identity - port identity structure
  * @version:        identity structure version
  * @type:           type of port (enum port_type)
  * @num_lanes:      number of lanes for the port
@@ -2099,7 +2099,7 @@ union ionic_port_identity {
 };
 
 /**
- * struct port_info - port info structure
+ * struct ionic_port_info - port info structure
  * @port_status:     port status
  * @port_stats:      port stats
  */
@@ -2110,7 +2110,7 @@ struct ionic_port_info {
 };
 
 /**
- * struct lif_stats
+ * struct ionic_lif_stats
  */
 struct ionic_lif_stats {
        /* RX */
@@ -2264,7 +2264,7 @@ struct ionic_lif_stats {
 };
 
 /**
- * struct lif_info - lif info structure
+ * struct ionic_lif_info - lif info structure
  */
 struct ionic_lif_info {
        union ionic_lif_config config;
@@ -2357,7 +2357,7 @@ union ionic_dev_info_regs {
 };
 
 /**
- * union dev_cmd_regs - Device command register format (read-write)
+ * union ionic_dev_cmd_regs - Device command register format (read-write)
  * @doorbell:        Device Cmd Doorbell, write-only.
  *                   Write a 1 to signal device to process cmd,
  *                   poll done for completion.
@@ -2379,7 +2379,7 @@ union ionic_dev_cmd_regs {
 };
 
 /**
- * union dev_regs - Device register format in for bar 0 page 0
+ * union ionic_dev_regs - Device register format in for bar 0 page 0
  * @info:            Device info registers
  * @devcmd:          Device command registers
  */
@@ -2433,7 +2433,7 @@ union ionic_adminq_comp {
 #define IONIC_ASIC_TYPE_CAPRI                  0
 
 /**
- * struct doorbell - Doorbell register layout
+ * struct ionic_doorbell - Doorbell register layout
  * @p_index: Producer index
  * @ring:    Selects the specific ring of the queue to update.
  *           Type-specific meaning:
index 20faa8d..60fd14d 100644 (file)
@@ -244,6 +244,21 @@ static int ionic_qcq_disable(struct ionic_qcq *qcq)
        return ionic_adminq_post_wait(lif, &ctx);
 }
 
+static void ionic_lif_quiesce(struct ionic_lif *lif)
+{
+       struct ionic_admin_ctx ctx = {
+               .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+               .cmd.lif_setattr = {
+                       .opcode = IONIC_CMD_LIF_SETATTR,
+                       .attr = IONIC_LIF_ATTR_STATE,
+                       .index = lif->index,
+                       .state = IONIC_LIF_DISABLE
+               },
+       };
+
+       ionic_adminq_post_wait(lif, &ctx);
+}
+
 static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
 {
        struct ionic_dev *idev = &lif->ionic->idev;
@@ -609,12 +624,14 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
                        .lif_index = cpu_to_le16(lif->index),
                        .type = q->type,
                        .index = cpu_to_le32(q->index),
-                       .flags = cpu_to_le16(IONIC_QINIT_F_IRQ),
+                       .flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+                                            IONIC_QINIT_F_SG),
                        .intr_index = cpu_to_le16(cq->bound_intr->index),
                        .pid = cpu_to_le16(q->pid),
                        .ring_size = ilog2(q->num_descs),
                        .ring_base = cpu_to_le64(q->base_pa),
                        .cq_ring_base = cpu_to_le64(cq->base_pa),
+                       .sg_ring_base = cpu_to_le64(q->sg_base_pa),
                },
        };
        int err;
@@ -1432,7 +1449,6 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
        unsigned int flags;
        unsigned int i;
        int err = 0;
-       u32 coal;
 
        flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG;
        for (i = 0; i < lif->nxqs; i++) {
@@ -1448,21 +1464,22 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
                lif->txqcqs[i].qcq->stats = lif->txqcqs[i].stats;
        }
 
-       flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_INTR;
-       coal = ionic_coal_usec_to_hw(lif->ionic, lif->rx_coalesce_usecs);
+       flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG | IONIC_QCQ_F_INTR;
        for (i = 0; i < lif->nxqs; i++) {
                err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
                                      lif->nrxq_descs,
                                      sizeof(struct ionic_rxq_desc),
                                      sizeof(struct ionic_rxq_comp),
-                                     0, lif->kern_pid, &lif->rxqcqs[i].qcq);
+                                     sizeof(struct ionic_rxq_sg_desc),
+                                     lif->kern_pid, &lif->rxqcqs[i].qcq);
                if (err)
                        goto err_out;
 
                lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats;
 
                ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-                                    lif->rxqcqs[i].qcq->intr.index, coal);
+                                    lif->rxqcqs[i].qcq->intr.index,
+                                    lif->rx_coalesce_hw);
                ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq,
                                          lif->txqcqs[i].qcq);
        }
@@ -1592,6 +1609,7 @@ int ionic_stop(struct net_device *netdev)
        netif_tx_disable(netdev);
 
        ionic_txrx_disable(lif);
+       ionic_lif_quiesce(lif);
        ionic_txrx_deinit(lif);
        ionic_txrx_free(lif);
 
@@ -1621,8 +1639,9 @@ int ionic_reset_queues(struct ionic_lif *lif)
        /* Put off the next watchdog timeout */
        netif_trans_update(lif->netdev);
 
-       if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
-               return -EBUSY;
+       err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET);
+       if (err)
+               return err;
 
        running = netif_running(lif->netdev);
        if (running)
@@ -1641,7 +1660,6 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
        struct net_device *netdev;
        struct ionic_lif *lif;
        int tbl_sz;
-       u32 coal;
        int err;
 
        netdev = alloc_etherdev_mqs(sizeof(*lif),
@@ -1672,8 +1690,9 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
        lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
 
        /* Convert the default coalesce value to actual hw resolution */
-       coal = ionic_coal_usec_to_hw(lif->ionic, IONIC_ITR_COAL_USEC_DEFAULT);
-       lif->rx_coalesce_usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+       lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT;
+       lif->rx_coalesce_hw = ionic_coal_usec_to_hw(lif->ionic,
+                                                   lif->rx_coalesce_usecs);
 
        snprintf(lif->name, sizeof(lif->name), "lif%u", index);
 
index 6a95b42..a55fd1f 100644 (file)
@@ -175,7 +175,9 @@ struct ionic_lif {
        unsigned long *dbid_inuse;
        unsigned int dbid_count;
        struct dentry *dentry;
-       u32 rx_coalesce_usecs;
+       u32 rx_coalesce_usecs;          /* what the user asked for */
+       u32 rx_coalesce_hw;             /* what the hw is using */
+
        u32 flags;
        struct work_struct tx_timeout_work;
 };
@@ -187,15 +189,10 @@ struct ionic_lif {
 #define lif_to_txq(lif, i)     (&lif_to_txqcq((lif), i)->q)
 #define lif_to_rxq(lif, i)     (&lif_to_txqcq((lif), i)->q)
 
+/* return 0 if successfully set the bit, else non-zero */
 static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname)
 {
-       unsigned long tlimit = jiffies + HZ;
-
-       while (test_and_set_bit(bitname, lif->state) &&
-              time_before(jiffies, tlimit))
-               usleep_range(100, 200);
-
-       return test_bit(bitname, lif->state);
+       return wait_on_bit_lock(lif->state, bitname, TASK_INTERRUPTIBLE);
 }
 
 static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
index aab3114..3590ea7 100644 (file)
@@ -247,6 +247,10 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
                goto err_out;
        }
 
+       err = ionic_heartbeat_check(lif->ionic);
+       if (err)
+               goto err_out;
+
        memcpy(adminq->head->desc, &ctx->cmd, sizeof(ctx->cmd));
 
        dev_dbg(&lif->netdev->dev, "post admin queue command:\n");
@@ -307,6 +311,14 @@ int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
        return work_done;
 }
 
+static void ionic_dev_cmd_clean(struct ionic *ionic)
+{
+       union ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
+
+       iowrite32(0, &regs->doorbell);
+       memset_io(&regs->cmd, 0, sizeof(regs->cmd));
+}
+
 int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
 {
        struct ionic_dev *idev = &ionic->idev;
@@ -316,6 +328,7 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
        int opcode;
        int done;
        int err;
+       int hb;
 
        WARN_ON(in_interrupt());
 
@@ -330,7 +343,8 @@ try_again:
                if (done)
                        break;
                msleep(20);
-       } while (!done && time_before(jiffies, max_wait));
+               hb = ionic_heartbeat_check(ionic);
+       } while (!done && !hb && time_before(jiffies, max_wait));
        duration = jiffies - start_time;
 
        opcode = idev->dev_cmd_regs->cmd.cmd.opcode;
@@ -338,7 +352,15 @@ try_again:
                ionic_opcode_to_str(opcode), opcode,
                done, duration / HZ, duration);
 
+       if (!done && hb) {
+               ionic_dev_cmd_clean(ionic);
+               dev_warn(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
+                        ionic_opcode_to_str(opcode), opcode);
+               return -ENXIO;
+       }
+
        if (!done && !time_before(jiffies, max_wait)) {
+               ionic_dev_cmd_clean(ionic);
                dev_warn(ionic->dev, "DEVCMD %s (%d) timeout after %ld secs\n",
                         ionic_opcode_to_str(opcode), opcode, max_seconds);
                return -ETIMEDOUT;
index ab6663d..97e7994 100644 (file)
@@ -34,52 +34,110 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
        return netdev_get_tx_queue(q->lif->netdev, q->index);
 }
 
-static void ionic_rx_recycle(struct ionic_queue *q, struct ionic_desc_info *desc_info,
-                            struct sk_buff *skb)
+static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
+                                         unsigned int len, bool frags)
 {
-       struct ionic_rxq_desc *old = desc_info->desc;
-       struct ionic_rxq_desc *new = q->head->desc;
+       struct ionic_lif *lif = q->lif;
+       struct ionic_rx_stats *stats;
+       struct net_device *netdev;
+       struct sk_buff *skb;
+
+       netdev = lif->netdev;
+       stats = q_to_rx_stats(q);
+
+       if (frags)
+               skb = napi_get_frags(&q_to_qcq(q)->napi);
+       else
+               skb = netdev_alloc_skb_ip_align(netdev, len);
 
-       new->addr = old->addr;
-       new->len = old->len;
+       if (unlikely(!skb)) {
+               net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+                                    netdev->name, q->name);
+               stats->alloc_err++;
+               return NULL;
+       }
 
-       ionic_rxq_post(q, true, ionic_rx_clean, skb);
+       return skb;
 }
 
-static bool ionic_rx_copybreak(struct ionic_queue *q, struct ionic_desc_info *desc_info,
-                              struct ionic_cq_info *cq_info, struct sk_buff **skb)
+static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
+                                     struct ionic_desc_info *desc_info,
+                                     struct ionic_cq_info *cq_info)
 {
        struct ionic_rxq_comp *comp = cq_info->cq_desc;
-       struct ionic_rxq_desc *desc = desc_info->desc;
-       struct net_device *netdev = q->lif->netdev;
        struct device *dev = q->lif->ionic->dev;
-       struct sk_buff *new_skb;
-       u16 clen, dlen;
-
-       clen = le16_to_cpu(comp->len);
-       dlen = le16_to_cpu(desc->len);
-       if (clen > q->lif->rx_copybreak) {
-               dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
-                                dlen, DMA_FROM_DEVICE);
-               return false;
-       }
+       struct ionic_page_info *page_info;
+       struct sk_buff *skb;
+       unsigned int i;
+       u16 frag_len;
+       u16 len;
 
-       new_skb = netdev_alloc_skb_ip_align(netdev, clen);
-       if (!new_skb) {
-               dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
-                                dlen, DMA_FROM_DEVICE);
-               return false;
-       }
+       page_info = &desc_info->pages[0];
+       len = le16_to_cpu(comp->len);
 
-       dma_sync_single_for_cpu(dev, (dma_addr_t)le64_to_cpu(desc->addr),
-                               clen, DMA_FROM_DEVICE);
+       prefetch(page_address(page_info->page) + NET_IP_ALIGN);
 
-       memcpy(new_skb->data, (*skb)->data, clen);
+       skb = ionic_rx_skb_alloc(q, len, true);
+       if (unlikely(!skb))
+               return NULL;
 
-       ionic_rx_recycle(q, desc_info, *skb);
-       *skb = new_skb;
+       i = comp->num_sg_elems + 1;
+       do {
+               if (unlikely(!page_info->page)) {
+                       struct napi_struct *napi = &q_to_qcq(q)->napi;
 
-       return true;
+                       napi->skb = NULL;
+                       dev_kfree_skb(skb);
+                       return NULL;
+               }
+
+               frag_len = min(len, (u16)PAGE_SIZE);
+               len -= frag_len;
+
+               dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
+                              PAGE_SIZE, DMA_FROM_DEVICE);
+               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+                               page_info->page, 0, frag_len, PAGE_SIZE);
+               page_info->page = NULL;
+               page_info++;
+               i--;
+       } while (i > 0);
+
+       return skb;
+}
+
+static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
+                                         struct ionic_desc_info *desc_info,
+                                         struct ionic_cq_info *cq_info)
+{
+       struct ionic_rxq_comp *comp = cq_info->cq_desc;
+       struct device *dev = q->lif->ionic->dev;
+       struct ionic_page_info *page_info;
+       struct sk_buff *skb;
+       u16 len;
+
+       page_info = &desc_info->pages[0];
+       len = le16_to_cpu(comp->len);
+
+       skb = ionic_rx_skb_alloc(q, len, false);
+       if (unlikely(!skb))
+               return NULL;
+
+       if (unlikely(!page_info->page)) {
+               dev_kfree_skb(skb);
+               return NULL;
+       }
+
+       dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
+                               len, DMA_FROM_DEVICE);
+       skb_copy_to_linear_data(skb, page_address(page_info->page), len);
+       dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
+                                  len, DMA_FROM_DEVICE);
+
+       skb_put(skb, len);
+       skb->protocol = eth_type_trans(skb, q->lif->netdev);
+
+       return skb;
 }
 
 static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
@@ -87,35 +145,34 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
 {
        struct ionic_rxq_comp *comp = cq_info->cq_desc;
        struct ionic_qcq *qcq = q_to_qcq(q);
-       struct sk_buff *skb = cb_arg;
        struct ionic_rx_stats *stats;
        struct net_device *netdev;
+       struct sk_buff *skb;
 
        stats = q_to_rx_stats(q);
        netdev = q->lif->netdev;
 
-       if (comp->status) {
-               ionic_rx_recycle(q, desc_info, skb);
+       if (comp->status)
                return;
-       }
 
-       if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) {
-               /* no packet processing while resetting */
-               ionic_rx_recycle(q, desc_info, skb);
+       /* no packet processing while resetting */
+       if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state)))
                return;
-       }
 
        stats->pkts++;
        stats->bytes += le16_to_cpu(comp->len);
 
-       ionic_rx_copybreak(q, desc_info, cq_info, &skb);
+       if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
+               skb = ionic_rx_copybreak(q, desc_info, cq_info);
+       else
+               skb = ionic_rx_frags(q, desc_info, cq_info);
 
-       skb_put(skb, le16_to_cpu(comp->len));
-       skb->protocol = eth_type_trans(skb, netdev);
+       if (unlikely(!skb))
+               return;
 
        skb_record_rx_queue(skb, q->index);
 
-       if (netdev->features & NETIF_F_RXHASH) {
+       if (likely(netdev->features & NETIF_F_RXHASH)) {
                switch (comp->pkt_type_color & IONIC_RXQ_COMP_PKT_TYPE_MASK) {
                case IONIC_PKT_TYPE_IPV4:
                case IONIC_PKT_TYPE_IPV6:
@@ -132,7 +189,7 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
                }
        }
 
-       if (netdev->features & NETIF_F_RXCSUM) {
+       if (likely(netdev->features & NETIF_F_RXCSUM)) {
                if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
                        skb->ip_summed = CHECKSUM_COMPLETE;
                        skb->csum = (__wsum)le16_to_cpu(comp->csum);
@@ -142,18 +199,21 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
                stats->csum_none++;
        }
 
-       if ((comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_TCP_BAD) ||
-           (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_UDP_BAD) ||
-           (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD))
+       if (unlikely((comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_TCP_BAD) ||
+                    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_UDP_BAD) ||
+                    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD)))
                stats->csum_error++;
 
-       if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+       if (likely(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
                if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_VLAN)
                        __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
                                               le16_to_cpu(comp->vlan_tci));
        }
 
-       napi_gro_receive(&qcq->napi, skb);
+       if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
+               napi_gro_receive(&qcq->napi, skb);
+       else
+               napi_gro_frags(&qcq->napi);
 }
 
 static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
@@ -213,66 +273,125 @@ void ionic_rx_flush(struct ionic_cq *cq)
                                   work_done, IONIC_INTR_CRED_RESET_COALESCE);
 }
 
-static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q, unsigned int len,
-                                         dma_addr_t *dma_addr)
+static struct page *ionic_rx_page_alloc(struct ionic_queue *q,
+                                       dma_addr_t *dma_addr)
 {
        struct ionic_lif *lif = q->lif;
        struct ionic_rx_stats *stats;
        struct net_device *netdev;
-       struct sk_buff *skb;
        struct device *dev;
+       struct page *page;
 
        netdev = lif->netdev;
        dev = lif->ionic->dev;
        stats = q_to_rx_stats(q);
-       skb = netdev_alloc_skb_ip_align(netdev, len);
-       if (!skb) {
-               net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
-                                    netdev->name, q->name);
+       page = alloc_page(GFP_ATOMIC);
+       if (unlikely(!page)) {
+               net_err_ratelimited("%s: Page alloc failed on %s!\n",
+                                   netdev->name, q->name);
                stats->alloc_err++;
                return NULL;
        }
 
-       *dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
-       if (dma_mapping_error(dev, *dma_addr)) {
-               dev_kfree_skb(skb);
-               net_warn_ratelimited("%s: DMA single map failed on %s!\n",
-                                    netdev->name, q->name);
+       *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(dev, *dma_addr))) {
+               __free_page(page);
+               net_err_ratelimited("%s: DMA single map failed on %s!\n",
+                                   netdev->name, q->name);
                stats->dma_map_err++;
                return NULL;
        }
 
-       return skb;
+       return page;
+}
+
+static void ionic_rx_page_free(struct ionic_queue *q, struct page *page,
+                              dma_addr_t dma_addr)
+{
+       struct ionic_lif *lif = q->lif;
+       struct net_device *netdev;
+       struct device *dev;
+
+       netdev = lif->netdev;
+       dev = lif->ionic->dev;
+
+       if (unlikely(!page)) {
+               net_err_ratelimited("%s: Trying to free unallocated buffer on %s!\n",
+                                   netdev->name, q->name);
+               return;
+       }
+
+       dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+
+       __free_page(page);
 }
 
-#define IONIC_RX_RING_DOORBELL_STRIDE          ((1 << 2) - 1)
+#define IONIC_RX_RING_DOORBELL_STRIDE          ((1 << 5) - 1)
+#define IONIC_RX_RING_HEAD_BUF_SZ              2048
 
 void ionic_rx_fill(struct ionic_queue *q)
 {
        struct net_device *netdev = q->lif->netdev;
+       struct ionic_desc_info *desc_info;
+       struct ionic_page_info *page_info;
+       struct ionic_rxq_sg_desc *sg_desc;
+       struct ionic_rxq_sg_elem *sg_elem;
        struct ionic_rxq_desc *desc;
-       struct sk_buff *skb;
-       dma_addr_t dma_addr;
+       unsigned int nfrags;
        bool ring_doorbell;
+       unsigned int i, j;
        unsigned int len;
-       unsigned int i;
 
        len = netdev->mtu + ETH_HLEN;
+       nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
 
        for (i = ionic_q_space_avail(q); i; i--) {
-               skb = ionic_rx_skb_alloc(q, len, &dma_addr);
-               if (!skb)
-                       return;
+               desc_info = q->head;
+               desc = desc_info->desc;
+               sg_desc = desc_info->sg_desc;
+               page_info = &desc_info->pages[0];
+
+               if (page_info->page) { /* recycle the buffer */
+                       ring_doorbell = ((q->head->index + 1) &
+                                       IONIC_RX_RING_DOORBELL_STRIDE) == 0;
+                       ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, NULL);
+                       continue;
+               }
 
-               desc = q->head->desc;
-               desc->addr = cpu_to_le64(dma_addr);
-               desc->len = cpu_to_le16(len);
-               desc->opcode = IONIC_RXQ_DESC_OPCODE_SIMPLE;
+               /* fill main descriptor - pages[0] */
+               desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
+                                             IONIC_RXQ_DESC_OPCODE_SIMPLE;
+               desc_info->npages = nfrags;
+               page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
+               if (unlikely(!page_info->page)) {
+                       desc->addr = 0;
+                       desc->len = 0;
+                       return;
+               }
+               desc->addr = cpu_to_le64(page_info->dma_addr);
+               desc->len = cpu_to_le16(PAGE_SIZE);
+               page_info++;
+
+               /* fill sg descriptors - pages[1..n] */
+               for (j = 0; j < nfrags - 1; j++) {
+                       if (page_info->page) /* recycle the sg buffer */
+                               continue;
+
+                       sg_elem = &sg_desc->elems[j];
+                       page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
+                       if (unlikely(!page_info->page)) {
+                               sg_elem->addr = 0;
+                               sg_elem->len = 0;
+                               return;
+                       }
+                       sg_elem->addr = cpu_to_le64(page_info->dma_addr);
+                       sg_elem->len = cpu_to_le16(PAGE_SIZE);
+                       page_info++;
+               }
 
                ring_doorbell = ((q->head->index + 1) &
                                IONIC_RX_RING_DOORBELL_STRIDE) == 0;
-
-               ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, skb);
+               ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, NULL);
        }
 }
 
@@ -283,15 +402,24 @@ static void ionic_rx_fill_cb(void *arg)
 
 void ionic_rx_empty(struct ionic_queue *q)
 {
-       struct device *dev = q->lif->ionic->dev;
        struct ionic_desc_info *cur;
        struct ionic_rxq_desc *desc;
+       unsigned int i;
 
        for (cur = q->tail; cur != q->head; cur = cur->next) {
                desc = cur->desc;
-               dma_unmap_single(dev, le64_to_cpu(desc->addr),
-                                le16_to_cpu(desc->len), DMA_FROM_DEVICE);
-               dev_kfree_skb(cur->cb_arg);
+               desc->addr = 0;
+               desc->len = 0;
+
+               for (i = 0; i < cur->npages; i++) {
+                       if (likely(cur->pages[i].page)) {
+                               ionic_rx_page_free(q, cur->pages[i].page,
+                                                  cur->pages[i].dma_addr);
+                               cur->pages[i].page = NULL;
+                               cur->pages[i].dma_addr = 0;
+                       }
+               }
+
                cur->cb_arg = NULL;
        }
 }
index d473b52..9ad568d 100644 (file)
 #include <linux/slab.h>
 #include "qed.h"
 
-/* Fields of IGU PF CONFIGRATION REGISTER */
+/* Fields of IGU PF CONFIGURATION REGISTER */
 #define IGU_PF_CONF_FUNC_EN       (0x1 << 0)    /* function enable        */
 #define IGU_PF_CONF_MSI_MSIX_EN   (0x1 << 1)    /* MSI/MSIX enable        */
 #define IGU_PF_CONF_INT_LINE_EN   (0x1 << 2)    /* INT enable             */
 #define IGU_PF_CONF_ATTN_BIT_EN   (0x1 << 3)    /* attention enable       */
 #define IGU_PF_CONF_SINGLE_ISR_EN (0x1 << 4)    /* single ISR mode enable */
 #define IGU_PF_CONF_SIMD_MODE     (0x1 << 5)    /* simd all ones mode     */
-/* Fields of IGU VF CONFIGRATION REGISTER */
+/* Fields of IGU VF CONFIGURATION REGISTER */
 #define IGU_VF_CONF_FUNC_EN        (0x1 << 0)  /* function enable        */
 #define IGU_VF_CONF_MSI_MSIX_EN    (0x1 << 1)  /* MSI/MSIX enable        */
 #define IGU_VF_CONF_SINGLE_ISR_EN  (0x1 << 4)  /* single ISR mode enable */
index 9a8fd79..368e885 100644 (file)
@@ -305,7 +305,7 @@ void qed_iov_bulletin_set_udp_ports(struct qed_hwfn *p_hwfn,
 
 /**
  * @brief Read sriov related information and allocated resources
- *  reads from configuraiton space, shmem, etc.
+ *  reads from configuration space, shmem, etc.
  *
  * @param p_hwfn
  *
index 9a6a9a0..d6cfe4f 100644 (file)
@@ -1298,7 +1298,7 @@ void qede_config_rx_mode(struct net_device *ndev)
        rx_mode.type = QED_FILTER_TYPE_RX_MODE;
 
        /* Remove all previous unicast secondary macs and multicast macs
-        * (configrue / leave the primary mac)
+        * (configure / leave the primary mac)
         */
        rc = qede_set_ucast_rx_mac(edev, QED_FILTER_XCAST_TYPE_REPLACE,
                                   edev->ndev->dev_addr);
index 5064c29..0704f8b 100644 (file)
@@ -741,12 +741,6 @@ static void rtl_unlock_config_regs(struct rtl8169_private *tp)
        RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 }
 
-static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
-{
-       pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
-                                          PCI_EXP_DEVCTL_READRQ, force);
-}
-
 static bool rtl_is_8125(struct rtl8169_private *tp)
 {
        return tp->mac_version >= RTL_GIGA_MAC_VER_60;
@@ -1568,7 +1562,7 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
 
        if (dev->mtu > JUMBO_1K &&
            tp->mac_version > RTL_GIGA_MAC_VER_06)
-               features &= ~NETIF_F_IP_CSUM;
+               features &= ~(NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO);
 
        return features;
 }
@@ -4036,14 +4030,12 @@ static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -4061,7 +4053,6 @@ static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
        RTL_W8(tp, MaxTxPacketSize, 0x3f);
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
@@ -4069,32 +4060,15 @@ static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
        RTL_W8(tp, MaxTxPacketSize, 0x0c);
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-}
-
-static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
-{
-       rtl_tx_performance_tweak(tp,
-               PCI_EXP_DEVCTL_READRQ_512B | PCI_EXP_DEVCTL_NOSNOOP_EN);
-}
-
-static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
-{
-       rtl_tx_performance_tweak(tp,
-               PCI_EXP_DEVCTL_READRQ_4096B | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       r8168b_0_hw_jumbo_enable(tp);
-
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
 }
 
 static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       r8168b_0_hw_jumbo_disable(tp);
-
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
@@ -4102,9 +4076,6 @@ static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
        rtl_unlock_config_regs(tp);
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_11:
-               r8168b_0_hw_jumbo_enable(tp);
-               break;
        case RTL_GIGA_MAC_VER_12:
        case RTL_GIGA_MAC_VER_17:
                r8168b_1_hw_jumbo_enable(tp);
@@ -4128,9 +4099,6 @@ static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
        rtl_unlock_config_regs(tp);
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_11:
-               r8168b_0_hw_jumbo_disable(tp);
-               break;
        case RTL_GIGA_MAC_VER_12:
        case RTL_GIGA_MAC_VER_17:
                r8168b_1_hw_jumbo_disable(tp);
@@ -4451,18 +4419,11 @@ static void rtl8168g_set_pause_thresholds(struct rtl8169_private *tp,
        rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, high);
 }
 
-static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
+static void rtl_hw_start_8168b(struct rtl8169_private *tp)
 {
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
 
-static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
-{
-       rtl_hw_start_8168bb(tp);
-
-       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
-}
-
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
        RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
@@ -4554,19 +4515,6 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
        rtl_set_def_aspm_entry_latency(tp);
 
        rtl_disable_clock_request(tp);
-
-       if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-}
-
-static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
-{
-       rtl_set_def_aspm_entry_latency(tp);
-
-       if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-       rtl_disable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
@@ -4580,8 +4528,6 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
        rtl_set_def_aspm_entry_latency(tp);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_ephy_init(tp, e_info_8168d_4);
 
        rtl_enable_clock_request(tp);
@@ -4656,8 +4602,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
        rtl_set_def_aspm_entry_latency(tp);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
        rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
@@ -4720,8 +4664,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
        rtl_set_def_aspm_entry_latency(tp);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_reset_packet_filter(tp);
        rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f);
 
@@ -4772,8 +4714,7 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_hw_aspm_clkreq_enable(tp, false);
        rtl_ephy_init(tp, e_info_8168g_2);
 }
 
@@ -4958,8 +4899,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
        rtl_set_def_aspm_entry_latency(tp);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_reset_packet_filter(tp);
 
        rtl_eri_set_bits(tp, 0xdc, ERIAR_MASK_1111, BIT(4));
@@ -5017,8 +4956,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
        rtl_set_def_aspm_entry_latency(tp);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_reset_packet_filter(tp);
 
        rtl_eri_set_bits(tp, 0xd4, ERIAR_MASK_1111, 0x1f80);
@@ -5121,8 +5058,6 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
        RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        RTL_W8(tp, Config1,
               LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -5138,8 +5073,6 @@ static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
        rtl_set_def_aspm_entry_latency(tp);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
@@ -5200,8 +5133,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
        rtl_ephy_init(tp, e_info_8402);
 
-       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_set_fifo_size(tp, 0x00, 0x00, 0x02, 0x06);
        rtl_reset_packet_filter(tp);
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
@@ -5355,13 +5286,13 @@ static void rtl_hw_config(struct rtl8169_private *tp)
                [RTL_GIGA_MAC_VER_08] = rtl_hw_start_8102e_3,
                [RTL_GIGA_MAC_VER_09] = rtl_hw_start_8102e_2,
                [RTL_GIGA_MAC_VER_10] = NULL,
-               [RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168bb,
-               [RTL_GIGA_MAC_VER_12] = rtl_hw_start_8168bef,
+               [RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168b,
+               [RTL_GIGA_MAC_VER_12] = rtl_hw_start_8168b,
                [RTL_GIGA_MAC_VER_13] = NULL,
                [RTL_GIGA_MAC_VER_14] = NULL,
                [RTL_GIGA_MAC_VER_15] = NULL,
                [RTL_GIGA_MAC_VER_16] = NULL,
-               [RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168bef,
+               [RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168b,
                [RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1,
                [RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1,
                [RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2,
@@ -5375,7 +5306,7 @@ static void rtl_hw_config(struct rtl8169_private *tp)
                [RTL_GIGA_MAC_VER_28] = rtl_hw_start_8168d_4,
                [RTL_GIGA_MAC_VER_29] = rtl_hw_start_8105e_1,
                [RTL_GIGA_MAC_VER_30] = rtl_hw_start_8105e_2,
-               [RTL_GIGA_MAC_VER_31] = rtl_hw_start_8168dp,
+               [RTL_GIGA_MAC_VER_31] = rtl_hw_start_8168d,
                [RTL_GIGA_MAC_VER_32] = rtl_hw_start_8168e_1,
                [RTL_GIGA_MAC_VER_33] = rtl_hw_start_8168e_1,
                [RTL_GIGA_MAC_VER_34] = rtl_hw_start_8168e_2,
@@ -5417,11 +5348,6 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168(struct rtl8169_private *tp)
 {
-       if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_16)
-               pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
-                                        PCI_EXP_DEVCTL_NOSNOOP_EN);
-
        if (rtl_is_8168evl_up(tp))
                RTL_W8(tp, MaxTxPacketSize, EarlySize);
        else
@@ -5570,18 +5496,15 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
 
                data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
                if (!data) {
-                       rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
-                       goto err_out;
+                       rtl8169_rx_clear(tp);
+                       return -ENOMEM;
                }
                tp->Rx_databuff[i] = data;
        }
 
        rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1);
-       return 0;
 
-err_out:
-       rtl8169_rx_clear(tp);
-       return -ENOMEM;
+       return 0;
 }
 
 static int rtl8169_init_ring(struct rtl8169_private *tp)
index de9aa8c..5ea14b5 100644 (file)
@@ -2046,7 +2046,9 @@ static int ravb_probe(struct platform_device *pdev)
        spin_lock_init(&priv->lock);
        INIT_WORK(&priv->work, ravb_tx_timeout_work);
 
-       priv->phy_interface = of_get_phy_mode(np);
+       error = of_get_phy_mode(np, &priv->phy_interface);
+       if (error && error != -ENODEV)
+               goto out_release;
 
        priv->no_avb_link = of_property_read_bool(np, "renesas,no-ether-link");
        priv->avb_link_active_low =
index 7ba35a0..e19b49c 100644 (file)
@@ -3183,6 +3183,7 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
 {
        struct device_node *np = dev->of_node;
        struct sh_eth_plat_data *pdata;
+       phy_interface_t interface;
        const char *mac_addr;
        int ret;
 
@@ -3190,10 +3191,10 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
        if (!pdata)
                return NULL;
 
-       ret = of_get_phy_mode(np);
-       if (ret < 0)
+       ret = of_get_phy_mode(np, &interface);
+       if (ret)
                return NULL;
-       pdata->phy_interface = ret;
+       pdata->phy_interface = interface;
 
        mac_addr = of_get_mac_address(np);
        if (!IS_ERR(mac_addr))
index 786b158..bc4f951 100644 (file)
@@ -2189,9 +2189,6 @@ static int rocker_router_fib_event(struct notifier_block *nb,
        struct rocker_fib_event_work *fib_work;
        struct fib_notifier_info *info = ptr;
 
-       if (!net_eq(info->net, &init_net))
-               return NOTIFY_DONE;
-
        if (info->family != AF_INET)
                return NOTIFY_DONE;
 
@@ -2994,7 +2991,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         * the device, so no need to pass a callback.
         */
        rocker->fib_nb.notifier_call = rocker_router_fib_event;
-       err = register_fib_notifier(&rocker->fib_nb, NULL);
+       err = register_fib_notifier(&init_net, &rocker->fib_nb, NULL, NULL);
        if (err)
                goto err_register_fib_notifier;
 
@@ -3021,7 +3018,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 err_register_switchdev_blocking_notifier:
        unregister_switchdev_notifier(&rocker_switchdev_notifier);
 err_register_switchdev_notifier:
-       unregister_fib_notifier(&rocker->fib_nb);
+       unregister_fib_notifier(&init_net, &rocker->fib_nb);
 err_register_fib_notifier:
        rocker_remove_ports(rocker);
 err_probe_ports:
@@ -3057,7 +3054,7 @@ static void rocker_remove(struct pci_dev *pdev)
        unregister_switchdev_blocking_notifier(nb);
 
        unregister_switchdev_notifier(&rocker_switchdev_notifier);
-       unregister_fib_notifier(&rocker->fib_nb);
+       unregister_fib_notifier(&init_net, &rocker->fib_nb);
        rocker_remove_ports(rocker);
        rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
        destroy_workqueue(rocker->rocker_owq);
index 2412c87..33f7940 100644 (file)
@@ -30,12 +30,15 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev,
 {
        struct device_node *np = pdev->dev.of_node;
        struct sxgbe_dma_cfg *dma_cfg;
+       int err;
 
        if (!np)
                return -ENODEV;
 
        *mac = of_get_mac_address(np);
-       plat->interface = of_get_phy_mode(np);
+       err = of_get_phy_mode(np, &plat->interface);
+       if (err && err != -ENODEV)
+               return err;
 
        plat->bus_id = of_alias_get_id(np, "ethernet");
        if (plat->bus_id < 0)
index 0ec13f5..ad68eb0 100644 (file)
@@ -946,8 +946,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
                /* Extra channels, even those with TXQs (PTP), do not require
                 * PIO resources.
                 */
-               if (!channel->type->want_pio)
+               if (!channel->type->want_pio ||
+                   channel->channel >= efx->xdp_channel_offset)
                        continue;
+
                efx_for_each_channel_tx_queue(tx_queue, channel) {
                        /* We assign the PIO buffers to queues in
                         * reverse order to allow for the following
@@ -1296,8 +1298,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
        int rc;
 
        channel_vis = max(efx->n_channels,
-                         (efx->n_tx_channels + efx->n_extra_tx_channels) *
-                         EFX_TXQ_TYPES);
+                         ((efx->n_tx_channels + efx->n_extra_tx_channels) *
+                          EFX_TXQ_TYPES) +
+                          efx->n_xdp_channels * efx->xdp_tx_per_channel);
 
 #ifdef EFX_USE_PIO
        /* Try to allocate PIO buffers if wanted and if the full
@@ -2434,11 +2437,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
        /* TSOv2 is a limited resource that can only be configured on a limited
         * number of queues. TSO without checksum offload is not really a thing,
         * so we only enable it for those queues.
-        * TSOv2 cannot be used with Hardware timestamping.
+        * TSOv2 cannot be used with Hardware timestamping, and is never needed
+        * for XDP tx.
         */
        if (csum_offload && (nic_data->datapath_caps2 &
                        (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
-           !tx_queue->timestamping) {
+           !tx_queue->timestamping && !tx_queue->xdp_tx) {
                tso_v2 = true;
                netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
                                channel->channel);
index 2fef740..0fa9972 100644 (file)
@@ -226,6 +226,10 @@ static void efx_fini_napi_channel(struct efx_channel *channel);
 static void efx_fini_struct(struct efx_nic *efx);
 static void efx_start_all(struct efx_nic *efx);
 static void efx_stop_all(struct efx_nic *efx);
+static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
+static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
+static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
+                       u32 flags);
 
 #define EFX_ASSERT_RESET_SERIALISED(efx)               \
        do {                                            \
@@ -340,6 +344,8 @@ static int efx_poll(struct napi_struct *napi, int budget)
 
        spent = efx_process_channel(channel, budget);
 
+       xdp_do_flush_map();
+
        if (spent < budget) {
                if (efx_channel_has_rx_queue(channel) &&
                    efx->irq_rx_adaptive &&
@@ -579,9 +585,14 @@ efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
        int number;
 
        number = channel->channel;
-       if (efx->tx_channel_offset == 0) {
+
+       if (number >= efx->xdp_channel_offset &&
+           !WARN_ON_ONCE(!efx->n_xdp_channels)) {
+               type = "-xdp";
+               number -= efx->xdp_channel_offset;
+       } else if (efx->tx_channel_offset == 0) {
                type = "";
-       } else if (channel->channel < efx->tx_channel_offset) {
+       } else if (number < efx->tx_channel_offset) {
                type = "-rx";
        } else {
                type = "-tx";
@@ -651,7 +662,7 @@ static void efx_start_datapath(struct efx_nic *efx)
        efx->rx_dma_len = (efx->rx_prefix_size +
                           EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
                           efx->type->rx_buffer_padding);
-       rx_buf_len = (sizeof(struct efx_rx_page_state) +
+       rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
                      efx->rx_ip_align + efx->rx_dma_len);
        if (rx_buf_len <= PAGE_SIZE) {
                efx->rx_scatter = efx->type->always_rx_scatter;
@@ -774,6 +785,7 @@ static void efx_stop_datapath(struct efx_nic *efx)
                efx_for_each_possible_channel_tx_queue(tx_queue, channel)
                        efx_fini_tx_queue(tx_queue);
        }
+       efx->xdp_rxq_info_failed = false;
 }
 
 static void efx_remove_channel(struct efx_channel *channel)
@@ -798,6 +810,8 @@ static void efx_remove_channels(struct efx_nic *efx)
 
        efx_for_each_channel(channel, efx)
                efx_remove_channel(channel);
+
+       kfree(efx->xdp_tx_queues);
 }
 
 int
@@ -1435,6 +1449,101 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
        return count;
 }
 
+static int efx_allocate_msix_channels(struct efx_nic *efx,
+                                     unsigned int max_channels,
+                                     unsigned int extra_channels,
+                                     unsigned int parallelism)
+{
+       unsigned int n_channels = parallelism;
+       int vec_count;
+       int n_xdp_tx;
+       int n_xdp_ev;
+
+       if (efx_separate_tx_channels)
+               n_channels *= 2;
+       n_channels += extra_channels;
+
+       /* To allow XDP transmit to happen from arbitrary NAPI contexts
+        * we allocate a TX queue per CPU. We share event queues across
+        * multiple tx queues, assuming tx and ev queues are both
+        * maximum size.
+        */
+
+       n_xdp_tx = num_possible_cpus();
+       n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
+
+       /* Check resources.
+        * We need a channel per event queue, plus a VI per tx queue.
+        * This may be more pessimistic than it needs to be.
+        */
+       if (n_channels + n_xdp_ev > max_channels) {
+               netif_err(efx, drv, efx->net_dev,
+                         "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
+                         n_xdp_ev, n_channels, max_channels);
+               efx->n_xdp_channels = 0;
+               efx->xdp_tx_per_channel = 0;
+               efx->xdp_tx_queue_count = 0;
+       } else {
+               efx->n_xdp_channels = n_xdp_ev;
+               efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
+               efx->xdp_tx_queue_count = n_xdp_tx;
+               n_channels += n_xdp_ev;
+               netif_dbg(efx, drv, efx->net_dev,
+                         "Allocating %d TX and %d event queues for XDP\n",
+                         n_xdp_tx, n_xdp_ev);
+       }
+
+       n_channels = min(n_channels, max_channels);
+
+       vec_count = pci_msix_vec_count(efx->pci_dev);
+       if (vec_count < 0)
+               return vec_count;
+       if (vec_count < n_channels) {
+               netif_err(efx, drv, efx->net_dev,
+                         "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
+                         vec_count, n_channels);
+               netif_err(efx, drv, efx->net_dev,
+                         "WARNING: Performance may be reduced.\n");
+               n_channels = vec_count;
+       }
+
+       efx->n_channels = n_channels;
+
+       /* Do not create the PTP TX queue(s) if PTP uses the MC directly. */
+       if (extra_channels && !efx_ptp_use_mac_tx_timestamps(efx))
+               n_channels--;
+
+       /* Ignore XDP tx channels when creating rx channels. */
+       n_channels -= efx->n_xdp_channels;
+
+       if (efx_separate_tx_channels) {
+               efx->n_tx_channels =
+                       min(max(n_channels / 2, 1U),
+                           efx->max_tx_channels);
+               efx->tx_channel_offset =
+                       n_channels - efx->n_tx_channels;
+               efx->n_rx_channels =
+                       max(n_channels -
+                           efx->n_tx_channels, 1U);
+       } else {
+               efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
+               efx->tx_channel_offset = 0;
+               efx->n_rx_channels = n_channels;
+       }
+
+       if (efx->n_xdp_channels)
+               efx->xdp_channel_offset = efx->tx_channel_offset +
+                                         efx->n_tx_channels;
+       else
+               efx->xdp_channel_offset = efx->n_channels;
+
+       netif_dbg(efx, drv, efx->net_dev,
+                 "Allocating %u RX channels\n",
+                 efx->n_rx_channels);
+
+       return efx->n_channels;
+}
+
 /* Probe the number and type of interrupts we are able to obtain, and
  * the resulting numbers of channels and RX queues.
  */
@@ -1449,19 +1558,19 @@ static int efx_probe_interrupts(struct efx_nic *efx)
                        ++extra_channels;
 
        if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+               unsigned int parallelism = efx_wanted_parallelism(efx);
                struct msix_entry xentries[EFX_MAX_CHANNELS];
                unsigned int n_channels;
 
-               n_channels = efx_wanted_parallelism(efx);
-               if (efx_separate_tx_channels)
-                       n_channels *= 2;
-               n_channels += extra_channels;
-               n_channels = min(n_channels, efx->max_channels);
-
-               for (i = 0; i < n_channels; i++)
-                       xentries[i].entry = i;
-               rc = pci_enable_msix_range(efx->pci_dev,
-                                          xentries, 1, n_channels);
+               rc = efx_allocate_msix_channels(efx, efx->max_channels,
+                                               extra_channels, parallelism);
+               if (rc >= 0) {
+                       n_channels = rc;
+                       for (i = 0; i < n_channels; i++)
+                               xentries[i].entry = i;
+                       rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
+                                                  n_channels);
+               }
                if (rc < 0) {
                        /* Fall back to single channel MSI */
                        netif_err(efx, drv, efx->net_dev,
@@ -1480,21 +1589,6 @@ static int efx_probe_interrupts(struct efx_nic *efx)
                }
 
                if (rc > 0) {
-                       efx->n_channels = n_channels;
-                       if (n_channels > extra_channels)
-                               n_channels -= extra_channels;
-                       if (efx_separate_tx_channels) {
-                               efx->n_tx_channels = min(max(n_channels / 2,
-                                                            1U),
-                                                        efx->max_tx_channels);
-                               efx->n_rx_channels = max(n_channels -
-                                                        efx->n_tx_channels,
-                                                        1U);
-                       } else {
-                               efx->n_tx_channels = min(n_channels,
-                                                        efx->max_tx_channels);
-                               efx->n_rx_channels = n_channels;
-                       }
                        for (i = 0; i < efx->n_channels; i++)
                                efx_get_channel(efx, i)->irq =
                                        xentries[i].vector;
@@ -1506,6 +1600,8 @@ static int efx_probe_interrupts(struct efx_nic *efx)
                efx->n_channels = 1;
                efx->n_rx_channels = 1;
                efx->n_tx_channels = 1;
+               efx->n_xdp_channels = 0;
+               efx->xdp_channel_offset = efx->n_channels;
                rc = pci_enable_msi(efx->pci_dev);
                if (rc == 0) {
                        efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
@@ -1524,12 +1620,14 @@ static int efx_probe_interrupts(struct efx_nic *efx)
                efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
                efx->n_rx_channels = 1;
                efx->n_tx_channels = 1;
+               efx->n_xdp_channels = 0;
+               efx->xdp_channel_offset = efx->n_channels;
                efx->legacy_irq = efx->pci_dev->irq;
        }
 
-       /* Assign extra channels if possible */
+       /* Assign extra channels if possible, before XDP channels */
        efx->n_extra_tx_channels = 0;
-       j = efx->n_channels;
+       j = efx->xdp_channel_offset;
        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
                if (!efx->extra_channel_type[i])
                        continue;
@@ -1724,29 +1822,50 @@ static void efx_remove_interrupts(struct efx_nic *efx)
        efx->legacy_irq = 0;
 }
 
-static void efx_set_channels(struct efx_nic *efx)
+static int efx_set_channels(struct efx_nic *efx)
 {
        struct efx_channel *channel;
        struct efx_tx_queue *tx_queue;
+       int xdp_queue_number;
 
        efx->tx_channel_offset =
                efx_separate_tx_channels ?
                efx->n_channels - efx->n_tx_channels : 0;
 
+       if (efx->xdp_tx_queue_count) {
+               EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
+
+               /* Allocate array for XDP TX queue lookup. */
+               efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
+                                            sizeof(*efx->xdp_tx_queues),
+                                            GFP_KERNEL);
+               if (!efx->xdp_tx_queues)
+                       return -ENOMEM;
+       }
+
        /* We need to mark which channels really have RX and TX
         * queues, and adjust the TX queue numbers if we have separate
         * RX-only and TX-only channels.
         */
+       xdp_queue_number = 0;
        efx_for_each_channel(channel, efx) {
                if (channel->channel < efx->n_rx_channels)
                        channel->rx_queue.core_index = channel->channel;
                else
                        channel->rx_queue.core_index = -1;
 
-               efx_for_each_channel_tx_queue(tx_queue, channel)
+               efx_for_each_channel_tx_queue(tx_queue, channel) {
                        tx_queue->queue -= (efx->tx_channel_offset *
                                            EFX_TXQ_TYPES);
+
+                       if (efx_channel_is_xdp_tx(channel) &&
+                           xdp_queue_number < efx->xdp_tx_queue_count) {
+                               efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
+                               xdp_queue_number++;
+                       }
+               }
        }
+       return 0;
 }
 
 static int efx_probe_nic(struct efx_nic *efx)
@@ -1776,7 +1895,9 @@ static int efx_probe_nic(struct efx_nic *efx)
                if (rc)
                        goto fail1;
 
-               efx_set_channels(efx);
+               rc = efx_set_channels(efx);
+               if (rc)
+                       goto fail1;
 
                /* dimension_resources can fail with EAGAIN */
                rc = efx->type->dimension_resources(efx);
@@ -2022,6 +2143,10 @@ static void efx_stop_all(struct efx_nic *efx)
 
 static void efx_remove_all(struct efx_nic *efx)
 {
+       rtnl_lock();
+       efx_xdp_setup_prog(efx, NULL);
+       rtnl_unlock();
+
        efx_remove_channels(efx);
        efx_remove_filters(efx);
 #ifdef CONFIG_SFC_SRIOV
@@ -2082,6 +2207,8 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
                        channel->irq_moderation_us = rx_usecs;
                else if (efx_channel_has_tx_queues(channel))
                        channel->irq_moderation_us = tx_usecs;
+               else if (efx_channel_is_xdp_tx(channel))
+                       channel->irq_moderation_us = tx_usecs;
        }
 
        return 0;
@@ -2277,6 +2404,17 @@ static void efx_watchdog(struct net_device *net_dev)
        efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
 }
 
+static unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
+{
+       /* The maximum MTU that we can fit in a single page, allowing for
+        * framing, overhead and XDP headroom.
+        */
+       int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
+                      efx->rx_prefix_size + efx->type->rx_buffer_padding +
+                      efx->rx_ip_align + XDP_PACKET_HEADROOM;
+
+       return PAGE_SIZE - overhead;
+}
 
 /* Context: process, rtnl_lock() held. */
 static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
@@ -2288,6 +2426,14 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
        if (rc)
                return rc;
 
+       if (rtnl_dereference(efx->xdp_prog) &&
+           new_mtu > efx_xdp_max_mtu(efx)) {
+               netif_err(efx, drv, efx->net_dev,
+                         "Requested MTU of %d too big for XDP (max: %d)\n",
+                         new_mtu, efx_xdp_max_mtu(efx));
+               return -EINVAL;
+       }
+
        netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
 
        efx_device_detach_sync(efx);
@@ -2489,8 +2635,65 @@ static const struct net_device_ops efx_netdev_ops = {
 #endif
        .ndo_udp_tunnel_add     = efx_udp_tunnel_add,
        .ndo_udp_tunnel_del     = efx_udp_tunnel_del,
+       .ndo_xdp_xmit           = efx_xdp_xmit,
+       .ndo_bpf                = efx_xdp
 };
 
+static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog)
+{
+       struct bpf_prog *old_prog;
+
+       if (efx->xdp_rxq_info_failed) {
+               netif_err(efx, drv, efx->net_dev,
+                         "Unable to bind XDP program due to previous failure of rxq_info\n");
+               return -EINVAL;
+       }
+
+       if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) {
+               netif_err(efx, drv, efx->net_dev,
+                         "Unable to configure XDP with MTU of %d (max: %d)\n",
+                         efx->net_dev->mtu, efx_xdp_max_mtu(efx));
+               return -EINVAL;
+       }
+
+       old_prog = rtnl_dereference(efx->xdp_prog);
+       rcu_assign_pointer(efx->xdp_prog, prog);
+       /* Release the reference that was originally passed by the caller. */
+       if (old_prog)
+               bpf_prog_put(old_prog);
+
+       return 0;
+}
+
+/* Context: process, rtnl_lock() held. */
+static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+       struct efx_nic *efx = netdev_priv(dev);
+       struct bpf_prog *xdp_prog;
+
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return efx_xdp_setup_prog(efx, xdp->prog);
+       case XDP_QUERY_PROG:
+               xdp_prog = rtnl_dereference(efx->xdp_prog);
+               xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0;
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
+static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
+                       u32 flags)
+{
+       struct efx_nic *efx = netdev_priv(dev);
+
+       if (!netif_running(dev))
+               return -EINVAL;
+
+       return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH);
+}
+
 static void efx_update_name(struct efx_nic *efx)
 {
        strcpy(efx->name, efx->net_dev->name);
index 04fed7c..45c7ae4 100644 (file)
@@ -322,4 +322,7 @@ static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem)
        return true;
 }
 
+int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
+                      bool flush);
+
 #endif /* EFX_EFX_H */
index 86b9658..8db593f 100644 (file)
@@ -83,6 +83,10 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
        EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
        EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
        EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
+       EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
+       EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
+       EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
+       EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
 };
 
 #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
@@ -399,6 +403,19 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
                        }
                }
        }
+       if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+               unsigned short xdp;
+
+               for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+                       n_stats++;
+                       if (strings) {
+                               snprintf(strings, ETH_GSTRING_LEN,
+                                        "tx-xdp-cpu-%hu.tx_packets", xdp);
+                               strings += ETH_GSTRING_LEN;
+                       }
+               }
+       }
+
        return n_stats;
 }
 
@@ -509,6 +526,14 @@ static void efx_ethtool_get_stats(struct net_device *net_dev,
                        data++;
                }
        }
+       if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+               int xdp;
+
+               for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+                       data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
+                       data++;
+               }
+       }
 
        efx_ptp_update_stats(efx, data);
 }
index 284a1b0..04e49ea 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/i2c.h>
 #include <linux/mtd/mtd.h>
 #include <net/busy_poll.h>
+#include <net/xdp.h>
 
 #include "enum.h"
 #include "bitfield.h"
@@ -136,7 +137,8 @@ struct efx_special_buffer {
  * struct efx_tx_buffer - buffer state for a TX descriptor
  * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
  *     freed when descriptor completes
- * @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor.
+ * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data
+ *     member is the associated buffer to drop a page reference on.
  * @dma_addr: DMA address of the fragment.
  * @flags: Flags for allocation and DMA mapping type
  * @len: Length of this fragment.
@@ -146,7 +148,10 @@ struct efx_special_buffer {
  * Only valid if @unmap_len != 0.
  */
 struct efx_tx_buffer {
-       const struct sk_buff *skb;
+       union {
+               const struct sk_buff *skb;
+               struct xdp_frame *xdpf;
+       };
        union {
                efx_qword_t option;
                dma_addr_t dma_addr;
@@ -160,6 +165,7 @@ struct efx_tx_buffer {
 #define EFX_TX_BUF_SKB         2       /* buffer is last part of skb */
 #define EFX_TX_BUF_MAP_SINGLE  8       /* buffer was mapped with dma_map_single() */
 #define EFX_TX_BUF_OPTION      0x10    /* empty buffer for option descriptor */
+#define EFX_TX_BUF_XDP         0x20    /* buffer was sent with XDP */
 
 /**
  * struct efx_tx_queue - An Efx TX queue
@@ -189,6 +195,7 @@ struct efx_tx_buffer {
  * @piobuf_offset: Buffer offset to be specified in PIO descriptors
  * @initialised: Has hardware queue been initialised?
  * @timestamping: Is timestamping enabled for this channel?
+ * @xdp_tx: Is this an XDP tx queue?
  * @handle_tso: TSO xmit preparation handler.  Sets up the TSO metadata and
  *     may also map tx data, depending on the nature of the TSO implementation.
  * @read_count: Current read pointer.
@@ -250,6 +257,7 @@ struct efx_tx_queue {
        unsigned int piobuf_offset;
        bool initialised;
        bool timestamping;
+       bool xdp_tx;
 
        /* Function pointers used in the fast path. */
        int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
@@ -363,6 +371,8 @@ struct efx_rx_page_state {
  *     refill was triggered.
  * @recycle_count: RX buffer recycle counter.
  * @slow_fill: Timer used to defer efx_nic_generate_fill_event().
+ * @xdp_rxq_info: XDP specific RX queue information.
+ * @xdp_rxq_info_valid: Is xdp_rxq_info valid data?.
  */
 struct efx_rx_queue {
        struct efx_nic *efx;
@@ -394,6 +404,8 @@ struct efx_rx_queue {
        unsigned int slow_fill_count;
        /* Statistics to supplement MAC stats */
        unsigned long rx_packets;
+       struct xdp_rxq_info xdp_rxq_info;
+       bool xdp_rxq_info_valid;
 };
 
 enum efx_sync_events_state {
@@ -441,6 +453,10 @@ enum efx_sync_events_state {
  *     lack of descriptors
  * @n_rx_merge_events: Number of RX merged completion events
  * @n_rx_merge_packets: Number of RX packets completed by merged events
+ * @n_rx_xdp_drops: Count of RX packets intentionally dropped due to XDP
+ * @n_rx_xdp_bad_drops: Count of RX packets dropped due to XDP errors
+ * @n_rx_xdp_tx: Count of RX packets retransmitted due to XDP
+ * @n_rx_xdp_redirect: Count of RX packets redirected to a different NIC by XDP
  * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
  *     __efx_rx_packet(), or zero if there is none
  * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
@@ -494,6 +510,10 @@ struct efx_channel {
        unsigned int n_rx_nodesc_trunc;
        unsigned int n_rx_merge_events;
        unsigned int n_rx_merge_packets;
+       unsigned int n_rx_xdp_drops;
+       unsigned int n_rx_xdp_bad_drops;
+       unsigned int n_rx_xdp_tx;
+       unsigned int n_rx_xdp_redirect;
 
        unsigned int rx_pkt_n_frags;
        unsigned int rx_pkt_index;
@@ -818,6 +838,8 @@ struct efx_async_filter_insertion {
  * @msi_context: Context for each MSI
  * @extra_channel_types: Types of extra (non-traffic) channels that
  *     should be allocated for this NIC
+ * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues.
+ * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit.
  * @rxq_entries: Size of receive queues requested by user.
  * @txq_entries: Size of transmit queues requested by user.
  * @txq_stop_thresh: TX queue fill level at or above which we stop it.
@@ -830,6 +852,9 @@ struct efx_async_filter_insertion {
  * @n_rx_channels: Number of channels used for RX (= number of RX queues)
  * @n_tx_channels: Number of channels used for TX
  * @n_extra_tx_channels: Number of extra channels with TX queues
+ * @n_xdp_channels: Number of channels used for XDP TX
+ * @xdp_channel_offset: Offset of zeroth channel used for XPD TX.
+ * @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel.
  * @rx_ip_align: RX DMA address offset to have IP header aligned in
  *     in accordance with NET_IP_ALIGN
  * @rx_dma_len: Current maximum RX DMA length
@@ -894,6 +919,7 @@ struct efx_async_filter_insertion {
  * @loopback_mode: Loopback status
  * @loopback_modes: Supported loopback mode bitmask
  * @loopback_selftest: Offline self-test private state
+ * @xdp_prog: Current XDP programme for this interface
  * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
  * @filter_state: Architecture-dependent filter table state
  * @rps_mutex: Protects RPS state of all channels
@@ -919,6 +945,8 @@ struct efx_async_filter_insertion {
  * @ptp_data: PTP state data
  * @ptp_warned: has this NIC seen and warned about unexpected PTP events?
  * @vpd_sn: Serial number read from VPD
+ * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
+ *      xdp_rxq_info structures?
  * @monitor_work: Hardware monitor workitem
  * @biu_lock: BIU (bus interface unit) lock
  * @last_irq_cpu: Last CPU to handle a possible test interrupt.  This
@@ -966,6 +994,9 @@ struct efx_nic {
        const struct efx_channel_type *
        extra_channel_type[EFX_MAX_EXTRA_CHANNELS];
 
+       unsigned int xdp_tx_queue_count;
+       struct efx_tx_queue **xdp_tx_queues;
+
        unsigned rxq_entries;
        unsigned txq_entries;
        unsigned int txq_stop_thresh;
@@ -984,6 +1015,9 @@ struct efx_nic {
        unsigned tx_channel_offset;
        unsigned n_tx_channels;
        unsigned n_extra_tx_channels;
+       unsigned int n_xdp_channels;
+       unsigned int xdp_channel_offset;
+       unsigned int xdp_tx_per_channel;
        unsigned int rx_ip_align;
        unsigned int rx_dma_len;
        unsigned int rx_buffer_order;
@@ -1053,6 +1087,10 @@ struct efx_nic {
        u64 loopback_modes;
 
        void *loopback_selftest;
+       /* We access loopback_selftest immediately before running XDP,
+        * so we want them next to each other.
+        */
+       struct bpf_prog __rcu *xdp_prog;
 
        struct rw_semaphore filter_sem;
        void *filter_state;
@@ -1082,6 +1120,7 @@ struct efx_nic {
        bool ptp_warned;
 
        char *vpd_sn;
+       bool xdp_rxq_info_failed;
 
        /* The following fields may be written more often */
 
@@ -1473,10 +1512,24 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
        return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
 }
 
+static inline struct efx_channel *
+efx_get_xdp_channel(struct efx_nic *efx, unsigned int index)
+{
+       EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_xdp_channels);
+       return efx->channel[efx->xdp_channel_offset + index];
+}
+
+static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel)
+{
+       return channel->channel - channel->efx->xdp_channel_offset <
+              channel->efx->n_xdp_channels;
+}
+
 static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
 {
-       return channel->type && channel->type->want_txqs &&
-                               channel->type->want_txqs(channel);
+       return efx_channel_is_xdp_tx(channel) ||
+              (channel->type && channel->type->want_txqs &&
+               channel->type->want_txqs(channel));
 }
 
 static inline struct efx_tx_queue *
@@ -1500,7 +1553,8 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue)
        else                                                            \
                for (_tx_queue = (_channel)->tx_queue;                  \
                     _tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES && \
-                            efx_tx_queue_used(_tx_queue);              \
+                            (efx_tx_queue_used(_tx_queue) ||            \
+                             efx_channel_is_xdp_tx(_channel));         \
                     _tx_queue++)
 
 /* Iterate over all possible TX queues belonging to a channel */
index 85ec07f..a7d9841 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/iommu.h>
 #include <net/ip.h>
 #include <net/checksum.h>
+#include <net/xdp.h>
+#include <linux/bpf_trace.h>
 #include "net_driver.h"
 #include "efx.h"
 #include "filter.h"
@@ -27,6 +29,9 @@
 /* Preferred number of descriptors to fill at once */
 #define EFX_RX_PREFERRED_BATCH 8U
 
+/* Maximum rx prefix used by any architecture. */
+#define EFX_MAX_RX_PREFIX_SIZE 16
+
 /* Number of RX buffers to recycle pages for.  When creating the RX page recycle
  * ring, this number is divided by the number of buffers per page to calculate
  * the number of pages to store in the RX page recycle ring.
@@ -95,7 +100,7 @@ void efx_rx_config_page_split(struct efx_nic *efx)
                                      EFX_RX_BUF_ALIGNMENT);
        efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
                ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
-                efx->rx_page_buf_step);
+               (efx->rx_page_buf_step + XDP_PACKET_HEADROOM));
        efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
                efx->rx_bufs_per_page;
        efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
@@ -185,6 +190,9 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
                page_offset = sizeof(struct efx_rx_page_state);
 
                do {
+                       page_offset += XDP_PACKET_HEADROOM;
+                       dma_addr += XDP_PACKET_HEADROOM;
+
                        index = rx_queue->added_count & rx_queue->ptr_mask;
                        rx_buf = efx_rx_buffer(rx_queue, index);
                        rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
@@ -635,6 +643,123 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
                netif_receive_skb(skb);
 }
 
+/** efx_do_xdp: perform XDP processing on a received packet
+ *
+ * Returns true if packet should still be delivered.
+ */
+static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
+                      struct efx_rx_buffer *rx_buf, u8 **ehp)
+{
+       u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE];
+       struct efx_rx_queue *rx_queue;
+       struct bpf_prog *xdp_prog;
+       struct xdp_frame *xdpf;
+       struct xdp_buff xdp;
+       u32 xdp_act;
+       s16 offset;
+       int err;
+
+       rcu_read_lock();
+       xdp_prog = rcu_dereference(efx->xdp_prog);
+       if (!xdp_prog) {
+               rcu_read_unlock();
+               return true;
+       }
+
+       rx_queue = efx_channel_get_rx_queue(channel);
+
+       if (unlikely(channel->rx_pkt_n_frags > 1)) {
+               /* We can't do XDP on fragmented packets - drop. */
+               rcu_read_unlock();
+               efx_free_rx_buffers(rx_queue, rx_buf,
+                                   channel->rx_pkt_n_frags);
+               if (net_ratelimit())
+                       netif_err(efx, rx_err, efx->net_dev,
+                                 "XDP is not possible with multiple receive fragments (%d)\n",
+                                 channel->rx_pkt_n_frags);
+               channel->n_rx_xdp_bad_drops++;
+               return false;
+       }
+
+       dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr,
+                               rx_buf->len, DMA_FROM_DEVICE);
+
+       /* Save the rx prefix. */
+       EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE);
+       memcpy(rx_prefix, *ehp - efx->rx_prefix_size,
+              efx->rx_prefix_size);
+
+       xdp.data = *ehp;
+       xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+
+       /* No support yet for XDP metadata */
+       xdp_set_data_meta_invalid(&xdp);
+       xdp.data_end = xdp.data + rx_buf->len;
+       xdp.rxq = &rx_queue->xdp_rxq_info;
+
+       xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
+       rcu_read_unlock();
+
+       offset = (u8 *)xdp.data - *ehp;
+
+       switch (xdp_act) {
+       case XDP_PASS:
+               /* Fix up rx prefix. */
+               if (offset) {
+                       *ehp += offset;
+                       rx_buf->page_offset += offset;
+                       rx_buf->len -= offset;
+                       memcpy(*ehp - efx->rx_prefix_size, rx_prefix,
+                              efx->rx_prefix_size);
+               }
+               break;
+
+       case XDP_TX:
+               /* Buffer ownership passes to tx on success. */
+               xdpf = convert_to_xdp_frame(&xdp);
+               err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
+               if (unlikely(err != 1)) {
+                       efx_free_rx_buffers(rx_queue, rx_buf, 1);
+                       if (net_ratelimit())
+                               netif_err(efx, rx_err, efx->net_dev,
+                                         "XDP TX failed (%d)\n", err);
+                       channel->n_rx_xdp_bad_drops++;
+               } else {
+                       channel->n_rx_xdp_tx++;
+               }
+               break;
+
+       case XDP_REDIRECT:
+               err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog);
+               if (unlikely(err)) {
+                       efx_free_rx_buffers(rx_queue, rx_buf, 1);
+                       if (net_ratelimit())
+                               netif_err(efx, rx_err, efx->net_dev,
+                                         "XDP redirect failed (%d)\n", err);
+                       channel->n_rx_xdp_bad_drops++;
+               } else {
+                       channel->n_rx_xdp_redirect++;
+               }
+               break;
+
+       default:
+               bpf_warn_invalid_xdp_action(xdp_act);
+               efx_free_rx_buffers(rx_queue, rx_buf, 1);
+               channel->n_rx_xdp_bad_drops++;
+               break;
+
+       case XDP_ABORTED:
+               trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
+               /* Fall through */
+       case XDP_DROP:
+               efx_free_rx_buffers(rx_queue, rx_buf, 1);
+               channel->n_rx_xdp_drops++;
+               break;
+       }
+
+       return xdp_act == XDP_PASS;
+}
+
 /* Handle a received packet.  Second half: Touches packet payload. */
 void __efx_rx_packet(struct efx_channel *channel)
 {
@@ -663,6 +788,9 @@ void __efx_rx_packet(struct efx_channel *channel)
                goto out;
        }
 
+       if (!efx_do_xdp(efx, channel, rx_buf, &eh))
+               goto out;
+
        if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
                rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
 
@@ -731,6 +859,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
 {
        struct efx_nic *efx = rx_queue->efx;
        unsigned int max_fill, trigger, max_trigger;
+       int rc = 0;
 
        netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
                  "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
@@ -764,6 +893,19 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
        rx_queue->fast_fill_trigger = trigger;
        rx_queue->refill_enabled = true;
 
+       /* Initialise XDP queue information */
+       rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
+                             rx_queue->core_index);
+
+       if (rc) {
+               netif_err(efx, rx_err, efx->net_dev,
+                         "Failure to initialise XDP queue information rc=%d\n",
+                         rc);
+               efx->xdp_rxq_info_failed = true;
+       } else {
+               rx_queue->xdp_rxq_info_valid = true;
+       }
+
        /* Set up RX descriptor ring */
        efx_nic_init_rx(rx_queue);
 }
@@ -805,6 +947,11 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
        }
        kfree(rx_queue->page_ring);
        rx_queue->page_ring = NULL;
+
+       if (rx_queue->xdp_rxq_info_valid)
+               xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
+
+       rx_queue->xdp_rxq_info_valid = false;
 }
 
 void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
index 65e81ec..00c1c44 100644 (file)
@@ -95,6 +95,8 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
                netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
                           "TX queue %d transmission id %x complete\n",
                           tx_queue->queue, tx_queue->read_count);
+       } else if (buffer->flags & EFX_TX_BUF_XDP) {
+               xdp_return_frame_rx_napi(buffer->xdpf);
        }
 
        buffer->len = 0;
@@ -597,6 +599,94 @@ err:
        return NETDEV_TX_OK;
 }
 
+static void efx_xdp_return_frames(int n,  struct xdp_frame **xdpfs)
+{
+       int i;
+
+       for (i = 0; i < n; i++)
+               xdp_return_frame_rx_napi(xdpfs[i]);
+}
+
+/* Transmit a packet from an XDP buffer
+ *
+ * Returns number of packets sent on success, error code otherwise.
+ * Runs in NAPI context, either in our poll (for XDP TX) or a different NIC
+ * (for XDP redirect).
+ */
+int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
+                      bool flush)
+{
+       struct efx_tx_buffer *tx_buffer;
+       struct efx_tx_queue *tx_queue;
+       struct xdp_frame *xdpf;
+       dma_addr_t dma_addr;
+       unsigned int len;
+       int space;
+       int cpu;
+       int i;
+
+       cpu = raw_smp_processor_id();
+
+       if (!efx->xdp_tx_queue_count ||
+           unlikely(cpu >= efx->xdp_tx_queue_count))
+               return -EINVAL;
+
+       tx_queue = efx->xdp_tx_queues[cpu];
+       if (unlikely(!tx_queue))
+               return -EINVAL;
+
+       if (unlikely(n && !xdpfs))
+               return -EINVAL;
+
+       if (!n)
+               return 0;
+
+       /* Check for available space. We should never need multiple
+        * descriptors per frame.
+        */
+       space = efx->txq_entries +
+               tx_queue->read_count - tx_queue->insert_count;
+
+       for (i = 0; i < n; i++) {
+               xdpf = xdpfs[i];
+
+               if (i >= space)
+                       break;
+
+               /* We'll want a descriptor for this tx. */
+               prefetchw(__efx_tx_queue_get_insert_buffer(tx_queue));
+
+               len = xdpf->len;
+
+               /* Map for DMA. */
+               dma_addr = dma_map_single(&efx->pci_dev->dev,
+                                         xdpf->data, len,
+                                         DMA_TO_DEVICE);
+               if (dma_mapping_error(&efx->pci_dev->dev, dma_addr))
+                       break;
+
+               /*  Create descriptor and set up for unmapping DMA. */
+               tx_buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+               tx_buffer->xdpf = xdpf;
+               tx_buffer->flags = EFX_TX_BUF_XDP |
+                                  EFX_TX_BUF_MAP_SINGLE;
+               tx_buffer->dma_offset = 0;
+               tx_buffer->unmap_len = len;
+               tx_queue->tx_packets++;
+       }
+
+       /* Pass mapped frames to hardware. */
+       if (flush && i > 0)
+               efx_nic_push_buffers(tx_queue);
+
+       if (i == 0)
+               return -EIO;
+
+       efx_xdp_return_frames(n - i, xdpfs + i);
+
+       return i;
+}
+
 /* Remove packets from the TX queue
  *
  * This removes packets from the TX queue, up to and including the
@@ -857,6 +947,8 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
        tx_queue->completed_timestamp_major = 0;
        tx_queue->completed_timestamp_minor = 0;
 
+       tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
+
        /* Set up default function pointers. These may get replaced by
         * efx_nic_init_tx() based off NIC/queue capabilities.
         */
index deb636d..d242906 100644 (file)
@@ -48,7 +48,7 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/skbuff.h>
-#include <linux/dma-direct.h>
+#include <linux/dma-mapping.h>
 
 #include <net/ip.h>
 
@@ -89,6 +89,7 @@ struct ioc3_private {
        struct device *dma_dev;
        u32 *ssram;
        unsigned long *rxr;             /* pointer to receiver ring */
+       void *tx_ring;
        struct ioc3_etxd *txr;
        dma_addr_t rxr_dma;
        dma_addr_t txr_dma;
@@ -1173,26 +1174,14 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct ioc3 *ioc3;
        unsigned long ioc3_base, ioc3_size;
        u32 vendor, model, rev;
-       int err, pci_using_dac;
+       int err;
 
        /* Configure DMA attributes. */
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (!err) {
-               pci_using_dac = 1;
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-               if (err < 0) {
-                       pr_err("%s: Unable to obtain 64 bit DMA for consistent allocations\n",
-                              pci_name(pdev));
-                       goto out;
-               }
-       } else {
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (err) {
-                       pr_err("%s: No usable DMA configuration, aborting.\n",
-                              pci_name(pdev));
-                       goto out;
-               }
-               pci_using_dac = 0;
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+       if (err) {
+               pr_err("%s: No usable DMA configuration, aborting.\n",
+                      pci_name(pdev));
+               goto out;
        }
 
        if (pci_enable_device(pdev))
@@ -1204,9 +1193,6 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto out_disable;
        }
 
-       if (pci_using_dac)
-               dev->features |= NETIF_F_HIGHDMA;
-
        err = pci_request_regions(pdev, "ioc3");
        if (err)
                goto out_free;
@@ -1242,8 +1228,8 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        ioc3_stop(ip);
 
        /* Allocate rx ring.  4kb = 512 entries, must be 4kb aligned */
-       ip->rxr = dma_direct_alloc_pages(ip->dma_dev, RX_RING_SIZE,
-                                        &ip->rxr_dma, GFP_ATOMIC, 0);
+       ip->rxr = dma_alloc_coherent(ip->dma_dev, RX_RING_SIZE, &ip->rxr_dma,
+                                    GFP_KERNEL);
        if (!ip->rxr) {
                pr_err("ioc3-eth: rx ring allocation failed\n");
                err = -ENOMEM;
@@ -1251,14 +1237,16 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
 
        /* Allocate tx rings.  16kb = 128 bufs, must be 16kb aligned  */
-       ip->txr = dma_direct_alloc_pages(ip->dma_dev, TX_RING_SIZE,
-                                        &ip->txr_dma,
-                                        GFP_KERNEL | __GFP_ZERO, 0);
-       if (!ip->txr) {
+       ip->tx_ring = dma_alloc_coherent(ip->dma_dev, TX_RING_SIZE + SZ_16K - 1,
+                                        &ip->txr_dma, GFP_KERNEL);
+       if (!ip->tx_ring) {
                pr_err("ioc3-eth: tx ring allocation failed\n");
                err = -ENOMEM;
                goto out_stop;
        }
+       /* Align TX ring */
+       ip->txr = PTR_ALIGN(ip->tx_ring, SZ_16K);
+       ip->txr_dma = ALIGN(ip->txr_dma, SZ_16K);
 
        ioc3_init(dev);
 
@@ -1288,7 +1276,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        dev->netdev_ops         = &ioc3_netdev_ops;
        dev->ethtool_ops        = &ioc3_ethtool_ops;
        dev->hw_features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
-       dev->features           = NETIF_F_IP_CSUM;
+       dev->features           = NETIF_F_IP_CSUM | NETIF_F_HIGHDMA;
 
        sw_physid1 = ioc3_mdio_read(dev, ip->mii.phy_id, MII_PHYSID1);
        sw_physid2 = ioc3_mdio_read(dev, ip->mii.phy_id, MII_PHYSID2);
@@ -1313,11 +1301,11 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 out_stop:
        del_timer_sync(&ip->ioc3_timer);
        if (ip->rxr)
-               dma_direct_free_pages(ip->dma_dev, RX_RING_SIZE, ip->rxr,
-                                     ip->rxr_dma, 0);
-       if (ip->txr)
-               dma_direct_free_pages(ip->dma_dev, TX_RING_SIZE, ip->txr,
-                                     ip->txr_dma, 0);
+               dma_free_coherent(ip->dma_dev, RX_RING_SIZE, ip->rxr,
+                                 ip->rxr_dma);
+       if (ip->tx_ring)
+               dma_free_coherent(ip->dma_dev, TX_RING_SIZE, ip->tx_ring,
+                                 ip->txr_dma);
 out_res:
        pci_release_regions(pdev);
 out_free:
@@ -1335,10 +1323,8 @@ static void ioc3_remove_one(struct pci_dev *pdev)
        struct net_device *dev = pci_get_drvdata(pdev);
        struct ioc3_private *ip = netdev_priv(dev);
 
-       dma_direct_free_pages(ip->dma_dev, RX_RING_SIZE, ip->rxr,
-                             ip->rxr_dma, 0);
-       dma_direct_free_pages(ip->dma_dev, TX_RING_SIZE, ip->txr,
-                             ip->txr_dma, 0);
+       dma_free_coherent(ip->dma_dev, RX_RING_SIZE, ip->rxr, ip->rxr_dma);
+       dma_free_coherent(ip->dma_dev, TX_RING_SIZE, ip->tx_ring, ip->txr_dma);
 
        unregister_netdev(dev);
        del_timer_sync(&ip->ioc3_timer);
index f9e6744..869a498 100644 (file)
 #define NETSEC_XDP_CONSUMED      BIT(0)
 #define NETSEC_XDP_TX            BIT(1)
 #define NETSEC_XDP_REDIR         BIT(2)
-#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR)
 
 enum ring_id {
        NETSEC_RING_TX = 0,
@@ -661,6 +660,7 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv)
                        bytes += desc->skb->len;
                        dev_kfree_skb(desc->skb);
                } else {
+                       bytes += desc->xdpf->len;
                        xdp_return_frame(desc->xdpf);
                }
 next:
@@ -847,8 +847,8 @@ static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
                enum dma_data_direction dma_dir =
                        page_pool_get_dma_dir(rx_ring->page_pool);
 
-               dma_handle = page_pool_get_dma_addr(page) +
-                       NETSEC_RXBUF_HEADROOM;
+               dma_handle = page_pool_get_dma_addr(page) + xdpf->headroom +
+                       sizeof(*xdpf);
                dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len,
                                           dma_dir);
                tx_desc.buf_type = TYPE_NETSEC_XDP_TX;
@@ -858,6 +858,7 @@ static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
        tx_desc.addr = xdpf->data;
        tx_desc.len = xdpf->len;
 
+       netdev_sent_queue(priv->ndev, xdpf->len);
        netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf);
 
        return NETSEC_XDP_TX;
@@ -1030,7 +1031,7 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 
 next:
                if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) ||
-                   xdp_result & NETSEC_XDP_RX_OK) {
+                   xdp_result) {
                        ndev->stats.rx_packets++;
                        ndev->stats.rx_bytes += xdp.data_end - xdp.data;
                }
index 6e984d5..f7e927a 100644 (file)
@@ -1565,10 +1565,10 @@ static int ave_probe(struct platform_device *pdev)
                return -EINVAL;
 
        np = dev->of_node;
-       phy_mode = of_get_phy_mode(np);
-       if ((int)phy_mode < 0) {
+       ret = of_get_phy_mode(np, &phy_mode);
+       if (ret) {
                dev_err(dev, "phy-mode not found\n");
-               return -EINVAL;
+               return ret;
        }
 
        irq = platform_get_irq(pdev, 0);
index 527f933..d0d2d0f 100644 (file)
@@ -61,9 +61,10 @@ static void anarion_gmac_exit(struct platform_device *pdev, void *priv)
 
 static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
 {
-       int phy_mode;
-       void __iomem *ctl_block;
        struct anarion_gmac *gmac;
+       phy_interface_t phy_mode;
+       void __iomem *ctl_block;
+       int err;
 
        ctl_block = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(ctl_block)) {
@@ -78,7 +79,10 @@ static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
 
        gmac->ctl_block = (uintptr_t)ctl_block;
 
-       phy_mode = of_get_phy_mode(pdev->dev.of_node);
+       err = of_get_phy_mode(pdev->dev.of_node, &phy_mode);
+       if (err)
+               return ERR_PTR(err);
+
        switch (phy_mode) {
        case PHY_INTERFACE_MODE_RGMII:          /* Fall through */
        case PHY_INTERFACE_MODE_RGMII_ID        /* Fall through */:
index 0d21082..6ae13dc 100644 (file)
@@ -189,9 +189,10 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed)
 static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
 {
        struct device *dev = &gmac->pdev->dev;
+       int ret;
 
-       gmac->phy_mode = of_get_phy_mode(dev->of_node);
-       if ((int)gmac->phy_mode < 0) {
+       ret = of_get_phy_mode(dev->of_node, &gmac->phy_mode);
+       if (ret) {
                dev_err(dev, "missing phy mode property\n");
                return -EINVAL;
        }
index 79f2ee3..bdb8042 100644 (file)
@@ -54,7 +54,7 @@ struct mediatek_dwmac_plat_data {
        struct device_node *np;
        struct regmap *peri_regmap;
        struct device *dev;
-       int phy_mode;
+       phy_interface_t phy_mode;
        bool rmii_rxc;
 };
 
@@ -130,6 +130,31 @@ static void mt2712_delay_ps2stage(struct mediatek_dwmac_plat_data *plat)
        }
 }
 
+static void mt2712_delay_stage2ps(struct mediatek_dwmac_plat_data *plat)
+{
+       struct mac_delay_struct *mac_delay = &plat->mac_delay;
+
+       switch (plat->phy_mode) {
+       case PHY_INTERFACE_MODE_MII:
+       case PHY_INTERFACE_MODE_RMII:
+               /* 550ps per stage for MII/RMII */
+               mac_delay->tx_delay *= 550;
+               mac_delay->rx_delay *= 550;
+               break;
+       case PHY_INTERFACE_MODE_RGMII:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_ID:
+               /* 170ps per stage for RGMII */
+               mac_delay->tx_delay *= 170;
+               mac_delay->rx_delay *= 170;
+               break;
+       default:
+               dev_err(plat->dev, "phy interface not supported\n");
+               break;
+       }
+}
+
 static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat)
 {
        struct mac_delay_struct *mac_delay = &plat->mac_delay;
@@ -199,6 +224,8 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat)
        regmap_write(plat->peri_regmap, PERI_ETH_DLY, delay_val);
        regmap_write(plat->peri_regmap, PERI_ETH_DLY_FINE, fine_val);
 
+       mt2712_delay_stage2ps(plat);
+
        return 0;
 }
 
@@ -216,6 +243,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
 {
        struct mac_delay_struct *mac_delay = &plat->mac_delay;
        u32 tx_delay_ps, rx_delay_ps;
+       int err;
 
        plat->peri_regmap = syscon_regmap_lookup_by_phandle(plat->np, "mediatek,pericfg");
        if (IS_ERR(plat->peri_regmap)) {
@@ -223,10 +251,10 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
                return PTR_ERR(plat->peri_regmap);
        }
 
-       plat->phy_mode = of_get_phy_mode(plat->np);
-       if (plat->phy_mode < 0) {
+       err = of_get_phy_mode(plat->np, &plat->phy_mode);
+       if (err) {
                dev_err(plat->dev, "not find phy-mode\n");
-               return -EINVAL;
+               return err;
        }
 
        if (!of_property_read_u32(plat->np, "mediatek,tx-delay-ps", &tx_delay_ps)) {
index 306da8f..bd6c010 100644 (file)
@@ -338,10 +338,9 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
        }
 
        dwmac->dev = &pdev->dev;
-       dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-       if ((int)dwmac->phy_mode < 0) {
+       ret = of_get_phy_mode(pdev->dev.of_node, &dwmac->phy_mode);
+       if (ret) {
                dev_err(&pdev->dev, "missing phy-mode property\n");
-               ret = -EINVAL;
                goto err_remove_config_dt;
        }
 
index e2e469c..dc50ba1 100644 (file)
@@ -37,7 +37,7 @@ struct rk_gmac_ops {
 
 struct rk_priv_data {
        struct platform_device *pdev;
-       int phy_iface;
+       phy_interface_t phy_iface;
        struct regulator *regulator;
        bool suspended;
        const struct rk_gmac_ops *ops;
@@ -1224,7 +1224,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
        if (!bsp_priv)
                return ERR_PTR(-ENOMEM);
 
-       bsp_priv->phy_iface = of_get_phy_mode(dev->of_node);
+       of_get_phy_mode(dev->of_node, &bsp_priv->phy_iface);
        bsp_priv->ops = ops;
 
        bsp_priv->regulator = devm_regulator_get_optional(dev, "phy");
index e9fd661..e1b63df 100644 (file)
 #define ETH_PHY_SEL_MII                0x0
 
 struct sti_dwmac {
-       int interface;          /* MII interface */
+       phy_interface_t interface;      /* MII interface */
        bool ext_phyclk;        /* Clock from external PHY */
        u32 tx_retime_src;      /* TXCLK Retiming*/
        struct clk *clk;        /* PHY clock */
@@ -269,7 +269,12 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
                return err;
        }
 
-       dwmac->interface = of_get_phy_mode(np);
+       err = of_get_phy_mode(np, &dwmac->interface);
+       if (err && err != -ENODEV) {
+               dev_err(dev, "Can't get phy-mode\n");
+               return err;
+       }
+
        dwmac->regmap = regmap;
        dwmac->gmac_en = of_property_read_bool(np, "st,gmac_en");
        dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk");
index 4ef041b..595af2e 100644 (file)
@@ -175,7 +175,7 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 {
        struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
        u32 reg = dwmac->mode_reg;
-       int val, ret;
+       int val;
 
        switch (plat_dat->interface) {
        case PHY_INTERFACE_MODE_MII:
@@ -211,8 +211,8 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
        }
 
        /* Need to update PMCCLRR (clear register) */
-       ret = regmap_write(dwmac->regmap, reg + SYSCFG_PMCCLRR_OFFSET,
-                          dwmac->ops->syscfg_eth_mask);
+       regmap_write(dwmac->regmap, reg + SYSCFG_PMCCLRR_OFFSET,
+                    dwmac->ops->syscfg_eth_mask);
 
        /* Update PMCSETR (set register) */
        return regmap_update_bits(dwmac->regmap, reg,
index ddcc191..eefb06d 100644 (file)
@@ -1105,6 +1105,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
        struct stmmac_resources stmmac_res;
        struct sunxi_priv_data *gmac;
        struct device *dev = &pdev->dev;
+       phy_interface_t interface;
        int ret;
        struct stmmac_priv *priv;
        struct net_device *ndev;
@@ -1178,10 +1179,10 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
                return ret;
        }
 
-       ret = of_get_phy_mode(dev->of_node);
-       if (ret < 0)
+       ret = of_get_phy_mode(dev->of_node, &interface);
+       if (ret)
                return -EINVAL;
-       plat_dat->interface = ret;
+       plat_dat->interface = interface;
 
        /* platform data specifying hardware features and callbacks.
         * hardware features were copied from Allwinner drivers.
index a299da3..26353ef 100644 (file)
@@ -18,7 +18,7 @@
 #include "stmmac_platform.h"
 
 struct sunxi_priv_data {
-       int interface;
+       phy_interface_t interface;
        int clk_enabled;
        struct clk *tx_clk;
        struct regulator *regulator;
@@ -118,7 +118,11 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
                goto err_remove_config_dt;
        }
 
-       gmac->interface = of_get_phy_mode(dev->of_node);
+       ret = of_get_phy_mode(dev->of_node, &gmac->interface);
+       if (ret && ret != -ENODEV) {
+               dev_err(dev, "Can't get phy-mode\n");
+               goto err_remove_config_dt;
+       }
 
        gmac->tx_clk = devm_clk_get(dev, "allwinner_gmac_tx");
        if (IS_ERR(gmac->tx_clk)) {
index 3d69da1..d0356fb 100644 (file)
@@ -130,7 +130,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits,
                writel(mcfilterbits[0], ioaddr + GMAC_HASH_LOW);
                writel(mcfilterbits[1], ioaddr + GMAC_HASH_HIGH);
                return;
-               break;
        case 7:
                numhashregs = 4;
                break;
@@ -140,7 +139,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits,
        default:
                pr_debug("STMMAC: err in setting multicast filter\n");
                return;
-               break;
        }
        for (regs = 0; regs < numhashregs; regs++)
                writel(mcfilterbits[regs],
index 89a3420..07e97f4 100644 (file)
 #define GMAC_ARP_ADDR                  0x00000210
 #define GMAC_ADDR_HIGH(reg)            (0x300 + reg * 8)
 #define GMAC_ADDR_LOW(reg)             (0x304 + reg * 8)
+#define GMAC_L3L4_CTRL(reg)            (0x900 + (reg) * 0x30)
+#define GMAC_L4_ADDR(reg)              (0x904 + (reg) * 0x30)
+#define GMAC_L3_ADDR0(reg)             (0x910 + (reg) * 0x30)
+#define GMAC_L3_ADDR1(reg)             (0x914 + (reg) * 0x30)
 
 /* RX Queues Routing */
 #define GMAC_RXQCTRL_AVCPQ_MASK                GENMASK(2, 0)
@@ -67,6 +71,7 @@
 #define GMAC_PACKET_FILTER_PCF         BIT(7)
 #define GMAC_PACKET_FILTER_HPF         BIT(10)
 #define GMAC_PACKET_FILTER_VTFE                BIT(16)
+#define GMAC_PACKET_FILTER_IPFE                BIT(20)
 
 #define GMAC_MAX_PERFECT_ADDRESSES     128
 
@@ -202,9 +207,11 @@ enum power_event {
 #define GMAC_HW_FEAT_MIISEL            BIT(0)
 
 /* MAC HW features1 bitmap */
+#define GMAC_HW_FEAT_L3L4FNUM          GENMASK(30, 27)
 #define GMAC_HW_HASH_TB_SZ             GENMASK(25, 24)
 #define GMAC_HW_FEAT_AVSEL             BIT(20)
 #define GMAC_HW_TSOEN                  BIT(18)
+#define GMAC_HW_ADDR64                 GENMASK(15, 14)
 #define GMAC_HW_TXFIFOSIZE             GENMASK(10, 6)
 #define GMAC_HW_RXFIFOSIZE             GENMASK(4, 0)
 
@@ -227,6 +234,21 @@ enum power_event {
 #define GMAC_HI_DCS_SHIFT              16
 #define GMAC_HI_REG_AE                 BIT(31)
 
+/* L3/L4 Filters regs */
+#define GMAC_L4DPIM0                   BIT(21)
+#define GMAC_L4DPM0                    BIT(20)
+#define GMAC_L4SPIM0                   BIT(19)
+#define GMAC_L4SPM0                    BIT(18)
+#define GMAC_L4PEN0                    BIT(16)
+#define GMAC_L3DAIM0                   BIT(5)
+#define GMAC_L3DAM0                    BIT(4)
+#define GMAC_L3SAIM0                   BIT(3)
+#define GMAC_L3SAM0                    BIT(2)
+#define GMAC_L3PEN0                    BIT(0)
+#define GMAC_L4DP0                     GENMASK(31, 16)
+#define GMAC_L4DP0_SHIFT               16
+#define GMAC_L4SP0                     GENMASK(15, 0)
+
 /*  MTL registers */
 #define MTL_OPERATION_MODE             0x00000c00
 #define MTL_FRPE                       BIT(15)
index 5a7b0ac..bec929d 100644 (file)
@@ -733,7 +733,7 @@ static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable)
 }
 
 static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash,
-                                   bool is_double)
+                                   u16 perfect_match, bool is_double)
 {
        void __iomem *ioaddr = hw->pcsr;
 
@@ -748,6 +748,16 @@ static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash,
                }
 
                writel(value, ioaddr + GMAC_VLAN_TAG);
+       } else if (perfect_match) {
+               u32 value = GMAC_VLAN_ETV;
+
+               if (is_double) {
+                       value |= GMAC_VLAN_EDVLP;
+                       value |= GMAC_VLAN_ESVL;
+                       value |= GMAC_VLAN_DOVLTC;
+               }
+
+               writel(value | perfect_match, ioaddr + GMAC_VLAN_TAG);
        } else {
                u32 value = readl(ioaddr + GMAC_VLAN_TAG);
 
@@ -799,6 +809,106 @@ static void dwmac4_set_arp_offload(struct mac_device_info *hw, bool en,
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
+static int dwmac4_config_l3_filter(struct mac_device_info *hw, u32 filter_no,
+                                  bool en, bool ipv6, bool sa, bool inv,
+                                  u32 match)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       value = readl(ioaddr + GMAC_PACKET_FILTER);
+       value |= GMAC_PACKET_FILTER_IPFE;
+       writel(value, ioaddr + GMAC_PACKET_FILTER);
+
+       value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+       /* For IPv6 not both SA/DA filters can be active */
+       if (ipv6) {
+               value |= GMAC_L3PEN0;
+               value &= ~(GMAC_L3SAM0 | GMAC_L3SAIM0);
+               value &= ~(GMAC_L3DAM0 | GMAC_L3DAIM0);
+               if (sa) {
+                       value |= GMAC_L3SAM0;
+                       if (inv)
+                               value |= GMAC_L3SAIM0;
+               } else {
+                       value |= GMAC_L3DAM0;
+                       if (inv)
+                               value |= GMAC_L3DAIM0;
+               }
+       } else {
+               value &= ~GMAC_L3PEN0;
+               if (sa) {
+                       value |= GMAC_L3SAM0;
+                       if (inv)
+                               value |= GMAC_L3SAIM0;
+               } else {
+                       value |= GMAC_L3DAM0;
+                       if (inv)
+                               value |= GMAC_L3DAIM0;
+               }
+       }
+
+       writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+       if (sa) {
+               writel(match, ioaddr + GMAC_L3_ADDR0(filter_no));
+       } else {
+               writel(match, ioaddr + GMAC_L3_ADDR1(filter_no));
+       }
+
+       if (!en)
+               writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+       return 0;
+}
+
+static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
+                                  bool en, bool udp, bool sa, bool inv,
+                                  u32 match)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       value = readl(ioaddr + GMAC_PACKET_FILTER);
+       value |= GMAC_PACKET_FILTER_IPFE;
+       writel(value, ioaddr + GMAC_PACKET_FILTER);
+
+       value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no));
+       if (udp) {
+               value |= GMAC_L4PEN0;
+       } else {
+               value &= ~GMAC_L4PEN0;
+       }
+
+       value &= ~(GMAC_L4SPM0 | GMAC_L4SPIM0);
+       value &= ~(GMAC_L4DPM0 | GMAC_L4DPIM0);
+       if (sa) {
+               value |= GMAC_L4SPM0;
+               if (inv)
+                       value |= GMAC_L4SPIM0;
+       } else {
+               value |= GMAC_L4DPM0;
+               if (inv)
+                       value |= GMAC_L4DPIM0;
+       }
+
+       writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+       if (sa) {
+               value = match & GMAC_L4SP0;
+       } else {
+               value = (match << GMAC_L4DP0_SHIFT) & GMAC_L4DP0;
+       }
+
+       writel(value, ioaddr + GMAC_L4_ADDR(filter_no));
+
+       if (!en)
+               writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+       return 0;
+}
+
 const struct stmmac_ops dwmac4_ops = {
        .core_init = dwmac4_core_init,
        .set_mac = stmmac_set_mac,
@@ -828,11 +938,14 @@ const struct stmmac_ops dwmac4_ops = {
        .pcs_get_adv_lp = dwmac4_get_adv_lp,
        .debug = dwmac4_debug,
        .set_filter = dwmac4_set_filter,
+       .flex_pps_config = dwmac5_flex_pps_config,
        .set_mac_loopback = dwmac4_set_mac_loopback,
        .update_vlan_hash = dwmac4_update_vlan_hash,
        .sarc_configure = dwmac4_sarc_configure,
        .enable_vlan = dwmac4_enable_vlan,
        .set_arp_offload = dwmac4_set_arp_offload,
+       .config_l3_filter = dwmac4_config_l3_filter,
+       .config_l4_filter = dwmac4_config_l4_filter,
 };
 
 const struct stmmac_ops dwmac410_ops = {
@@ -869,6 +982,8 @@ const struct stmmac_ops dwmac410_ops = {
        .sarc_configure = dwmac4_sarc_configure,
        .enable_vlan = dwmac4_enable_vlan,
        .set_arp_offload = dwmac4_set_arp_offload,
+       .config_l3_filter = dwmac4_config_l3_filter,
+       .config_l4_filter = dwmac4_config_l4_filter,
 };
 
 const struct stmmac_ops dwmac510_ops = {
@@ -910,6 +1025,8 @@ const struct stmmac_ops dwmac510_ops = {
        .sarc_configure = dwmac4_sarc_configure,
        .enable_vlan = dwmac4_enable_vlan,
        .set_arp_offload = dwmac4_set_arp_offload,
+       .config_l3_filter = dwmac4_config_l3_filter,
+       .config_l4_filter = dwmac4_config_l4_filter,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
index 15eb1ab..707ab5e 100644 (file)
@@ -431,8 +431,8 @@ static void dwmac4_get_addr(struct dma_desc *p, unsigned int *addr)
 
 static void dwmac4_set_addr(struct dma_desc *p, dma_addr_t addr)
 {
-       p->des0 = cpu_to_le32(addr);
-       p->des1 = 0;
+       p->des0 = cpu_to_le32(lower_32_bits(addr));
+       p->des1 = cpu_to_le32(upper_32_bits(addr));
 }
 
 static void dwmac4_clear(struct dma_desc *p)
index 68c1579..b24c895 100644 (file)
@@ -79,6 +79,10 @@ static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
        value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
        writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
+       if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame))
+               writel(upper_32_bits(dma_rx_phy),
+                      ioaddr + DMA_CHAN_RX_BASE_ADDR_HI(chan));
+
        writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
 }
 
@@ -97,6 +101,10 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
 
        writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
+       if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame))
+               writel(upper_32_bits(dma_tx_phy),
+                      ioaddr + DMA_CHAN_TX_BASE_ADDR_HI(chan));
+
        writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
 }
 
@@ -132,6 +140,9 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
        if (dma_cfg->aal)
                value |= DMA_SYS_BUS_AAL;
 
+       if (dma_cfg->eame)
+               value |= DMA_SYS_BUS_EAME;
+
        writel(value, ioaddr + DMA_SYS_BUS_MODE);
 }
 
@@ -353,9 +364,27 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
        /* MAC HW feature1 */
        hw_cap = readl(ioaddr + GMAC_HW_FEATURE1);
+       dma_cap->l3l4fnum = (hw_cap & GMAC_HW_FEAT_L3L4FNUM) >> 27;
        dma_cap->hash_tb_sz = (hw_cap & GMAC_HW_HASH_TB_SZ) >> 24;
        dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20;
        dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18;
+
+       dma_cap->addr64 = (hw_cap & GMAC_HW_ADDR64) >> 14;
+       switch (dma_cap->addr64) {
+       case 0:
+               dma_cap->addr64 = 32;
+               break;
+       case 1:
+               dma_cap->addr64 = 40;
+               break;
+       case 2:
+               dma_cap->addr64 = 48;
+               break;
+       default:
+               dma_cap->addr64 = 32;
+               break;
+       }
+
        /* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by
         * shifting and store the sizes in bytes.
         */
index b66da02..5299fa1 100644 (file)
@@ -65,6 +65,7 @@
 #define DMA_SYS_BUS_MB                 BIT(14)
 #define DMA_AXI_1KBBE                  BIT(13)
 #define DMA_SYS_BUS_AAL                        BIT(12)
+#define DMA_SYS_BUS_EAME               BIT(11)
 #define DMA_AXI_BLEN256                        BIT(7)
 #define DMA_AXI_BLEN128                        BIT(6)
 #define DMA_AXI_BLEN64                 BIT(5)
@@ -91,7 +92,9 @@
 #define DMA_CHAN_CONTROL(x)            DMA_CHANX_BASE_ADDR(x)
 #define DMA_CHAN_TX_CONTROL(x)         (DMA_CHANX_BASE_ADDR(x) + 0x4)
 #define DMA_CHAN_RX_CONTROL(x)         (DMA_CHANX_BASE_ADDR(x) + 0x8)
+#define DMA_CHAN_TX_BASE_ADDR_HI(x)    (DMA_CHANX_BASE_ADDR(x) + 0x10)
 #define DMA_CHAN_TX_BASE_ADDR(x)       (DMA_CHANX_BASE_ADDR(x) + 0x14)
+#define DMA_CHAN_RX_BASE_ADDR_HI(x)    (DMA_CHANX_BASE_ADDR(x) + 0x18)
 #define DMA_CHAN_RX_BASE_ADDR(x)       (DMA_CHANX_BASE_ADDR(x) + 0x1c)
 #define DMA_CHAN_TX_END_ADDR(x)                (DMA_CHANX_BASE_ADDR(x) + 0x20)
 #define DMA_CHAN_RX_END_ADDR(x)                (DMA_CHANX_BASE_ADDR(x) + 0x28)
index 5031398..5cda360 100644 (file)
@@ -555,7 +555,7 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw,
 }
 
 static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash,
-                                     bool is_double)
+                                     u16 perfect_match, bool is_double)
 {
        void __iomem *ioaddr = hw->pcsr;
 
@@ -576,6 +576,21 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash,
                }
 
                writel(value, ioaddr + XGMAC_VLAN_TAG);
+       } else if (perfect_match) {
+               u32 value = readl(ioaddr + XGMAC_PACKET_FILTER);
+
+               value |= XGMAC_FILTER_VTFE;
+
+               writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+               value = XGMAC_VLAN_ETV;
+               if (is_double) {
+                       value |= XGMAC_VLAN_EDVLP;
+                       value |= XGMAC_VLAN_ESVL;
+                       value |= XGMAC_VLAN_DOVLTC;
+               }
+
+               writel(value | perfect_match, ioaddr + XGMAC_VLAN_TAG);
        } else {
                u32 value = readl(ioaddr + XGMAC_PACKET_FILTER);
 
index 965cbe3..7cc3319 100644 (file)
@@ -27,7 +27,10 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr,
        if (dma_cfg->aal)
                value |= XGMAC_AAL;
 
-       writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+       if (dma_cfg->eame)
+               value |= XGMAC_EAME;
+
+       writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
 }
 
 static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
index ddb851d..1303d1e 100644 (file)
@@ -357,7 +357,7 @@ struct stmmac_ops {
                             struct stmmac_rss *cfg, u32 num_rxq);
        /* VLAN */
        void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash,
-                                bool is_double);
+                                u16 perfect_match, bool is_double);
        void (*enable_vlan)(struct mac_device_info *hw, u32 type);
        /* TX Timestamp */
        int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts);
index 4e9c848..654a2b7 100644 (file)
@@ -4208,15 +4208,25 @@ static u32 stmmac_vid_crc32_le(__le16 vid_le)
 static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double)
 {
        u32 crc, hash = 0;
-       u16 vid;
+       int count = 0;
+       u16 vid = 0;
 
        for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
                __le16 vid_le = cpu_to_le16(vid);
                crc = bitrev32(~stmmac_vid_crc32_le(vid_le)) >> 28;
                hash |= (1 << crc);
+               count++;
+       }
+
+       if (!priv->dma_cap.vlhash) {
+               if (count > 2) /* VID = 0 always passes filter */
+                       return -EOPNOTSUPP;
+
+               vid = cpu_to_le16(vid);
+               hash = 0;
        }
 
-       return stmmac_update_vlan_hash(priv, priv->hw, hash, is_double);
+       return stmmac_update_vlan_hash(priv, priv->hw, hash, vid, is_double);
 }
 
 static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid)
@@ -4225,8 +4235,6 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
        bool is_double = false;
        int ret;
 
-       if (!priv->dma_cap.vlhash)
-               return -EOPNOTSUPP;
        if (be16_to_cpu(proto) == ETH_P_8021AD)
                is_double = true;
 
@@ -4245,8 +4253,6 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
        struct stmmac_priv *priv = netdev_priv(ndev);
        bool is_double = false;
 
-       if (!priv->dma_cap.vlhash)
-               return -EOPNOTSUPP;
        if (be16_to_cpu(proto) == ETH_P_8021AD)
                is_double = true;
 
@@ -4516,6 +4522,13 @@ int stmmac_dvr_probe(struct device *device,
                if (!ret) {
                        dev_info(priv->device, "Using %d bits DMA width\n",
                                 priv->dma_cap.addr64);
+
+                       /*
+                        * If more than 32 bits can be addressed, make sure to
+                        * enable enhanced addressing mode.
+                        */
+                       if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+                               priv->plat->dma_cfg->eame = true;
                } else {
                        ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32));
                        if (ret) {
index 170c3a0..bedaff0 100644 (file)
@@ -412,9 +412,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
                *mac = NULL;
        }
 
-       plat->phy_interface = of_get_phy_mode(np);
-       if (plat->phy_interface < 0)
-               return ERR_PTR(plat->phy_interface);
+       rc = of_get_phy_mode(np, &plat->phy_interface);
+       if (rc)
+               return ERR_PTR(rc);
 
        plat->interface = stmmac_of_get_mac_mode(np);
        if (plat->interface < 0)
index e4ac3c4..0b5db52 100644 (file)
@@ -823,16 +823,13 @@ out:
        return 0;
 }
 
-static int stmmac_test_vlanfilt(struct stmmac_priv *priv)
+static int __stmmac_test_vlanfilt(struct stmmac_priv *priv)
 {
        struct stmmac_packet_attrs attr = { };
        struct stmmac_test_priv *tpriv;
        struct sk_buff *skb = NULL;
        int ret = 0, i;
 
-       if (!priv->dma_cap.vlhash)
-               return -EOPNOTSUPP;
-
        tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
        if (!tpriv)
                return -ENOMEM;
@@ -898,16 +895,32 @@ cleanup:
        return ret;
 }
 
-static int stmmac_test_dvlanfilt(struct stmmac_priv *priv)
+static int stmmac_test_vlanfilt(struct stmmac_priv *priv)
+{
+       if (!priv->dma_cap.vlhash)
+               return -EOPNOTSUPP;
+
+       return __stmmac_test_vlanfilt(priv);
+}
+
+static int stmmac_test_vlanfilt_perfect(struct stmmac_priv *priv)
+{
+       int ret, prev_cap = priv->dma_cap.vlhash;
+
+       priv->dma_cap.vlhash = 0;
+       ret = __stmmac_test_vlanfilt(priv);
+       priv->dma_cap.vlhash = prev_cap;
+
+       return ret;
+}
+
+static int __stmmac_test_dvlanfilt(struct stmmac_priv *priv)
 {
        struct stmmac_packet_attrs attr = { };
        struct stmmac_test_priv *tpriv;
        struct sk_buff *skb = NULL;
        int ret = 0, i;
 
-       if (!priv->dma_cap.vlhash)
-               return -EOPNOTSUPP;
-
        tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
        if (!tpriv)
                return -ENOMEM;
@@ -974,6 +987,25 @@ cleanup:
        return ret;
 }
 
+static int stmmac_test_dvlanfilt(struct stmmac_priv *priv)
+{
+       if (!priv->dma_cap.vlhash)
+               return -EOPNOTSUPP;
+
+       return __stmmac_test_dvlanfilt(priv);
+}
+
+static int stmmac_test_dvlanfilt_perfect(struct stmmac_priv *priv)
+{
+       int ret, prev_cap = priv->dma_cap.vlhash;
+
+       priv->dma_cap.vlhash = 0;
+       ret = __stmmac_test_dvlanfilt(priv);
+       priv->dma_cap.vlhash = prev_cap;
+
+       return ret;
+}
+
 #ifdef CONFIG_NET_CLS_ACT
 static int stmmac_test_rxp(struct stmmac_priv *priv)
 {
@@ -1648,119 +1680,127 @@ static const struct stmmac_test {
        int (*fn)(struct stmmac_priv *priv);
 } stmmac_selftests[] = {
        {
-               .name = "MAC Loopback         ",
+               .name = "MAC Loopback               ",
                .lb = STMMAC_LOOPBACK_MAC,
                .fn = stmmac_test_mac_loopback,
        }, {
-               .name = "PHY Loopback         ",
+               .name = "PHY Loopback               ",
                .lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */
                .fn = stmmac_test_phy_loopback,
        }, {
-               .name = "MMC Counters         ",
+               .name = "MMC Counters               ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_mmc,
        }, {
-               .name = "EEE                  ",
+               .name = "EEE                        ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_eee,
        }, {
-               .name = "Hash Filter MC       ",
+               .name = "Hash Filter MC             ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_hfilt,
        }, {
-               .name = "Perfect Filter UC    ",
+               .name = "Perfect Filter UC          ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_pfilt,
        }, {
-               .name = "MC Filter            ",
+               .name = "MC Filter                  ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_mcfilt,
        }, {
-               .name = "UC Filter            ",
+               .name = "UC Filter                  ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_ucfilt,
        }, {
-               .name = "Flow Control         ",
+               .name = "Flow Control               ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_flowctrl,
        }, {
-               .name = "RSS                  ",
+               .name = "RSS                        ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_rss,
        }, {
-               .name = "VLAN Filtering       ",
+               .name = "VLAN Filtering             ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_vlanfilt,
        }, {
-               .name = "Double VLAN Filtering",
+               .name = "VLAN Filtering (perf)      ",
+               .lb = STMMAC_LOOPBACK_PHY,
+               .fn = stmmac_test_vlanfilt_perfect,
+       }, {
+               .name = "Double VLAN Filter         ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_dvlanfilt,
        }, {
-               .name = "Flexible RX Parser   ",
+               .name = "Double VLAN Filter (perf)  ",
+               .lb = STMMAC_LOOPBACK_PHY,
+               .fn = stmmac_test_dvlanfilt_perfect,
+       }, {
+               .name = "Flexible RX Parser         ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_rxp,
        }, {
-               .name = "SA Insertion (desc)  ",
+               .name = "SA Insertion (desc)        ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_desc_sai,
        }, {
-               .name = "SA Replacement (desc)",
+               .name = "SA Replacement (desc)      ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_desc_sar,
        }, {
-               .name = "SA Insertion (reg)  ",
+               .name = "SA Insertion (reg)         ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_reg_sai,
        }, {
-               .name = "SA Replacement (reg)",
+               .name = "SA Replacement (reg)       ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_reg_sar,
        }, {
-               .name = "VLAN TX Insertion   ",
+               .name = "VLAN TX Insertion          ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_vlanoff,
        }, {
-               .name = "SVLAN TX Insertion  ",
+               .name = "SVLAN TX Insertion         ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_svlanoff,
        }, {
-               .name = "L3 DA Filtering     ",
+               .name = "L3 DA Filtering            ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_l3filt_da,
        }, {
-               .name = "L3 SA Filtering     ",
+               .name = "L3 SA Filtering            ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_l3filt_sa,
        }, {
-               .name = "L4 DA TCP Filtering ",
+               .name = "L4 DA TCP Filtering        ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_l4filt_da_tcp,
        }, {
-               .name = "L4 SA TCP Filtering ",
+               .name = "L4 SA TCP Filtering        ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_l4filt_sa_tcp,
        }, {
-               .name = "L4 DA UDP Filtering ",
+               .name = "L4 DA UDP Filtering        ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_l4filt_da_udp,
        }, {
-               .name = "L4 SA UDP Filtering ",
+               .name = "L4 SA UDP Filtering        ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_l4filt_sa_udp,
        }, {
-               .name = "ARP Offload         ",
+               .name = "ARP Offload                ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_arpoffload,
        }, {
-               .name = "Jumbo Frame         ",
+               .name = "Jumbo Frame                ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_jumbo,
        }, {
-               .name = "Multichannel Jumbo  ",
+               .name = "Multichannel Jumbo         ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_mjumbo,
        }, {
-               .name = "Split Header        ",
+               .name = "Split Header               ",
                .lb = STMMAC_LOOPBACK_PHY,
                .fn = stmmac_test_sph,
        },
index f298d71..329671e 100644 (file)
@@ -2619,11 +2619,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
                                i);
                        goto no_phy_slave;
                }
-               slave_data->phy_if = of_get_phy_mode(slave_node);
-               if (slave_data->phy_if < 0) {
+               ret = of_get_phy_mode(slave_node, &slave_data->phy_if);
+               if (ret) {
                        dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
                                i);
-                       ret = slave_data->phy_if;
                        goto err_node_put;
                }
 
index 362c5a9..8bfa761 100644 (file)
@@ -275,7 +275,7 @@ struct cpsw_slave_data {
        struct device_node *slave_node;
        struct device_node *phy_node;
        char            phy_id[MII_BUS_ID_SIZE];
-       int             phy_if;
+       phy_interface_t phy_if;
        u8              mac_addr[ETH_ALEN];
        u16             dual_emac_res_vlan;     /* Reserved VLAN for DualEMAC */
        struct phy      *ifphy;
index 2c1fac3..86a3f42 100644 (file)
@@ -2291,6 +2291,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
        struct gbe_slave *slave = gbe_intf->slave;
        phy_interface_t phy_mode;
        bool has_phy = false;
+       int err;
 
        void (*hndlr)(struct net_device *) = gbe_adjust_link;
 
@@ -2320,11 +2321,11 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
                slave->phy_port_t = PORT_MII;
        } else if (slave->link_interface == RGMII_LINK_MAC_PHY) {
                has_phy = true;
-               phy_mode = of_get_phy_mode(slave->node);
+               err = of_get_phy_mode(slave->node, &phy_mode);
                /* if phy-mode is not present, default to
                 * PHY_INTERFACE_MODE_RGMII
                 */
-               if (phy_mode < 0)
+               if (err)
                        phy_mode = PHY_INTERFACE_MODE_RGMII;
 
                if (!phy_interface_mode_is_rgmii(phy_mode)) {
index 676006f..867726d 100644 (file)
@@ -1761,11 +1761,9 @@ static int axienet_probe(struct platform_device *pdev)
                        goto free_netdev;
                }
        } else {
-               lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-               if ((int)lp->phy_mode < 0) {
-                       ret = -EINVAL;
+               ret = of_get_phy_mode(pdev->dev.of_node, &lp->phy_mode);
+               if (ret)
                        goto free_netdev;
-               }
        }
 
        /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
index 670ef68..4209d1c 100644 (file)
@@ -853,6 +853,7 @@ struct multi_recv_comp {
 struct nvsc_rsc {
        const struct ndis_pkt_8021q_info *vlan;
        const struct ndis_tcp_ip_checksum_info *csum_info;
+       const u32 *hash_info;
        u8 is_last; /* last RNDIS msg in a vmtransfer_page */
        u32 cnt; /* #fragments in an RSC packet */
        u32 pktlen; /* Full packet length */
index 963509a..5fa5c49 100644 (file)
@@ -285,9 +285,9 @@ static inline u32 netvsc_get_hash(
                else if (flow.basic.n_proto == htons(ETH_P_IPV6))
                        hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
                else
-                       hash = 0;
+                       return 0;
 
-               skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
+               __skb_set_sw_hash(skb, hash, false);
        }
 
        return hash;
@@ -766,6 +766,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
        const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
        const struct ndis_tcp_ip_checksum_info *csum_info =
                                                nvchan->rsc.csum_info;
+       const u32 *hash_info = nvchan->rsc.hash_info;
        struct sk_buff *skb;
        int i;
 
@@ -795,14 +796,16 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
            skb->protocol == htons(ETH_P_IP))
                netvsc_comp_ipcsum(skb);
 
-       /* Do L4 checksum offload if enabled and present.
-        */
+       /* Do L4 checksum offload if enabled and present. */
        if (csum_info && (net->features & NETIF_F_RXCSUM)) {
                if (csum_info->receive.tcp_checksum_succeeded ||
                    csum_info->receive.udp_checksum_succeeded)
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
 
+       if (hash_info)
+               skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
+
        if (vlan) {
                u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) |
                        (vlan->cfi ? VLAN_CFI_MASK : 0);
index abaf815..c061783 100644 (file)
@@ -358,6 +358,7 @@ static inline
 void rsc_add_data(struct netvsc_channel *nvchan,
                  const struct ndis_pkt_8021q_info *vlan,
                  const struct ndis_tcp_ip_checksum_info *csum_info,
+                 const u32 *hash_info,
                  void *data, u32 len)
 {
        u32 cnt = nvchan->rsc.cnt;
@@ -368,6 +369,7 @@ void rsc_add_data(struct netvsc_channel *nvchan,
                nvchan->rsc.vlan = vlan;
                nvchan->rsc.csum_info = csum_info;
                nvchan->rsc.pktlen = len;
+               nvchan->rsc.hash_info = hash_info;
        }
 
        nvchan->rsc.data[cnt] = data;
@@ -385,6 +387,7 @@ static int rndis_filter_receive_data(struct net_device *ndev,
        const struct ndis_tcp_ip_checksum_info *csum_info;
        const struct ndis_pkt_8021q_info *vlan;
        const struct rndis_pktinfo_id *pktinfo_id;
+       const u32 *hash_info;
        u32 data_offset;
        void *data;
        bool rsc_more = false;
@@ -411,6 +414,8 @@ static int rndis_filter_receive_data(struct net_device *ndev,
 
        csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO, 0);
 
+       hash_info = rndis_get_ppi(rndis_pkt, NBL_HASH_VALUE, 0);
+
        pktinfo_id = rndis_get_ppi(rndis_pkt, RNDIS_PKTINFO_ID, 1);
 
        data = (void *)msg + data_offset;
@@ -441,7 +446,8 @@ static int rndis_filter_receive_data(struct net_device *ndev,
         * rndis_pkt->data_len tell us the real data length, we only copy
         * the data packet to the stack, without the rndis trailer padding
         */
-       rsc_add_data(nvchan, vlan, csum_info, data, rndis_pkt->data_len);
+       rsc_add_data(nvchan, vlan, csum_info, hash_info,
+                    data, rndis_pkt->data_len);
 
        if (rsc_more)
                return NVSP_STAT_SUCCESS;
index ba3dfac..a706622 100644 (file)
@@ -108,8 +108,8 @@ static void ipvlan_port_destroy(struct net_device *dev)
 
 #define IPVLAN_FEATURES \
        (NETIF_F_SG | NETIF_F_CSUM_MASK | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
-        NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
-        NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
+        NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_GSO_ROBUST | \
+        NETIF_F_GRO | NETIF_F_RXCSUM | \
         NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
 #define IPVLAN_STATE_MASK \
index 09f1315..f4d8f62 100644 (file)
@@ -3,7 +3,7 @@
 obj-$(CONFIG_NETDEVSIM) += netdevsim.o
 
 netdevsim-objs := \
-       netdev.o dev.o fib.o bus.o
+       netdev.o dev.o fib.o bus.o health.o
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
 netdevsim-objs += \
index 1a0ff3d..6aeed0c 100644 (file)
@@ -283,6 +283,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
        nsim_bus_dev->dev.bus = &nsim_bus;
        nsim_bus_dev->dev.type = &nsim_bus_dev_type;
        nsim_bus_dev->port_count = port_count;
+       nsim_bus_dev->initial_net = current->nsproxy->net_ns;
 
        err = device_register(&nsim_bus_dev->dev);
        if (err)
index 54ca668..e59a882 100644 (file)
@@ -90,6 +90,10 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
                            &nsim_dev->test1);
        debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev,
                            &nsim_dev_take_snapshot_fops);
+       debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir,
+                           &nsim_dev->dont_allow_reload);
+       debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir,
+                           &nsim_dev->fail_reload);
        return 0;
 }
 
@@ -123,39 +127,6 @@ static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port)
        debugfs_remove_recursive(nsim_dev_port->ddir);
 }
 
-static struct net *nsim_devlink_net(struct devlink *devlink)
-{
-       return &init_net;
-}
-
-static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv)
-{
-       struct net *net = priv;
-
-       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
-}
-
-static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv)
-{
-       struct net *net = priv;
-
-       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
-}
-
-static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv)
-{
-       struct net *net = priv;
-
-       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
-}
-
-static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv)
-{
-       struct net *net = priv;
-
-       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
-}
-
 static int nsim_dev_resources_register(struct devlink *devlink)
 {
        struct devlink_resource_size_params params = {
@@ -163,9 +134,7 @@ static int nsim_dev_resources_register(struct devlink *devlink)
                .size_granularity = 1,
                .unit = DEVLINK_RESOURCE_UNIT_ENTRY
        };
-       struct net *net = nsim_devlink_net(devlink);
        int err;
-       u64 n;
 
        /* Resources for IPv4 */
        err = devlink_resource_register(devlink, "IPv4", (u64)-1,
@@ -177,8 +146,7 @@ static int nsim_dev_resources_register(struct devlink *devlink)
                goto out;
        }
 
-       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
-       err = devlink_resource_register(devlink, "fib", n,
+       err = devlink_resource_register(devlink, "fib", (u64)-1,
                                        NSIM_RESOURCE_IPV4_FIB,
                                        NSIM_RESOURCE_IPV4, &params);
        if (err) {
@@ -186,8 +154,7 @@ static int nsim_dev_resources_register(struct devlink *devlink)
                return err;
        }
 
-       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
-       err = devlink_resource_register(devlink, "fib-rules", n,
+       err = devlink_resource_register(devlink, "fib-rules", (u64)-1,
                                        NSIM_RESOURCE_IPV4_FIB_RULES,
                                        NSIM_RESOURCE_IPV4, &params);
        if (err) {
@@ -205,8 +172,7 @@ static int nsim_dev_resources_register(struct devlink *devlink)
                goto out;
        }
 
-       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
-       err = devlink_resource_register(devlink, "fib", n,
+       err = devlink_resource_register(devlink, "fib", (u64)-1,
                                        NSIM_RESOURCE_IPV6_FIB,
                                        NSIM_RESOURCE_IPV6, &params);
        if (err) {
@@ -214,8 +180,7 @@ static int nsim_dev_resources_register(struct devlink *devlink)
                return err;
        }
 
-       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
-       err = devlink_resource_register(devlink, "fib-rules", n,
+       err = devlink_resource_register(devlink, "fib-rules", (u64)-1,
                                        NSIM_RESOURCE_IPV6_FIB_RULES,
                                        NSIM_RESOURCE_IPV6, &params);
        if (err) {
@@ -223,22 +188,6 @@ static int nsim_dev_resources_register(struct devlink *devlink)
                return err;
        }
 
-       devlink_resource_occ_get_register(devlink,
-                                         NSIM_RESOURCE_IPV4_FIB,
-                                         nsim_dev_ipv4_fib_resource_occ_get,
-                                         net);
-       devlink_resource_occ_get_register(devlink,
-                                         NSIM_RESOURCE_IPV4_FIB_RULES,
-                                         nsim_dev_ipv4_fib_rules_res_occ_get,
-                                         net);
-       devlink_resource_occ_get_register(devlink,
-                                         NSIM_RESOURCE_IPV6_FIB,
-                                         nsim_dev_ipv6_fib_resource_occ_get,
-                                         net);
-       devlink_resource_occ_get_register(devlink,
-                                         NSIM_RESOURCE_IPV6_FIB_RULES,
-                                         nsim_dev_ipv6_fib_rules_res_occ_get,
-                                         net);
 out:
        return err;
 }
@@ -524,36 +473,48 @@ static void nsim_dev_traps_exit(struct devlink *devlink)
        kfree(nsim_dev->trap_data);
 }
 
-static int nsim_dev_reload_down(struct devlink *devlink,
+static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
+                                 struct netlink_ext_ack *extack);
+static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev);
+
+static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
                                struct netlink_ext_ack *extack)
 {
+       struct nsim_dev *nsim_dev = devlink_priv(devlink);
+
+       if (nsim_dev->dont_allow_reload) {
+               /* For testing purposes, user set debugfs dont_allow_reload
+                * value to true. So forbid it.
+                */
+               NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes");
+               return -EOPNOTSUPP;
+       }
+
+       nsim_dev_reload_destroy(nsim_dev);
        return 0;
 }
 
 static int nsim_dev_reload_up(struct devlink *devlink,
                              struct netlink_ext_ack *extack)
 {
-       enum nsim_resource_id res_ids[] = {
-               NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
-               NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
-       };
-       struct net *net = nsim_devlink_net(devlink);
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
-               int err;
-               u64 val;
+       struct nsim_dev *nsim_dev = devlink_priv(devlink);
 
-               err = devlink_resource_size_get(devlink, res_ids[i], &val);
-               if (!err) {
-                       err = nsim_fib_set_max(net, res_ids[i], val, extack);
-                       if (err)
-                               return err;
-               }
+       if (nsim_dev->fail_reload) {
+               /* For testing purposes, user set debugfs fail_reload
+                * value to true. Fail right away.
+                */
+               NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes");
+               return -EINVAL;
        }
-       nsim_devlink_param_load_driverinit_values(devlink);
 
-       return 0;
+       return nsim_dev_reload_create(nsim_dev, extack);
+}
+
+static int nsim_dev_info_get(struct devlink *devlink,
+                            struct devlink_info_req *req,
+                            struct netlink_ext_ack *extack)
+{
+       return devlink_info_driver_name_put(req, DRV_NAME);
 }
 
 #define NSIM_DEV_FLASH_SIZE 500000
@@ -649,6 +610,7 @@ nsim_dev_devlink_trap_action_set(struct devlink *devlink,
 static const struct devlink_ops nsim_dev_devlink_ops = {
        .reload_down = nsim_dev_reload_down,
        .reload_up = nsim_dev_reload_up,
+       .info_get = nsim_dev_info_get,
        .flash_update = nsim_dev_flash_update,
        .trap_init = nsim_dev_devlink_trap_init,
        .trap_action_set = nsim_dev_devlink_trap_action_set,
@@ -657,8 +619,141 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 #define NSIM_DEV_MAX_MACS_DEFAULT 32
 #define NSIM_DEV_TEST1_DEFAULT true
 
-static struct nsim_dev *
-nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
+static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
+                              unsigned int port_index)
+{
+       struct nsim_dev_port *nsim_dev_port;
+       struct devlink_port *devlink_port;
+       int err;
+
+       nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL);
+       if (!nsim_dev_port)
+               return -ENOMEM;
+       nsim_dev_port->port_index = port_index;
+
+       devlink_port = &nsim_dev_port->devlink_port;
+       devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+                              port_index + 1, 0, 0,
+                              nsim_dev->switch_id.id,
+                              nsim_dev->switch_id.id_len);
+       err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port,
+                                   port_index);
+       if (err)
+               goto err_port_free;
+
+       err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port);
+       if (err)
+               goto err_dl_port_unregister;
+
+       nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port);
+       if (IS_ERR(nsim_dev_port->ns)) {
+               err = PTR_ERR(nsim_dev_port->ns);
+               goto err_port_debugfs_exit;
+       }
+
+       devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev);
+       list_add(&nsim_dev_port->list, &nsim_dev->port_list);
+
+       return 0;
+
+err_port_debugfs_exit:
+       nsim_dev_port_debugfs_exit(nsim_dev_port);
+err_dl_port_unregister:
+       devlink_port_unregister(devlink_port);
+err_port_free:
+       kfree(nsim_dev_port);
+       return err;
+}
+
+static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port)
+{
+       struct devlink_port *devlink_port = &nsim_dev_port->devlink_port;
+
+       list_del(&nsim_dev_port->list);
+       devlink_port_type_clear(devlink_port);
+       nsim_destroy(nsim_dev_port->ns);
+       nsim_dev_port_debugfs_exit(nsim_dev_port);
+       devlink_port_unregister(devlink_port);
+       kfree(nsim_dev_port);
+}
+
+static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev)
+{
+       struct nsim_dev_port *nsim_dev_port, *tmp;
+
+       mutex_lock(&nsim_dev->port_list_lock);
+       list_for_each_entry_safe(nsim_dev_port, tmp,
+                                &nsim_dev->port_list, list)
+               __nsim_dev_port_del(nsim_dev_port);
+       mutex_unlock(&nsim_dev->port_list_lock);
+}
+
+static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev,
+                                unsigned int port_count)
+{
+       int i, err;
+
+       for (i = 0; i < port_count; i++) {
+               err = __nsim_dev_port_add(nsim_dev, i);
+               if (err)
+                       goto err_port_del_all;
+       }
+       return 0;
+
+err_port_del_all:
+       nsim_dev_port_del_all(nsim_dev);
+       return err;
+}
+
+static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
+                                 struct netlink_ext_ack *extack)
+{
+       struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev;
+       struct devlink *devlink;
+       int err;
+
+       devlink = priv_to_devlink(nsim_dev);
+       nsim_dev = devlink_priv(devlink);
+       INIT_LIST_HEAD(&nsim_dev->port_list);
+       mutex_init(&nsim_dev->port_list_lock);
+       nsim_dev->fw_update_status = true;
+
+       nsim_dev->fib_data = nsim_fib_create(devlink, extack);
+       if (IS_ERR(nsim_dev->fib_data))
+               return PTR_ERR(nsim_dev->fib_data);
+
+       nsim_devlink_param_load_driverinit_values(devlink);
+
+       err = nsim_dev_dummy_region_init(nsim_dev, devlink);
+       if (err)
+               goto err_fib_destroy;
+
+       err = nsim_dev_traps_init(devlink);
+       if (err)
+               goto err_dummy_region_exit;
+
+       err = nsim_dev_health_init(nsim_dev, devlink);
+       if (err)
+               goto err_traps_exit;
+
+       err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+       if (err)
+               goto err_health_exit;
+
+       return 0;
+
+err_health_exit:
+       nsim_dev_health_exit(nsim_dev);
+err_traps_exit:
+       nsim_dev_traps_exit(devlink);
+err_dummy_region_exit:
+       nsim_dev_dummy_region_exit(nsim_dev);
+err_fib_destroy:
+       nsim_fib_destroy(devlink, nsim_dev->fib_data);
+       return err;
+}
+
+static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev)
 {
        struct nsim_dev *nsim_dev;
        struct devlink *devlink;
@@ -667,6 +762,7 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
        devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev));
        if (!devlink)
                return ERR_PTR(-ENOMEM);
+       devlink_net_set(devlink, nsim_bus_dev->initial_net);
        nsim_dev = devlink_priv(devlink);
        nsim_dev->nsim_bus_dev = nsim_bus_dev;
        nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id);
@@ -681,9 +777,15 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
        if (err)
                goto err_devlink_free;
 
+       nsim_dev->fib_data = nsim_fib_create(devlink, NULL);
+       if (IS_ERR(nsim_dev->fib_data)) {
+               err = PTR_ERR(nsim_dev->fib_data);
+               goto err_resources_unregister;
+       }
+
        err = devlink_register(devlink, &nsim_bus_dev->dev);
        if (err)
-               goto err_resources_unregister;
+               goto err_fib_destroy;
 
        err = devlink_params_register(devlink, nsim_devlink_params,
                                      ARRAY_SIZE(nsim_devlink_params));
@@ -703,13 +805,25 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
        if (err)
                goto err_traps_exit;
 
-       err = nsim_bpf_dev_init(nsim_dev);
+       err = nsim_dev_health_init(nsim_dev, devlink);
        if (err)
                goto err_debugfs_exit;
 
+       err = nsim_bpf_dev_init(nsim_dev);
+       if (err)
+               goto err_health_exit;
+
+       err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+       if (err)
+               goto err_bpf_dev_exit;
+
        devlink_params_publish(devlink);
        return nsim_dev;
 
+err_bpf_dev_exit:
+       nsim_bpf_dev_exit(nsim_dev);
+err_health_exit:
+       nsim_dev_health_exit(nsim_dev);
 err_debugfs_exit:
        nsim_dev_debugfs_exit(nsim_dev);
 err_traps_exit:
@@ -721,6 +835,8 @@ err_params_unregister:
                                  ARRAY_SIZE(nsim_devlink_params));
 err_dl_unregister:
        devlink_unregister(devlink);
+err_fib_destroy:
+       nsim_fib_destroy(devlink, nsim_dev->fib_data);
 err_resources_unregister:
        devlink_resources_unregister(devlink, NULL);
 err_devlink_free:
@@ -728,89 +844,33 @@ err_devlink_free:
        return ERR_PTR(err);
 }
 
-static void nsim_dev_destroy(struct nsim_dev *nsim_dev)
+static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev)
 {
        struct devlink *devlink = priv_to_devlink(nsim_dev);
 
-       nsim_bpf_dev_exit(nsim_dev);
-       nsim_dev_debugfs_exit(nsim_dev);
+       if (devlink_is_reload_failed(devlink))
+               return;
+       nsim_dev_port_del_all(nsim_dev);
+       nsim_dev_health_exit(nsim_dev);
        nsim_dev_traps_exit(devlink);
        nsim_dev_dummy_region_exit(nsim_dev);
-       devlink_params_unregister(devlink, nsim_devlink_params,
-                                 ARRAY_SIZE(nsim_devlink_params));
-       devlink_unregister(devlink);
-       devlink_resources_unregister(devlink, NULL);
        mutex_destroy(&nsim_dev->port_list_lock);
-       devlink_free(devlink);
-}
-
-static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
-                              unsigned int port_index)
-{
-       struct nsim_dev_port *nsim_dev_port;
-       struct devlink_port *devlink_port;
-       int err;
-
-       nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL);
-       if (!nsim_dev_port)
-               return -ENOMEM;
-       nsim_dev_port->port_index = port_index;
-
-       devlink_port = &nsim_dev_port->devlink_port;
-       devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
-                              port_index + 1, 0, 0,
-                              nsim_dev->switch_id.id,
-                              nsim_dev->switch_id.id_len);
-       err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port,
-                                   port_index);
-       if (err)
-               goto err_port_free;
-
-       err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port);
-       if (err)
-               goto err_dl_port_unregister;
-
-       nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port);
-       if (IS_ERR(nsim_dev_port->ns)) {
-               err = PTR_ERR(nsim_dev_port->ns);
-               goto err_port_debugfs_exit;
-       }
-
-       devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev);
-       list_add(&nsim_dev_port->list, &nsim_dev->port_list);
-
-       return 0;
-
-err_port_debugfs_exit:
-       nsim_dev_port_debugfs_exit(nsim_dev_port);
-err_dl_port_unregister:
-       devlink_port_unregister(devlink_port);
-err_port_free:
-       kfree(nsim_dev_port);
-       return err;
+       nsim_fib_destroy(devlink, nsim_dev->fib_data);
 }
 
-static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port)
+static void nsim_dev_destroy(struct nsim_dev *nsim_dev)
 {
-       struct devlink_port *devlink_port = &nsim_dev_port->devlink_port;
-
-       list_del(&nsim_dev_port->list);
-       devlink_port_type_clear(devlink_port);
-       nsim_destroy(nsim_dev_port->ns);
-       nsim_dev_port_debugfs_exit(nsim_dev_port);
-       devlink_port_unregister(devlink_port);
-       kfree(nsim_dev_port);
-}
+       struct devlink *devlink = priv_to_devlink(nsim_dev);
 
-static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev)
-{
-       struct nsim_dev_port *nsim_dev_port, *tmp;
+       nsim_dev_reload_destroy(nsim_dev);
 
-       mutex_lock(&nsim_dev->port_list_lock);
-       list_for_each_entry_safe(nsim_dev_port, tmp,
-                                &nsim_dev->port_list, list)
-               __nsim_dev_port_del(nsim_dev_port);
-       mutex_unlock(&nsim_dev->port_list_lock);
+       nsim_bpf_dev_exit(nsim_dev);
+       nsim_dev_debugfs_exit(nsim_dev);
+       devlink_params_unregister(devlink, nsim_devlink_params,
+                                 ARRAY_SIZE(nsim_devlink_params));
+       devlink_unregister(devlink);
+       devlink_resources_unregister(devlink, NULL);
+       devlink_free(devlink);
 }
 
 int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
@@ -819,7 +879,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
        int i;
        int err;
 
-       nsim_dev = nsim_dev_create(nsim_bus_dev, nsim_bus_dev->port_count);
+       nsim_dev = nsim_dev_create(nsim_bus_dev);
        if (IS_ERR(nsim_dev))
                return PTR_ERR(nsim_dev);
        dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev);
@@ -844,7 +904,6 @@ void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev)
 {
        struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
 
-       nsim_dev_port_del_all(nsim_dev);
        nsim_dev_destroy(nsim_dev);
 }
 
index 1a251f7..13540de 100644 (file)
@@ -18,7 +18,7 @@
 #include <net/ip_fib.h>
 #include <net/ip6_fib.h>
 #include <net/fib_rules.h>
-#include <net/netns/generic.h>
+#include <net/net_namespace.h>
 
 #include "netdevsim.h"
 
@@ -33,15 +33,14 @@ struct nsim_per_fib_data {
 };
 
 struct nsim_fib_data {
+       struct notifier_block fib_nb;
        struct nsim_per_fib_data ipv4;
        struct nsim_per_fib_data ipv6;
 };
 
-static unsigned int nsim_fib_net_id;
-
-u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max)
+u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
+                    enum nsim_resource_id res_id, bool max)
 {
-       struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id);
        struct nsim_fib_entry *entry;
 
        switch (res_id) {
@@ -64,12 +63,10 @@ u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max)
        return max ? entry->max : entry->num;
 }
 
-int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
-                    struct netlink_ext_ack *extack)
+static void nsim_fib_set_max(struct nsim_fib_data *fib_data,
+                            enum nsim_resource_id res_id, u64 val)
 {
-       struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id);
        struct nsim_fib_entry *entry;
-       int err = 0;
 
        switch (res_id) {
        case NSIM_RESOURCE_IPV4_FIB:
@@ -85,20 +82,10 @@ int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
                entry = &fib_data->ipv6.rules;
                break;
        default:
-               return 0;
-       }
-
-       /* not allowing a new max to be less than curren occupancy
-        * --> no means of evicting entries
-        */
-       if (val < entry->num) {
-               NL_SET_ERR_MSG_MOD(extack, "New size is less than current occupancy");
-               err = -EINVAL;
-       } else {
-               entry->max = val;
+               WARN_ON(1);
+               return;
        }
-
-       return err;
+       entry->max = val;
 }
 
 static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add,
@@ -120,9 +107,9 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add,
        return err;
 }
 
-static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add)
+static int nsim_fib_rule_event(struct nsim_fib_data *data,
+                              struct fib_notifier_info *info, bool add)
 {
-       struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id);
        struct netlink_ext_ack *extack = info->extack;
        int err = 0;
 
@@ -157,9 +144,9 @@ static int nsim_fib_account(struct nsim_fib_entry *entry, bool add,
        return err;
 }
 
-static int nsim_fib_event(struct fib_notifier_info *info, bool add)
+static int nsim_fib_event(struct nsim_fib_data *data,
+                         struct fib_notifier_info *info, bool add)
 {
-       struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id);
        struct netlink_ext_ack *extack = info->extack;
        int err = 0;
 
@@ -178,18 +165,22 @@ static int nsim_fib_event(struct fib_notifier_info *info, bool add)
 static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
                             void *ptr)
 {
+       struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data,
+                                                 fib_nb);
        struct fib_notifier_info *info = ptr;
        int err = 0;
 
        switch (event) {
        case FIB_EVENT_RULE_ADD: /* fall through */
        case FIB_EVENT_RULE_DEL:
-               err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD);
+               err = nsim_fib_rule_event(data, info,
+                                         event == FIB_EVENT_RULE_ADD);
                break;
 
        case FIB_EVENT_ENTRY_ADD:  /* fall through */
        case FIB_EVENT_ENTRY_DEL:
-               err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD);
+               err = nsim_fib_event(data, info,
+                                    event == FIB_EVENT_ENTRY_ADD);
                break;
        }
 
@@ -199,69 +190,116 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
 /* inconsistent dump, trying again */
 static void nsim_fib_dump_inconsistent(struct notifier_block *nb)
 {
-       struct nsim_fib_data *data;
-       struct net *net;
+       struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data,
+                                                 fib_nb);
 
-       rcu_read_lock();
-       for_each_net_rcu(net) {
-               data = net_generic(net, nsim_fib_net_id);
+       data->ipv4.fib.num = 0ULL;
+       data->ipv4.rules.num = 0ULL;
+       data->ipv6.fib.num = 0ULL;
+       data->ipv6.rules.num = 0ULL;
+}
 
-               data->ipv4.fib.num = 0ULL;
-               data->ipv4.rules.num = 0ULL;
+static u64 nsim_fib_ipv4_resource_occ_get(void *priv)
+{
+       struct nsim_fib_data *data = priv;
 
-               data->ipv6.fib.num = 0ULL;
-               data->ipv6.rules.num = 0ULL;
-       }
-       rcu_read_unlock();
+       return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB, false);
 }
 
-static struct notifier_block nsim_fib_nb = {
-       .notifier_call = nsim_fib_event_nb,
-};
-
-/* Initialize per network namespace state */
-static int __net_init nsim_fib_netns_init(struct net *net)
+static u64 nsim_fib_ipv4_rules_res_occ_get(void *priv)
 {
-       struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id);
+       struct nsim_fib_data *data = priv;
 
-       data->ipv4.fib.max = (u64)-1;
-       data->ipv4.rules.max = (u64)-1;
+       return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB_RULES, false);
+}
 
-       data->ipv6.fib.max = (u64)-1;
-       data->ipv6.rules.max = (u64)-1;
+static u64 nsim_fib_ipv6_resource_occ_get(void *priv)
+{
+       struct nsim_fib_data *data = priv;
 
-       return 0;
+       return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB, false);
 }
 
-static struct pernet_operations nsim_fib_net_ops = {
-       .init = nsim_fib_netns_init,
-       .id   = &nsim_fib_net_id,
-       .size = sizeof(struct nsim_fib_data),
-};
+static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv)
+{
+       struct nsim_fib_data *data = priv;
+
+       return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false);
+}
 
-void nsim_fib_exit(void)
+static void nsim_fib_set_max_all(struct nsim_fib_data *data,
+                                struct devlink *devlink)
 {
-       unregister_fib_notifier(&nsim_fib_nb);
-       unregister_pernet_subsys(&nsim_fib_net_ops);
+       enum nsim_resource_id res_ids[] = {
+               NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
+               NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
+       };
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(res_ids); i++) {
+               int err;
+               u64 val;
+
+               err = devlink_resource_size_get(devlink, res_ids[i], &val);
+               if (err)
+                       val = (u64) -1;
+               nsim_fib_set_max(data, res_ids[i], val);
+       }
 }
 
-int nsim_fib_init(void)
+struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
+                                     struct netlink_ext_ack *extack)
 {
+       struct nsim_fib_data *data;
        int err;
 
-       err = register_pernet_subsys(&nsim_fib_net_ops);
-       if (err < 0) {
-               pr_err("Failed to register pernet subsystem\n");
-               goto err_out;
-       }
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return ERR_PTR(-ENOMEM);
+
+       nsim_fib_set_max_all(data, devlink);
 
-       err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent);
-       if (err < 0) {
+       data->fib_nb.notifier_call = nsim_fib_event_nb;
+       err = register_fib_notifier(devlink_net(devlink), &data->fib_nb,
+                                   nsim_fib_dump_inconsistent, extack);
+       if (err) {
                pr_err("Failed to register fib notifier\n");
-               unregister_pernet_subsys(&nsim_fib_net_ops);
                goto err_out;
        }
 
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV4_FIB,
+                                         nsim_fib_ipv4_resource_occ_get,
+                                         data);
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV4_FIB_RULES,
+                                         nsim_fib_ipv4_rules_res_occ_get,
+                                         data);
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV6_FIB,
+                                         nsim_fib_ipv6_resource_occ_get,
+                                         data);
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV6_FIB_RULES,
+                                         nsim_fib_ipv6_rules_res_occ_get,
+                                         data);
+       return data;
+
 err_out:
-       return err;
+       kfree(data);
+       return ERR_PTR(err);
+}
+
+void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
+{
+       devlink_resource_occ_get_unregister(devlink,
+                                           NSIM_RESOURCE_IPV6_FIB_RULES);
+       devlink_resource_occ_get_unregister(devlink,
+                                           NSIM_RESOURCE_IPV6_FIB);
+       devlink_resource_occ_get_unregister(devlink,
+                                           NSIM_RESOURCE_IPV4_FIB_RULES);
+       devlink_resource_occ_get_unregister(devlink,
+                                           NSIM_RESOURCE_IPV4_FIB);
+       unregister_fib_notifier(devlink_net(devlink), &data->fib_nb);
+       kfree(data);
 }
diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c
new file mode 100644 (file)
index 0000000..2716235
--- /dev/null
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "netdevsim.h"
+
+static int
+nsim_dev_empty_reporter_dump(struct devlink_health_reporter *reporter,
+                            struct devlink_fmsg *fmsg, void *priv_ctx,
+                            struct netlink_ext_ack *extack)
+{
+       return 0;
+}
+
+static int
+nsim_dev_empty_reporter_diagnose(struct devlink_health_reporter *reporter,
+                                struct devlink_fmsg *fmsg,
+                                struct netlink_ext_ack *extack)
+{
+       return 0;
+}
+
+static const
+struct devlink_health_reporter_ops nsim_dev_empty_reporter_ops = {
+       .name = "empty",
+       .dump = nsim_dev_empty_reporter_dump,
+       .diagnose = nsim_dev_empty_reporter_diagnose,
+};
+
+struct nsim_dev_dummy_reporter_ctx {
+       char *break_msg;
+};
+
+static int
+nsim_dev_dummy_reporter_recover(struct devlink_health_reporter *reporter,
+                               void *priv_ctx,
+                               struct netlink_ext_ack *extack)
+{
+       struct nsim_dev_health *health = devlink_health_reporter_priv(reporter);
+       struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx;
+
+       if (health->fail_recover) {
+               /* For testing purposes, user set debugfs fail_recover
+                * value to true. Fail right away.
+                */
+               NL_SET_ERR_MSG_MOD(extack, "User setup the recover to fail for testing purposes");
+               return -EINVAL;
+       }
+       if (ctx) {
+               kfree(health->recovered_break_msg);
+               health->recovered_break_msg = kstrdup(ctx->break_msg,
+                                                     GFP_KERNEL);
+               if (!health->recovered_break_msg)
+                       return -ENOMEM;
+       }
+       return 0;
+}
+
+static int nsim_dev_dummy_fmsg_put(struct devlink_fmsg *fmsg, u32 binary_len)
+{
+       char *binary;
+       int err;
+       int i;
+
+       err = devlink_fmsg_bool_pair_put(fmsg, "test_bool", true);
+       if (err)
+               return err;
+       err = devlink_fmsg_u8_pair_put(fmsg, "test_u8", 1);
+       if (err)
+               return err;
+       err = devlink_fmsg_u32_pair_put(fmsg, "test_u32", 3);
+       if (err)
+               return err;
+       err = devlink_fmsg_u64_pair_put(fmsg, "test_u64", 4);
+       if (err)
+               return err;
+       err = devlink_fmsg_string_pair_put(fmsg, "test_string", "somestring");
+       if (err)
+               return err;
+
+       err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_binary");
+       if (err)
+               return err;
+       binary = kmalloc(binary_len, GFP_KERNEL);
+       if (!binary)
+               return -ENOMEM;
+       get_random_bytes(binary, binary_len);
+       err = devlink_fmsg_binary_put(fmsg, binary, binary_len);
+       kfree(binary);
+       if (err)
+               return err;
+       err = devlink_fmsg_arr_pair_nest_end(fmsg);
+       if (err)
+               return err;
+
+       err = devlink_fmsg_pair_nest_start(fmsg, "test_nest");
+       if (err)
+               return err;
+       err = devlink_fmsg_obj_nest_start(fmsg);
+       if (err)
+               return err;
+       err = devlink_fmsg_bool_pair_put(fmsg, "nested_test_bool", false);
+       if (err)
+               return err;
+       err = devlink_fmsg_u8_pair_put(fmsg, "nested_test_u8", false);
+       if (err)
+               return err;
+       err = devlink_fmsg_obj_nest_end(fmsg);
+       if (err)
+               return err;
+       err = devlink_fmsg_pair_nest_end(fmsg);
+       if (err)
+               return err;
+
+       err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_bool_array");
+       if (err)
+               return err;
+       for (i = 0; i < 10; i++) {
+               err = devlink_fmsg_bool_put(fmsg, true);
+               if (err)
+                       return err;
+       }
+       err = devlink_fmsg_arr_pair_nest_end(fmsg);
+       if (err)
+               return err;
+
+       err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u8_array");
+       if (err)
+               return err;
+       for (i = 0; i < 10; i++) {
+               err = devlink_fmsg_u8_put(fmsg, i);
+               if (err)
+                       return err;
+       }
+       err = devlink_fmsg_arr_pair_nest_end(fmsg);
+       if (err)
+               return err;
+
+       err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u32_array");
+       if (err)
+               return err;
+       for (i = 0; i < 10; i++) {
+               err = devlink_fmsg_u32_put(fmsg, i);
+               if (err)
+                       return err;
+       }
+       err = devlink_fmsg_arr_pair_nest_end(fmsg);
+       if (err)
+               return err;
+
+       err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u64_array");
+       if (err)
+               return err;
+       for (i = 0; i < 10; i++) {
+               err = devlink_fmsg_u64_put(fmsg, i);
+               if (err)
+                       return err;
+       }
+       err = devlink_fmsg_arr_pair_nest_end(fmsg);
+       if (err)
+               return err;
+
+       err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_array_of_objects");
+       if (err)
+               return err;
+       for (i = 0; i < 10; i++) {
+               err = devlink_fmsg_obj_nest_start(fmsg);
+               if (err)
+                       return err;
+               err = devlink_fmsg_bool_pair_put(fmsg,
+                                                "in_array_nested_test_bool",
+                                                false);
+               if (err)
+                       return err;
+               err = devlink_fmsg_u8_pair_put(fmsg,
+                                              "in_array_nested_test_u8",
+                                              i);
+               if (err)
+                       return err;
+               err = devlink_fmsg_obj_nest_end(fmsg);
+               if (err)
+                       return err;
+       }
+       return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int
+nsim_dev_dummy_reporter_dump(struct devlink_health_reporter *reporter,
+                            struct devlink_fmsg *fmsg, void *priv_ctx,
+                            struct netlink_ext_ack *extack)
+{
+       struct nsim_dev_health *health = devlink_health_reporter_priv(reporter);
+       struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx;
+       int err;
+
+       if (ctx) {
+               err = devlink_fmsg_string_pair_put(fmsg, "break_message",
+                                                  ctx->break_msg);
+               if (err)
+                       return err;
+       }
+       return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len);
+}
+
+static int
+nsim_dev_dummy_reporter_diagnose(struct devlink_health_reporter *reporter,
+                                struct devlink_fmsg *fmsg,
+                                struct netlink_ext_ack *extack)
+{
+       struct nsim_dev_health *health = devlink_health_reporter_priv(reporter);
+       int err;
+
+       if (health->recovered_break_msg) {
+               err = devlink_fmsg_string_pair_put(fmsg,
+                                                  "recovered_break_message",
+                                                  health->recovered_break_msg);
+               if (err)
+                       return err;
+       }
+       return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len);
+}
+
+static const
+struct devlink_health_reporter_ops nsim_dev_dummy_reporter_ops = {
+       .name = "dummy",
+       .recover = nsim_dev_dummy_reporter_recover,
+       .dump = nsim_dev_dummy_reporter_dump,
+       .diagnose = nsim_dev_dummy_reporter_diagnose,
+};
+
+static ssize_t nsim_dev_health_break_write(struct file *file,
+                                          const char __user *data,
+                                          size_t count, loff_t *ppos)
+{
+       struct nsim_dev_health *health = file->private_data;
+       struct nsim_dev_dummy_reporter_ctx ctx;
+       char *break_msg;
+       int err;
+
+       break_msg = kmalloc(count + 1, GFP_KERNEL);
+       if (!break_msg)
+               return -ENOMEM;
+
+       if (copy_from_user(break_msg, data, count)) {
+               err = -EFAULT;
+               goto out;
+       }
+       break_msg[count] = '\0';
+       if (break_msg[count - 1] == '\n')
+               break_msg[count - 1] = '\0';
+
+       ctx.break_msg = break_msg;
+       err = devlink_health_report(health->dummy_reporter, break_msg, &ctx);
+       if (err)
+               goto out;
+
+out:
+       kfree(break_msg);
+       return err ?: count;
+}
+
+static const struct file_operations nsim_dev_health_break_fops = {
+       .open = simple_open,
+       .write = nsim_dev_health_break_write,
+       .llseek = generic_file_llseek,
+};
+
+int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink)
+{
+       struct nsim_dev_health *health = &nsim_dev->health;
+       int err;
+
+       health->empty_reporter =
+               devlink_health_reporter_create(devlink,
+                                              &nsim_dev_empty_reporter_ops,
+                                              0, false, health);
+       if (IS_ERR(health->empty_reporter))
+               return PTR_ERR(health->empty_reporter);
+
+       health->dummy_reporter =
+               devlink_health_reporter_create(devlink,
+                                              &nsim_dev_dummy_reporter_ops,
+                                              0, false, health);
+       if (IS_ERR(health->dummy_reporter)) {
+               err = PTR_ERR(health->dummy_reporter);
+               goto err_empty_reporter_destroy;
+       }
+
+       health->ddir = debugfs_create_dir("health", nsim_dev->ddir);
+       if (IS_ERR_OR_NULL(health->ddir)) {
+               err = PTR_ERR_OR_ZERO(health->ddir) ?: -EINVAL;
+               goto err_dummy_reporter_destroy;
+       }
+
+       health->recovered_break_msg = NULL;
+       debugfs_create_file("break_health", 0200, health->ddir, health,
+                           &nsim_dev_health_break_fops);
+       health->binary_len = 16;
+       debugfs_create_u32("binary_len", 0600, health->ddir,
+                          &health->binary_len);
+       health->fail_recover = false;
+       debugfs_create_bool("fail_recover", 0600, health->ddir,
+                           &health->fail_recover);
+       return 0;
+
+err_dummy_reporter_destroy:
+       devlink_health_reporter_destroy(health->dummy_reporter);
+err_empty_reporter_destroy:
+       devlink_health_reporter_destroy(health->empty_reporter);
+       return err;
+}
+
+void nsim_dev_health_exit(struct nsim_dev *nsim_dev)
+{
+       struct nsim_dev_health *health = &nsim_dev->health;
+
+       debugfs_remove_recursive(health->ddir);
+       kfree(health->recovered_break_msg);
+       devlink_health_reporter_destroy(health->dummy_reporter);
+       devlink_health_reporter_destroy(health->empty_reporter);
+}
index 55f57f7..2908e0a 100644 (file)
@@ -290,6 +290,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
+       dev_net_set(dev, nsim_dev_net(nsim_dev));
        ns = netdev_priv(dev);
        ns->netdev = dev;
        ns->nsim_dev = nsim_dev;
@@ -357,18 +358,12 @@ static int __init nsim_module_init(void)
        if (err)
                goto err_dev_exit;
 
-       err = nsim_fib_init();
-       if (err)
-               goto err_bus_exit;
-
        err = rtnl_link_register(&nsim_link_ops);
        if (err)
-               goto err_fib_exit;
+               goto err_bus_exit;
 
        return 0;
 
-err_fib_exit:
-       nsim_fib_exit();
 err_bus_exit:
        nsim_bus_exit();
 err_dev_exit:
@@ -379,7 +374,6 @@ err_dev_exit:
 static void __exit nsim_module_exit(void)
 {
        rtnl_link_unregister(&nsim_link_ops);
-       nsim_fib_exit();
        nsim_bus_exit();
        nsim_dev_exit();
 }
index 66bf137..94df795 100644 (file)
@@ -134,6 +134,18 @@ enum nsim_resource_id {
        NSIM_RESOURCE_IPV6_FIB_RULES,
 };
 
+struct nsim_dev_health {
+       struct devlink_health_reporter *empty_reporter;
+       struct devlink_health_reporter *dummy_reporter;
+       struct dentry *ddir;
+       char *recovered_break_msg;
+       u32 binary_len;
+       bool fail_recover;
+};
+
+int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink);
+void nsim_dev_health_exit(struct nsim_dev *nsim_dev);
+
 struct nsim_dev_port {
        struct list_head list;
        struct devlink_port devlink_port;
@@ -161,9 +173,17 @@ struct nsim_dev {
        bool fw_update_status;
        u32 max_macs;
        bool test1;
+       bool dont_allow_reload;
+       bool fail_reload;
        struct devlink_region *dummy_region;
+       struct nsim_dev_health health;
 };
 
+static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev)
+{
+       return devlink_net(priv_to_devlink(nsim_dev));
+}
+
 int nsim_dev_init(void);
 void nsim_dev_exit(void);
 int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev);
@@ -173,11 +193,11 @@ int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev,
 int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
                      unsigned int port_index);
 
-int nsim_fib_init(void);
-void nsim_fib_exit(void);
-u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
-int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
-                    struct netlink_ext_ack *extack);
+struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
+                                     struct netlink_ext_ack *extack);
+void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data);
+u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
+                    enum nsim_resource_id res_id, bool max);
 
 #if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
 void nsim_ipsec_init(struct netdevsim *ns);
@@ -215,6 +235,9 @@ struct nsim_bus_dev {
        struct device dev;
        struct list_head list;
        unsigned int port_count;
+       struct net *initial_net; /* Purpose of this is to carry net pointer
+                                 * during the probe time only.
+                                 */
        unsigned int num_vfs;
        struct nsim_vf_config *vfconfigs;
 };
index 1eb5d4f..8e30db2 100644 (file)
@@ -62,6 +62,7 @@
 #define AT803X_DEBUG_REG_5                     0x05
 #define AT803X_DEBUG_TX_CLK_DLY_EN             BIT(8)
 
+#define ATH9331_PHY_ID 0x004dd041
 #define ATH8030_PHY_ID 0x004dd076
 #define ATH8031_PHY_ID 0x004dd074
 #define ATH8035_PHY_ID 0x004dd072
@@ -71,10 +72,6 @@ MODULE_DESCRIPTION("Atheros 803x PHY driver");
 MODULE_AUTHOR("Matus Ujhelyi");
 MODULE_LICENSE("GPL");
 
-struct at803x_priv {
-       bool phy_reset:1;
-};
-
 struct at803x_context {
        u16 bmcr;
        u16 advertise;
@@ -240,20 +237,6 @@ static int at803x_resume(struct phy_device *phydev)
        return phy_modify(phydev, MII_BMCR, BMCR_PDOWN | BMCR_ISOLATE, 0);
 }
 
-static int at803x_probe(struct phy_device *phydev)
-{
-       struct device *dev = &phydev->mdio.dev;
-       struct at803x_priv *priv;
-
-       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return -ENOMEM;
-
-       phydev->priv = priv;
-
-       return 0;
-}
-
 static int at803x_config_init(struct phy_device *phydev)
 {
        int ret;
@@ -430,7 +413,6 @@ static struct phy_driver at803x_driver[] = {
        .phy_id                 = ATH8035_PHY_ID,
        .name                   = "Atheros 8035 ethernet",
        .phy_id_mask            = AT803X_PHY_ID_MASK,
-       .probe                  = at803x_probe,
        .config_init            = at803x_config_init,
        .set_wol                = at803x_set_wol,
        .get_wol                = at803x_get_wol,
@@ -445,7 +427,6 @@ static struct phy_driver at803x_driver[] = {
        .phy_id                 = ATH8030_PHY_ID,
        .name                   = "Atheros 8030 ethernet",
        .phy_id_mask            = AT803X_PHY_ID_MASK,
-       .probe                  = at803x_probe,
        .config_init            = at803x_config_init,
        .link_change_notify     = at803x_link_change_notify,
        .set_wol                = at803x_set_wol,
@@ -460,7 +441,6 @@ static struct phy_driver at803x_driver[] = {
        .phy_id                 = ATH8031_PHY_ID,
        .name                   = "Atheros 8031 ethernet",
        .phy_id_mask            = AT803X_PHY_ID_MASK,
-       .probe                  = at803x_probe,
        .config_init            = at803x_config_init,
        .set_wol                = at803x_set_wol,
        .get_wol                = at803x_get_wol,
@@ -471,6 +451,16 @@ static struct phy_driver at803x_driver[] = {
        .aneg_done              = at803x_aneg_done,
        .ack_interrupt          = &at803x_ack_interrupt,
        .config_intr            = &at803x_config_intr,
+}, {
+       /* ATHEROS AR9331 */
+       PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
+       .name                   = "Atheros AR9331 built-in PHY",
+       .config_init            = at803x_config_init,
+       .suspend                = at803x_suspend,
+       .resume                 = at803x_resume,
+       /* PHY_BASIC_FEATURES */
+       .ack_interrupt          = &at803x_ack_interrupt,
+       .config_intr            = &at803x_config_intr,
 } };
 
 module_phy_driver(at803x_driver);
@@ -479,6 +469,7 @@ static struct mdio_device_id __maybe_unused atheros_tbl[] = {
        { ATH8030_PHY_ID, AT803X_PHY_ID_MASK },
        { ATH8031_PHY_ID, AT803X_PHY_ID_MASK },
        { ATH8035_PHY_ID, AT803X_PHY_ID_MASK },
+       { PHY_ID_MATCH_EXACT(ATH9331_PHY_ID) },
        { }
 };
 
index 937d005..7d68b28 100644 (file)
@@ -26,18 +26,13 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
 
+static int bcm54xx_config_clock_delay(struct phy_device *phydev);
+
 static int bcm54210e_config_init(struct phy_device *phydev)
 {
        int val;
 
-       val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
-       val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN;
-       val |= MII_BCM54XX_AUXCTL_MISC_WREN;
-       bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, val);
-
-       val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL);
-       val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN;
-       bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val);
+       bcm54xx_config_clock_delay(phydev);
 
        if (phydev->dev_flags & PHY_BRCM_EN_MASTER_MODE) {
                val = phy_read(phydev, MII_CTRL1000);
@@ -52,26 +47,7 @@ static int bcm54612e_config_init(struct phy_device *phydev)
 {
        int reg;
 
-       /* Clear TX internal delay unless requested. */
-       if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) &&
-           (phydev->interface != PHY_INTERFACE_MODE_RGMII_TXID)) {
-               /* Disable TXD to GTXCLK clock delay (default set) */
-               /* Bit 9 is the only field in shadow register 00011 */
-               bcm_phy_write_shadow(phydev, 0x03, 0);
-       }
-
-       /* Clear RX internal delay unless requested. */
-       if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) &&
-           (phydev->interface != PHY_INTERFACE_MODE_RGMII_RXID)) {
-               reg = bcm54xx_auxctl_read(phydev,
-                                         MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
-               /* Disable RXD to RXC delay (default set) */
-               reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN;
-               /* Clear shadow selector field */
-               reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MASK;
-               bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
-                                    MII_BCM54XX_AUXCTL_MISC_WREN | reg);
-       }
+       bcm54xx_config_clock_delay(phydev);
 
        /* Enable CLK125 MUX on LED4 if ref clock is enabled. */
        if (!(phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)) {
@@ -383,9 +359,9 @@ static int bcm5482_config_init(struct phy_device *phydev)
                /*
                 * Select 1000BASE-X register set (primary SerDes)
                 */
-               reg = bcm_phy_read_shadow(phydev, BCM5482_SHD_MODE);
-               bcm_phy_write_shadow(phydev, BCM5482_SHD_MODE,
-                                    reg | BCM5482_SHD_MODE_1000BX);
+               reg = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE);
+               bcm_phy_write_shadow(phydev, BCM54XX_SHD_MODE,
+                                    reg | BCM54XX_SHD_MODE_1000BX);
 
                /*
                 * LED1=ACTIVITYLED, LED3=LINKSPD[2]
@@ -451,12 +427,47 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
        return ret;
 }
 
+static int bcm54616s_probe(struct phy_device *phydev)
+{
+       int val, intf_sel;
+
+       val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE);
+       if (val < 0)
+               return val;
+
+       /* The PHY is strapped in RGMII-fiber mode when INTERF_SEL[1:0]
+        * is 01b, and the link between PHY and its link partner can be
+        * either 1000Base-X or 100Base-FX.
+        * RGMII-1000Base-X is properly supported, but RGMII-100Base-FX
+        * support is still missing as of now.
+        */
+       intf_sel = (val & BCM54XX_SHD_INTF_SEL_MASK) >> 1;
+       if (intf_sel == 1) {
+               val = bcm_phy_read_shadow(phydev, BCM54616S_SHD_100FX_CTRL);
+               if (val < 0)
+                       return val;
+
+               /* Bit 0 of the SerDes 100-FX Control register, when set
+                * to 1, sets the MII/RGMII -> 100BASE-FX configuration.
+                * When this bit is set to 0, it sets the GMII/RGMII ->
+                * 1000BASE-X configuration.
+                */
+               if (!(val & BCM54616S_100FX_MODE))
+                       phydev->dev_flags |= PHY_BCM_FLAGS_MODE_1000BX;
+       }
+
+       return 0;
+}
+
 static int bcm54616s_config_aneg(struct phy_device *phydev)
 {
        int ret;
 
        /* Aneg firsly. */
-       ret = genphy_config_aneg(phydev);
+       if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX)
+               ret = genphy_c37_config_aneg(phydev);
+       else
+               ret = genphy_config_aneg(phydev);
 
        /* Then we can set up the delay. */
        bcm54xx_config_clock_delay(phydev);
@@ -464,6 +475,18 @@ static int bcm54616s_config_aneg(struct phy_device *phydev)
        return ret;
 }
 
+static int bcm54616s_read_status(struct phy_device *phydev)
+{
+       int err;
+
+       if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX)
+               err = genphy_c37_read_status(phydev);
+       else
+               err = genphy_read_status(phydev);
+
+       return err;
+}
+
 static int brcm_phy_setbits(struct phy_device *phydev, int reg, int set)
 {
        int val;
@@ -655,6 +678,8 @@ static struct phy_driver broadcom_drivers[] = {
        .config_aneg    = bcm54616s_config_aneg,
        .ack_interrupt  = bcm_phy_ack_intr,
        .config_intr    = bcm_phy_config_intr,
+       .read_status    = bcm54616s_read_status,
+       .probe          = bcm54616s_probe,
 }, {
        .phy_id         = PHY_ID_BCM5464,
        .phy_id_mask    = 0xfffffff0,
index 37fceaf..0b95e7a 100644 (file)
@@ -12,6 +12,8 @@
 #include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 
 #include <dt-bindings/net/ti-dp83867.h>
 
@@ -21,8 +23,9 @@
 #define MII_DP83867_PHYCTRL    0x10
 #define MII_DP83867_MICR       0x12
 #define MII_DP83867_ISR                0x13
-#define DP83867_CTRL           0x1f
+#define DP83867_CFG2           0x14
 #define DP83867_CFG3           0x1e
+#define DP83867_CTRL           0x1f
 
 /* Extended Registers */
 #define DP83867_CFG4            0x0031
 #define DP83867_STRAP_STS1     0x006E
 #define DP83867_STRAP_STS2     0x006f
 #define DP83867_RGMIIDCTL      0x0086
+#define DP83867_RXFCFG         0x0134
+#define DP83867_RXFPMD1        0x0136
+#define DP83867_RXFPMD2        0x0137
+#define DP83867_RXFPMD3        0x0138
+#define DP83867_RXFSOP1        0x0139
+#define DP83867_RXFSOP2        0x013A
+#define DP83867_RXFSOP3        0x013B
 #define DP83867_IO_MUX_CFG     0x0170
 #define DP83867_SGMIICTL       0x00D3
 #define DP83867_10M_SGMII_CFG   0x016F
 /* SGMIICTL bits */
 #define DP83867_SGMII_TYPE             BIT(14)
 
+/* RXFCFG bits*/
+#define DP83867_WOL_MAGIC_EN           BIT(0)
+#define DP83867_WOL_BCAST_EN           BIT(2)
+#define DP83867_WOL_UCAST_EN           BIT(4)
+#define DP83867_WOL_SEC_EN             BIT(5)
+#define DP83867_WOL_ENH_MAC            BIT(7)
+
 /* STRAP_STS1 bits */
 #define DP83867_STRAP_STS1_RESERVED            BIT(11)
 
 #define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK      (0x1f << 8)
 #define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT     8
 
+/* CFG3 bits */
+#define DP83867_CFG3_INT_OE                    BIT(7)
+#define DP83867_CFG3_ROBUST_AUTO_MDIX          BIT(9)
+
 /* CFG4 bits */
 #define DP83867_CFG4_PORT_MIRROR_EN              BIT(0)
 
@@ -126,6 +147,115 @@ static int dp83867_ack_interrupt(struct phy_device *phydev)
        return 0;
 }
 
+static int dp83867_set_wol(struct phy_device *phydev,
+                          struct ethtool_wolinfo *wol)
+{
+       struct net_device *ndev = phydev->attached_dev;
+       u16 val_rxcfg, val_micr;
+       u8 *mac;
+
+       val_rxcfg = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_RXFCFG);
+       val_micr = phy_read(phydev, MII_DP83867_MICR);
+
+       if (wol->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_UCAST |
+                           WAKE_BCAST)) {
+               val_rxcfg |= DP83867_WOL_ENH_MAC;
+               val_micr |= MII_DP83867_MICR_WOL_INT_EN;
+
+               if (wol->wolopts & WAKE_MAGIC) {
+                       mac = (u8 *)ndev->dev_addr;
+
+                       if (!is_valid_ether_addr(mac))
+                               return -EINVAL;
+
+                       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RXFPMD1,
+                                     (mac[1] << 8 | mac[0]));
+                       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RXFPMD2,
+                                     (mac[3] << 8 | mac[2]));
+                       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RXFPMD3,
+                                     (mac[5] << 8 | mac[4]));
+
+                       val_rxcfg |= DP83867_WOL_MAGIC_EN;
+               } else {
+                       val_rxcfg &= ~DP83867_WOL_MAGIC_EN;
+               }
+
+               if (wol->wolopts & WAKE_MAGICSECURE) {
+                       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RXFSOP1,
+                                     (wol->sopass[1] << 8) | wol->sopass[0]);
+                       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RXFSOP1,
+                                     (wol->sopass[3] << 8) | wol->sopass[2]);
+                       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RXFSOP1,
+                                     (wol->sopass[5] << 8) | wol->sopass[4]);
+
+                       val_rxcfg |= DP83867_WOL_SEC_EN;
+               } else {
+                       val_rxcfg &= ~DP83867_WOL_SEC_EN;
+               }
+
+               if (wol->wolopts & WAKE_UCAST)
+                       val_rxcfg |= DP83867_WOL_UCAST_EN;
+               else
+                       val_rxcfg &= ~DP83867_WOL_UCAST_EN;
+
+               if (wol->wolopts & WAKE_BCAST)
+                       val_rxcfg |= DP83867_WOL_BCAST_EN;
+               else
+                       val_rxcfg &= ~DP83867_WOL_BCAST_EN;
+       } else {
+               val_rxcfg &= ~DP83867_WOL_ENH_MAC;
+               val_micr &= ~MII_DP83867_MICR_WOL_INT_EN;
+       }
+
+       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RXFCFG, val_rxcfg);
+       phy_write(phydev, MII_DP83867_MICR, val_micr);
+
+       return 0;
+}
+
+static void dp83867_get_wol(struct phy_device *phydev,
+                           struct ethtool_wolinfo *wol)
+{
+       u16 value, sopass_val;
+
+       wol->supported = (WAKE_UCAST | WAKE_BCAST | WAKE_MAGIC |
+                       WAKE_MAGICSECURE);
+       wol->wolopts = 0;
+
+       value = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_RXFCFG);
+
+       if (value & DP83867_WOL_UCAST_EN)
+               wol->wolopts |= WAKE_UCAST;
+
+       if (value & DP83867_WOL_BCAST_EN)
+               wol->wolopts |= WAKE_BCAST;
+
+       if (value & DP83867_WOL_MAGIC_EN)
+               wol->wolopts |= WAKE_MAGIC;
+
+       if (value & DP83867_WOL_SEC_EN) {
+               sopass_val = phy_read_mmd(phydev, DP83867_DEVADDR,
+                                         DP83867_RXFSOP1);
+               wol->sopass[0] = (sopass_val & 0xff);
+               wol->sopass[1] = (sopass_val >> 8);
+
+               sopass_val = phy_read_mmd(phydev, DP83867_DEVADDR,
+                                         DP83867_RXFSOP2);
+               wol->sopass[2] = (sopass_val & 0xff);
+               wol->sopass[3] = (sopass_val >> 8);
+
+               sopass_val = phy_read_mmd(phydev, DP83867_DEVADDR,
+                                         DP83867_RXFSOP3);
+               wol->sopass[4] = (sopass_val & 0xff);
+               wol->sopass[5] = (sopass_val >> 8);
+
+               wol->wolopts |= WAKE_MAGICSECURE;
+       }
+
+       if (!(value & DP83867_WOL_ENH_MAC))
+               wol->wolopts = 0;
+}
+
 static int dp83867_config_intr(struct phy_device *phydev)
 {
        int micr_status;
@@ -295,7 +425,7 @@ static int dp83867_probe(struct phy_device *phydev)
 
        phydev->priv = dp83867;
 
-       return 0;
+       return dp83867_of_init(phydev);
 }
 
 static int dp83867_config_init(struct phy_device *phydev)
@@ -304,10 +434,6 @@ static int dp83867_config_init(struct phy_device *phydev)
        int ret, val, bs;
        u16 delay;
 
-       ret = dp83867_of_init(phydev);
-       if (ret)
-               return ret;
-
        /* RX_DV/RX_CTRL strapped in mode 1 or mode 2 workaround */
        if (dp83867->rxctrl_strap_quirk)
                phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4,
@@ -410,12 +536,13 @@ static int dp83867_config_init(struct phy_device *phydev)
                phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_SGMIICTL, val);
        }
 
+       val = phy_read(phydev, DP83867_CFG3);
        /* Enable Interrupt output INT_OE in CFG3 register */
-       if (phy_interrupt_is_valid(phydev)) {
-               val = phy_read(phydev, DP83867_CFG3);
-               val |= BIT(7);
-               phy_write(phydev, DP83867_CFG3, val);
-       }
+       if (phy_interrupt_is_valid(phydev))
+               val |= DP83867_CFG3_INT_OE;
+
+       val |= DP83867_CFG3_ROBUST_AUTO_MDIX;
+       phy_write(phydev, DP83867_CFG3, val);
 
        if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP)
                dp83867_config_port_mirroring(phydev);
@@ -463,6 +590,9 @@ static struct phy_driver dp83867_driver[] = {
                .config_init    = dp83867_config_init,
                .soft_reset     = dp83867_phy_reset,
 
+               .get_wol        = dp83867_get_wol,
+               .set_wol        = dp83867_set_wol,
+
                /* IRQ related */
                .ack_interrupt  = dp83867_ack_interrupt,
                .config_intr    = dp83867_config_intr,
index a779613..b1fbd19 100644 (file)
 
 #define MII_M1011_PHY_SCR                      0x10
 #define MII_M1011_PHY_SCR_DOWNSHIFT_EN         BIT(11)
-#define MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT      12
-#define MII_M1011_PHY_SRC_DOWNSHIFT_MASK       0x7800
+#define MII_M1011_PHY_SCR_DOWNSHIFT_MASK       GENMASK(14, 12)
+#define MII_M1011_PHY_SCR_DOWNSHIFT_MAX                8
 #define MII_M1011_PHY_SCR_MDI                  (0x0 << 5)
 #define MII_M1011_PHY_SCR_MDI_X                        (0x1 << 5)
 #define MII_M1011_PHY_SCR_AUTO_CROSS           (0x3 << 5)
 
+#define MII_M1011_PHY_SSR                      0x11
+#define MII_M1011_PHY_SSR_DOWNSHIFT            BIT(5)
+
 #define MII_M1111_PHY_LED_CONTROL      0x18
 #define MII_M1111_PHY_LED_DIRECT       0x4100
 #define MII_M1111_PHY_LED_COMBINE      0x411c
 #define MII_M1111_PHY_EXT_CR           0x14
+#define MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK    GENMASK(11, 9)
+#define MII_M1111_PHY_EXT_CR_DOWNSHIFT_MAX     8
+#define MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN      BIT(8)
 #define MII_M1111_RGMII_RX_DELAY       BIT(7)
 #define MII_M1111_RGMII_TX_DELAY       BIT(1)
 #define MII_M1111_PHY_EXT_SR           0x1b
@@ -273,23 +279,6 @@ static int marvell_set_polarity(struct phy_device *phydev, int polarity)
        return val != reg;
 }
 
-static int marvell_set_downshift(struct phy_device *phydev, bool enable,
-                                u8 retries)
-{
-       int reg;
-
-       reg = phy_read(phydev, MII_M1011_PHY_SCR);
-       if (reg < 0)
-               return reg;
-
-       reg &= MII_M1011_PHY_SRC_DOWNSHIFT_MASK;
-       reg |= ((retries - 1) << MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT);
-       if (enable)
-               reg |= MII_M1011_PHY_SCR_DOWNSHIFT_EN;
-
-       return phy_write(phydev, MII_M1011_PHY_SCR, reg);
-}
-
 static int marvell_config_aneg(struct phy_device *phydev)
 {
        int changed = 0;
@@ -658,41 +647,6 @@ static int marvell_config_init(struct phy_device *phydev)
        return marvell_of_reg_init(phydev);
 }
 
-static int m88e1116r_config_init(struct phy_device *phydev)
-{
-       int err;
-
-       err = genphy_soft_reset(phydev);
-       if (err < 0)
-               return err;
-
-       msleep(500);
-
-       err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
-       if (err < 0)
-               return err;
-
-       err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
-       if (err < 0)
-               return err;
-
-       err = marvell_set_downshift(phydev, true, 8);
-       if (err < 0)
-               return err;
-
-       if (phy_interface_is_rgmii(phydev)) {
-               err = m88e1121_config_aneg_rgmii_delays(phydev);
-               if (err < 0)
-                       return err;
-       }
-
-       err = genphy_soft_reset(phydev);
-       if (err < 0)
-               return err;
-
-       return marvell_config_init(phydev);
-}
-
 static int m88e3016_config_init(struct phy_device *phydev)
 {
        int ret;
@@ -833,6 +787,172 @@ static int m88e1111_config_init(struct phy_device *phydev)
        return genphy_soft_reset(phydev);
 }
 
+static int m88e1111_get_downshift(struct phy_device *phydev, u8 *data)
+{
+       int val, cnt, enable;
+
+       val = phy_read(phydev, MII_M1111_PHY_EXT_CR);
+       if (val < 0)
+               return val;
+
+       enable = FIELD_GET(MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN, val);
+       cnt = FIELD_GET(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, val) + 1;
+
+       *data = enable ? cnt : DOWNSHIFT_DEV_DISABLE;
+
+       return 0;
+}
+
+static int m88e1111_set_downshift(struct phy_device *phydev, u8 cnt)
+{
+       int val;
+
+       if (cnt > MII_M1111_PHY_EXT_CR_DOWNSHIFT_MAX)
+               return -E2BIG;
+
+       if (!cnt)
+               return phy_clear_bits(phydev, MII_M1111_PHY_EXT_CR,
+                                     MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN);
+
+       val = MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN;
+       val |= FIELD_PREP(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, cnt - 1);
+
+       return phy_modify(phydev, MII_M1111_PHY_EXT_CR,
+                         MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN |
+                         MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK,
+                         val);
+}
+
+static int m88e1111_get_tunable(struct phy_device *phydev,
+                               struct ethtool_tunable *tuna, void *data)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return m88e1111_get_downshift(phydev, data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int m88e1111_set_tunable(struct phy_device *phydev,
+                               struct ethtool_tunable *tuna, const void *data)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return m88e1111_set_downshift(phydev, *(const u8 *)data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int m88e1011_get_downshift(struct phy_device *phydev, u8 *data)
+{
+       int val, cnt, enable;
+
+       val = phy_read(phydev, MII_M1011_PHY_SCR);
+       if (val < 0)
+               return val;
+
+       enable = FIELD_GET(MII_M1011_PHY_SCR_DOWNSHIFT_EN, val);
+       cnt = FIELD_GET(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, val) + 1;
+
+       *data = enable ? cnt : DOWNSHIFT_DEV_DISABLE;
+
+       return 0;
+}
+
+static int m88e1011_set_downshift(struct phy_device *phydev, u8 cnt)
+{
+       int val;
+
+       if (cnt > MII_M1011_PHY_SCR_DOWNSHIFT_MAX)
+               return -E2BIG;
+
+       if (!cnt)
+               return phy_clear_bits(phydev, MII_M1011_PHY_SCR,
+                                     MII_M1011_PHY_SCR_DOWNSHIFT_EN);
+
+       val = MII_M1011_PHY_SCR_DOWNSHIFT_EN;
+       val |= FIELD_PREP(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, cnt - 1);
+
+       return phy_modify(phydev, MII_M1011_PHY_SCR,
+                         MII_M1011_PHY_SCR_DOWNSHIFT_EN |
+                         MII_M1011_PHY_SCR_DOWNSHIFT_MASK,
+                         val);
+}
+
+static int m88e1011_get_tunable(struct phy_device *phydev,
+                               struct ethtool_tunable *tuna, void *data)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return m88e1011_get_downshift(phydev, data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int m88e1011_set_tunable(struct phy_device *phydev,
+                               struct ethtool_tunable *tuna, const void *data)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return m88e1011_set_downshift(phydev, *(const u8 *)data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void m88e1011_link_change_notify(struct phy_device *phydev)
+{
+       int status;
+
+       if (phydev->state != PHY_RUNNING)
+               return;
+
+       /* we may be on fiber page currently */
+       status = phy_read_paged(phydev, MII_MARVELL_COPPER_PAGE,
+                               MII_M1011_PHY_SSR);
+
+       if (status > 0 && status & MII_M1011_PHY_SSR_DOWNSHIFT)
+               phydev_warn(phydev, "Downshift occurred! Cabling may be defective.\n");
+}
+
+static int m88e1116r_config_init(struct phy_device *phydev)
+{
+       int err;
+
+       err = genphy_soft_reset(phydev);
+       if (err < 0)
+               return err;
+
+       msleep(500);
+
+       err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
+       if (err < 0)
+               return err;
+
+       err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
+       if (err < 0)
+               return err;
+
+       err = m88e1011_set_downshift(phydev, 8);
+       if (err < 0)
+               return err;
+
+       if (phy_interface_is_rgmii(phydev)) {
+               err = m88e1121_config_aneg_rgmii_delays(phydev);
+               if (err < 0)
+                       return err;
+       }
+
+       err = genphy_soft_reset(phydev);
+       if (err < 0)
+               return err;
+
+       return marvell_config_init(phydev);
+}
+
 static int m88e1318_config_init(struct phy_device *phydev)
 {
        if (phy_interrupt_is_valid(phydev)) {
@@ -1117,6 +1237,8 @@ static int m88e1540_get_tunable(struct phy_device *phydev,
        switch (tuna->id) {
        case ETHTOOL_PHY_FAST_LINK_DOWN:
                return m88e1540_get_fld(phydev, data);
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return m88e1011_get_downshift(phydev, data);
        default:
                return -EOPNOTSUPP;
        }
@@ -1128,6 +1250,8 @@ static int m88e1540_set_tunable(struct phy_device *phydev,
        switch (tuna->id) {
        case ETHTOOL_PHY_FAST_LINK_DOWN:
                return m88e1540_set_fld(phydev, data);
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return m88e1011_set_downshift(phydev, *(const u8 *)data);
        default:
                return -EOPNOTSUPP;
        }
@@ -2163,6 +2287,9 @@ static struct phy_driver marvell_drivers[] = {
                .get_sset_count = marvell_get_sset_count,
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
+               .get_tunable = m88e1011_get_tunable,
+               .set_tunable = m88e1011_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E1111,
@@ -2182,6 +2309,9 @@ static struct phy_driver marvell_drivers[] = {
                .get_sset_count = marvell_get_sset_count,
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
+               .get_tunable = m88e1111_get_tunable,
+               .set_tunable = m88e1111_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E1118,
@@ -2220,6 +2350,9 @@ static struct phy_driver marvell_drivers[] = {
                .get_sset_count = marvell_get_sset_count,
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
+               .get_tunable = m88e1011_get_tunable,
+               .set_tunable = m88e1011_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E1318S,
@@ -2261,6 +2394,9 @@ static struct phy_driver marvell_drivers[] = {
                .get_sset_count = marvell_get_sset_count,
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
+               .get_tunable = m88e1111_get_tunable,
+               .set_tunable = m88e1111_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E1149R,
@@ -2314,6 +2450,9 @@ static struct phy_driver marvell_drivers[] = {
                .get_sset_count = marvell_get_sset_count,
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
+               .get_tunable = m88e1011_get_tunable,
+               .set_tunable = m88e1011_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E1510,
@@ -2337,6 +2476,9 @@ static struct phy_driver marvell_drivers[] = {
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
                .set_loopback = genphy_loopback,
+               .get_tunable = m88e1011_get_tunable,
+               .set_tunable = m88e1011_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E1540,
@@ -2359,6 +2501,7 @@ static struct phy_driver marvell_drivers[] = {
                .get_stats = marvell_get_stats,
                .get_tunable = m88e1540_get_tunable,
                .set_tunable = m88e1540_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E1545,
@@ -2379,6 +2522,9 @@ static struct phy_driver marvell_drivers[] = {
                .get_sset_count = marvell_get_sset_count,
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
+               .get_tunable = m88e1540_get_tunable,
+               .set_tunable = m88e1540_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
        {
                .phy_id = MARVELL_PHY_ID_88E3016,
@@ -2421,6 +2567,7 @@ static struct phy_driver marvell_drivers[] = {
                .get_stats = marvell_get_stats,
                .get_tunable = m88e1540_get_tunable,
                .set_tunable = m88e1540_set_tunable,
+               .link_change_notify = m88e1011_link_change_notify,
        },
 };
 
index 7ada1fd..805cda3 100644 (file)
@@ -895,7 +895,7 @@ static void vsc85xx_tr_write(struct phy_device *phydev, u16 addr, u32 val)
 static int vsc8531_pre_init_seq_set(struct phy_device *phydev)
 {
        int rc;
-       const struct reg_val init_seq[] = {
+       static const struct reg_val init_seq[] = {
                {0x0f90, 0x00688980},
                {0x0696, 0x00000003},
                {0x07fa, 0x0050100f},
@@ -939,7 +939,7 @@ out_unlock:
 
 static int vsc85xx_eee_init_seq_set(struct phy_device *phydev)
 {
-       const struct reg_val init_eee[] = {
+       static const struct reg_val init_eee[] = {
                {0x0f82, 0x0012b00a},
                {0x1686, 0x00000004},
                {0x168c, 0x00d2c46f},
@@ -1224,7 +1224,7 @@ out:
 /* bus->mdio_lock should be locked when using this function */
 static int vsc8574_config_pre_init(struct phy_device *phydev)
 {
-       const struct reg_val pre_init1[] = {
+       static const struct reg_val pre_init1[] = {
                {0x0fae, 0x000401bd},
                {0x0fac, 0x000f000f},
                {0x17a0, 0x00a0f147},
@@ -1272,7 +1272,7 @@ static int vsc8574_config_pre_init(struct phy_device *phydev)
                {0x0fee, 0x0004a6a1},
                {0x0ffe, 0x00b01807},
        };
-       const struct reg_val pre_init2[] = {
+       static const struct reg_val pre_init2[] = {
                {0x0486, 0x0008a518},
                {0x0488, 0x006dc696},
                {0x048a, 0x00000912},
@@ -1427,7 +1427,7 @@ out:
 /* bus->mdio_lock should be locked when using this function */
 static int vsc8584_config_pre_init(struct phy_device *phydev)
 {
-       const struct reg_val pre_init1[] = {
+       static const struct reg_val pre_init1[] = {
                {0x07fa, 0x0050100f},
                {0x1688, 0x00049f81},
                {0x0f90, 0x00688980},
@@ -1451,7 +1451,7 @@ static int vsc8584_config_pre_init(struct phy_device *phydev)
                {0x16b2, 0x00007000},
                {0x16b4, 0x00000814},
        };
-       const struct reg_val pre_init2[] = {
+       static const struct reg_val pre_init2[] = {
                {0x0486, 0x0008a518},
                {0x0488, 0x006dc696},
                {0x048a, 0x00000912},
@@ -1786,7 +1786,7 @@ static int vsc8514_config_pre_init(struct phy_device *phydev)
         * values to handle hardware performance of PHY. They
         * are set at Power-On state and remain until PHY Reset.
         */
-       const struct reg_val pre_init1[] = {
+       static const struct reg_val pre_init1[] = {
                {0x0f90, 0x00688980},
                {0x0786, 0x00000003},
                {0x07fa, 0x0050100f},
index 9412669..5458ed1 100644 (file)
@@ -8,7 +8,7 @@
 
 const char *phy_speed_to_str(int speed)
 {
-       BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 69,
+       BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 74,
                "Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
                "If a speed or mode has been added please update phy_speed_to_str "
                "and the PHY settings array.\n");
@@ -42,6 +42,8 @@ const char *phy_speed_to_str(int speed)
                return "100Gbps";
        case SPEED_200000:
                return "200Gbps";
+       case SPEED_400000:
+               return "400Gbps";
        case SPEED_UNKNOWN:
                return "Unknown";
        default:
@@ -70,6 +72,12 @@ EXPORT_SYMBOL_GPL(phy_duplex_to_str);
                               .bit = ETHTOOL_LINK_MODE_ ## b ## _BIT}
 
 static const struct phy_setting settings[] = {
+       /* 400G */
+       PHY_SETTING( 400000, FULL, 400000baseCR8_Full           ),
+       PHY_SETTING( 400000, FULL, 400000baseKR8_Full           ),
+       PHY_SETTING( 400000, FULL, 400000baseLR8_ER8_FR8_Full   ),
+       PHY_SETTING( 400000, FULL, 400000baseDR8_Full           ),
+       PHY_SETTING( 400000, FULL, 400000baseSR8_Full           ),
        /* 200G */
        PHY_SETTING( 200000, FULL, 200000baseCR4_Full           ),
        PHY_SETTING( 200000, FULL, 200000baseKR4_Full           ),
@@ -689,11 +697,17 @@ EXPORT_SYMBOL_GPL(phy_modify_mmd);
 
 static int __phy_read_page(struct phy_device *phydev)
 {
+       if (WARN_ONCE(!phydev->drv->read_page, "read_page callback not available, PHY driver not loaded?\n"))
+               return -EOPNOTSUPP;
+
        return phydev->drv->read_page(phydev);
 }
 
 static int __phy_write_page(struct phy_device *phydev, int page)
 {
+       if (WARN_ONCE(!phydev->drv->write_page, "write_page callback not available, PHY driver not loaded?\n"))
+               return -EOPNOTSUPP;
+
        return phydev->drv->write_page(phydev, page);
 }
 
index adb66a2..fa71998 100644 (file)
@@ -1270,7 +1270,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
                        phydev_err(phydev, "error creating 'phy_standalone' sysfs entry\n");
        }
 
-       phydev->dev_flags = flags;
+       phydev->dev_flags |= flags;
 
        phydev->interface = interface;
 
@@ -1607,6 +1607,40 @@ static int genphy_config_advert(struct phy_device *phydev)
        return changed;
 }
 
+/**
+ * genphy_c37_config_advert - sanitize and advertise auto-negotiation parameters
+ * @phydev: target phy_device struct
+ *
+ * Description: Writes MII_ADVERTISE with the appropriate values,
+ *   after sanitizing the values to make sure we only advertise
+ *   what is supported.  Returns < 0 on error, 0 if the PHY's advertisement
+ *   hasn't changed, and > 0 if it has changed. This function is intended
+ *   for Clause 37 1000Base-X mode.
+ */
+static int genphy_c37_config_advert(struct phy_device *phydev)
+{
+       u16 adv = 0;
+
+       /* Only allow advertising what this PHY supports */
+       linkmode_and(phydev->advertising, phydev->advertising,
+                    phydev->supported);
+
+       if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+                             phydev->advertising))
+               adv |= ADVERTISE_1000XFULL;
+       if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+                             phydev->advertising))
+               adv |= ADVERTISE_1000XPAUSE;
+       if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+                             phydev->advertising))
+               adv |= ADVERTISE_1000XPSE_ASYM;
+
+       return phy_modify_changed(phydev, MII_ADVERTISE,
+                                 ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE |
+                                 ADVERTISE_1000XHALF | ADVERTISE_1000XPSE_ASYM,
+                                 adv);
+}
+
 /**
  * genphy_config_eee_advert - disable unwanted eee mode advertisement
  * @phydev: target phy_device struct
@@ -1715,6 +1749,54 @@ int __genphy_config_aneg(struct phy_device *phydev, bool changed)
 }
 EXPORT_SYMBOL(__genphy_config_aneg);
 
+/**
+ * genphy_c37_config_aneg - restart auto-negotiation or write BMCR
+ * @phydev: target phy_device struct
+ *
+ * Description: If auto-negotiation is enabled, we configure the
+ *   advertising, and then restart auto-negotiation.  If it is not
+ *   enabled, then we write the BMCR. This function is intended
+ *   for use with Clause 37 1000Base-X mode.
+ */
+int genphy_c37_config_aneg(struct phy_device *phydev)
+{
+       int err, changed;
+
+       if (phydev->autoneg != AUTONEG_ENABLE)
+               return genphy_setup_forced(phydev);
+
+       err = phy_modify(phydev, MII_BMCR, BMCR_SPEED1000 | BMCR_SPEED100,
+                        BMCR_SPEED1000);
+       if (err)
+               return err;
+
+       changed = genphy_c37_config_advert(phydev);
+       if (changed < 0) /* error */
+               return changed;
+
+       if (!changed) {
+               /* Advertisement hasn't changed, but maybe aneg was never on to
+                * begin with?  Or maybe phy was isolated?
+                */
+               int ctl = phy_read(phydev, MII_BMCR);
+
+               if (ctl < 0)
+                       return ctl;
+
+               if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
+                       changed = 1; /* do restart aneg */
+       }
+
+       /* Only restart aneg if we are advertising something different
+        * than we were before.
+        */
+       if (changed > 0)
+               return genphy_restart_aneg(phydev);
+
+       return 0;
+}
+EXPORT_SYMBOL(genphy_c37_config_aneg);
+
 /**
  * genphy_aneg_done - return auto-negotiation status
  * @phydev: target phy_device struct
@@ -1886,6 +1968,63 @@ int genphy_read_status(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_read_status);
 
+/**
+ * genphy_c37_read_status - check the link status and update current link state
+ * @phydev: target phy_device struct
+ *
+ * Description: Check the link, then figure out the current state
+ *   by comparing what we advertise with what the link partner
+ *   advertises. This function is for Clause 37 1000Base-X mode.
+ */
+int genphy_c37_read_status(struct phy_device *phydev)
+{
+       int lpa, err, old_link = phydev->link;
+
+       /* Update the link, but return if there was an error */
+       err = genphy_update_link(phydev);
+       if (err)
+               return err;
+
+       /* why bother the PHY if nothing can have changed */
+       if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link)
+               return 0;
+
+       phydev->duplex = DUPLEX_UNKNOWN;
+       phydev->pause = 0;
+       phydev->asym_pause = 0;
+
+       if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
+               lpa = phy_read(phydev, MII_LPA);
+               if (lpa < 0)
+                       return lpa;
+
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+                                phydev->lp_advertising, lpa & LPA_LPACK);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+                                phydev->lp_advertising, lpa & LPA_1000XFULL);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+                                phydev->lp_advertising, lpa & LPA_1000XPAUSE);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+                                phydev->lp_advertising,
+                                lpa & LPA_1000XPAUSE_ASYM);
+
+               phy_resolve_aneg_linkmode(phydev);
+       } else if (phydev->autoneg == AUTONEG_DISABLE) {
+               int bmcr = phy_read(phydev, MII_BMCR);
+
+               if (bmcr < 0)
+                       return bmcr;
+
+               if (bmcr & BMCR_FULLDPLX)
+                       phydev->duplex = DUPLEX_FULL;
+               else
+                       phydev->duplex = DUPLEX_HALF;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(genphy_c37_read_status);
+
 /**
  * genphy_soft_reset - software reset the PHY via BMCR_RESET bit
  * @phydev: target phy_device struct
index a578f7e..f16d9e9 100644 (file)
@@ -133,9 +133,7 @@ static int phylink_is_empty_linkmode(const unsigned long *linkmode)
        phylink_set(tmp, Pause);
        phylink_set(tmp, Asym_Pause);
 
-       bitmap_andnot(tmp, linkmode, tmp, __ETHTOOL_LINK_MODE_MASK_NBITS);
-
-       return linkmode_empty(tmp);
+       return linkmode_subset(linkmode, tmp);
 }
 
 static const char *phylink_an_mode_str(unsigned int mode)
@@ -566,26 +564,17 @@ static const struct sfp_upstream_ops sfp_phylink_ops;
 static int phylink_register_sfp(struct phylink *pl,
                                struct fwnode_handle *fwnode)
 {
-       struct fwnode_reference_args ref;
+       struct sfp_bus *bus;
        int ret;
 
-       if (!fwnode)
-               return 0;
-
-       ret = fwnode_property_get_reference_args(fwnode, "sfp", NULL,
-                                                0, 0, &ref);
-       if (ret < 0) {
-               if (ret == -ENOENT)
-                       return 0;
-
-               phylink_err(pl, "unable to parse \"sfp\" node: %d\n",
-                           ret);
+       bus = sfp_register_upstream_node(fwnode, pl, &sfp_phylink_ops);
+       if (IS_ERR(bus)) {
+               ret = PTR_ERR(bus);
+               phylink_err(pl, "unable to attach SFP bus: %d\n", ret);
                return ret;
        }
 
-       pl->sfp_bus = sfp_register_upstream(ref.fwnode, pl, &sfp_phylink_ops);
-       if (!pl->sfp_bus)
-               return -ENOMEM;
+       pl->sfp_bus = bus;
 
        return 0;
 }
@@ -1744,8 +1733,7 @@ static int phylink_sfp_module_insert(void *upstream,
        if (phy_interface_mode_is_8023z(iface) && pl->phydev)
                return -EINVAL;
 
-       changed = !bitmap_equal(pl->supported, support,
-                               __ETHTOOL_LINK_MODE_MASK_NBITS);
+       changed = !linkmode_equal(pl->supported, support);
        if (changed) {
                linkmode_copy(pl->supported, support);
                linkmode_copy(pl->link_config.advertising, config.advertising);
index b23fc41..d037aab 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/phylink.h>
+#include <linux/property.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
@@ -445,45 +446,63 @@ static void sfp_upstream_clear(struct sfp_bus *bus)
 }
 
 /**
- * sfp_register_upstream() - Register the neighbouring device
- * @fwnode: firmware node for the SFP bus
+ * sfp_register_upstream_node() - parse and register the neighbouring device
+ * @fwnode: firmware node for the parent device (MAC or PHY)
  * @upstream: the upstream private data
  * @ops: the upstream's &struct sfp_upstream_ops
  *
- * Register the upstream device (eg, PHY) with the SFP bus. MAC drivers
- * should use phylink, which will call this function for them. Returns
- * a pointer to the allocated &struct sfp_bus.
+ * Parse the parent device's firmware node for a SFP bus, and register the
+ * SFP bus using sfp_register_upstream().
  *
- * On error, returns %NULL.
+ * Returns: on success, a pointer to the sfp_bus structure,
+ *         %NULL if no SFP is specified,
+ *         on failure, an error pointer value:
+ *             corresponding to the errors detailed for
+ *             fwnode_property_get_reference_args().
+ *             %-ENOMEM if we failed to allocate the bus.
+ *             an error from the upstream's connect_phy() method.
  */
-struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode,
-                                     void *upstream,
-                                     const struct sfp_upstream_ops *ops)
+struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode,
+                                          void *upstream,
+                                          const struct sfp_upstream_ops *ops)
 {
-       struct sfp_bus *bus = sfp_bus_get(fwnode);
-       int ret = 0;
+       struct fwnode_reference_args ref;
+       struct sfp_bus *bus;
+       int ret;
 
-       if (bus) {
-               rtnl_lock();
-               bus->upstream_ops = ops;
-               bus->upstream = upstream;
+       ret = fwnode_property_get_reference_args(fwnode, "sfp", NULL,
+                                                0, 0, &ref);
+       if (ret == -ENOENT)
+               return NULL;
+       else if (ret < 0)
+               return ERR_PTR(ret);
 
-               if (bus->sfp) {
-                       ret = sfp_register_bus(bus);
-                       if (ret)
-                               sfp_upstream_clear(bus);
-               }
-               rtnl_unlock();
+       bus = sfp_bus_get(ref.fwnode);
+       fwnode_handle_put(ref.fwnode);
+       if (!bus)
+               return ERR_PTR(-ENOMEM);
+
+       rtnl_lock();
+       bus->upstream_ops = ops;
+       bus->upstream = upstream;
+
+       if (bus->sfp) {
+               ret = sfp_register_bus(bus);
+               if (ret)
+                       sfp_upstream_clear(bus);
+       } else {
+               ret = 0;
        }
+       rtnl_unlock();
 
        if (ret) {
                sfp_bus_put(bus);
-               bus = NULL;
+               bus = ERR_PTR(ret);
        }
 
        return bus;
 }
-EXPORT_SYMBOL_GPL(sfp_register_upstream);
+EXPORT_SYMBOL_GPL(sfp_register_upstream_node);
 
 /**
  * sfp_unregister_upstream() - Unregister sfp bus
index 8156b33..ca70a1d 100644 (file)
@@ -2074,7 +2074,8 @@ static int team_ethtool_get_link_ksettings(struct net_device *dev,
        cmd->base.duplex = DUPLEX_UNKNOWN;
        cmd->base.port = PORT_OTHER;
 
-       list_for_each_entry(port, &team->port_list, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(port, &team->port_list, list) {
                if (team_port_txable(port)) {
                        if (port->state.speed != SPEED_UNKNOWN)
                                speed += port->state.speed;
@@ -2083,6 +2084,8 @@ static int team_ethtool_get_link_ksettings(struct net_device *dev,
                                cmd->base.duplex = port->state.duplex;
                }
        }
+       rcu_read_unlock();
+
        cmd->base.speed = speed ? : SPEED_UNKNOWN;
 
        return 0;
index a8d3141..dab6ccc 100644 (file)
@@ -2290,7 +2290,13 @@ static void tun_free_netdev(struct net_device *dev)
        struct tun_struct *tun = netdev_priv(dev);
 
        BUG_ON(!(list_empty(&tun->disabled)));
+
        free_percpu(tun->pcpu_stats);
+       /* We clear pcpu_stats so that tun_set_iff() can tell if
+        * tun_free_netdev() has been called from register_netdevice().
+        */
+       tun->pcpu_stats = NULL;
+
        tun_flow_uninit(tun);
        security_tun_dev_free_security(tun->security);
        __tun_set_ebpf(tun, &tun->steering_prog, NULL);
@@ -2782,9 +2788,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
                if (!dev)
                        return -ENOMEM;
-               err = dev_get_valid_name(net, dev, name);
-               if (err < 0)
-                       goto err_free_dev;
 
                dev_net_set(dev, net);
                dev->rtnl_link_ops = &tun_link_ops;
@@ -2859,8 +2862,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 err_detach:
        tun_detach_all(dev);
-       /* register_netdevice() already called tun_free_netdev() */
-       goto err_free_dev;
+       /* We are here because register_netdevice() has failed.
+        * If register_netdevice() already called tun_free_netdev()
+        * while dealing with the error, tun->pcpu_stats has been cleared.
+        */
+       if (!tun->pcpu_stats)
+               goto err_free_dev;
 
 err_free_flow:
        tun_flow_uninit(tun);
index daa5448..c5a6e75 100644 (file)
@@ -1214,6 +1214,32 @@ static int ax88179_led_setting(struct usbnet *dev)
        return 0;
 }
 
+static void ax88179_get_mac_addr(struct usbnet *dev)
+{
+       u8 mac[ETH_ALEN];
+
+       /* Maybe the boot loader passed the MAC address via device tree */
+       if (!eth_platform_get_mac_address(&dev->udev->dev, mac)) {
+               netif_dbg(dev, ifup, dev->net,
+                         "MAC address read from device tree");
+       } else {
+               ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
+                                ETH_ALEN, mac);
+               netif_dbg(dev, ifup, dev->net,
+                         "MAC address read from ASIX chip");
+       }
+
+       if (is_valid_ether_addr(mac)) {
+               memcpy(dev->net->dev_addr, mac, ETH_ALEN);
+       } else {
+               netdev_info(dev->net, "invalid MAC address, using random\n");
+               eth_hw_addr_random(dev->net);
+       }
+
+       ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN,
+                         dev->net->dev_addr);
+}
+
 static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 {
        u8 buf[5];
@@ -1240,8 +1266,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
        ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
        msleep(100);
 
-       ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
-                        ETH_ALEN, dev->net->dev_addr);
+       /* Read MAC address from DTB or asix chip */
+       ax88179_get_mac_addr(dev);
        memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
        /* RX bulk configuration */
@@ -1541,8 +1567,8 @@ static int ax88179_reset(struct usbnet *dev)
        /* Ethernet PHY Auto Detach*/
        ax88179_auto_detach(dev, 0);
 
-       ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN,
-                        dev->net->dev_addr);
+       /* Read MAC address from DTB or asix chip */
+       ax88179_get_mac_addr(dev);
 
        /* RX bulk configuration */
        memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
index fe63043..0cdb2ce 100644 (file)
@@ -766,6 +766,13 @@ static const struct usb_device_id  products[] = {
        .driver_info = 0,
 },
 
+/* ThinkPad Thunderbolt 3 Dock Gen 2 (based on Realtek RTL8153) */
+{
+       USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3082, USB_CLASS_COMM,
+                       USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+       .driver_info = 0,
+},
+
 /* Lenovo Thinkpad USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
 {
        USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7205, USB_CLASS_COMM,
index f24a1b0..cf1f3f0 100644 (file)
@@ -3995,9 +3995,6 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
        struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
        u32 buf;
        int ret;
-       int event;
-
-       event = message.event;
 
        if (!dev->suspend_count++) {
                spin_lock_irq(&dev->txq.lock);
index d4a95b5..ac07939 100644 (file)
 #include <linux/suspend.h>
 #include <linux/atomic.h>
 #include <linux/acpi.h>
+#include <linux/firmware.h>
+#include <crypto/hash.h>
 
 /* Information for net-next */
-#define NETNEXT_VERSION                "10"
+#define NETNEXT_VERSION                "11"
 
 /* Information for net */
 #define NET_VERSION            "10"
 #define PLA_BDC_CR             0xd1a0
 #define PLA_TEREDO_TIMER       0xd2cc
 #define PLA_REALWOW_TIMER      0xd2e8
+#define PLA_UPHY_TIMER         0xd388
 #define PLA_SUSPEND_FLAG       0xd38a
 #define PLA_INDICATE_FALG      0xd38c
+#define PLA_MACDBG_PRE         0xd38c  /* RTL_VER_04 only */
+#define PLA_MACDBG_POST                0xd38e  /* RTL_VER_04 only */
 #define PLA_EXTRA_STATUS       0xd398
 #define PLA_EFUSE_DATA         0xdd00
 #define PLA_EFUSE_CMD          0xdd02
 #define USB_CONNECT_TIMER      0xcbf8
 #define USB_MSC_TIMER          0xcbfc
 #define USB_BURST_SIZE         0xcfc0
+#define USB_FW_FIX_EN0         0xcfca
+#define USB_FW_FIX_EN1         0xcfcc
 #define USB_LPM_CONFIG         0xcfd8
+#define USB_CSTMR              0xcfef  /* RTL8153A */
+#define USB_FW_CTRL            0xd334  /* RTL8153B */
+#define USB_FC_TIMER           0xd340
 #define USB_USB_CTRL           0xd406
 #define USB_PHY_CTRL           0xd408
 #define USB_TX_AGG             0xd40a
 #define USB_LPM_CTRL           0xd41a
 #define USB_BMU_RESET          0xd4b0
 #define USB_U1U2_TIMER         0xd4da
+#define USB_FW_TASK            0xd4e8  /* RTL8153B */
 #define USB_UPS_CTRL           0xd800
 #define USB_POWER_CUT          0xd80a
 #define USB_MISC_0             0xd81a
 #define USB_AFE_CTRL2          0xd824
 #define USB_UPS_CFG            0xd842
 #define USB_UPS_FLAGS          0xd848
+#define USB_WDT1_CTRL          0xe404
 #define USB_WDT11_CTRL         0xe43c
-#define USB_BP_BA              0xfc26
-#define USB_BP_0               0xfc28
-#define USB_BP_1               0xfc2a
-#define USB_BP_2               0xfc2c
-#define USB_BP_3               0xfc2e
-#define USB_BP_4               0xfc30
-#define USB_BP_5               0xfc32
-#define USB_BP_6               0xfc34
-#define USB_BP_7               0xfc36
-#define USB_BP_EN              0xfc38
-#define USB_BP_8               0xfc38
+#define USB_BP_BA              PLA_BP_BA
+#define USB_BP_0               PLA_BP_0
+#define USB_BP_1               PLA_BP_1
+#define USB_BP_2               PLA_BP_2
+#define USB_BP_3               PLA_BP_3
+#define USB_BP_4               PLA_BP_4
+#define USB_BP_5               PLA_BP_5
+#define USB_BP_6               PLA_BP_6
+#define USB_BP_7               PLA_BP_7
+#define USB_BP_EN              PLA_BP_EN       /* RTL8153A */
+#define USB_BP_8               0xfc38          /* RTL8153B */
 #define USB_BP_9               0xfc3a
 #define USB_BP_10              0xfc3c
 #define USB_BP_11              0xfc3e
 #define OCP_PHY_STATE          0xa708          /* nway state for 8153 */
 #define OCP_PHY_PATCH_STAT     0xb800
 #define OCP_PHY_PATCH_CMD      0xb820
+#define OCP_PHY_LOCK           0xb82e
 #define OCP_ADC_IOFFSET                0xbcfc
 #define OCP_ADC_CFG            0xbc06
 #define OCP_SYSCLK_CFG         0xc416
 #define SRAM_10M_AMP1          0x8080
 #define SRAM_10M_AMP2          0x8082
 #define SRAM_IMPEDANCE         0x8084
+#define SRAM_PHY_LOCK          0xb82e
 
 /* PLA_RCR */
 #define RCR_AAP                        0x00000001
 /* PLA_INDICATE_FALG */
 #define UPCOMING_RUNTIME_D3    BIT(0)
 
+/* PLA_MACDBG_PRE and PLA_MACDBG_POST */
+#define DEBUG_OE               BIT(0)
+#define DEBUG_LTSSM            0x0082
+
 /* PLA_EXTRA_STATUS */
+#define U3P3_CHECK_EN          BIT(7)  /* RTL_VER_05 only */
 #define LINK_CHANGE_FLAG       BIT(8)
 
 /* USB_USB2PHY */
 #define STAT_SPEED_HIGH                0x0000
 #define STAT_SPEED_FULL                0x0002
 
+/* USB_FW_FIX_EN0 */
+#define FW_FIX_SUSPEND         BIT(14)
+
+/* USB_FW_FIX_EN1 */
+#define FW_IP_RESET_EN         BIT(9)
+
 /* USB_LPM_CONFIG */
 #define LPM_U1U2_EN            BIT(0)
 
 #define OWN_UPDATE             BIT(0)
 #define OWN_CLEAR              BIT(1)
 
+/* USB_FW_TASK */
+#define FC_PATCH_TASK          BIT(1)
+
 /* USB_UPS_CTRL */
 #define POWER_CUT              0x0100
 
 /* USB_PM_CTRL_STATUS */
 #define RESUME_INDICATE                0x0001
 
+/* USB_CSTMR */
+#define FORCE_SUPER            BIT(0)
+
+/* USB_FW_CTRL */
+#define FLOW_CTRL_PATCH_OPT    BIT(1)
+
+/* USB_FC_TIMER */
+#define CTRL_TIMER_EN          BIT(15)
+
 /* USB_USB_CTRL */
 #define RX_AGG_DISABLE         0x0010
 #define RX_ZERO_EN             0x0080
 #define COALESCE_HIGH          250000U
 #define COALESCE_SLOW          524280U
 
+/* USB_WDT1_CTRL */
+#define WTD1_EN                        BIT(0)
+
 /* USB_WDT11_CTRL */
 #define TIMER11_EN             0x0001
 
@@ -539,6 +579,9 @@ enum spd_duplex {
 /* OCP_PHY_PATCH_CMD */
 #define PATCH_REQUEST          BIT(4)
 
+/* OCP_PHY_LOCK */
+#define PATCH_LOCK             BIT(0)
+
 /* OCP_ADC_CFG */
 #define CKADSEL_L              0x0100
 #define ADC_EN                 0x0080
@@ -563,6 +606,9 @@ enum spd_duplex {
 /* SRAM_IMPEDANCE */
 #define RX_DRIVING_MASK                0x6000
 
+/* SRAM_PHY_LOCK */
+#define PHY_PATCH_LOCK         0x0001
+
 /* MAC PASSTHRU */
 #define AD_MASK                        0xfee0
 #define BND_MASK               0x0004
@@ -570,6 +616,8 @@ enum spd_duplex {
 #define EFUSE                  0xcfdb
 #define PASS_THRU_MASK         0x1
 
+#define BP4_SUPER_ONLY         0x1578  /* RTL_VER_04 only */
+
 enum rtl_register_content {
        _1000bps        = 0x10,
        _100bps         = 0x08,
@@ -622,6 +670,7 @@ enum rtl8152_flags {
        SCHEDULE_TASKLET,
        GREEN_ETHERNET,
        DELL_TB_RX_AGG_BUG,
+       LENOVO_MACPASSTHRU,
 };
 
 /* Define these values to match your device */
@@ -736,16 +785,16 @@ struct r8152 {
        struct tasklet_struct tx_tl;
 
        struct rtl_ops {
-               void (*init)(struct r8152 *);
-               int (*enable)(struct r8152 *);
-               void (*disable)(struct r8152 *);
-               void (*up)(struct r8152 *);
-               void (*down)(struct r8152 *);
-               void (*unload)(struct r8152 *);
-               int (*eee_get)(struct r8152 *, struct ethtool_eee *);
-               int (*eee_set)(struct r8152 *, struct ethtool_eee *);
-               bool (*in_nway)(struct r8152 *);
-               void (*hw_phy_cfg)(struct r8152 *);
+               void (*init)(struct r8152 *tp);
+               int (*enable)(struct r8152 *tp);
+               void (*disable)(struct r8152 *tp);
+               void (*up)(struct r8152 *tp);
+               void (*down)(struct r8152 *tp);
+               void (*unload)(struct r8152 *tp);
+               int (*eee_get)(struct r8152 *tp, struct ethtool_eee *eee);
+               int (*eee_set)(struct r8152 *tp, struct ethtool_eee *eee);
+               bool (*in_nway)(struct r8152 *tp);
+               void (*hw_phy_cfg)(struct r8152 *tp);
                void (*autosuspend_en)(struct r8152 *tp, bool enable);
        } rtl_ops;
 
@@ -766,6 +815,19 @@ struct r8152 {
                u32 ctap_short_off:1;
        } ups_info;
 
+#define RTL_VER_SIZE           32
+
+       struct rtl_fw {
+               const char *fw_name;
+               const struct firmware *fw;
+
+               char version[RTL_VER_SIZE];
+               int (*pre_fw)(struct r8152 *tp);
+               int (*post_fw)(struct r8152 *tp);
+
+               bool retry;
+       } rtl_fw;
+
        atomic_t rx_count;
 
        bool eee_en;
@@ -788,6 +850,131 @@ struct r8152 {
        u8 autoneg;
 };
 
+/**
+ * struct fw_block - block type and total length
+ * @type: type of the current block, such as RTL_FW_END, RTL_FW_PLA,
+ *     RTL_FW_USB and so on.
+ * @length: total length of the current block.
+ */
+struct fw_block {
+       __le32 type;
+       __le32 length;
+} __packed;
+
+/**
+ * struct fw_header - header of the firmware file
+ * @checksum: checksum of sha256 which is calculated from the whole file
+ *     except the checksum field of the file. That is, calculate sha256
+ *     from the version field to the end of the file.
+ * @version: version of this firmware.
+ * @blocks: the first firmware block of the file
+ */
+struct fw_header {
+       u8 checksum[32];
+       char version[RTL_VER_SIZE];
+       struct fw_block blocks[0];
+} __packed;
+
+/**
+ * struct fw_mac - a firmware block used by RTL_FW_PLA and RTL_FW_USB.
+ *     The layout of the firmware block is:
+ *     <struct fw_mac> + <info> + <firmware data>.
+ * @fw_offset: offset of the firmware binary data. The start address of
+ *     the data would be the address of struct fw_mac + @fw_offset.
+ * @fw_reg: the register to load the firmware. Depends on chip.
+ * @bp_ba_addr: the register to write break point base address. Depends on
+ *     chip.
+ * @bp_ba_value: break point base address. Depends on chip.
+ * @bp_en_addr: the register to write break point enabled mask. Depends
+ *     on chip.
+ * @bp_en_value: break point enabled mask. Depends on the firmware.
+ * @bp_start: the start register of break points. Depends on chip.
+ * @bp_num: the break point number which needs to be set for this firmware.
+ *     Depends on the firmware.
+ * @bp: break points. Depends on firmware.
+ * @fw_ver_reg: the register to store the fw version.
+ * @fw_ver_data: the firmware version of the current type.
+ * @info: additional information for debugging, and is followed by the
+ *     binary data of firmware.
+ */
+struct fw_mac {
+       struct fw_block blk_hdr;
+       __le16 fw_offset;
+       __le16 fw_reg;
+       __le16 bp_ba_addr;
+       __le16 bp_ba_value;
+       __le16 bp_en_addr;
+       __le16 bp_en_value;
+       __le16 bp_start;
+       __le16 bp_num;
+       __le16 bp[16]; /* any value determined by firmware */
+       __le32 reserved;
+       __le16 fw_ver_reg;
+       u8 fw_ver_data;
+       char info[0];
+} __packed;
+
+/**
+ * struct fw_phy_patch_key - a firmware block used by RTL_FW_PHY_START.
+ *     This is used to set patch key when loading the firmware of PHY.
+ * @key_reg: the register to write the patch key.
+ * @key_data: patch key.
+ */
+struct fw_phy_patch_key {
+       struct fw_block blk_hdr;
+       __le16 key_reg;
+       __le16 key_data;
+       __le32 reserved;
+} __packed;
+
+/**
+ * struct fw_phy_nc - a firmware block used by RTL_FW_PHY_NC.
+ *     The layout of the firmware block is:
+ *     <struct fw_phy_nc> + <info> + <firmware data>.
+ * @fw_offset: offset of the firmware binary data. The start address of
+ *     the data would be the address of struct fw_phy_nc + @fw_offset.
+ * @fw_reg: the register to load the firmware. Depends on chip.
+ * @ba_reg: the register to write the base address. Depends on chip.
+ * @ba_data: base address. Depends on chip.
+ * @patch_en_addr: the register of enabling patch mode. Depends on chip.
+ * @patch_en_value: patch mode enabled mask. Depends on the firmware.
+ * @mode_reg: the regitster of switching the mode.
+ * @mod_pre: the mode needing to be set before loading the firmware.
+ * @mod_post: the mode to be set when finishing to load the firmware.
+ * @bp_start: the start register of break points. Depends on chip.
+ * @bp_num: the break point number which needs to be set for this firmware.
+ *     Depends on the firmware.
+ * @bp: break points. Depends on firmware.
+ * @info: additional information for debugging, and is followed by the
+ *     binary data of firmware.
+ */
+struct fw_phy_nc {
+       struct fw_block blk_hdr;
+       __le16 fw_offset;
+       __le16 fw_reg;
+       __le16 ba_reg;
+       __le16 ba_data;
+       __le16 patch_en_addr;
+       __le16 patch_en_value;
+       __le16 mode_reg;
+       __le16 mode_pre;
+       __le16 mode_post;
+       __le16 reserved;
+       __le16 bp_start;
+       __le16 bp_num;
+       __le16 bp[4];
+       char info[0];
+} __packed;
+
+enum rtl_fw_type {
+       RTL_FW_END = 0,
+       RTL_FW_PLA,
+       RTL_FW_USB,
+       RTL_FW_PHY_START,
+       RTL_FW_PHY_STOP,
+       RTL_FW_PHY_NC,
+};
+
 enum rtl_version {
        RTL_VER_UNKNOWN = 0,
        RTL_VER_01,
@@ -1222,38 +1409,52 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
        int ret = -EINVAL;
        u32 ocp_data;
        unsigned char buf[6];
-
-       /* test for -AD variant of RTL8153 */
-       ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
-       if ((ocp_data & AD_MASK) == 0x1000) {
-               /* test for MAC address pass-through bit */
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, EFUSE);
-               if ((ocp_data & PASS_THRU_MASK) != 1) {
-                       netif_dbg(tp, probe, tp->netdev,
-                                 "No efuse for RTL8153-AD MAC pass through\n");
-                       return -ENODEV;
-               }
+       char *mac_obj_name;
+       acpi_object_type mac_obj_type;
+       int mac_strlen;
+
+       if (test_bit(LENOVO_MACPASSTHRU, &tp->flags)) {
+               mac_obj_name = "\\MACA";
+               mac_obj_type = ACPI_TYPE_STRING;
+               mac_strlen = 0x16;
        } else {
-               /* test for RTL8153-BND and RTL8153-BD */
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1);
-               if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK) == 0) {
-                       netif_dbg(tp, probe, tp->netdev,
-                                 "Invalid variant for MAC pass through\n");
-                       return -ENODEV;
+               /* test for -AD variant of RTL8153 */
+               ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
+               if ((ocp_data & AD_MASK) == 0x1000) {
+                       /* test for MAC address pass-through bit */
+                       ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, EFUSE);
+                       if ((ocp_data & PASS_THRU_MASK) != 1) {
+                               netif_dbg(tp, probe, tp->netdev,
+                                               "No efuse for RTL8153-AD MAC pass through\n");
+                               return -ENODEV;
+                       }
+               } else {
+                       /* test for RTL8153-BND and RTL8153-BD */
+                       ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1);
+                       if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK) == 0) {
+                               netif_dbg(tp, probe, tp->netdev,
+                                               "Invalid variant for MAC pass through\n");
+                               return -ENODEV;
+                       }
                }
+
+               mac_obj_name = "\\_SB.AMAC";
+               mac_obj_type = ACPI_TYPE_BUFFER;
+               mac_strlen = 0x17;
        }
 
        /* returns _AUXMAC_#AABBCCDDEEFF# */
-       status = acpi_evaluate_object(NULL, "\\_SB.AMAC", NULL, &buffer);
+       status = acpi_evaluate_object(NULL, mac_obj_name, NULL, &buffer);
        obj = (union acpi_object *)buffer.pointer;
        if (!ACPI_SUCCESS(status))
                return -ENODEV;
-       if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
+       if (obj->type != mac_obj_type || obj->string.length != mac_strlen) {
                netif_warn(tp, probe, tp->netdev,
                           "Invalid buffer for pass-thru MAC addr: (%d, %d)\n",
                           obj->type, obj->string.length);
                goto amacout;
        }
+
        if (strncmp(obj->string.pointer, "_AUXMAC_#", 9) != 0 ||
            strncmp(obj->string.pointer + 0x15, "#", 1) != 0) {
                netif_warn(tp, probe, tp->netdev,
@@ -1688,7 +1889,7 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
 }
 
 /* r8152_csum_workaround()
- * The hw limites the value the transport offset. When the offset is out of the
+ * The hw limits the value of the transport offset. When the offset is out of
  * range, calculate the checksum by sw.
  */
 static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
@@ -2178,6 +2379,7 @@ static void tx_bottom(struct r8152 *tp)
        int res;
 
        do {
+               struct net_device *netdev = tp->netdev;
                struct tx_agg *agg;
 
                if (skb_queue_empty(&tp->tx_queue))
@@ -2188,24 +2390,23 @@ static void tx_bottom(struct r8152 *tp)
                        break;
 
                res = r8152_tx_agg_fill(tp, agg);
-               if (res) {
-                       struct net_device *netdev = tp->netdev;
+               if (!res)
+                       continue;
 
-                       if (res == -ENODEV) {
-                               rtl_set_unplug(tp);
-                               netif_device_detach(netdev);
-                       } else {
-                               struct net_device_stats *stats = &netdev->stats;
-                               unsigned long flags;
+               if (res == -ENODEV) {
+                       rtl_set_unplug(tp);
+                       netif_device_detach(netdev);
+               } else {
+                       struct net_device_stats *stats = &netdev->stats;
+                       unsigned long flags;
 
-                               netif_warn(tp, tx_err, netdev,
-                                          "failed tx_urb %d\n", res);
-                               stats->tx_dropped += agg->skb_num;
+                       netif_warn(tp, tx_err, netdev,
+                                  "failed tx_urb %d\n", res);
+                       stats->tx_dropped += agg->skb_num;
 
-                               spin_lock_irqsave(&tp->tx_lock, flags);
-                               list_add_tail(&agg->list, &tp->tx_free);
-                               spin_unlock_irqrestore(&tp->tx_lock, flags);
-                       }
+                       spin_lock_irqsave(&tp->tx_lock, flags);
+                       list_add_tail(&agg->list, &tp->tx_free);
+                       spin_unlock_irqrestore(&tp->tx_lock, flags);
                }
        } while (res == 0);
 }
@@ -3226,6 +3427,688 @@ static void rtl_reset_bmu(struct r8152 *tp)
        ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
 }
 
+/* Clear the bp to stop the firmware before loading a new one */
+static void rtl_clear_bp(struct r8152 *tp, u16 type)
+{
+       switch (tp->version) {
+       case RTL_VER_01:
+       case RTL_VER_02:
+       case RTL_VER_07:
+               break;
+       case RTL_VER_03:
+       case RTL_VER_04:
+       case RTL_VER_05:
+       case RTL_VER_06:
+               ocp_write_byte(tp, type, PLA_BP_EN, 0);
+               break;
+       case RTL_VER_08:
+       case RTL_VER_09:
+       default:
+               if (type == MCU_TYPE_USB) {
+                       ocp_write_byte(tp, MCU_TYPE_USB, USB_BP2_EN, 0);
+
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_8, 0);
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_9, 0);
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_10, 0);
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_11, 0);
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_12, 0);
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_13, 0);
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_14, 0);
+                       ocp_write_word(tp, MCU_TYPE_USB, USB_BP_15, 0);
+               } else {
+                       ocp_write_byte(tp, MCU_TYPE_PLA, PLA_BP_EN, 0);
+               }
+               break;
+       }
+
+       ocp_write_word(tp, type, PLA_BP_0, 0);
+       ocp_write_word(tp, type, PLA_BP_1, 0);
+       ocp_write_word(tp, type, PLA_BP_2, 0);
+       ocp_write_word(tp, type, PLA_BP_3, 0);
+       ocp_write_word(tp, type, PLA_BP_4, 0);
+       ocp_write_word(tp, type, PLA_BP_5, 0);
+       ocp_write_word(tp, type, PLA_BP_6, 0);
+       ocp_write_word(tp, type, PLA_BP_7, 0);
+
+       /* wait 3 ms to make sure the firmware is stopped */
+       usleep_range(3000, 6000);
+       ocp_write_word(tp, type, PLA_BP_BA, 0);
+}
+
+static int r8153_patch_request(struct r8152 *tp, bool request)
+{
+       u16 data;
+       int i;
+
+       data = ocp_reg_read(tp, OCP_PHY_PATCH_CMD);
+       if (request)
+               data |= PATCH_REQUEST;
+       else
+               data &= ~PATCH_REQUEST;
+       ocp_reg_write(tp, OCP_PHY_PATCH_CMD, data);
+
+       for (i = 0; request && i < 5000; i++) {
+               usleep_range(1000, 2000);
+               if (ocp_reg_read(tp, OCP_PHY_PATCH_STAT) & PATCH_READY)
+                       break;
+       }
+
+       if (request && !(ocp_reg_read(tp, OCP_PHY_PATCH_STAT) & PATCH_READY)) {
+               netif_err(tp, drv, tp->netdev, "patch request fail\n");
+               r8153_patch_request(tp, false);
+               return -ETIME;
+       } else {
+               return 0;
+       }
+}
+
+static int r8153_pre_ram_code(struct r8152 *tp, u16 key_addr, u16 patch_key)
+{
+       if (r8153_patch_request(tp, true)) {
+               dev_err(&tp->intf->dev, "patch request fail\n");
+               return -ETIME;
+       }
+
+       sram_write(tp, key_addr, patch_key);
+       sram_write(tp, SRAM_PHY_LOCK, PHY_PATCH_LOCK);
+
+       return 0;
+}
+
+static int r8153_post_ram_code(struct r8152 *tp, u16 key_addr)
+{
+       u16 data;
+
+       sram_write(tp, 0x0000, 0x0000);
+
+       data = ocp_reg_read(tp, OCP_PHY_LOCK);
+       data &= ~PATCH_LOCK;
+       ocp_reg_write(tp, OCP_PHY_LOCK, data);
+
+       sram_write(tp, key_addr, 0x0000);
+
+       r8153_patch_request(tp, false);
+
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base);
+
+       return 0;
+}
+
+static bool rtl8152_is_fw_phy_nc_ok(struct r8152 *tp, struct fw_phy_nc *phy)
+{
+       u32 length;
+       u16 fw_offset, fw_reg, ba_reg, patch_en_addr, mode_reg, bp_start;
+       bool rc = false;
+
+       switch (tp->version) {
+       case RTL_VER_04:
+       case RTL_VER_05:
+       case RTL_VER_06:
+               fw_reg = 0xa014;
+               ba_reg = 0xa012;
+               patch_en_addr = 0xa01a;
+               mode_reg = 0xb820;
+               bp_start = 0xa000;
+               break;
+       default:
+               goto out;
+       }
+
+       fw_offset = __le16_to_cpu(phy->fw_offset);
+       if (fw_offset < sizeof(*phy)) {
+               dev_err(&tp->intf->dev, "fw_offset too small\n");
+               goto out;
+       }
+
+       length = __le32_to_cpu(phy->blk_hdr.length);
+       if (length < fw_offset) {
+               dev_err(&tp->intf->dev, "invalid fw_offset\n");
+               goto out;
+       }
+
+       length -= __le16_to_cpu(phy->fw_offset);
+       if (!length || (length & 1)) {
+               dev_err(&tp->intf->dev, "invalid block length\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(phy->fw_reg) != fw_reg) {
+               dev_err(&tp->intf->dev, "invalid register to load firmware\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(phy->ba_reg) != ba_reg) {
+               dev_err(&tp->intf->dev, "invalid base address register\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(phy->patch_en_addr) != patch_en_addr) {
+               dev_err(&tp->intf->dev,
+                       "invalid patch mode enabled register\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(phy->mode_reg) != mode_reg) {
+               dev_err(&tp->intf->dev,
+                       "invalid register to switch the mode\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(phy->bp_start) != bp_start) {
+               dev_err(&tp->intf->dev,
+                       "invalid start register of break point\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(phy->bp_num) > 4) {
+               dev_err(&tp->intf->dev, "invalid break point number\n");
+               goto out;
+       }
+
+       rc = true;
+out:
+       return rc;
+}
+
+static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac)
+{
+       u16 fw_reg, bp_ba_addr, bp_en_addr, bp_start, fw_offset;
+       bool rc = false;
+       u32 length, type;
+       int i, max_bp;
+
+       type = __le32_to_cpu(mac->blk_hdr.type);
+       if (type == RTL_FW_PLA) {
+               switch (tp->version) {
+               case RTL_VER_01:
+               case RTL_VER_02:
+               case RTL_VER_07:
+                       fw_reg = 0xf800;
+                       bp_ba_addr = PLA_BP_BA;
+                       bp_en_addr = 0;
+                       bp_start = PLA_BP_0;
+                       max_bp = 8;
+                       break;
+               case RTL_VER_03:
+               case RTL_VER_04:
+               case RTL_VER_05:
+               case RTL_VER_06:
+               case RTL_VER_08:
+               case RTL_VER_09:
+                       fw_reg = 0xf800;
+                       bp_ba_addr = PLA_BP_BA;
+                       bp_en_addr = PLA_BP_EN;
+                       bp_start = PLA_BP_0;
+                       max_bp = 8;
+                       break;
+               default:
+                       goto out;
+               }
+       } else if (type == RTL_FW_USB) {
+               switch (tp->version) {
+               case RTL_VER_03:
+               case RTL_VER_04:
+               case RTL_VER_05:
+               case RTL_VER_06:
+                       fw_reg = 0xf800;
+                       bp_ba_addr = USB_BP_BA;
+                       bp_en_addr = USB_BP_EN;
+                       bp_start = USB_BP_0;
+                       max_bp = 8;
+                       break;
+               case RTL_VER_08:
+               case RTL_VER_09:
+                       fw_reg = 0xe600;
+                       bp_ba_addr = USB_BP_BA;
+                       bp_en_addr = USB_BP2_EN;
+                       bp_start = USB_BP_0;
+                       max_bp = 16;
+                       break;
+               case RTL_VER_01:
+               case RTL_VER_02:
+               case RTL_VER_07:
+               default:
+                       goto out;
+               }
+       } else {
+               goto out;
+       }
+
+       fw_offset = __le16_to_cpu(mac->fw_offset);
+       if (fw_offset < sizeof(*mac)) {
+               dev_err(&tp->intf->dev, "fw_offset too small\n");
+               goto out;
+       }
+
+       length = __le32_to_cpu(mac->blk_hdr.length);
+       if (length < fw_offset) {
+               dev_err(&tp->intf->dev, "invalid fw_offset\n");
+               goto out;
+       }
+
+       length -= fw_offset;
+       if (length < 4 || (length & 3)) {
+               dev_err(&tp->intf->dev, "invalid block length\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(mac->fw_reg) != fw_reg) {
+               dev_err(&tp->intf->dev, "invalid register to load firmware\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(mac->bp_ba_addr) != bp_ba_addr) {
+               dev_err(&tp->intf->dev, "invalid base address register\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(mac->bp_en_addr) != bp_en_addr) {
+               dev_err(&tp->intf->dev, "invalid enabled mask register\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(mac->bp_start) != bp_start) {
+               dev_err(&tp->intf->dev,
+                       "invalid start register of break point\n");
+               goto out;
+       }
+
+       if (__le16_to_cpu(mac->bp_num) > max_bp) {
+               dev_err(&tp->intf->dev, "invalid break point number\n");
+               goto out;
+       }
+
+       for (i = __le16_to_cpu(mac->bp_num); i < max_bp; i++) {
+               if (mac->bp[i]) {
+                       dev_err(&tp->intf->dev, "unused bp%u is not zero\n", i);
+                       goto out;
+               }
+       }
+
+       rc = true;
+out:
+       return rc;
+}
+
+/* Verify the checksum for the firmware file. It is calculated from the version
+ * field to the end of the file. Compare the result with the checksum field to
+ * make sure the file is correct.
+ */
+static long rtl8152_fw_verify_checksum(struct r8152 *tp,
+                                      struct fw_header *fw_hdr, size_t size)
+{
+       unsigned char checksum[sizeof(fw_hdr->checksum)];
+       struct crypto_shash *alg;
+       struct shash_desc *sdesc;
+       size_t len;
+       long rc;
+
+       alg = crypto_alloc_shash("sha256", 0, 0);
+       if (IS_ERR(alg)) {
+               rc = PTR_ERR(alg);
+               goto out;
+       }
+
+       if (crypto_shash_digestsize(alg) != sizeof(fw_hdr->checksum)) {
+               rc = -EFAULT;
+               dev_err(&tp->intf->dev, "digestsize incorrect (%u)\n",
+                       crypto_shash_digestsize(alg));
+               goto free_shash;
+       }
+
+       len = sizeof(*sdesc) + crypto_shash_descsize(alg);
+       sdesc = kmalloc(len, GFP_KERNEL);
+       if (!sdesc) {
+               rc = -ENOMEM;
+               goto free_shash;
+       }
+       sdesc->tfm = alg;
+
+       len = size - sizeof(fw_hdr->checksum);
+       rc = crypto_shash_digest(sdesc, fw_hdr->version, len, checksum);
+       kfree(sdesc);
+       if (rc)
+               goto free_shash;
+
+       if (memcmp(fw_hdr->checksum, checksum, sizeof(fw_hdr->checksum))) {
+               dev_err(&tp->intf->dev, "checksum fail\n");
+               rc = -EFAULT;
+       }
+
+free_shash:
+       crypto_free_shash(alg);
+out:
+       return rc;
+}
+
+static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw)
+{
+       const struct firmware *fw = rtl_fw->fw;
+       struct fw_header *fw_hdr = (struct fw_header *)fw->data;
+       struct fw_mac *pla = NULL, *usb = NULL;
+       struct fw_phy_patch_key *start = NULL;
+       struct fw_phy_nc *phy_nc = NULL;
+       struct fw_block *stop = NULL;
+       long ret = -EFAULT;
+       int i;
+
+       if (fw->size < sizeof(*fw_hdr)) {
+               dev_err(&tp->intf->dev, "file too small\n");
+               goto fail;
+       }
+
+       ret = rtl8152_fw_verify_checksum(tp, fw_hdr, fw->size);
+       if (ret)
+               goto fail;
+
+       ret = -EFAULT;
+
+       for (i = sizeof(*fw_hdr); i < fw->size;) {
+               struct fw_block *block = (struct fw_block *)&fw->data[i];
+               u32 type;
+
+               if ((i + sizeof(*block)) > fw->size)
+                       goto fail;
+
+               type = __le32_to_cpu(block->type);
+               switch (type) {
+               case RTL_FW_END:
+                       if (__le32_to_cpu(block->length) != sizeof(*block))
+                               goto fail;
+                       goto fw_end;
+               case RTL_FW_PLA:
+                       if (pla) {
+                               dev_err(&tp->intf->dev,
+                                       "multiple PLA firmware encountered");
+                               goto fail;
+                       }
+
+                       pla = (struct fw_mac *)block;
+                       if (!rtl8152_is_fw_mac_ok(tp, pla)) {
+                               dev_err(&tp->intf->dev,
+                                       "check PLA firmware failed\n");
+                               goto fail;
+                       }
+                       break;
+               case RTL_FW_USB:
+                       if (usb) {
+                               dev_err(&tp->intf->dev,
+                                       "multiple USB firmware encountered");
+                               goto fail;
+                       }
+
+                       usb = (struct fw_mac *)block;
+                       if (!rtl8152_is_fw_mac_ok(tp, usb)) {
+                               dev_err(&tp->intf->dev,
+                                       "check USB firmware failed\n");
+                               goto fail;
+                       }
+                       break;
+               case RTL_FW_PHY_START:
+                       if (start || phy_nc || stop) {
+                               dev_err(&tp->intf->dev,
+                                       "check PHY_START fail\n");
+                               goto fail;
+                       }
+
+                       if (__le32_to_cpu(block->length) != sizeof(*start)) {
+                               dev_err(&tp->intf->dev,
+                                       "Invalid length for PHY_START\n");
+                               goto fail;
+                       }
+
+                       start = (struct fw_phy_patch_key *)block;
+                       break;
+               case RTL_FW_PHY_STOP:
+                       if (stop || !start) {
+                               dev_err(&tp->intf->dev,
+                                       "Check PHY_STOP fail\n");
+                               goto fail;
+                       }
+
+                       if (__le32_to_cpu(block->length) != sizeof(*block)) {
+                               dev_err(&tp->intf->dev,
+                                       "Invalid length for PHY_STOP\n");
+                               goto fail;
+                       }
+
+                       stop = block;
+                       break;
+               case RTL_FW_PHY_NC:
+                       if (!start || stop) {
+                               dev_err(&tp->intf->dev,
+                                       "check PHY_NC fail\n");
+                               goto fail;
+                       }
+
+                       if (phy_nc) {
+                               dev_err(&tp->intf->dev,
+                                       "multiple PHY NC encountered\n");
+                               goto fail;
+                       }
+
+                       phy_nc = (struct fw_phy_nc *)block;
+                       if (!rtl8152_is_fw_phy_nc_ok(tp, phy_nc)) {
+                               dev_err(&tp->intf->dev,
+                                       "check PHY NC firmware failed\n");
+                               goto fail;
+                       }
+
+                       break;
+               default:
+                       dev_warn(&tp->intf->dev, "Unknown type %u is found\n",
+                                type);
+                       break;
+               }
+
+               /* next block */
+               i += ALIGN(__le32_to_cpu(block->length), 8);
+       }
+
+fw_end:
+       if ((phy_nc || start) && !stop) {
+               dev_err(&tp->intf->dev, "without PHY_STOP\n");
+               goto fail;
+       }
+
+       return 0;
+fail:
+       return ret;
+}
+
+static void rtl8152_fw_phy_nc_apply(struct r8152 *tp, struct fw_phy_nc *phy)
+{
+       u16 mode_reg, bp_index;
+       u32 length, i, num;
+       __le16 *data;
+
+       mode_reg = __le16_to_cpu(phy->mode_reg);
+       sram_write(tp, mode_reg, __le16_to_cpu(phy->mode_pre));
+       sram_write(tp, __le16_to_cpu(phy->ba_reg),
+                  __le16_to_cpu(phy->ba_data));
+
+       length = __le32_to_cpu(phy->blk_hdr.length);
+       length -= __le16_to_cpu(phy->fw_offset);
+       num = length / 2;
+       data = (__le16 *)((u8 *)phy + __le16_to_cpu(phy->fw_offset));
+
+       ocp_reg_write(tp, OCP_SRAM_ADDR, __le16_to_cpu(phy->fw_reg));
+       for (i = 0; i < num; i++)
+               ocp_reg_write(tp, OCP_SRAM_DATA, __le16_to_cpu(data[i]));
+
+       sram_write(tp, __le16_to_cpu(phy->patch_en_addr),
+                  __le16_to_cpu(phy->patch_en_value));
+
+       bp_index = __le16_to_cpu(phy->bp_start);
+       num = __le16_to_cpu(phy->bp_num);
+       for (i = 0; i < num; i++) {
+               sram_write(tp, bp_index, __le16_to_cpu(phy->bp[i]));
+               bp_index += 2;
+       }
+
+       sram_write(tp, mode_reg, __le16_to_cpu(phy->mode_post));
+
+       dev_dbg(&tp->intf->dev, "successfully applied %s\n", phy->info);
+}
+
+static void rtl8152_fw_mac_apply(struct r8152 *tp, struct fw_mac *mac)
+{
+       u16 bp_en_addr, bp_index, type, bp_num, fw_ver_reg;
+       u32 length;
+       u8 *data;
+       int i;
+
+       switch (__le32_to_cpu(mac->blk_hdr.type)) {
+       case RTL_FW_PLA:
+               type = MCU_TYPE_PLA;
+               break;
+       case RTL_FW_USB:
+               type = MCU_TYPE_USB;
+               break;
+       default:
+               return;
+       }
+
+       rtl_clear_bp(tp, type);
+
+       /* Enable backup/restore of MACDBG. This is required after clearing PLA
+        * break points and before applying the PLA firmware.
+        */
+       if (tp->version == RTL_VER_04 && type == MCU_TYPE_PLA &&
+           !(ocp_read_word(tp, MCU_TYPE_PLA, PLA_MACDBG_POST) & DEBUG_OE)) {
+               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MACDBG_PRE, DEBUG_LTSSM);
+               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MACDBG_POST, DEBUG_LTSSM);
+       }
+
+       length = __le32_to_cpu(mac->blk_hdr.length);
+       length -= __le16_to_cpu(mac->fw_offset);
+
+       data = (u8 *)mac;
+       data += __le16_to_cpu(mac->fw_offset);
+
+       generic_ocp_write(tp, __le16_to_cpu(mac->fw_reg), 0xff, length, data,
+                         type);
+
+       ocp_write_word(tp, type, __le16_to_cpu(mac->bp_ba_addr),
+                      __le16_to_cpu(mac->bp_ba_value));
+
+       bp_index = __le16_to_cpu(mac->bp_start);
+       bp_num = __le16_to_cpu(mac->bp_num);
+       for (i = 0; i < bp_num; i++) {
+               ocp_write_word(tp, type, bp_index, __le16_to_cpu(mac->bp[i]));
+               bp_index += 2;
+       }
+
+       bp_en_addr = __le16_to_cpu(mac->bp_en_addr);
+       if (bp_en_addr)
+               ocp_write_word(tp, type, bp_en_addr,
+                              __le16_to_cpu(mac->bp_en_value));
+
+       fw_ver_reg = __le16_to_cpu(mac->fw_ver_reg);
+       if (fw_ver_reg)
+               ocp_write_byte(tp, MCU_TYPE_USB, fw_ver_reg,
+                              mac->fw_ver_data);
+
+       dev_dbg(&tp->intf->dev, "successfully applied %s\n", mac->info);
+}
+
+static void rtl8152_apply_firmware(struct r8152 *tp)
+{
+       struct rtl_fw *rtl_fw = &tp->rtl_fw;
+       const struct firmware *fw;
+       struct fw_header *fw_hdr;
+       struct fw_phy_patch_key *key;
+       u16 key_addr = 0;
+       int i;
+
+       if (IS_ERR_OR_NULL(rtl_fw->fw))
+               return;
+
+       fw = rtl_fw->fw;
+       fw_hdr = (struct fw_header *)fw->data;
+
+       if (rtl_fw->pre_fw)
+               rtl_fw->pre_fw(tp);
+
+       for (i = offsetof(struct fw_header, blocks); i < fw->size;) {
+               struct fw_block *block = (struct fw_block *)&fw->data[i];
+
+               switch (__le32_to_cpu(block->type)) {
+               case RTL_FW_END:
+                       goto post_fw;
+               case RTL_FW_PLA:
+               case RTL_FW_USB:
+                       rtl8152_fw_mac_apply(tp, (struct fw_mac *)block);
+                       break;
+               case RTL_FW_PHY_START:
+                       key = (struct fw_phy_patch_key *)block;
+                       key_addr = __le16_to_cpu(key->key_reg);
+                       r8153_pre_ram_code(tp, key_addr,
+                                          __le16_to_cpu(key->key_data));
+                       break;
+               case RTL_FW_PHY_STOP:
+                       WARN_ON(!key_addr);
+                       r8153_post_ram_code(tp, key_addr);
+                       break;
+               case RTL_FW_PHY_NC:
+                       rtl8152_fw_phy_nc_apply(tp, (struct fw_phy_nc *)block);
+                       break;
+               default:
+                       break;
+               }
+
+               i += ALIGN(__le32_to_cpu(block->length), 8);
+       }
+
+post_fw:
+       if (rtl_fw->post_fw)
+               rtl_fw->post_fw(tp);
+
+       strscpy(rtl_fw->version, fw_hdr->version, RTL_VER_SIZE);
+       dev_info(&tp->intf->dev, "load %s successfully\n", rtl_fw->version);
+}
+
+static void rtl8152_release_firmware(struct r8152 *tp)
+{
+       struct rtl_fw *rtl_fw = &tp->rtl_fw;
+
+       if (!IS_ERR_OR_NULL(rtl_fw->fw)) {
+               release_firmware(rtl_fw->fw);
+               rtl_fw->fw = NULL;
+       }
+}
+
+static int rtl8152_request_firmware(struct r8152 *tp)
+{
+       struct rtl_fw *rtl_fw = &tp->rtl_fw;
+       long rc;
+
+       if (rtl_fw->fw || !rtl_fw->fw_name) {
+               dev_info(&tp->intf->dev, "skip request firmware\n");
+               rc = 0;
+               goto result;
+       }
+
+       rc = request_firmware(&rtl_fw->fw, rtl_fw->fw_name, &tp->intf->dev);
+       if (rc < 0)
+               goto result;
+
+       rc = rtl8152_check_firmware(tp, rtl_fw);
+       if (rc < 0)
+               release_firmware(rtl_fw->fw);
+
+result:
+       if (rc) {
+               rtl_fw->fw = ERR_PTR(rc);
+
+               dev_warn(&tp->intf->dev,
+                        "unable to load firmware patch %s (%ld)\n",
+                        rtl_fw->fw_name, rc);
+       }
+
+       return rc;
+}
+
 static void r8152_aldps_en(struct r8152 *tp, bool enable)
 {
        if (enable) {
@@ -3370,6 +4253,7 @@ static void rtl8152_disable(struct r8152 *tp)
 
 static void r8152b_hw_phy_cfg(struct r8152 *tp)
 {
+       rtl8152_apply_firmware(tp);
        rtl_eee_enable(tp, tp->eee_en);
        r8152_aldps_en(tp, true);
        r8152b_enable_fc(tp);
@@ -3377,11 +4261,23 @@ static void r8152b_hw_phy_cfg(struct r8152 *tp)
        set_bit(PHY_RESET, &tp->flags);
 }
 
-static void r8152b_exit_oob(struct r8152 *tp)
+static void wait_oob_link_list_ready(struct r8152 *tp)
 {
        u32 ocp_data;
        int i;
 
+       for (i = 0; i < 1000; i++) {
+               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
+               if (ocp_data & LINK_LIST_READY)
+                       break;
+               usleep_range(1000, 2000);
+       }
+}
+
+static void r8152b_exit_oob(struct r8152 *tp)
+{
+       u32 ocp_data;
+
        ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
        ocp_data &= ~RCR_ACPT_ALL;
        ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
@@ -3399,23 +4295,13 @@ static void r8152b_exit_oob(struct r8152 *tp)
        ocp_data &= ~MCU_BORW_EN;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
        ocp_data |= RE_INIT_LL;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        rtl8152_nic_reset(tp);
 
@@ -3457,7 +4343,6 @@ static void r8152b_exit_oob(struct r8152 *tp)
 static void r8152b_enter_oob(struct r8152 *tp)
 {
        u32 ocp_data;
-       int i;
 
        ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
        ocp_data &= ~NOW_IS_OOB;
@@ -3469,23 +4354,13 @@ static void r8152b_enter_oob(struct r8152 *tp)
 
        rtl_disable(tp);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
        ocp_data |= RE_INIT_LL;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8152_RMS);
 
@@ -3506,31 +4381,124 @@ static void r8152b_enter_oob(struct r8152 *tp)
        ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 }
 
-static int r8153_patch_request(struct r8152 *tp, bool request)
+static int r8153_pre_firmware_1(struct r8152 *tp)
 {
-       u16 data;
        int i;
 
-       data = ocp_reg_read(tp, OCP_PHY_PATCH_CMD);
-       if (request)
-               data |= PATCH_REQUEST;
-       else
-               data &= ~PATCH_REQUEST;
-       ocp_reg_write(tp, OCP_PHY_PATCH_CMD, data);
+       /* Wait till the WTD timer is ready. It would take at most 104 ms. */
+       for (i = 0; i < 104; i++) {
+               u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_WDT1_CTRL);
 
-       for (i = 0; request && i < 5000; i++) {
-               usleep_range(1000, 2000);
-               if (ocp_reg_read(tp, OCP_PHY_PATCH_STAT) & PATCH_READY)
+               if (!(ocp_data & WTD1_EN))
                        break;
+               usleep_range(1000, 2000);
        }
 
-       if (request && !(ocp_reg_read(tp, OCP_PHY_PATCH_STAT) & PATCH_READY)) {
-               netif_err(tp, drv, tp->netdev, "patch request fail\n");
-               r8153_patch_request(tp, false);
-               return -ETIME;
-       } else {
-               return 0;
+       return 0;
+}
+
+static int r8153_post_firmware_1(struct r8152 *tp)
+{
+       /* set USB_BP_4 to support USB_SPEED_SUPER only */
+       if (ocp_read_byte(tp, MCU_TYPE_USB, USB_CSTMR) & FORCE_SUPER)
+               ocp_write_word(tp, MCU_TYPE_USB, USB_BP_4, BP4_SUPER_ONLY);
+
+       /* reset UPHY timer to 36 ms */
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_UPHY_TIMER, 36000 / 16);
+
+       return 0;
+}
+
+static int r8153_pre_firmware_2(struct r8152 *tp)
+{
+       u32 ocp_data;
+
+       r8153_pre_firmware_1(tp);
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0);
+       ocp_data &= ~FW_FIX_SUSPEND;
+       ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, ocp_data);
+
+       return 0;
+}
+
+static int r8153_post_firmware_2(struct r8152 *tp)
+{
+       u32 ocp_data;
+
+       /* enable bp0 if support USB_SPEED_SUPER only */
+       if (ocp_read_byte(tp, MCU_TYPE_USB, USB_CSTMR) & FORCE_SUPER) {
+               ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BP_EN);
+               ocp_data |= BIT(0);
+               ocp_write_word(tp, MCU_TYPE_PLA, PLA_BP_EN, ocp_data);
        }
+
+       /* reset UPHY timer to 36 ms */
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_UPHY_TIMER, 36000 / 16);
+
+       /* enable U3P3 check, set the counter to 4 */
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, U3P3_CHECK_EN | 4);
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0);
+       ocp_data |= FW_FIX_SUSPEND;
+       ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, ocp_data);
+
+       ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY);
+       ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND;
+       ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data);
+
+       return 0;
+}
+
+static int r8153_post_firmware_3(struct r8152 *tp)
+{
+       u32 ocp_data;
+
+       ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY);
+       ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND;
+       ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data);
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1);
+       ocp_data |= FW_IP_RESET_EN;
+       ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data);
+
+       return 0;
+}
+
+static int r8153b_pre_firmware_1(struct r8152 *tp)
+{
+       /* enable fc timer and set timer to 1 second. */
+       ocp_write_word(tp, MCU_TYPE_USB, USB_FC_TIMER,
+                      CTRL_TIMER_EN | (1000 / 8));
+
+       return 0;
+}
+
+static int r8153b_post_firmware_1(struct r8152 *tp)
+{
+       u32 ocp_data;
+
+       /* enable bp0 for RTL8153-BND */
+       ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1);
+       if (ocp_data & BND_MASK) {
+               ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BP_EN);
+               ocp_data |= BIT(0);
+               ocp_write_word(tp, MCU_TYPE_PLA, PLA_BP_EN, ocp_data);
+       }
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL);
+       ocp_data |= FLOW_CTRL_PATCH_OPT;
+       ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data);
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK);
+       ocp_data |= FC_PATCH_TASK;
+       ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1);
+       ocp_data |= FW_IP_RESET_EN;
+       ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data);
+
+       return 0;
 }
 
 static void r8153_aldps_en(struct r8152 *tp, bool enable)
@@ -3567,6 +4535,8 @@ static void r8153_hw_phy_cfg(struct r8152 *tp)
        /* disable EEE before updating the PHY parameters */
        rtl_eee_enable(tp, false);
 
+       rtl8152_apply_firmware(tp);
+
        if (tp->version == RTL_VER_03) {
                data = ocp_reg_read(tp, OCP_EEE_CFG);
                data &= ~CTAP_SHORT_EN;
@@ -3639,6 +4609,8 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp)
        /* disable EEE before updating the PHY parameters */
        rtl_eee_enable(tp, false);
 
+       rtl8152_apply_firmware(tp);
+
        r8153b_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags));
 
        data = sram_read(tp, SRAM_GREEN_CFG);
@@ -3711,7 +4683,6 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp)
 static void r8153_first_init(struct r8152 *tp)
 {
        u32 ocp_data;
-       int i;
 
        r8153_mac_clk_spd(tp, false);
        rxdy_gated_en(tp, true);
@@ -3732,23 +4703,13 @@ static void r8153_first_init(struct r8152 *tp)
        ocp_data &= ~MCU_BORW_EN;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
        ocp_data |= RE_INIT_LL;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
 
@@ -3773,7 +4734,6 @@ static void r8153_first_init(struct r8152 *tp)
 static void r8153_enter_oob(struct r8152 *tp)
 {
        u32 ocp_data;
-       int i;
 
        r8153_mac_clk_spd(tp, true);
 
@@ -3784,23 +4744,13 @@ static void r8153_enter_oob(struct r8152 *tp)
        rtl_disable(tp);
        rtl_reset_bmu(tp);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
        ocp_data |= RE_INIT_LL;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-       for (i = 0; i < 1000; i++) {
-               ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
-               if (ocp_data & LINK_LIST_READY)
-                       break;
-               usleep_range(1000, 2000);
-       }
+       wait_oob_link_list_ready(tp);
 
        ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
@@ -4187,11 +5137,22 @@ static void rtl_hw_phy_work_func_t(struct work_struct *work)
 
        mutex_lock(&tp->control);
 
+       if (rtl8152_request_firmware(tp) == -ENODEV && tp->rtl_fw.retry) {
+               tp->rtl_fw.retry = false;
+               tp->rtl_fw.fw = NULL;
+
+               /* Delay execution in case request_firmware() is not ready yet.
+                */
+               queue_delayed_work(system_long_wq, &tp->hw_phy_work, HZ * 10);
+               goto ignore_once;
+       }
+
        tp->rtl_ops.hw_phy_cfg(tp);
 
        rtl8152_set_speed(tp, tp->autoneg, tp->speed, tp->duplex,
                          tp->advertising);
 
+ignore_once:
        mutex_unlock(&tp->control);
 
        usb_autopm_put_interface(tp->intf);
@@ -4229,6 +5190,11 @@ static int rtl8152_open(struct net_device *netdev)
        struct r8152 *tp = netdev_priv(netdev);
        int res = 0;
 
+       if (work_busy(&tp->hw_phy_work.work) & WORK_BUSY_PENDING) {
+               cancel_delayed_work_sync(&tp->hw_phy_work);
+               rtl_hw_phy_work_func_t(&tp->hw_phy_work.work);
+       }
+
        res = alloc_all_mem(tp);
        if (res)
                goto out;
@@ -4875,6 +5841,9 @@ static void rtl8152_get_drvinfo(struct net_device *netdev,
        strlcpy(info->driver, MODULENAME, sizeof(info->driver));
        strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
        usb_make_path(tp->udev, info->bus_info, sizeof(info->bus_info));
+       if (!IS_ERR_OR_NULL(tp->rtl_fw.fw))
+               strlcpy(info->fw_version, tp->rtl_fw.version,
+                       sizeof(info->fw_version));
 }
 
 static
@@ -5499,6 +6468,47 @@ static int rtl_ops_init(struct r8152 *tp)
        return ret;
 }
 
+#define FIRMWARE_8153A_2       "rtl_nic/rtl8153a-2.fw"
+#define FIRMWARE_8153A_3       "rtl_nic/rtl8153a-3.fw"
+#define FIRMWARE_8153A_4       "rtl_nic/rtl8153a-4.fw"
+#define FIRMWARE_8153B_2       "rtl_nic/rtl8153b-2.fw"
+
+MODULE_FIRMWARE(FIRMWARE_8153A_2);
+MODULE_FIRMWARE(FIRMWARE_8153A_3);
+MODULE_FIRMWARE(FIRMWARE_8153A_4);
+MODULE_FIRMWARE(FIRMWARE_8153B_2);
+
+static int rtl_fw_init(struct r8152 *tp)
+{
+       struct rtl_fw *rtl_fw = &tp->rtl_fw;
+
+       switch (tp->version) {
+       case RTL_VER_04:
+               rtl_fw->fw_name         = FIRMWARE_8153A_2;
+               rtl_fw->pre_fw          = r8153_pre_firmware_1;
+               rtl_fw->post_fw         = r8153_post_firmware_1;
+               break;
+       case RTL_VER_05:
+               rtl_fw->fw_name         = FIRMWARE_8153A_3;
+               rtl_fw->pre_fw          = r8153_pre_firmware_2;
+               rtl_fw->post_fw         = r8153_post_firmware_2;
+               break;
+       case RTL_VER_06:
+               rtl_fw->fw_name         = FIRMWARE_8153A_4;
+               rtl_fw->post_fw         = r8153_post_firmware_3;
+               break;
+       case RTL_VER_09:
+               rtl_fw->fw_name         = FIRMWARE_8153B_2;
+               rtl_fw->pre_fw          = r8153b_pre_firmware_1;
+               rtl_fw->post_fw         = r8153b_post_firmware_1;
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
 static u8 rtl_get_version(struct usb_interface *intf)
 {
        struct usb_device *udev = interface_to_usbdev(intf);
@@ -5606,6 +6616,8 @@ static int rtl8152_probe(struct usb_interface *intf,
        if (ret)
                goto out;
 
+       rtl_fw_init(tp);
+
        mutex_init(&tp->control);
        INIT_DELAYED_WORK(&tp->schedule, rtl_work_func_t);
        INIT_DELAYED_WORK(&tp->hw_phy_work, rtl_hw_phy_work_func_t);
@@ -5632,8 +6644,13 @@ static int rtl8152_probe(struct usb_interface *intf,
                netdev->hw_features &= ~NETIF_F_RXCSUM;
        }
 
+       if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO &&
+           le16_to_cpu(udev->descriptor.idProduct) == 0x3082)
+               set_bit(LENOVO_MACPASSTHRU, &tp->flags);
+
        if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial &&
-           (!strcmp(udev->serial, "000001000000") || !strcmp(udev->serial, "000002000000"))) {
+           (!strcmp(udev->serial, "000001000000") ||
+            !strcmp(udev->serial, "000002000000"))) {
                dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
                set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
        }
@@ -5676,6 +6693,10 @@ static int rtl8152_probe(struct usb_interface *intf,
        intf->needs_remote_wakeup = 1;
 
        tp->rtl_ops.init(tp);
+#if IS_BUILTIN(CONFIG_USB_RTL8152)
+       /* Retry in case request_firmware() is not ready yet. */
+       tp->rtl_fw.retry = true;
+#endif
        queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
        set_ethernet_addr(tp);
 
@@ -5721,6 +6742,7 @@ static void rtl8152_disconnect(struct usb_interface *intf)
                tasklet_kill(&tp->tx_tl);
                cancel_delayed_work_sync(&tp->hw_phy_work);
                tp->rtl_ops.unload(tp);
+               rtl8152_release_firmware(tp);
                free_netdev(tp->netdev);
        }
 }
@@ -5752,6 +6774,7 @@ static const struct usb_device_id rtl8152_table[] = {
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3062)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3069)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3082)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x720c)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7214)},
index 8869154..11f5776 100644 (file)
@@ -793,8 +793,7 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
        return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
-static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
-                                        const u8 *mac, __u16 state,
+static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state,
                                         __be32 src_vni, __u16 ndm_flags)
 {
        struct vxlan_fdb *f;
@@ -835,7 +834,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
                return -ENOSPC;
 
        netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
-       f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
+       f = vxlan_fdb_alloc(mac, state, src_vni, ndm_flags);
        if (!f)
                return -ENOMEM;
 
index 73f5892..1c640b4 100644 (file)
@@ -26,7 +26,7 @@ int debugfs_netdev_queue_stopped_get(void *data, u64 *val)
        *val = netif_queue_stopped(i2400m->wimax_dev.net_dev);
        return 0;
 }
-DEFINE_SIMPLE_ATTRIBUTE(fops_netdev_queue_stopped,
+DEFINE_DEBUGFS_ATTRIBUTE(fops_netdev_queue_stopped,
                        debugfs_netdev_queue_stopped_get,
                        NULL, "%llu\n");
 
@@ -154,7 +154,7 @@ int debugfs_i2400m_suspend_set(void *data, u64 val)
                result = 0;
        return result;
 }
-DEFINE_SIMPLE_ATTRIBUTE(fops_i2400m_suspend,
+DEFINE_DEBUGFS_ATTRIBUTE(fops_i2400m_suspend,
                        NULL, debugfs_i2400m_suspend_set,
                        "%llu\n");
 
@@ -183,7 +183,7 @@ int debugfs_i2400m_reset_set(void *data, u64 val)
        }
        return result;
 }
-DEFINE_SIMPLE_ATTRIBUTE(fops_i2400m_reset,
+DEFINE_DEBUGFS_ATTRIBUTE(fops_i2400m_reset,
                        NULL, debugfs_i2400m_reset_set,
                        "%llu\n");
 
index 6953f90..9659f9e 100644 (file)
@@ -511,7 +511,7 @@ error_alloc_netdev:
 
 
 /*
- * Disconect a i2400m from the system.
+ * Disconnect a i2400m from the system.
  *
  * i2400m_stop() has been called before, so al the rx and tx contexts
  * have been taken down already. Make sure the queue is stopped,
index 2fe12b0..42f00a2 100644 (file)
@@ -1037,7 +1037,7 @@ static void ar9003_hw_configpcipowersave(struct ath_hw *ah,
        }
 
        /*
-        * Configire PCIE after Ini init. SERDES values now come from ini file
+        * Configure PCIE after Ini init. SERDES values now come from ini file
         * This enables PCIe low power mode.
         */
        array = power_off ? &ah->iniPcieSerdes :
index a82ad73..791f663 100644 (file)
@@ -1674,7 +1674,7 @@ static int ath9k_htc_ampdu_action(struct ieee80211_hw *hw,
        case IEEE80211_AMPDU_TX_START:
                ret = ath9k_htc_tx_aggr_oper(priv, vif, sta, action, tid);
                if (!ret)
-                       ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+                       ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
                break;
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
index 34121fb..0548aa3 100644 (file)
@@ -1921,7 +1921,7 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
                ath9k_ps_wakeup(sc);
                ret = ath_tx_aggr_start(sc, sta, tid, ssn);
                if (!ret)
-                       ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+                       ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
                ath9k_ps_restore(sc);
                break;
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
index 40a8054..5914926 100644 (file)
@@ -1449,8 +1449,7 @@ static int carl9170_op_ampdu_action(struct ieee80211_hw *hw,
                rcu_assign_pointer(sta_info->agg[tid], tid_info);
                spin_unlock_bh(&ar->tx_ampdu_list_lock);
 
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
 
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
index 79998a3..a276dae 100644 (file)
@@ -1084,6 +1084,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
        enum ieee80211_ampdu_mlme_action action = params->action;
        u16 tid = params->tid;
        u16 *ssn = &params->ssn;
+       int ret = 0;
 
        wcn36xx_dbg(WCN36XX_DBG_MAC, "mac ampdu action action %d tid %d\n",
                    action, tid);
@@ -1106,7 +1107,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
                sta_priv->ampdu_state[tid] = WCN36XX_AMPDU_START;
                spin_unlock_bh(&sta_priv->ampdu_lock);
 
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
                break;
        case IEEE80211_AMPDU_TX_OPERATIONAL:
                spin_lock_bh(&sta_priv->ampdu_lock);
@@ -1131,7 +1132,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
 
        mutex_unlock(&wcn->conf_mutex);
 
-       return 0;
+       return ret;
 }
 
 static const struct ieee80211_ops wcn36xx_ops = {
index 6188275..8e8b685 100644 (file)
@@ -850,8 +850,7 @@ brcms_ops_ampdu_action(struct ieee80211_hw *hw,
                                     "START: tid %d is not agg\'able\n", tid);
                        return -EINVAL;
                }
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
 
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
index ffb705b..51fdd7c 100644 (file)
@@ -2265,7 +2265,7 @@ il4965_tx_agg_start(struct il_priv *il, struct ieee80211_vif *vif,
        if (tid_data->tfds_in_queue == 0) {
                D_HT("HW queue is empty\n");
                tid_data->agg.state = IL_AGG_ON;
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
        } else {
                D_HT("HW queue is NOT empty: %d packets in HW queue\n",
                     tid_data->tfds_in_queue);
index 3029e3f..cd73fc5 100644 (file)
@@ -621,7 +621,7 @@ int iwlagn_tx_agg_start(struct iwl_priv *priv, struct ieee80211_vif *vif,
                IWL_DEBUG_TX_QUEUES(priv, "Can proceed: ssn = next_recl = %d\n",
                                    tid_data->agg.ssn);
                tid_data->agg.state = IWL_AGG_STARTING;
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
        } else {
                IWL_DEBUG_TX_QUEUES(priv, "Can't proceed: ssn %d, "
                                    "next_reclaimed = %d\n",
index 0c12df5..05c1c77 100644 (file)
@@ -148,7 +148,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans,
  *
  * Bits 3:0:
  * Define the maximum number of pending read requests.
- * Maximum configration value allowed is 0xC
+ * Maximum configuration value allowed is 0xC
  * Bits 9:8:
  * Define the maximum transfer size. (64 / 128 / 256)
  * Bit 10:
index b3768d5..7b35f41 100644 (file)
@@ -2844,13 +2844,12 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
        if (normalized_ssn == tid_data->next_reclaimed) {
                tid_data->state = IWL_AGG_STARTING;
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
        } else {
                tid_data->state = IWL_EMPTYING_HW_QUEUE_ADDBA;
+               ret = 0;
        }
 
-       ret = 0;
-
 out:
        spin_unlock_bh(&mvmsta->lock);
 
index 14f562c..31ae6c4 100644 (file)
@@ -148,23 +148,25 @@ static const char *hwsim_alpha2s[] = {
 };
 
 static const struct ieee80211_regdomain hwsim_world_regdom_custom_01 = {
-       .n_reg_rules = 4,
+       .n_reg_rules = 5,
        .alpha2 =  "99",
        .reg_rules = {
                REG_RULE(2412-10, 2462+10, 40, 0, 20, 0),
                REG_RULE(2484-10, 2484+10, 40, 0, 20, 0),
                REG_RULE(5150-10, 5240+10, 40, 0, 30, 0),
                REG_RULE(5745-10, 5825+10, 40, 0, 30, 0),
+               REG_RULE(5855-10, 5925+10, 40, 0, 33, 0),
        }
 };
 
 static const struct ieee80211_regdomain hwsim_world_regdom_custom_02 = {
-       .n_reg_rules = 2,
+       .n_reg_rules = 3,
        .alpha2 =  "99",
        .reg_rules = {
                REG_RULE(2412-10, 2462+10, 40, 0, 20, 0),
                REG_RULE(5725-10, 5850+10, 40, 0, 30,
                         NL80211_RRF_NO_IR),
+               REG_RULE(5855-10, 5925+10, 40, 0, 33, 0),
        }
 };
 
@@ -354,6 +356,24 @@ static const struct ieee80211_channel hwsim_channels_5ghz[] = {
        CHAN5G(5805), /* Channel 161 */
        CHAN5G(5825), /* Channel 165 */
        CHAN5G(5845), /* Channel 169 */
+
+       CHAN5G(5855), /* Channel 171 */
+       CHAN5G(5860), /* Channel 172 */
+       CHAN5G(5865), /* Channel 173 */
+       CHAN5G(5870), /* Channel 174 */
+
+       CHAN5G(5875), /* Channel 175 */
+       CHAN5G(5880), /* Channel 176 */
+       CHAN5G(5885), /* Channel 177 */
+       CHAN5G(5890), /* Channel 178 */
+       CHAN5G(5895), /* Channel 179 */
+       CHAN5G(5900), /* Channel 180 */
+       CHAN5G(5905), /* Channel 181 */
+
+       CHAN5G(5910), /* Channel 182 */
+       CHAN5G(5915), /* Channel 183 */
+       CHAN5G(5920), /* Channel 184 */
+       CHAN5G(5925), /* Channel 185 */
 };
 
 static const struct ieee80211_rate hwsim_rates[] = {
@@ -1550,7 +1570,8 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac,
 
        if (vif->type != NL80211_IFTYPE_AP &&
            vif->type != NL80211_IFTYPE_MESH_POINT &&
-           vif->type != NL80211_IFTYPE_ADHOC)
+           vif->type != NL80211_IFTYPE_ADHOC &&
+           vif->type != NL80211_IFTYPE_OCB)
                return;
 
        skb = ieee80211_beacon_get(hw, vif);
@@ -1604,6 +1625,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer)
 }
 
 static const char * const hwsim_chanwidths[] = {
+       [NL80211_CHAN_WIDTH_5] = "ht5",
+       [NL80211_CHAN_WIDTH_10] = "ht10",
        [NL80211_CHAN_WIDTH_20_NOHT] = "noht",
        [NL80211_CHAN_WIDTH_20] = "ht20",
        [NL80211_CHAN_WIDTH_40] = "ht40",
@@ -1979,8 +2002,7 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw,
 
        switch (action) {
        case IEEE80211_AMPDU_TX_START:
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
        case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
@@ -2723,7 +2745,8 @@ static void mac80211_hwsim_he_capab(struct ieee80211_supported_band *sband)
         BIT(NL80211_IFTYPE_P2P_CLIENT) | \
         BIT(NL80211_IFTYPE_P2P_GO) | \
         BIT(NL80211_IFTYPE_ADHOC) | \
-        BIT(NL80211_IFTYPE_MESH_POINT))
+        BIT(NL80211_IFTYPE_MESH_POINT) | \
+        BIT(NL80211_IFTYPE_OCB))
 
 static int mac80211_hwsim_new_radio(struct genl_info *info,
                                    struct hwsim_new_radio_params *param)
@@ -2847,6 +2870,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
        } else {
                data->if_combination.num_different_channels = 1;
                data->if_combination.radar_detect_widths =
+                                       BIT(NL80211_CHAN_WIDTH_5) |
+                                       BIT(NL80211_CHAN_WIDTH_10) |
                                        BIT(NL80211_CHAN_WIDTH_20_NOHT) |
                                        BIT(NL80211_CHAN_WIDTH_20) |
                                        BIT(NL80211_CHAN_WIDTH_40) |
index c4db641..d55f229 100644 (file)
@@ -5520,7 +5520,7 @@ mwl8k_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                        rc = -EBUSY;
                        break;
                }
-               ieee80211_start_tx_ba_cb_irqsafe(vif, addr, tid);
+               rc = IEEE80211_AMPDU_TX_START_IMMEDIATE;
                break;
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
index 25d5b16..4b3217b 100644 (file)
@@ -582,8 +582,7 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                break;
        case IEEE80211_AMPDU_TX_START:
                mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn);
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
        case IEEE80211_AMPDU_TX_STOP_CONT:
                mtxq->aggr = false;
                mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1);
index 87c7487..b6d7821 100644 (file)
@@ -477,8 +477,7 @@ mt7615_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                break;
        case IEEE80211_AMPDU_TX_START:
                mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn);
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
        case IEEE80211_AMPDU_TX_STOP_CONT:
                mtxq->aggr = false;
                mt7615_mcu_set_tx_ba(dev, params, 0);
index aec73a0..414b223 100644 (file)
@@ -393,8 +393,7 @@ int mt76x02_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                break;
        case IEEE80211_AMPDU_TX_START:
                mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn);
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
        case IEEE80211_AMPDU_TX_STOP_CONT:
                mtxq->aggr = false;
                ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
index 72e608c..671d889 100644 (file)
@@ -372,8 +372,7 @@ mt76_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                break;
        case IEEE80211_AMPDU_TX_START:
                msta->agg_ssn[tid] = ssn << 4;
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
        case IEEE80211_AMPDU_TX_STOP_CONT:
                ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
                break;
index f1cdcd6..2546645 100644 (file)
@@ -10476,7 +10476,7 @@ int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
         * when the hw reorders frames due to aggregation.
         */
        if (sta_priv->wcid > WCID_END)
-               return 1;
+               return -ENOSPC;
 
        switch (action) {
        case IEEE80211_AMPDU_RX_START:
@@ -10489,7 +10489,7 @@ int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                 */
                break;
        case IEEE80211_AMPDU_TX_START:
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
                break;
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
index ac746c3..c75192c 100644 (file)
@@ -1776,8 +1776,7 @@ int rtl_tx_agg_start(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 
        tid_data->agg.agg_state = RTL_AGG_START;
 
-       ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-       return 0;
+       return IEEE80211_AMPDU_TX_START_IMMEDIATE;
 }
 
 int rtl_tx_agg_stop(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
index 6d6e899..81313e0 100644 (file)
@@ -1352,9 +1352,9 @@ static void _rtl92s_phy_set_rfhalt(struct ieee80211_hw *hw)
        /* SW/HW radio off or halt adapter!! For example S3/S4 */
        } else {
                /* LED function disable. Power range is about 8mA now. */
-               /* if write 0xF1 disconnet_pci power
+               /* if write 0xF1 disconnect_pci power
                 *       ifconfig wlan0 down power are both high 35:70 */
-               /* if write oxF9 disconnet_pci power
+               /* if write oxF9 disconnect_pci power
                 * ifconfig wlan0 down power are both low  12:45*/
                rtl_write_byte(rtlpriv, 0x03, 0xF9);
        }
index e5e3605..a203b47 100644 (file)
@@ -437,8 +437,7 @@ static int rtw_ops_ampdu_action(struct ieee80211_hw *hw,
 
        switch (params->action) {
        case IEEE80211_AMPDU_TX_START:
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
        case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
index ce5e92d..4400882 100644 (file)
@@ -1140,8 +1140,7 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
                else if ((vif->type == NL80211_IFTYPE_AP) ||
                         (vif->type == NL80211_IFTYPE_P2P_GO))
                        rsta->seq_start[tid] = seq_no;
-               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
-               status = 0;
+               status = IEEE80211_AMPDU_TX_START_IMMEDIATE;
                break;
 
        case IEEE80211_AMPDU_TX_STOP_CONT:
index d4c09e5..18c4d99 100644 (file)
@@ -186,7 +186,7 @@ static void wl12xx_spi_init(struct device *child)
 
        spi_sync(to_spi_device(glue->dev), &m);
 
-       /* Restore chip select configration to normal */
+       /* Restore chip select configuration to normal */
        spi->mode ^= SPI_CS_HIGH;
        kfree(cmd);
 }
index 103ed00..68dd7bb 100644 (file)
@@ -626,6 +626,38 @@ err:
        return err;
 }
 
+static void xenvif_disconnect_queue(struct xenvif_queue *queue)
+{
+       if (queue->tx_irq) {
+               unbind_from_irqhandler(queue->tx_irq, queue);
+               if (queue->tx_irq == queue->rx_irq)
+                       queue->rx_irq = 0;
+               queue->tx_irq = 0;
+       }
+
+       if (queue->rx_irq) {
+               unbind_from_irqhandler(queue->rx_irq, queue);
+               queue->rx_irq = 0;
+       }
+
+       if (queue->task) {
+               kthread_stop(queue->task);
+               queue->task = NULL;
+       }
+
+       if (queue->dealloc_task) {
+               kthread_stop(queue->dealloc_task);
+               queue->dealloc_task = NULL;
+       }
+
+       if (queue->napi.poll) {
+               netif_napi_del(&queue->napi);
+               queue->napi.poll = NULL;
+       }
+
+       xenvif_unmap_frontend_data_rings(queue);
+}
+
 int xenvif_connect_data(struct xenvif_queue *queue,
                        unsigned long tx_ring_ref,
                        unsigned long rx_ring_ref,
@@ -651,13 +683,27 @@ int xenvif_connect_data(struct xenvif_queue *queue,
        netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
                        XENVIF_NAPI_WEIGHT);
 
+       queue->stalled = true;
+
+       task = kthread_run(xenvif_kthread_guest_rx, queue,
+                          "%s-guest-rx", queue->name);
+       if (IS_ERR(task))
+               goto kthread_err;
+       queue->task = task;
+
+       task = kthread_run(xenvif_dealloc_kthread, queue,
+                          "%s-dealloc", queue->name);
+       if (IS_ERR(task))
+               goto kthread_err;
+       queue->dealloc_task = task;
+
        if (tx_evtchn == rx_evtchn) {
                /* feature-split-event-channels == 0 */
                err = bind_interdomain_evtchn_to_irqhandler(
                        queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
                        queue->name, queue);
                if (err < 0)
-                       goto err_unmap;
+                       goto err;
                queue->tx_irq = queue->rx_irq = err;
                disable_irq(queue->tx_irq);
        } else {
@@ -668,7 +714,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
                        queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
                        queue->tx_irq_name, queue);
                if (err < 0)
-                       goto err_unmap;
+                       goto err;
                queue->tx_irq = err;
                disable_irq(queue->tx_irq);
 
@@ -678,47 +724,18 @@ int xenvif_connect_data(struct xenvif_queue *queue,
                        queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
                        queue->rx_irq_name, queue);
                if (err < 0)
-                       goto err_tx_unbind;
+                       goto err;
                queue->rx_irq = err;
                disable_irq(queue->rx_irq);
        }
 
-       queue->stalled = true;
-
-       task = kthread_create(xenvif_kthread_guest_rx,
-                             (void *)queue, "%s-guest-rx", queue->name);
-       if (IS_ERR(task)) {
-               pr_warn("Could not allocate kthread for %s\n", queue->name);
-               err = PTR_ERR(task);
-               goto err_rx_unbind;
-       }
-       queue->task = task;
-       get_task_struct(task);
-
-       task = kthread_create(xenvif_dealloc_kthread,
-                             (void *)queue, "%s-dealloc", queue->name);
-       if (IS_ERR(task)) {
-               pr_warn("Could not allocate kthread for %s\n", queue->name);
-               err = PTR_ERR(task);
-               goto err_rx_unbind;
-       }
-       queue->dealloc_task = task;
-
-       wake_up_process(queue->task);
-       wake_up_process(queue->dealloc_task);
-
        return 0;
 
-err_rx_unbind:
-       unbind_from_irqhandler(queue->rx_irq, queue);
-       queue->rx_irq = 0;
-err_tx_unbind:
-       unbind_from_irqhandler(queue->tx_irq, queue);
-       queue->tx_irq = 0;
-err_unmap:
-       xenvif_unmap_frontend_data_rings(queue);
-       netif_napi_del(&queue->napi);
+kthread_err:
+       pr_warn("Could not allocate kthread for %s\n", queue->name);
+       err = PTR_ERR(task);
 err:
+       xenvif_disconnect_queue(queue);
        return err;
 }
 
@@ -746,30 +763,7 @@ void xenvif_disconnect_data(struct xenvif *vif)
        for (queue_index = 0; queue_index < num_queues; ++queue_index) {
                queue = &vif->queues[queue_index];
 
-               netif_napi_del(&queue->napi);
-
-               if (queue->task) {
-                       kthread_stop(queue->task);
-                       put_task_struct(queue->task);
-                       queue->task = NULL;
-               }
-
-               if (queue->dealloc_task) {
-                       kthread_stop(queue->dealloc_task);
-                       queue->dealloc_task = NULL;
-               }
-
-               if (queue->tx_irq) {
-                       if (queue->tx_irq == queue->rx_irq)
-                               unbind_from_irqhandler(queue->tx_irq, queue);
-                       else {
-                               unbind_from_irqhandler(queue->tx_irq, queue);
-                               unbind_from_irqhandler(queue->rx_irq, queue);
-                       }
-                       queue->tx_irq = 0;
-               }
-
-               xenvif_unmap_frontend_data_rings(queue);
+               xenvif_disconnect_queue(queue);
        }
 
        xenvif_mcast_addr_list_free(vif);
index 0f22379..18cd962 100644 (file)
@@ -278,7 +278,6 @@ static struct i2c_driver nfcmrvl_i2c_driver = {
        .remove = nfcmrvl_i2c_remove,
        .driver = {
                .name           = "nfcmrvl_i2c",
-               .owner          = THIS_MODULE,
                .of_match_table = of_match_ptr(of_nfcmrvl_i2c_match),
        },
 };
index f6d6b34..7fe1bbe 100644 (file)
@@ -26,3 +26,14 @@ config NFC_PN533_I2C
 
          If you choose to build a module, it'll be called pn533_i2c.
          Say N if unsure.
+
+config NFC_PN532_UART
+       tristate "NFC PN532 device support (UART)"
+       depends on SERIAL_DEV_BUS
+       select NFC_PN533
+       ---help---
+         This module adds support for the NXP pn532 UART interface.
+         Select this if your platform is using the UART bus.
+
+         If you choose to build a module, it'll be called pn532_uart.
+         Say N if unsure.
index 43c25b4..b964833 100644 (file)
@@ -4,7 +4,9 @@
 #
 pn533_usb-objs  = usb.o
 pn533_i2c-objs  = i2c.o
+pn532_uart-objs  = uart.o
 
 obj-$(CONFIG_NFC_PN533)     += pn533.o
 obj-$(CONFIG_NFC_PN533_USB) += pn533_usb.o
 obj-$(CONFIG_NFC_PN533_I2C) += pn533_i2c.o
+obj-$(CONFIG_NFC_PN532_UART) += pn532_uart.o
index 1832cd9..7507176 100644 (file)
@@ -193,12 +193,10 @@ static int pn533_i2c_probe(struct i2c_client *client,
        phy->i2c_dev = client;
        i2c_set_clientdata(client, phy);
 
-       priv = pn533_register_device(PN533_DEVICE_PN532,
-                                    PN533_NO_TYPE_B_PROTOCOLS,
-                                    PN533_PROTO_REQ_ACK_RESP,
-                                    phy, &i2c_phy_ops, NULL,
-                                    &phy->i2c_dev->dev,
-                                    &client->dev);
+       priv = pn53x_common_init(PN533_DEVICE_PN532,
+                               PN533_PROTO_REQ_ACK_RESP,
+                               phy, &i2c_phy_ops, NULL,
+                               &phy->i2c_dev->dev);
 
        if (IS_ERR(priv)) {
                r = PTR_ERR(priv);
@@ -206,6 +204,9 @@ static int pn533_i2c_probe(struct i2c_client *client,
        }
 
        phy->priv = priv;
+       r = pn532_i2c_nfc_alloc(priv, PN533_NO_TYPE_B_PROTOCOLS, &client->dev);
+       if (r)
+               goto nfc_alloc_err;
 
        r = request_threaded_irq(client->irq, NULL, pn533_i2c_irq_thread_fn,
                                IRQF_TRIGGER_FALLING |
@@ -220,13 +221,20 @@ static int pn533_i2c_probe(struct i2c_client *client,
        if (r)
                goto fn_setup_err;
 
-       return 0;
+       r = nfc_register_device(priv->nfc_dev);
+       if (r)
+               goto fn_setup_err;
+
+       return r;
 
 fn_setup_err:
        free_irq(client->irq, phy);
 
 irq_rqst_err:
-       pn533_unregister_device(phy->priv);
+       nfc_free_device(priv->nfc_dev);
+
+nfc_alloc_err:
+       pn53x_common_clean(phy->priv);
 
        return r;
 }
@@ -239,12 +247,18 @@ static int pn533_i2c_remove(struct i2c_client *client)
 
        free_irq(client->irq, phy);
 
-       pn533_unregister_device(phy->priv);
+       pn53x_unregister_nfc(phy->priv);
+       pn53x_common_clean(phy->priv);
 
        return 0;
 }
 
 static const struct of_device_id of_pn533_i2c_match[] = {
+       { .compatible = "nxp,pn532", },
+       /*
+        * NOTE: The use of the compatibles with the trailing "...-i2c" is
+        * deprecated and will be removed.
+        */
        { .compatible = "nxp,pn533-i2c", },
        { .compatible = "nxp,pn532-i2c", },
        {},
index a172a32..aa766e7 100644 (file)
@@ -185,6 +185,32 @@ struct pn533_cmd_jump_dep_response {
        u8 gt[];
 } __packed;
 
+struct pn532_autopoll_resp {
+       u8 type;
+       u8 ln;
+       u8 tg;
+       u8 tgdata[];
+};
+
+/* PN532_CMD_IN_AUTOPOLL */
+#define PN532_AUTOPOLL_POLLNR_INFINITE 0xff
+#define PN532_AUTOPOLL_PERIOD          0x03 /* in units of 150 ms */
+
+#define PN532_AUTOPOLL_TYPE_GENERIC_106                0x00
+#define PN532_AUTOPOLL_TYPE_GENERIC_212                0x01
+#define PN532_AUTOPOLL_TYPE_GENERIC_424                0x02
+#define PN532_AUTOPOLL_TYPE_JEWEL              0x04
+#define PN532_AUTOPOLL_TYPE_MIFARE             0x10
+#define PN532_AUTOPOLL_TYPE_FELICA212          0x11
+#define PN532_AUTOPOLL_TYPE_FELICA424          0x12
+#define PN532_AUTOPOLL_TYPE_ISOA               0x20
+#define PN532_AUTOPOLL_TYPE_ISOB               0x23
+#define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_106    0x40
+#define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_212    0x41
+#define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_424    0x42
+#define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_106     0x80
+#define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_212     0x81
+#define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_424     0x82
 
 /* PN533_TG_INIT_AS_TARGET */
 #define PN533_INIT_TARGET_PASSIVE 0x1
@@ -1389,6 +1415,101 @@ static int pn533_poll_dep(struct nfc_dev *nfc_dev)
        return rc;
 }
 
+static int pn533_autopoll_complete(struct pn533 *dev, void *arg,
+                              struct sk_buff *resp)
+{
+       struct pn532_autopoll_resp *apr;
+       struct nfc_target nfc_tgt;
+       u8 nbtg;
+       int rc;
+
+       if (IS_ERR(resp)) {
+               rc = PTR_ERR(resp);
+
+               nfc_err(dev->dev, "%s  autopoll complete error %d\n",
+                       __func__, rc);
+
+               if (rc == -ENOENT) {
+                       if (dev->poll_mod_count != 0)
+                               return rc;
+                       goto stop_poll;
+               } else if (rc < 0) {
+                       nfc_err(dev->dev,
+                               "Error %d when running autopoll\n", rc);
+                       goto stop_poll;
+               }
+       }
+
+       nbtg = resp->data[0];
+       if ((nbtg > 2) || (nbtg <= 0))
+               return -EAGAIN;
+
+       apr = (struct pn532_autopoll_resp *)&resp->data[1];
+       while (nbtg--) {
+               memset(&nfc_tgt, 0, sizeof(struct nfc_target));
+               switch (apr->type) {
+               case PN532_AUTOPOLL_TYPE_ISOA:
+                       dev_dbg(dev->dev, "ISOA\n");
+                       rc = pn533_target_found_type_a(&nfc_tgt, apr->tgdata,
+                                                      apr->ln - 1);
+                       break;
+               case PN532_AUTOPOLL_TYPE_FELICA212:
+               case PN532_AUTOPOLL_TYPE_FELICA424:
+                       dev_dbg(dev->dev, "FELICA\n");
+                       rc = pn533_target_found_felica(&nfc_tgt, apr->tgdata,
+                                                      apr->ln - 1);
+                       break;
+               case PN532_AUTOPOLL_TYPE_JEWEL:
+                       dev_dbg(dev->dev, "JEWEL\n");
+                       rc = pn533_target_found_jewel(&nfc_tgt, apr->tgdata,
+                                                     apr->ln - 1);
+                       break;
+               case PN532_AUTOPOLL_TYPE_ISOB:
+                       dev_dbg(dev->dev, "ISOB\n");
+                       rc = pn533_target_found_type_b(&nfc_tgt, apr->tgdata,
+                                                      apr->ln - 1);
+                       break;
+               case PN532_AUTOPOLL_TYPE_MIFARE:
+                       dev_dbg(dev->dev, "Mifare\n");
+                       rc = pn533_target_found_type_a(&nfc_tgt, apr->tgdata,
+                                                      apr->ln - 1);
+                       break;
+               default:
+                       nfc_err(dev->dev,
+                                   "Unknown current poll modulation\n");
+                       rc = -EPROTO;
+               }
+
+               if (rc)
+                       goto done;
+
+               if (!(nfc_tgt.supported_protocols & dev->poll_protocols)) {
+                       nfc_err(dev->dev,
+                                   "The Tg found doesn't have the desired protocol\n");
+                       rc = -EAGAIN;
+                       goto done;
+               }
+
+               dev->tgt_available_prots = nfc_tgt.supported_protocols;
+               apr = (struct pn532_autopoll_resp *)
+                       (apr->tgdata + (apr->ln - 1));
+       }
+
+       pn533_poll_reset_mod_list(dev);
+       nfc_targets_found(dev->nfc_dev, &nfc_tgt, 1);
+
+done:
+       dev_kfree_skb(resp);
+       return rc;
+
+stop_poll:
+       nfc_err(dev->dev, "autopoll operation has been stopped\n");
+
+       pn533_poll_reset_mod_list(dev);
+       dev->poll_protocols = 0;
+       return rc;
+}
+
 static int pn533_poll_complete(struct pn533 *dev, void *arg,
                               struct sk_buff *resp)
 {
@@ -1532,6 +1653,7 @@ static int pn533_start_poll(struct nfc_dev *nfc_dev,
 {
        struct pn533 *dev = nfc_get_drvdata(nfc_dev);
        struct pn533_poll_modulations *cur_mod;
+       struct sk_buff *skb;
        u8 rand_mod;
        int rc;
 
@@ -1557,9 +1679,73 @@ static int pn533_start_poll(struct nfc_dev *nfc_dev,
                        tm_protocols = 0;
        }
 
-       pn533_poll_create_mod_list(dev, im_protocols, tm_protocols);
        dev->poll_protocols = im_protocols;
        dev->listen_protocols = tm_protocols;
+       if (dev->device_type == PN533_DEVICE_PN532_AUTOPOLL) {
+               skb = pn533_alloc_skb(dev, 4 + 6);
+               if (!skb)
+                       return -ENOMEM;
+
+               *((u8 *)skb_put(skb, sizeof(u8))) =
+                       PN532_AUTOPOLL_POLLNR_INFINITE;
+               *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_PERIOD;
+
+               if ((im_protocols & NFC_PROTO_MIFARE_MASK) &&
+                               (im_protocols & NFC_PROTO_ISO14443_MASK) &&
+                               (im_protocols & NFC_PROTO_NFC_DEP_MASK))
+                       *((u8 *)skb_put(skb, sizeof(u8))) =
+                               PN532_AUTOPOLL_TYPE_GENERIC_106;
+               else {
+                       if (im_protocols & NFC_PROTO_MIFARE_MASK)
+                               *((u8 *)skb_put(skb, sizeof(u8))) =
+                                       PN532_AUTOPOLL_TYPE_MIFARE;
+
+                       if (im_protocols & NFC_PROTO_ISO14443_MASK)
+                               *((u8 *)skb_put(skb, sizeof(u8))) =
+                                       PN532_AUTOPOLL_TYPE_ISOA;
+
+                       if (im_protocols & NFC_PROTO_NFC_DEP_MASK) {
+                               *((u8 *)skb_put(skb, sizeof(u8))) =
+                                       PN532_AUTOPOLL_TYPE_DEP_PASSIVE_106;
+                               *((u8 *)skb_put(skb, sizeof(u8))) =
+                                       PN532_AUTOPOLL_TYPE_DEP_PASSIVE_212;
+                               *((u8 *)skb_put(skb, sizeof(u8))) =
+                                       PN532_AUTOPOLL_TYPE_DEP_PASSIVE_424;
+                       }
+               }
+
+               if (im_protocols & NFC_PROTO_FELICA_MASK ||
+                               im_protocols & NFC_PROTO_NFC_DEP_MASK) {
+                       *((u8 *)skb_put(skb, sizeof(u8))) =
+                               PN532_AUTOPOLL_TYPE_FELICA212;
+                       *((u8 *)skb_put(skb, sizeof(u8))) =
+                               PN532_AUTOPOLL_TYPE_FELICA424;
+               }
+
+               if (im_protocols & NFC_PROTO_JEWEL_MASK)
+                       *((u8 *)skb_put(skb, sizeof(u8))) =
+                               PN532_AUTOPOLL_TYPE_JEWEL;
+
+               if (im_protocols & NFC_PROTO_ISO14443_B_MASK)
+                       *((u8 *)skb_put(skb, sizeof(u8))) =
+                               PN532_AUTOPOLL_TYPE_ISOB;
+
+               if (tm_protocols)
+                       *((u8 *)skb_put(skb, sizeof(u8))) =
+                               PN532_AUTOPOLL_TYPE_DEP_ACTIVE_106;
+
+               rc = pn533_send_cmd_async(dev, PN533_CMD_IN_AUTOPOLL, skb,
+                               pn533_autopoll_complete, NULL);
+
+               if (rc < 0)
+                       dev_kfree_skb(skb);
+               else
+                       dev->poll_mod_count++;
+
+               return rc;
+       }
+
+       pn533_poll_create_mod_list(dev, im_protocols, tm_protocols);
 
        /* Do not always start polling from the same modulation */
        get_random_bytes(&rand_mod, sizeof(rand_mod));
@@ -2458,7 +2644,11 @@ static int pn533_dev_up(struct nfc_dev *nfc_dev)
 {
        struct pn533 *dev = nfc_get_drvdata(nfc_dev);
 
-       if (dev->device_type == PN533_DEVICE_PN532) {
+       if (dev->phy_ops->dev_up)
+               dev->phy_ops->dev_up(dev);
+
+       if ((dev->device_type == PN533_DEVICE_PN532) ||
+               (dev->device_type == PN533_DEVICE_PN532_AUTOPOLL)) {
                int rc = pn532_sam_configuration(nfc_dev);
 
                if (rc)
@@ -2470,7 +2660,14 @@ static int pn533_dev_up(struct nfc_dev *nfc_dev)
 
 static int pn533_dev_down(struct nfc_dev *nfc_dev)
 {
-       return pn533_rf_field(nfc_dev, 0);
+       struct pn533 *dev = nfc_get_drvdata(nfc_dev);
+       int ret;
+
+       ret = pn533_rf_field(nfc_dev, 0);
+       if (dev->phy_ops->dev_down && !ret)
+               dev->phy_ops->dev_down(dev);
+
+       return ret;
 }
 
 static struct nfc_ops pn533_nfc_ops = {
@@ -2498,6 +2695,7 @@ static int pn533_setup(struct pn533 *dev)
        case PN533_DEVICE_PASORI:
        case PN533_DEVICE_ACR122U:
        case PN533_DEVICE_PN532:
+       case PN533_DEVICE_PN532_AUTOPOLL:
                max_retries.mx_rty_atr = 0x2;
                max_retries.mx_rty_psl = 0x1;
                max_retries.mx_rty_passive_act =
@@ -2534,6 +2732,7 @@ static int pn533_setup(struct pn533 *dev)
        switch (dev->device_type) {
        case PN533_DEVICE_STD:
        case PN533_DEVICE_PN532:
+       case PN533_DEVICE_PN532_AUTOPOLL:
                break;
 
        case PN533_DEVICE_PASORI:
@@ -2580,14 +2779,12 @@ int pn533_finalize_setup(struct pn533 *dev)
 }
 EXPORT_SYMBOL_GPL(pn533_finalize_setup);
 
-struct pn533 *pn533_register_device(u32 device_type,
-                               u32 protocols,
+struct pn533 *pn53x_common_init(u32 device_type,
                                enum pn533_protocol_type protocol_type,
                                void *phy,
                                struct pn533_phy_ops *phy_ops,
                                struct pn533_frame_ops *fops,
-                               struct device *dev,
-                               struct device *parent)
+                               struct device *dev)
 {
        struct pn533 *priv;
        int rc = -ENOMEM;
@@ -2628,43 +2825,18 @@ struct pn533 *pn533_register_device(u32 device_type,
        skb_queue_head_init(&priv->fragment_skb);
 
        INIT_LIST_HEAD(&priv->cmd_queue);
-
-       priv->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols,
-                                          priv->ops->tx_header_len +
-                                          PN533_CMD_DATAEXCH_HEAD_LEN,
-                                          priv->ops->tx_tail_len);
-       if (!priv->nfc_dev) {
-               rc = -ENOMEM;
-               goto destroy_wq;
-       }
-
-       nfc_set_parent_dev(priv->nfc_dev, parent);
-       nfc_set_drvdata(priv->nfc_dev, priv);
-
-       rc = nfc_register_device(priv->nfc_dev);
-       if (rc)
-               goto free_nfc_dev;
-
        return priv;
 
-free_nfc_dev:
-       nfc_free_device(priv->nfc_dev);
-
-destroy_wq:
-       destroy_workqueue(priv->wq);
 error:
        kfree(priv);
        return ERR_PTR(rc);
 }
-EXPORT_SYMBOL_GPL(pn533_register_device);
+EXPORT_SYMBOL_GPL(pn53x_common_init);
 
-void pn533_unregister_device(struct pn533 *priv)
+void pn53x_common_clean(struct pn533 *priv)
 {
        struct pn533_cmd *cmd, *n;
 
-       nfc_unregister_device(priv->nfc_dev);
-       nfc_free_device(priv->nfc_dev);
-
        flush_delayed_work(&priv->poll_work);
        destroy_workqueue(priv->wq);
 
@@ -2679,8 +2851,47 @@ void pn533_unregister_device(struct pn533 *priv)
 
        kfree(priv);
 }
-EXPORT_SYMBOL_GPL(pn533_unregister_device);
+EXPORT_SYMBOL_GPL(pn53x_common_clean);
+
+int pn532_i2c_nfc_alloc(struct pn533 *priv, u32 protocols,
+                       struct device *parent)
+{
+       priv->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols,
+                                          priv->ops->tx_header_len +
+                                          PN533_CMD_DATAEXCH_HEAD_LEN,
+                                          priv->ops->tx_tail_len);
+       if (!priv->nfc_dev)
+               return -ENOMEM;
+
+       nfc_set_parent_dev(priv->nfc_dev, parent);
+       nfc_set_drvdata(priv->nfc_dev, priv);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pn532_i2c_nfc_alloc);
 
+int pn53x_register_nfc(struct pn533 *priv, u32 protocols,
+                       struct device *parent)
+{
+       int rc;
+
+       rc = pn532_i2c_nfc_alloc(priv, protocols, parent);
+       if (rc)
+               return rc;
+
+       rc = nfc_register_device(priv->nfc_dev);
+       if (rc)
+               nfc_free_device(priv->nfc_dev);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(pn53x_register_nfc);
+
+void pn53x_unregister_nfc(struct pn533 *priv)
+{
+       nfc_unregister_device(priv->nfc_dev);
+       nfc_free_device(priv->nfc_dev);
+}
+EXPORT_SYMBOL_GPL(pn53x_unregister_nfc);
 
 MODULE_AUTHOR("Lauro Ramos Venancio <lauro.venancio@openbossa.org>");
 MODULE_AUTHOR("Aloisio Almeida Jr <aloisio.almeida@openbossa.org>");
index 8bf9d6e..b66f02a 100644 (file)
@@ -6,10 +6,11 @@
  * Copyright (C) 2012-2013 Tieto Poland
  */
 
-#define PN533_DEVICE_STD     0x1
-#define PN533_DEVICE_PASORI  0x2
-#define PN533_DEVICE_ACR122U 0x3
-#define PN533_DEVICE_PN532   0x4
+#define PN533_DEVICE_STD               0x1
+#define PN533_DEVICE_PASORI            0x2
+#define PN533_DEVICE_ACR122U           0x3
+#define PN533_DEVICE_PN532             0x4
+#define PN533_DEVICE_PN532_AUTOPOLL    0x5
 
 #define PN533_ALL_PROTOCOLS (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK |\
                             NFC_PROTO_FELICA_MASK | NFC_PROTO_ISO14443_MASK |\
 
 /* Preamble (1), SoPC (2), ACK Code (2), Postamble (1) */
 #define PN533_STD_FRAME_ACK_SIZE 6
+/*
+ * Preamble (1), SoPC (2), Packet Length (1), Packet Length Checksum (1),
+ * Specific Application Level Error Code (1) , Postamble (1)
+ */
+#define PN533_STD_ERROR_FRAME_SIZE 8
 #define PN533_STD_FRAME_CHECKSUM(f) (f->data[f->datalen])
 #define PN533_STD_FRAME_POSTAMBLE(f) (f->data[f->datalen + 1])
 /* Half start code (3), LEN (4) should be 0xffff for extended frame */
@@ -70,6 +76,7 @@
 #define PN533_CMD_IN_ATR 0x50
 #define PN533_CMD_IN_RELEASE 0x52
 #define PN533_CMD_IN_JUMP_FOR_DEP 0x56
+#define PN533_CMD_IN_AUTOPOLL 0x60
 
 #define PN533_CMD_TG_INIT_AS_TARGET 0x8c
 #define PN533_CMD_TG_GET_DATA 0x86
@@ -84,6 +91,9 @@
 #define PN533_CMD_MI_MASK 0x40
 #define PN533_CMD_RET_SUCCESS 0x00
 
+#define PN533_FRAME_DATALEN_ACK 0x00
+#define PN533_FRAME_DATALEN_ERROR 0x01
+#define PN533_FRAME_DATALEN_EXTENDED 0xFF
 
 enum  pn533_protocol_type {
        PN533_PROTO_REQ_ACK_RESP = 0,
@@ -207,21 +217,33 @@ struct pn533_phy_ops {
                          struct sk_buff *out);
        int (*send_ack)(struct pn533 *dev, gfp_t flags);
        void (*abort_cmd)(struct pn533 *priv, gfp_t flags);
+       /*
+        * dev_up and dev_down are optional.
+        * They are used to inform the phy layer that the nfc chip
+        * is going to be really used very soon. The phy layer can then
+        * bring up it's interface to the chip and have it suspended for power
+        * saving reasons otherwise.
+        */
+       void (*dev_up)(struct pn533 *priv);
+       void (*dev_down)(struct pn533 *priv);
 };
 
 
-struct pn533 *pn533_register_device(u32 device_type,
-                               u32 protocols,
+struct pn533 *pn53x_common_init(u32 device_type,
                                enum pn533_protocol_type protocol_type,
                                void *phy,
                                struct pn533_phy_ops *phy_ops,
                                struct pn533_frame_ops *fops,
-                               struct device *dev,
-                               struct device *parent);
+                               struct device *dev);
 
 int pn533_finalize_setup(struct pn533 *dev);
-void pn533_unregister_device(struct pn533 *priv);
+void pn53x_common_clean(struct pn533 *priv);
 void pn533_recv_frame(struct pn533 *dev, struct sk_buff *skb, int status);
+int pn532_i2c_nfc_alloc(struct pn533 *priv, u32 protocols,
+                       struct device *parent);
+int pn53x_register_nfc(struct pn533 *priv, u32 protocols,
+                       struct device *parent);
+void pn53x_unregister_nfc(struct pn533 *priv);
 
 bool pn533_rx_frame_is_cmd_response(struct pn533 *dev, void *frame);
 bool pn533_rx_frame_is_ack(void *_frame);
diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c
new file mode 100644 (file)
index 0000000..46e5ff1
--- /dev/null
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Driver for NXP PN532 NFC Chip - UART transport layer
+ *
+ * Copyright (C) 2018 Lemonage Software GmbH
+ * Author: Lars Pöschel <poeschel@lemonage.de>
+ * All rights reserved.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nfc.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/serdev.h>
+#include "pn533.h"
+
+#define PN532_UART_SKB_BUFF_LEN        (PN533_CMD_DATAEXCH_DATA_MAXLEN * 2)
+
+enum send_wakeup {
+       PN532_SEND_NO_WAKEUP = 0,
+       PN532_SEND_WAKEUP,
+       PN532_SEND_LAST_WAKEUP,
+};
+
+
+struct pn532_uart_phy {
+       struct serdev_device *serdev;
+       struct sk_buff *recv_skb;
+       struct pn533 *priv;
+       /*
+        * send_wakeup variable is used to control if we need to send a wakeup
+        * request to the pn532 chip prior to our actual command. There is a
+        * little propability of a race condition. We decided to not mutex the
+        * variable as the worst that could happen is, that we send a wakeup
+        * to the chip that is already awake. This does not hurt. It is a
+        * no-op to the chip.
+        */
+       enum send_wakeup send_wakeup;
+       struct timer_list cmd_timeout;
+       struct sk_buff *cur_out_buf;
+};
+
+static int pn532_uart_send_frame(struct pn533 *dev,
+                               struct sk_buff *out)
+{
+       /* wakeup sequence and dummy bytes for waiting time */
+       static const u8 wakeup[] = {
+               0x55, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+       struct pn532_uart_phy *pn532 = dev->phy;
+       int err;
+
+       print_hex_dump_debug("PN532_uart TX: ", DUMP_PREFIX_NONE, 16, 1,
+                            out->data, out->len, false);
+
+       pn532->cur_out_buf = out;
+       if (pn532->send_wakeup) {
+               err = serdev_device_write(pn532->serdev,
+                               wakeup, sizeof(wakeup),
+                               MAX_SCHEDULE_TIMEOUT);
+               if (err < 0)
+                       return err;
+       }
+
+       if (pn532->send_wakeup == PN532_SEND_LAST_WAKEUP)
+               pn532->send_wakeup = PN532_SEND_NO_WAKEUP;
+
+       err = serdev_device_write(pn532->serdev, out->data, out->len,
+                       MAX_SCHEDULE_TIMEOUT);
+       if (err < 0)
+               return err;
+
+       mod_timer(&pn532->cmd_timeout, HZ / 40 + jiffies);
+       return 0;
+}
+
+static int pn532_uart_send_ack(struct pn533 *dev, gfp_t flags)
+{
+       /* spec 7.1.1.3:  Preamble, SoPC (2), ACK Code (2), Postamble */
+       static const u8 ack[PN533_STD_FRAME_ACK_SIZE] = {
+                       0x00, 0x00, 0xff, 0x00, 0xff, 0x00};
+       struct pn532_uart_phy *pn532 = dev->phy;
+       int err;
+
+       err = serdev_device_write(pn532->serdev, ack, sizeof(ack),
+                       MAX_SCHEDULE_TIMEOUT);
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static void pn532_uart_abort_cmd(struct pn533 *dev, gfp_t flags)
+{
+       /* An ack will cancel the last issued command */
+       pn532_uart_send_ack(dev, flags);
+       /* schedule cmd_complete_work to finish current command execution */
+       pn533_recv_frame(dev, NULL, -ENOENT);
+}
+
+static void pn532_dev_up(struct pn533 *dev)
+{
+       struct pn532_uart_phy *pn532 = dev->phy;
+
+       serdev_device_open(pn532->serdev);
+       pn532->send_wakeup = PN532_SEND_LAST_WAKEUP;
+}
+
+static void pn532_dev_down(struct pn533 *dev)
+{
+       struct pn532_uart_phy *pn532 = dev->phy;
+
+       serdev_device_close(pn532->serdev);
+       pn532->send_wakeup = PN532_SEND_WAKEUP;
+}
+
+static struct pn533_phy_ops uart_phy_ops = {
+       .send_frame = pn532_uart_send_frame,
+       .send_ack = pn532_uart_send_ack,
+       .abort_cmd = pn532_uart_abort_cmd,
+       .dev_up = pn532_dev_up,
+       .dev_down = pn532_dev_down,
+};
+
+static void pn532_cmd_timeout(struct timer_list *t)
+{
+       struct pn532_uart_phy *dev = from_timer(dev, t, cmd_timeout);
+
+       pn532_uart_send_frame(dev->priv, dev->cur_out_buf);
+}
+
+/*
+ * scans the buffer if it contains a pn532 frame. It is not checked if the
+ * frame is really valid. This is later done with pn533_rx_frame_is_valid.
+ * This is useful for malformed or errornous transmitted frames. Adjusts the
+ * bufferposition where the frame starts, since pn533_recv_frame expects a
+ * well formed frame.
+ */
+static int pn532_uart_rx_is_frame(struct sk_buff *skb)
+{
+       struct pn533_std_frame *std;
+       struct pn533_ext_frame *ext;
+       u16 frame_len;
+       int i;
+
+       for (i = 0; i + PN533_STD_FRAME_ACK_SIZE <= skb->len; i++) {
+               std = (struct pn533_std_frame *)&skb->data[i];
+               /* search start code */
+               if (std->start_frame != cpu_to_be16(PN533_STD_FRAME_SOF))
+                       continue;
+
+               /* frame type */
+               switch (std->datalen) {
+               case PN533_FRAME_DATALEN_ACK:
+                       if (std->datalen_checksum == 0xff) {
+                               skb_pull(skb, i);
+                               return 1;
+                       }
+
+                       break;
+               case PN533_FRAME_DATALEN_ERROR:
+                       if ((std->datalen_checksum == 0xff) &&
+                                       (skb->len >=
+                                        PN533_STD_ERROR_FRAME_SIZE)) {
+                               skb_pull(skb, i);
+                               return 1;
+                       }
+
+                       break;
+               case PN533_FRAME_DATALEN_EXTENDED:
+                       ext = (struct pn533_ext_frame *)&skb->data[i];
+                       frame_len = be16_to_cpu(ext->datalen);
+                       if (skb->len >= frame_len +
+                                       sizeof(struct pn533_ext_frame) +
+                                       2 /* CKS + Postamble */) {
+                               skb_pull(skb, i);
+                               return 1;
+                       }
+
+                       break;
+               default: /* normal information frame */
+                       frame_len = std->datalen;
+                       if (skb->len >= frame_len +
+                                       sizeof(struct pn533_std_frame) +
+                                       2 /* CKS + Postamble */) {
+                               skb_pull(skb, i);
+                               return 1;
+                       }
+
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+static int pn532_receive_buf(struct serdev_device *serdev,
+               const unsigned char *data, size_t count)
+{
+       struct pn532_uart_phy *dev = serdev_device_get_drvdata(serdev);
+       size_t i;
+
+       del_timer(&dev->cmd_timeout);
+       for (i = 0; i < count; i++) {
+               skb_put_u8(dev->recv_skb, *data++);
+               if (!pn532_uart_rx_is_frame(dev->recv_skb))
+                       continue;
+
+               pn533_recv_frame(dev->priv, dev->recv_skb, 0);
+               dev->recv_skb = alloc_skb(PN532_UART_SKB_BUFF_LEN, GFP_KERNEL);
+               if (!dev->recv_skb)
+                       return 0;
+       }
+
+       return i;
+}
+
+static struct serdev_device_ops pn532_serdev_ops = {
+       .receive_buf = pn532_receive_buf,
+       .write_wakeup = serdev_device_write_wakeup,
+};
+
+static const struct of_device_id pn532_uart_of_match[] = {
+       { .compatible = "nxp,pn532", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, pn532_uart_of_match);
+
+static int pn532_uart_probe(struct serdev_device *serdev)
+{
+       struct pn532_uart_phy *pn532;
+       struct pn533 *priv;
+       int err;
+
+       err = -ENOMEM;
+       pn532 = kzalloc(sizeof(*pn532), GFP_KERNEL);
+       if (!pn532)
+               goto err_exit;
+
+       pn532->recv_skb = alloc_skb(PN532_UART_SKB_BUFF_LEN, GFP_KERNEL);
+       if (!pn532->recv_skb)
+               goto err_free;
+
+       pn532->serdev = serdev;
+       serdev_device_set_drvdata(serdev, pn532);
+       serdev_device_set_client_ops(serdev, &pn532_serdev_ops);
+       err = serdev_device_open(serdev);
+       if (err) {
+               dev_err(&serdev->dev, "Unable to open device\n");
+               goto err_skb;
+       }
+
+       err = serdev_device_set_baudrate(serdev, 115200);
+       if (err != 115200) {
+               err = -EINVAL;
+               goto err_serdev;
+       }
+
+       serdev_device_set_flow_control(serdev, false);
+       pn532->send_wakeup = PN532_SEND_WAKEUP;
+       timer_setup(&pn532->cmd_timeout, pn532_cmd_timeout, 0);
+       priv = pn53x_common_init(PN533_DEVICE_PN532_AUTOPOLL,
+                                    PN533_PROTO_REQ_ACK_RESP,
+                                    pn532, &uart_phy_ops, NULL,
+                                    &pn532->serdev->dev);
+       if (IS_ERR(priv)) {
+               err = PTR_ERR(priv);
+               goto err_serdev;
+       }
+
+       pn532->priv = priv;
+       err = pn533_finalize_setup(pn532->priv);
+       if (err)
+               goto err_clean;
+
+       serdev_device_close(serdev);
+       err = pn53x_register_nfc(priv, PN533_NO_TYPE_B_PROTOCOLS, &serdev->dev);
+       if (err) {
+               pn53x_common_clean(pn532->priv);
+               goto err_skb;
+       }
+
+       return err;
+
+err_clean:
+       pn53x_common_clean(pn532->priv);
+err_serdev:
+       serdev_device_close(serdev);
+err_skb:
+       kfree_skb(pn532->recv_skb);
+err_free:
+       kfree(pn532);
+err_exit:
+       return err;
+}
+
+static void pn532_uart_remove(struct serdev_device *serdev)
+{
+       struct pn532_uart_phy *pn532 = serdev_device_get_drvdata(serdev);
+
+       pn53x_unregister_nfc(pn532->priv);
+       serdev_device_close(serdev);
+       pn53x_common_clean(pn532->priv);
+       kfree_skb(pn532->recv_skb);
+       kfree(pn532);
+}
+
+static struct serdev_device_driver pn532_uart_driver = {
+       .probe = pn532_uart_probe,
+       .remove = pn532_uart_remove,
+       .driver = {
+               .name = "pn532_uart",
+               .of_match_table = of_match_ptr(pn532_uart_of_match),
+       },
+};
+
+module_serdev_device_driver(pn532_uart_driver);
+
+MODULE_AUTHOR("Lars Pöschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("PN532 UART driver");
+MODULE_LICENSE("GPL");
index e897e4d..4590fbf 100644 (file)
@@ -534,9 +534,9 @@ static int pn533_usb_probe(struct usb_interface *interface,
                goto error;
        }
 
-       priv = pn533_register_device(id->driver_info, protocols, protocol_type,
+       priv = pn53x_common_init(id->driver_info, protocol_type,
                                        phy, &usb_phy_ops, fops,
-                                       &phy->udev->dev, &interface->dev);
+                                       &phy->udev->dev);
 
        if (IS_ERR(priv)) {
                rc = PTR_ERR(priv);
@@ -547,14 +547,17 @@ static int pn533_usb_probe(struct usb_interface *interface,
 
        rc = pn533_finalize_setup(priv);
        if (rc)
-               goto err_deregister;
+               goto err_clean;
 
        usb_set_intfdata(interface, phy);
+       rc = pn53x_register_nfc(priv, protocols, &interface->dev);
+       if (rc)
+               goto err_clean;
 
        return 0;
 
-err_deregister:
-       pn533_unregister_device(phy->priv);
+err_clean:
+       pn53x_common_clean(priv);
 error:
        usb_kill_urb(phy->in_urb);
        usb_kill_urb(phy->out_urb);
@@ -577,7 +580,8 @@ static void pn533_usb_disconnect(struct usb_interface *interface)
        if (!phy)
                return;
 
-       pn533_unregister_device(phy->priv);
+       pn53x_unregister_nfc(phy->priv);
+       pn53x_common_clean(phy->priv);
 
        usb_set_intfdata(interface, NULL);
 
index e4f7fa0..b4eb926 100644 (file)
@@ -279,7 +279,6 @@ MODULE_DEVICE_TABLE(of, of_s3fwrn5_i2c_match);
 
 static struct i2c_driver s3fwrn5_i2c_driver = {
        .driver = {
-               .owner = THIS_MODULE,
                .name = S3FWRN5_I2C_DRIVER_NAME,
                .of_match_table = of_match_ptr(of_s3fwrn5_i2c_match),
        },
index bd6129d..c6b87ce 100644 (file)
@@ -361,8 +361,8 @@ struct phy_device *of_phy_get_and_connect(struct net_device *dev,
        struct phy_device *phy;
        int ret;
 
-       iface = of_get_phy_mode(np);
-       if ((int)iface < 0)
+       ret = of_get_phy_mode(np, &iface);
+       if (ret)
                return NULL;
        if (of_phy_is_fixed_link(np)) {
                ret = of_phy_register_fixed_link(np);
index b02734a..6e41182 100644 (file)
 /**
  * of_get_phy_mode - Get phy mode for given device_node
  * @np:        Pointer to the given device_node
+ * @interface: Pointer to the result
  *
  * The function gets phy interface string from property 'phy-mode' or
- * 'phy-connection-type', and return its index in phy_modes table, or errno in
- * error case.
+ * 'phy-connection-type'. The index in phy_modes table is set in
+ * interface and 0 returned. In case of error interface is set to
+ * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
  */
-int of_get_phy_mode(struct device_node *np)
+int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
 {
        const char *pm;
        int err, i;
 
+       *interface = PHY_INTERFACE_MODE_NA;
+
        err = of_property_read_string(np, "phy-mode", &pm);
        if (err < 0)
                err = of_property_read_string(np, "phy-connection-type", &pm);
@@ -32,8 +36,10 @@ int of_get_phy_mode(struct device_node *np)
                return err;
 
        for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
-               if (!strcasecmp(pm, phy_modes(i)))
-                       return i;
+               if (!strcasecmp(pm, phy_modes(i))) {
+                       *interface = i;
+                       return 0;
+               }
 
        return -ENODEV;
 }
index 0517272..c48ad23 100644 (file)
@@ -119,4 +119,16 @@ config PTP_1588_CLOCK_KVM
          To compile this driver as a module, choose M here: the module
          will be called ptp_kvm.
 
+config PTP_1588_CLOCK_IDTCM
+       tristate "IDT CLOCKMATRIX as PTP clock"
+       select PTP_1588_CLOCK
+       default n
+       help
+         This driver adds support for using IDT CLOCKMATRIX(TM) as a PTP
+         clock. This clock is only useful if your time stamping MAC
+         is connected to the IDT chip.
+
+         To compile this driver as a module, choose M here: the module
+         will be called ptp_clockmatrix.
+
 endmenu
index 677d1d1..69a06f8 100644 (file)
@@ -12,3 +12,4 @@ obj-$(CONFIG_PTP_1588_CLOCK_KVM)      += ptp_kvm.o
 obj-$(CONFIG_PTP_1588_CLOCK_QORIQ)     += ptp-qoriq.o
 ptp-qoriq-y                            += ptp_qoriq.o
 ptp-qoriq-$(CONFIG_DEBUG_FS)           += ptp_qoriq_debugfs.o
+obj-$(CONFIG_PTP_1588_CLOCK_IDTCM)     += ptp_clockmatrix.o
\ No newline at end of file
diff --git a/drivers/ptp/idt8a340_reg.h b/drivers/ptp/idt8a340_reg.h
new file mode 100644 (file)
index 0000000..9263bc3
--- /dev/null
@@ -0,0 +1,659 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* idt8a340_reg.h
+ *
+ * Originally generated by regen.tcl on Thu Feb 14 19:23:44 PST 2019
+ * https://github.com/richardcochran/regen
+ *
+ * Hand modified to include some HW registers.
+ * Based on 4.8.0, SCSR rev C commit a03c7ae5
+ */
+#ifndef HAVE_IDT8A340_REG
+#define HAVE_IDT8A340_REG
+
+#define PAGE_ADDR_BASE                    0x0000
+#define PAGE_ADDR                         0x00fc
+
+#define HW_REVISION                       0x8180
+#define REV_ID                            0x007a
+
+#define HW_DPLL_0                         (0x8a00)
+#define HW_DPLL_1                         (0x8b00)
+#define HW_DPLL_2                         (0x8c00)
+#define HW_DPLL_3                         (0x8d00)
+
+#define HW_DPLL_TOD_SW_TRIG_ADDR__0       (0x080)
+#define HW_DPLL_TOD_CTRL_1                (0x089)
+#define HW_DPLL_TOD_CTRL_2                (0x08A)
+#define HW_DPLL_TOD_OVR__0                (0x098)
+#define HW_DPLL_TOD_OUT_0__0              (0x0B0)
+
+#define HW_Q0_Q1_CH_SYNC_CTRL_0           (0xa740)
+#define HW_Q0_Q1_CH_SYNC_CTRL_1           (0xa741)
+#define HW_Q2_Q3_CH_SYNC_CTRL_0           (0xa742)
+#define HW_Q2_Q3_CH_SYNC_CTRL_1           (0xa743)
+#define HW_Q4_Q5_CH_SYNC_CTRL_0           (0xa744)
+#define HW_Q4_Q5_CH_SYNC_CTRL_1           (0xa745)
+#define HW_Q6_Q7_CH_SYNC_CTRL_0           (0xa746)
+#define HW_Q6_Q7_CH_SYNC_CTRL_1           (0xa747)
+#define HW_Q8_CH_SYNC_CTRL_0              (0xa748)
+#define HW_Q8_CH_SYNC_CTRL_1              (0xa749)
+#define HW_Q9_CH_SYNC_CTRL_0              (0xa74a)
+#define HW_Q9_CH_SYNC_CTRL_1              (0xa74b)
+#define HW_Q10_CH_SYNC_CTRL_0             (0xa74c)
+#define HW_Q10_CH_SYNC_CTRL_1             (0xa74d)
+#define HW_Q11_CH_SYNC_CTRL_0             (0xa74e)
+#define HW_Q11_CH_SYNC_CTRL_1             (0xa74f)
+
+#define SYNC_SOURCE_DPLL0_TOD_PPS      0x14
+#define SYNC_SOURCE_DPLL1_TOD_PPS      0x15
+#define SYNC_SOURCE_DPLL2_TOD_PPS      0x16
+#define SYNC_SOURCE_DPLL3_TOD_PPS      0x17
+
+#define SYNCTRL1_MASTER_SYNC_RST       BIT(7)
+#define SYNCTRL1_MASTER_SYNC_TRIG      BIT(5)
+#define SYNCTRL1_TOD_SYNC_TRIG         BIT(4)
+#define SYNCTRL1_FBDIV_FRAME_SYNC_TRIG BIT(3)
+#define SYNCTRL1_FBDIV_SYNC_TRIG       BIT(2)
+#define SYNCTRL1_Q1_DIV_SYNC_TRIG      BIT(1)
+#define SYNCTRL1_Q0_DIV_SYNC_TRIG      BIT(0)
+
+#define RESET_CTRL                        0xc000
+#define SM_RESET                          0x0012
+#define SM_RESET_CMD                      0x5A
+
+#define GENERAL_STATUS                    0xc014
+#define HW_REV_ID                         0x000A
+#define BOND_ID                           0x000B
+#define HW_CSR_ID                         0x000C
+#define HW_IRQ_ID                         0x000E
+
+#define MAJ_REL                           0x0010
+#define MIN_REL                           0x0011
+#define HOTFIX_REL                        0x0012
+
+#define PIPELINE_ID                       0x0014
+#define BUILD_ID                          0x0018
+
+#define JTAG_DEVICE_ID                    0x001c
+#define PRODUCT_ID                        0x001e
+
+#define STATUS                            0xc03c
+#define USER_GPIO0_TO_7_STATUS            0x008a
+#define USER_GPIO8_TO_15_STATUS           0x008b
+
+#define GPIO_USER_CONTROL                 0xc160
+#define GPIO0_TO_7_OUT                    0x0000
+#define GPIO8_TO_15_OUT                   0x0001
+
+#define STICKY_STATUS_CLEAR               0xc164
+
+#define GPIO_TOD_NOTIFICATION_CLEAR       0xc16c
+
+#define ALERT_CFG                         0xc188
+
+#define SYS_DPLL_XO                       0xc194
+
+#define SYS_APLL                          0xc19c
+
+#define INPUT_0                           0xc1b0
+
+#define INPUT_1                           0xc1c0
+
+#define INPUT_2                           0xc1d0
+
+#define INPUT_3                           0xc200
+
+#define INPUT_4                           0xc210
+
+#define INPUT_5                           0xc220
+
+#define INPUT_6                           0xc230
+
+#define INPUT_7                           0xc240
+
+#define INPUT_8                           0xc250
+
+#define INPUT_9                           0xc260
+
+#define INPUT_10                          0xc280
+
+#define INPUT_11                          0xc290
+
+#define INPUT_12                          0xc2a0
+
+#define INPUT_13                          0xc2b0
+
+#define INPUT_14                          0xc2c0
+
+#define INPUT_15                          0xc2d0
+
+#define REF_MON_0                         0xc2e0
+
+#define REF_MON_1                         0xc2ec
+
+#define REF_MON_2                         0xc300
+
+#define REF_MON_3                         0xc30c
+
+#define REF_MON_4                         0xc318
+
+#define REF_MON_5                         0xc324
+
+#define REF_MON_6                         0xc330
+
+#define REF_MON_7                         0xc33c
+
+#define REF_MON_8                         0xc348
+
+#define REF_MON_9                         0xc354
+
+#define REF_MON_10                        0xc360
+
+#define REF_MON_11                        0xc36c
+
+#define REF_MON_12                        0xc380
+
+#define REF_MON_13                        0xc38c
+
+#define REF_MON_14                        0xc398
+
+#define REF_MON_15                        0xc3a4
+
+#define DPLL_0                            0xc3b0
+#define DPLL_CTRL_REG_0                   0x0002
+#define DPLL_CTRL_REG_1                   0x0003
+#define DPLL_CTRL_REG_2                   0x0004
+#define DPLL_TOD_SYNC_CFG                 0x0031
+#define DPLL_COMBO_SLAVE_CFG_0            0x0032
+#define DPLL_COMBO_SLAVE_CFG_1            0x0033
+#define DPLL_SLAVE_REF_CFG                0x0034
+#define DPLL_REF_MODE                     0x0035
+#define DPLL_PHASE_MEASUREMENT_CFG        0x0036
+#define DPLL_MODE                         0x0037
+
+#define DPLL_1                            0xc400
+
+#define DPLL_2                            0xc438
+
+#define DPLL_3                            0xc480
+
+#define DPLL_4                            0xc4b8
+
+#define DPLL_5                            0xc500
+
+#define DPLL_6                            0xc538
+
+#define DPLL_7                            0xc580
+
+#define SYS_DPLL                          0xc5b8
+
+#define DPLL_CTRL_0                       0xc600
+#define DPLL_CTRL_DPLL_MANU_REF_CFG       0x0001
+
+#define DPLL_CTRL_1                       0xc63c
+
+#define DPLL_CTRL_2                       0xc680
+
+#define DPLL_CTRL_3                       0xc6bc
+
+#define DPLL_CTRL_4                       0xc700
+
+#define DPLL_CTRL_5                       0xc73c
+
+#define DPLL_CTRL_6                       0xc780
+
+#define DPLL_CTRL_7                       0xc7bc
+
+#define SYS_DPLL_CTRL                     0xc800
+
+#define DPLL_PHASE_0                      0xc818
+
+/* Signed 42-bit FFO in units of 2^(-53) */
+#define DPLL_WR_PHASE                     0x0000
+
+#define DPLL_PHASE_1                      0xc81c
+
+#define DPLL_PHASE_2                      0xc820
+
+#define DPLL_PHASE_3                      0xc824
+
+#define DPLL_PHASE_4                      0xc828
+
+#define DPLL_PHASE_5                      0xc82c
+
+#define DPLL_PHASE_6                      0xc830
+
+#define DPLL_PHASE_7                      0xc834
+
+#define DPLL_FREQ_0                       0xc838
+
+/* Signed 42-bit FFO in units of 2^(-53) */
+#define DPLL_WR_FREQ                      0x0000
+
+#define DPLL_FREQ_1                       0xc840
+
+#define DPLL_FREQ_2                       0xc848
+
+#define DPLL_FREQ_3                       0xc850
+
+#define DPLL_FREQ_4                       0xc858
+
+#define DPLL_FREQ_5                       0xc860
+
+#define DPLL_FREQ_6                       0xc868
+
+#define DPLL_FREQ_7                       0xc870
+
+#define DPLL_PHASE_PULL_IN_0              0xc880
+#define PULL_IN_OFFSET                    0x0000 /* Signed 32 bit */
+#define PULL_IN_SLOPE_LIMIT               0x0004 /* Unsigned 24 bit */
+#define PULL_IN_CTRL                      0x0007
+
+#define DPLL_PHASE_PULL_IN_1              0xc888
+
+#define DPLL_PHASE_PULL_IN_2              0xc890
+
+#define DPLL_PHASE_PULL_IN_3              0xc898
+
+#define DPLL_PHASE_PULL_IN_4              0xc8a0
+
+#define DPLL_PHASE_PULL_IN_5              0xc8a8
+
+#define DPLL_PHASE_PULL_IN_6              0xc8b0
+
+#define DPLL_PHASE_PULL_IN_7              0xc8b8
+
+#define GPIO_CFG                          0xc8c0
+#define GPIO_CFG_GBL                      0x0000
+
+#define GPIO_0                            0xc8c2
+#define GPIO_DCO_INC_DEC                  0x0000
+#define GPIO_OUT_CTRL_0                   0x0001
+#define GPIO_OUT_CTRL_1                   0x0002
+#define GPIO_TOD_TRIG                     0x0003
+#define GPIO_DPLL_INDICATOR               0x0004
+#define GPIO_LOS_INDICATOR                0x0005
+#define GPIO_REF_INPUT_DSQ_0              0x0006
+#define GPIO_REF_INPUT_DSQ_1              0x0007
+#define GPIO_REF_INPUT_DSQ_2              0x0008
+#define GPIO_REF_INPUT_DSQ_3              0x0009
+#define GPIO_MAN_CLK_SEL_0                0x000a
+#define GPIO_MAN_CLK_SEL_1                0x000b
+#define GPIO_MAN_CLK_SEL_2                0x000c
+#define GPIO_SLAVE                        0x000d
+#define GPIO_ALERT_OUT_CFG                0x000e
+#define GPIO_TOD_NOTIFICATION_CFG         0x000f
+#define GPIO_CTRL                         0x0010
+
+#define GPIO_1                            0xc8d4
+
+#define GPIO_2                            0xc8e6
+
+#define GPIO_3                            0xc900
+
+#define GPIO_4                            0xc912
+
+#define GPIO_5                            0xc924
+
+#define GPIO_6                            0xc936
+
+#define GPIO_7                            0xc948
+
+#define GPIO_8                            0xc95a
+
+#define GPIO_9                            0xc980
+
+#define GPIO_10                           0xc992
+
+#define GPIO_11                           0xc9a4
+
+#define GPIO_12                           0xc9b6
+
+#define GPIO_13                           0xc9c8
+
+#define GPIO_14                           0xc9da
+
+#define GPIO_15                           0xca00
+
+#define OUT_DIV_MUX                       0xca12
+
+#define OUTPUT_0                          0xca14
+/* FOD frequency output divider value */
+#define OUT_DIV                           0x0000
+#define OUT_DUTY_CYCLE_HIGH               0x0004
+#define OUT_CTRL_0                        0x0008
+#define OUT_CTRL_1                        0x0009
+/* Phase adjustment in FOD cycles */
+#define OUT_PHASE_ADJ                     0x000c
+
+#define OUTPUT_1                          0xca24
+
+#define OUTPUT_2                          0xca34
+
+#define OUTPUT_3                          0xca44
+
+#define OUTPUT_4                          0xca54
+
+#define OUTPUT_5                          0xca64
+
+#define OUTPUT_6                          0xca80
+
+#define OUTPUT_7                          0xca90
+
+#define OUTPUT_8                          0xcaa0
+
+#define OUTPUT_9                          0xcab0
+
+#define OUTPUT_10                         0xcac0
+
+#define OUTPUT_11                         0xcad0
+
+#define SERIAL                            0xcae0
+
+#define PWM_ENCODER_0                     0xcb00
+
+#define PWM_ENCODER_1                     0xcb08
+
+#define PWM_ENCODER_2                     0xcb10
+
+#define PWM_ENCODER_3                     0xcb18
+
+#define PWM_ENCODER_4                     0xcb20
+
+#define PWM_ENCODER_5                     0xcb28
+
+#define PWM_ENCODER_6                     0xcb30
+
+#define PWM_ENCODER_7                     0xcb38
+
+#define PWM_DECODER_0                     0xcb40
+
+#define PWM_DECODER_1                     0xcb48
+
+#define PWM_DECODER_2                     0xcb50
+
+#define PWM_DECODER_3                     0xcb58
+
+#define PWM_DECODER_4                     0xcb60
+
+#define PWM_DECODER_5                     0xcb68
+
+#define PWM_DECODER_6                     0xcb70
+
+#define PWM_DECODER_7                     0xcb80
+
+#define PWM_DECODER_8                     0xcb88
+
+#define PWM_DECODER_9                     0xcb90
+
+#define PWM_DECODER_10                    0xcb98
+
+#define PWM_DECODER_11                    0xcba0
+
+#define PWM_DECODER_12                    0xcba8
+
+#define PWM_DECODER_13                    0xcbb0
+
+#define PWM_DECODER_14                    0xcbb8
+
+#define PWM_DECODER_15                    0xcbc0
+
+#define PWM_USER_DATA                     0xcbc8
+
+#define TOD_0                             0xcbcc
+
+/* Enable TOD counter, output channel sync and even-PPS mode */
+#define TOD_CFG                           0x0000
+
+#define TOD_1                             0xcbce
+
+#define TOD_2                             0xcbd0
+
+#define TOD_3                             0xcbd2
+
+
+#define TOD_WRITE_0                       0xcc00
+/* 8-bit subns, 32-bit ns, 48-bit seconds */
+#define TOD_WRITE                         0x0000
+/* Counter increments after TOD write is completed */
+#define TOD_WRITE_COUNTER                 0x000c
+/* TOD write trigger configuration */
+#define TOD_WRITE_SELECT_CFG_0            0x000d
+/* TOD write trigger selection */
+#define TOD_WRITE_CMD                     0x000f
+
+#define TOD_WRITE_1                       0xcc10
+
+#define TOD_WRITE_2                       0xcc20
+
+#define TOD_WRITE_3                       0xcc30
+
+#define TOD_READ_PRIMARY_0                0xcc40
+/* 8-bit subns, 32-bit ns, 48-bit seconds */
+#define TOD_READ_PRIMARY                  0x0000
+/* Counter increments after TOD write is completed */
+#define TOD_READ_PRIMARY_COUNTER          0x000b
+/* Read trigger configuration */
+#define TOD_READ_PRIMARY_SEL_CFG_0        0x000c
+/* Read trigger selection */
+#define TOD_READ_PRIMARY_CMD              0x000e
+
+#define TOD_READ_PRIMARY_1                0xcc50
+
+#define TOD_READ_PRIMARY_2                0xcc60
+
+#define TOD_READ_PRIMARY_3                0xcc80
+
+#define TOD_READ_SECONDARY_0              0xcc90
+
+#define TOD_READ_SECONDARY_1              0xcca0
+
+#define TOD_READ_SECONDARY_2              0xccb0
+
+#define TOD_READ_SECONDARY_3              0xccc0
+
+#define OUTPUT_TDC_CFG                    0xccd0
+
+#define OUTPUT_TDC_0                      0xcd00
+
+#define OUTPUT_TDC_1                      0xcd08
+
+#define OUTPUT_TDC_2                      0xcd10
+
+#define OUTPUT_TDC_3                      0xcd18
+
+#define INPUT_TDC                         0xcd20
+
+#define SCRATCH                           0xcf50
+
+#define EEPROM                            0xcf68
+
+#define OTP                               0xcf70
+
+#define BYTE                              0xcf80
+
+/* Bit definitions for the MAJ_REL register */
+#define MAJOR_SHIFT                       (1)
+#define MAJOR_MASK                        (0x7f)
+#define PR_BUILD                          BIT(0)
+
+/* Bit definitions for the USER_GPIO0_TO_7_STATUS register */
+#define GPIO0_LEVEL                       BIT(0)
+#define GPIO1_LEVEL                       BIT(1)
+#define GPIO2_LEVEL                       BIT(2)
+#define GPIO3_LEVEL                       BIT(3)
+#define GPIO4_LEVEL                       BIT(4)
+#define GPIO5_LEVEL                       BIT(5)
+#define GPIO6_LEVEL                       BIT(6)
+#define GPIO7_LEVEL                       BIT(7)
+
+/* Bit definitions for the USER_GPIO8_TO_15_STATUS register */
+#define GPIO8_LEVEL                       BIT(0)
+#define GPIO9_LEVEL                       BIT(1)
+#define GPIO10_LEVEL                      BIT(2)
+#define GPIO11_LEVEL                      BIT(3)
+#define GPIO12_LEVEL                      BIT(4)
+#define GPIO13_LEVEL                      BIT(5)
+#define GPIO14_LEVEL                      BIT(6)
+#define GPIO15_LEVEL                      BIT(7)
+
+/* Bit definitions for the GPIO0_TO_7_OUT register */
+#define GPIO0_DRIVE_LEVEL                 BIT(0)
+#define GPIO1_DRIVE_LEVEL                 BIT(1)
+#define GPIO2_DRIVE_LEVEL                 BIT(2)
+#define GPIO3_DRIVE_LEVEL                 BIT(3)
+#define GPIO4_DRIVE_LEVEL                 BIT(4)
+#define GPIO5_DRIVE_LEVEL                 BIT(5)
+#define GPIO6_DRIVE_LEVEL                 BIT(6)
+#define GPIO7_DRIVE_LEVEL                 BIT(7)
+
+/* Bit definitions for the GPIO8_TO_15_OUT register */
+#define GPIO8_DRIVE_LEVEL                 BIT(0)
+#define GPIO9_DRIVE_LEVEL                 BIT(1)
+#define GPIO10_DRIVE_LEVEL                BIT(2)
+#define GPIO11_DRIVE_LEVEL                BIT(3)
+#define GPIO12_DRIVE_LEVEL                BIT(4)
+#define GPIO13_DRIVE_LEVEL                BIT(5)
+#define GPIO14_DRIVE_LEVEL                BIT(6)
+#define GPIO15_DRIVE_LEVEL                BIT(7)
+
+/* Bit definitions for the DPLL_TOD_SYNC_CFG register */
+#define TOD_SYNC_SOURCE_SHIFT             (1)
+#define TOD_SYNC_SOURCE_MASK              (0x3)
+#define TOD_SYNC_EN                       BIT(0)
+
+/* Bit definitions for the DPLL_MODE register */
+#define WRITE_TIMER_MODE                  BIT(6)
+#define PLL_MODE_SHIFT                    (3)
+#define PLL_MODE_MASK                     (0x7)
+#define STATE_MODE_SHIFT                  (0)
+#define STATE_MODE_MASK                   (0x7)
+
+/* Bit definitions for the GPIO_CFG_GBL register */
+#define SUPPLY_MODE_SHIFT                 (0)
+#define SUPPLY_MODE_MASK                  (0x3)
+
+/* Bit definitions for the GPIO_DCO_INC_DEC register */
+#define INCDEC_DPLL_INDEX_SHIFT           (0)
+#define INCDEC_DPLL_INDEX_MASK            (0x7)
+
+/* Bit definitions for the GPIO_OUT_CTRL_0 register */
+#define CTRL_OUT_0                        BIT(0)
+#define CTRL_OUT_1                        BIT(1)
+#define CTRL_OUT_2                        BIT(2)
+#define CTRL_OUT_3                        BIT(3)
+#define CTRL_OUT_4                        BIT(4)
+#define CTRL_OUT_5                        BIT(5)
+#define CTRL_OUT_6                        BIT(6)
+#define CTRL_OUT_7                        BIT(7)
+
+/* Bit definitions for the GPIO_OUT_CTRL_1 register */
+#define CTRL_OUT_8                        BIT(0)
+#define CTRL_OUT_9                        BIT(1)
+#define CTRL_OUT_10                       BIT(2)
+#define CTRL_OUT_11                       BIT(3)
+#define CTRL_OUT_12                       BIT(4)
+#define CTRL_OUT_13                       BIT(5)
+#define CTRL_OUT_14                       BIT(6)
+#define CTRL_OUT_15                       BIT(7)
+
+/* Bit definitions for the GPIO_TOD_TRIG register */
+#define TOD_TRIG_0                        BIT(0)
+#define TOD_TRIG_1                        BIT(1)
+#define TOD_TRIG_2                        BIT(2)
+#define TOD_TRIG_3                        BIT(3)
+
+/* Bit definitions for the GPIO_DPLL_INDICATOR register */
+#define IND_DPLL_INDEX_SHIFT              (0)
+#define IND_DPLL_INDEX_MASK               (0x7)
+
+/* Bit definitions for the GPIO_LOS_INDICATOR register */
+#define REFMON_INDEX_SHIFT                (0)
+#define REFMON_INDEX_MASK                 (0xf)
+/* Active level of LOS indicator, 0=low 1=high */
+#define ACTIVE_LEVEL                      BIT(4)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_0 register */
+#define DSQ_INP_0                         BIT(0)
+#define DSQ_INP_1                         BIT(1)
+#define DSQ_INP_2                         BIT(2)
+#define DSQ_INP_3                         BIT(3)
+#define DSQ_INP_4                         BIT(4)
+#define DSQ_INP_5                         BIT(5)
+#define DSQ_INP_6                         BIT(6)
+#define DSQ_INP_7                         BIT(7)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_1 register */
+#define DSQ_INP_8                         BIT(0)
+#define DSQ_INP_9                         BIT(1)
+#define DSQ_INP_10                        BIT(2)
+#define DSQ_INP_11                        BIT(3)
+#define DSQ_INP_12                        BIT(4)
+#define DSQ_INP_13                        BIT(5)
+#define DSQ_INP_14                        BIT(6)
+#define DSQ_INP_15                        BIT(7)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_2 register */
+#define DSQ_DPLL_0                        BIT(0)
+#define DSQ_DPLL_1                        BIT(1)
+#define DSQ_DPLL_2                        BIT(2)
+#define DSQ_DPLL_3                        BIT(3)
+#define DSQ_DPLL_4                        BIT(4)
+#define DSQ_DPLL_5                        BIT(5)
+#define DSQ_DPLL_6                        BIT(6)
+#define DSQ_DPLL_7                        BIT(7)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_3 register */
+#define DSQ_DPLL_SYS                      BIT(0)
+#define GPIO_DSQ_LEVEL                    BIT(1)
+
+/* Bit definitions for the GPIO_TOD_NOTIFICATION_CFG register */
+#define DPLL_TOD_SHIFT                    (0)
+#define DPLL_TOD_MASK                     (0x3)
+#define TOD_READ_SECONDARY                BIT(2)
+#define GPIO_ASSERT_LEVEL                 BIT(3)
+
+/* Bit definitions for the GPIO_CTRL register */
+#define GPIO_FUNCTION_EN                  BIT(0)
+#define GPIO_CMOS_OD_MODE                 BIT(1)
+#define GPIO_CONTROL_DIR                  BIT(2)
+#define GPIO_PU_PD_MODE                   BIT(3)
+#define GPIO_FUNCTION_SHIFT               (4)
+#define GPIO_FUNCTION_MASK                (0xf)
+
+/* Bit definitions for the OUT_CTRL_1 register */
+#define OUT_SYNC_DISABLE                  BIT(7)
+#define SQUELCH_VALUE                     BIT(6)
+#define SQUELCH_DISABLE                   BIT(5)
+#define PAD_VDDO_SHIFT                    (2)
+#define PAD_VDDO_MASK                     (0x7)
+#define PAD_CMOSDRV_SHIFT                 (0)
+#define PAD_CMOSDRV_MASK                  (0x3)
+
+/* Bit definitions for the TOD_CFG register */
+#define TOD_EVEN_PPS_MODE                 BIT(2)
+#define TOD_OUT_SYNC_ENABLE               BIT(1)
+#define TOD_ENABLE                        BIT(0)
+
+/* Bit definitions for the TOD_WRITE_SELECT_CFG_0 register */
+#define WR_PWM_DECODER_INDEX_SHIFT        (4)
+#define WR_PWM_DECODER_INDEX_MASK         (0xf)
+#define WR_REF_INDEX_SHIFT                (0)
+#define WR_REF_INDEX_MASK                 (0xf)
+
+/* Bit definitions for the TOD_WRITE_CMD register */
+#define TOD_WRITE_SELECTION_SHIFT         (0)
+#define TOD_WRITE_SELECTION_MASK          (0xf)
+
+/* Bit definitions for the TOD_READ_PRIMARY_SEL_CFG_0 register */
+#define RD_PWM_DECODER_INDEX_SHIFT        (4)
+#define RD_PWM_DECODER_INDEX_MASK         (0xf)
+#define RD_REF_INDEX_SHIFT                (0)
+#define RD_REF_INDEX_MASK                 (0xf)
+
+/* Bit definitions for the TOD_READ_PRIMARY_CMD register */
+#define TOD_READ_TRIGGER_MODE             BIT(4)
+#define TOD_READ_TRIGGER_SHIFT            (0)
+#define TOD_READ_TRIGGER_MASK             (0xf)
+
+#endif
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
new file mode 100644 (file)
index 0000000..cf5889b
--- /dev/null
@@ -0,0 +1,1425 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PTP hardware clock driver for the IDT ClockMatrix(TM) family of timing and
+ * synchronization devices.
+ *
+ * Copyright (C) 2019 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/timekeeping.h>
+
+#include "ptp_private.h"
+#include "ptp_clockmatrix.h"
+
+MODULE_DESCRIPTION("Driver for IDT ClockMatrix(TM) family");
+MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
+MODULE_AUTHOR("IDT support-1588 <IDT-support-1588@lm.renesas.com>");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL");
+
+#define SETTIME_CORRECTION (0)
+
+static int char_array_to_timespec(u8 *buf,
+                                 u8 count,
+                                 struct timespec64 *ts)
+{
+       u8 i;
+       u64 nsec;
+       time64_t sec;
+
+       if (count < TOD_BYTE_COUNT)
+               return 1;
+
+       /* Sub-nanoseconds are in buf[0]. */
+       nsec = buf[4];
+       for (i = 0; i < 3; i++) {
+               nsec <<= 8;
+               nsec |= buf[3 - i];
+       }
+
+       sec = buf[10];
+       for (i = 0; i < 5; i++) {
+               sec <<= 8;
+               sec |= buf[9 - i];
+       }
+
+       ts->tv_sec = sec;
+       ts->tv_nsec = nsec;
+
+       return 0;
+}
+
+static int timespec_to_char_array(struct timespec64 const *ts,
+                                 u8 *buf,
+                                 u8 count)
+{
+       u8 i;
+       s32 nsec;
+       time64_t sec;
+
+       if (count < TOD_BYTE_COUNT)
+               return 1;
+
+       nsec = ts->tv_nsec;
+       sec = ts->tv_sec;
+
+       /* Sub-nanoseconds are in buf[0]. */
+       buf[0] = 0;
+       for (i = 1; i < 5; i++) {
+               buf[i] = nsec & 0xff;
+               nsec >>= 8;
+       }
+
+       for (i = 5; i < TOD_BYTE_COUNT; i++) {
+
+               buf[i] = sec & 0xff;
+               sec >>= 8;
+       }
+
+       return 0;
+}
+
+static int idtcm_xfer(struct idtcm *idtcm,
+                     u8 regaddr,
+                     u8 *buf,
+                     u16 count,
+                     bool write)
+{
+       struct i2c_client *client = idtcm->client;
+       struct i2c_msg msg[2];
+       int cnt;
+
+       msg[0].addr = client->addr;
+       msg[0].flags = 0;
+       msg[0].len = 1;
+       msg[0].buf = &regaddr;
+
+       msg[1].addr = client->addr;
+       msg[1].flags = write ? 0 : I2C_M_RD;
+       msg[1].len = count;
+       msg[1].buf = buf;
+
+       cnt = i2c_transfer(client->adapter, msg, 2);
+
+       if (cnt < 0) {
+               dev_err(&client->dev, "i2c_transfer returned %d\n", cnt);
+               return cnt;
+       } else if (cnt != 2) {
+               dev_err(&client->dev,
+                       "i2c_transfer sent only %d of %d messages\n", cnt, 2);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int idtcm_page_offset(struct idtcm *idtcm, u8 val)
+{
+       u8 buf[4];
+       int err;
+
+       if (idtcm->page_offset == val)
+               return 0;
+
+       buf[0] = 0x0;
+       buf[1] = val;
+       buf[2] = 0x10;
+       buf[3] = 0x20;
+
+       err = idtcm_xfer(idtcm, PAGE_ADDR, buf, sizeof(buf), 1);
+
+       if (err)
+               dev_err(&idtcm->client->dev, "failed to set page offset\n");
+       else
+               idtcm->page_offset = val;
+
+       return err;
+}
+
+static int _idtcm_rdwr(struct idtcm *idtcm,
+                      u16 regaddr,
+                      u8 *buf,
+                      u16 count,
+                      bool write)
+{
+       u8 hi;
+       u8 lo;
+       int err;
+
+       hi = (regaddr >> 8) & 0xff;
+       lo = regaddr & 0xff;
+
+       err = idtcm_page_offset(idtcm, hi);
+
+       if (err)
+               goto out;
+
+       err = idtcm_xfer(idtcm, lo, buf, count, write);
+out:
+       return err;
+}
+
+static int idtcm_read(struct idtcm *idtcm,
+                     u16 module,
+                     u16 regaddr,
+                     u8 *buf,
+                     u16 count)
+{
+       return _idtcm_rdwr(idtcm, module + regaddr, buf, count, false);
+}
+
+static int idtcm_write(struct idtcm *idtcm,
+                      u16 module,
+                      u16 regaddr,
+                      u8 *buf,
+                      u16 count)
+{
+       return _idtcm_rdwr(idtcm, module + regaddr, buf, count, true);
+}
+
+static int _idtcm_gettime(struct idtcm_channel *channel,
+                         struct timespec64 *ts)
+{
+       struct idtcm *idtcm = channel->idtcm;
+       u8 buf[TOD_BYTE_COUNT];
+       u8 trigger;
+       int err;
+
+       err = idtcm_read(idtcm, channel->tod_read_primary,
+                        TOD_READ_PRIMARY_CMD, &trigger, sizeof(trigger));
+       if (err)
+               return err;
+
+       trigger &= ~(TOD_READ_TRIGGER_MASK << TOD_READ_TRIGGER_SHIFT);
+       trigger |= (1 << TOD_READ_TRIGGER_SHIFT);
+       trigger |= TOD_READ_TRIGGER_MODE;
+
+       err = idtcm_write(idtcm, channel->tod_read_primary,
+                         TOD_READ_PRIMARY_CMD, &trigger, sizeof(trigger));
+
+       if (err)
+               return err;
+
+       if (idtcm->calculate_overhead_flag)
+               idtcm->start_time = ktime_get_raw();
+
+       err = idtcm_read(idtcm, channel->tod_read_primary,
+                        TOD_READ_PRIMARY, buf, sizeof(buf));
+
+       if (err)
+               return err;
+
+       err = char_array_to_timespec(buf, sizeof(buf), ts);
+
+       return err;
+}
+
+static int _sync_pll_output(struct idtcm *idtcm,
+                           u8 pll,
+                           u8 sync_src,
+                           u8 qn,
+                           u8 qn_plus_1)
+{
+       int err;
+       u8 val;
+       u16 sync_ctrl0;
+       u16 sync_ctrl1;
+
+       if ((qn == 0) && (qn_plus_1 == 0))
+               return 0;
+
+       switch (pll) {
+       case 0:
+               sync_ctrl0 = HW_Q0_Q1_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q0_Q1_CH_SYNC_CTRL_1;
+               break;
+       case 1:
+               sync_ctrl0 = HW_Q2_Q3_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q2_Q3_CH_SYNC_CTRL_1;
+               break;
+       case 2:
+               sync_ctrl0 = HW_Q4_Q5_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q4_Q5_CH_SYNC_CTRL_1;
+               break;
+       case 3:
+               sync_ctrl0 = HW_Q6_Q7_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q6_Q7_CH_SYNC_CTRL_1;
+               break;
+       case 4:
+               sync_ctrl0 = HW_Q8_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q8_CH_SYNC_CTRL_1;
+               break;
+       case 5:
+               sync_ctrl0 = HW_Q9_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q9_CH_SYNC_CTRL_1;
+               break;
+       case 6:
+               sync_ctrl0 = HW_Q10_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q10_CH_SYNC_CTRL_1;
+               break;
+       case 7:
+               sync_ctrl0 = HW_Q11_CH_SYNC_CTRL_0;
+               sync_ctrl1 = HW_Q11_CH_SYNC_CTRL_1;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       val = SYNCTRL1_MASTER_SYNC_RST;
+
+       /* Place master sync in reset */
+       err = idtcm_write(idtcm, 0, sync_ctrl1, &val, sizeof(val));
+       if (err)
+               return err;
+
+       err = idtcm_write(idtcm, 0, sync_ctrl0, &sync_src, sizeof(sync_src));
+       if (err)
+               return err;
+
+       /* Set sync trigger mask */
+       val |= SYNCTRL1_FBDIV_FRAME_SYNC_TRIG | SYNCTRL1_FBDIV_SYNC_TRIG;
+
+       if (qn)
+               val |= SYNCTRL1_Q0_DIV_SYNC_TRIG;
+
+       if (qn_plus_1)
+               val |= SYNCTRL1_Q1_DIV_SYNC_TRIG;
+
+       err = idtcm_write(idtcm, 0, sync_ctrl1, &val, sizeof(val));
+       if (err)
+               return err;
+
+       /* Place master sync out of reset */
+       val &= ~(SYNCTRL1_MASTER_SYNC_RST);
+       err = idtcm_write(idtcm, 0, sync_ctrl1, &val, sizeof(val));
+
+       return err;
+}
+
+static int idtcm_sync_pps_output(struct idtcm_channel *channel)
+{
+       struct idtcm *idtcm = channel->idtcm;
+
+       u8 pll;
+       u8 sync_src;
+       u8 qn;
+       u8 qn_plus_1;
+       int err = 0;
+
+       u16 output_mask = channel->output_mask;
+
+       switch (channel->dpll_n) {
+       case DPLL_0:
+               sync_src = SYNC_SOURCE_DPLL0_TOD_PPS;
+               break;
+       case DPLL_1:
+               sync_src = SYNC_SOURCE_DPLL1_TOD_PPS;
+               break;
+       case DPLL_2:
+               sync_src = SYNC_SOURCE_DPLL2_TOD_PPS;
+               break;
+       case DPLL_3:
+               sync_src = SYNC_SOURCE_DPLL3_TOD_PPS;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       for (pll = 0; pll < 8; pll++) {
+
+               qn = output_mask & 0x1;
+               output_mask = output_mask >> 1;
+
+               if (pll < 4) {
+                       /* First 4 pll has 2 outputs */
+                       qn_plus_1 = output_mask & 0x1;
+                       output_mask = output_mask >> 1;
+               } else {
+                       qn_plus_1 = 0;
+               }
+
+               if ((qn != 0) || (qn_plus_1 != 0))
+                       err = _sync_pll_output(idtcm, pll, sync_src, qn,
+                                              qn_plus_1);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+
+static int _idtcm_set_dpll_tod(struct idtcm_channel *channel,
+                              struct timespec64 const *ts,
+                              enum hw_tod_write_trig_sel wr_trig)
+{
+       struct idtcm *idtcm = channel->idtcm;
+
+       u8 buf[TOD_BYTE_COUNT];
+       u8 cmd;
+       int err;
+       struct timespec64 local_ts = *ts;
+       s64 total_overhead_ns;
+
+       /* Configure HW TOD write trigger. */
+       err = idtcm_read(idtcm, channel->hw_dpll_n, HW_DPLL_TOD_CTRL_1,
+                        &cmd, sizeof(cmd));
+
+       if (err)
+               return err;
+
+       cmd &= ~(0x0f);
+       cmd |= wr_trig | 0x08;
+
+       err = idtcm_write(idtcm, channel->hw_dpll_n, HW_DPLL_TOD_CTRL_1,
+                         &cmd, sizeof(cmd));
+
+       if (err)
+               return err;
+
+       if (wr_trig  != HW_TOD_WR_TRIG_SEL_MSB) {
+
+               err = timespec_to_char_array(&local_ts, buf, sizeof(buf));
+
+               if (err)
+                       return err;
+
+               err = idtcm_write(idtcm, channel->hw_dpll_n,
+                                 HW_DPLL_TOD_OVR__0, buf, sizeof(buf));
+
+               if (err)
+                       return err;
+       }
+
+       /* ARM HW TOD write trigger. */
+       cmd &= ~(0x08);
+
+       err = idtcm_write(idtcm, channel->hw_dpll_n, HW_DPLL_TOD_CTRL_1,
+                         &cmd, sizeof(cmd));
+
+       if (wr_trig == HW_TOD_WR_TRIG_SEL_MSB) {
+
+               if (idtcm->calculate_overhead_flag) {
+                       total_overhead_ns =  ktime_to_ns(ktime_get_raw()
+                                                        - idtcm->start_time)
+                                            + idtcm->tod_write_overhead_ns
+                                            + SETTIME_CORRECTION;
+
+                       timespec64_add_ns(&local_ts, total_overhead_ns);
+
+                       idtcm->calculate_overhead_flag = 0;
+               }
+
+               err = timespec_to_char_array(&local_ts, buf, sizeof(buf));
+
+               if (err)
+                       return err;
+
+               err = idtcm_write(idtcm, channel->hw_dpll_n,
+                                 HW_DPLL_TOD_OVR__0, buf, sizeof(buf));
+       }
+
+       return err;
+}
+
+static int _idtcm_settime(struct idtcm_channel *channel,
+                         struct timespec64 const *ts,
+                         enum hw_tod_write_trig_sel wr_trig)
+{
+       struct idtcm *idtcm = channel->idtcm;
+       s32 retval;
+       int err;
+       int i;
+       u8 trig_sel;
+
+       err = _idtcm_set_dpll_tod(channel, ts, wr_trig);
+
+       if (err)
+               return err;
+
+       /* Wait for the operation to complete. */
+       for (i = 0; i < 10000; i++) {
+               err = idtcm_read(idtcm, channel->hw_dpll_n,
+                                HW_DPLL_TOD_CTRL_1, &trig_sel,
+                                sizeof(trig_sel));
+
+               if (err)
+                       return err;
+
+               if (trig_sel == 0x4a)
+                       break;
+
+               err = 1;
+       }
+
+       if (err)
+               return err;
+
+       retval = idtcm_sync_pps_output(channel);
+
+       return retval;
+}
+
+static int idtcm_set_phase_pull_in_offset(struct idtcm_channel *channel,
+                                         s32 offset_ns)
+{
+       int err;
+       int i;
+       struct idtcm *idtcm = channel->idtcm;
+
+       u8 buf[4];
+
+       for (i = 0; i < 4; i++) {
+               buf[i] = 0xff & (offset_ns);
+               offset_ns >>= 8;
+       }
+
+       err = idtcm_write(idtcm, channel->dpll_phase_pull_in, PULL_IN_OFFSET,
+                         buf, sizeof(buf));
+
+       return err;
+}
+
+static int idtcm_set_phase_pull_in_slope_limit(struct idtcm_channel *channel,
+                                              u32 max_ffo_ppb)
+{
+       int err;
+       u8 i;
+       struct idtcm *idtcm = channel->idtcm;
+
+       u8 buf[3];
+
+       if (max_ffo_ppb & 0xff000000)
+               max_ffo_ppb = 0;
+
+       for (i = 0; i < 3; i++) {
+               buf[i] = 0xff & (max_ffo_ppb);
+               max_ffo_ppb >>= 8;
+       }
+
+       err = idtcm_write(idtcm, channel->dpll_phase_pull_in,
+                         PULL_IN_SLOPE_LIMIT, buf, sizeof(buf));
+
+       return err;
+}
+
+static int idtcm_start_phase_pull_in(struct idtcm_channel *channel)
+{
+       int err;
+       struct idtcm *idtcm = channel->idtcm;
+
+       u8 buf;
+
+       err = idtcm_read(idtcm, channel->dpll_phase_pull_in, PULL_IN_CTRL,
+                        &buf, sizeof(buf));
+
+       if (err)
+               return err;
+
+       if (buf == 0) {
+               buf = 0x01;
+               err = idtcm_write(idtcm, channel->dpll_phase_pull_in,
+                                 PULL_IN_CTRL, &buf, sizeof(buf));
+       } else {
+               err = -EBUSY;
+       }
+
+       return err;
+}
+
+static int idtcm_do_phase_pull_in(struct idtcm_channel *channel,
+                                 s32 offset_ns,
+                                 u32 max_ffo_ppb)
+{
+       int err;
+
+       err = idtcm_set_phase_pull_in_offset(channel, -offset_ns);
+
+       if (err)
+               return err;
+
+       err = idtcm_set_phase_pull_in_slope_limit(channel, max_ffo_ppb);
+
+       if (err)
+               return err;
+
+       err = idtcm_start_phase_pull_in(channel);
+
+       return err;
+}
+
+static int _idtcm_adjtime(struct idtcm_channel *channel, s64 delta)
+{
+       int err;
+       struct idtcm *idtcm = channel->idtcm;
+       struct timespec64 ts;
+       s64 now;
+
+       if (abs(delta) < PHASE_PULL_IN_THRESHOLD_NS) {
+               err = idtcm_do_phase_pull_in(channel, delta, 0);
+       } else {
+               idtcm->calculate_overhead_flag = 1;
+
+               err = _idtcm_gettime(channel, &ts);
+
+               if (err)
+                       return err;
+
+               now = timespec64_to_ns(&ts);
+               now += delta;
+
+               ts = ns_to_timespec64(now);
+
+               err = _idtcm_settime(channel, &ts, HW_TOD_WR_TRIG_SEL_MSB);
+       }
+
+       return err;
+}
+
+static int idtcm_state_machine_reset(struct idtcm *idtcm)
+{
+       int err;
+       u8 byte = SM_RESET_CMD;
+
+       err = idtcm_write(idtcm, RESET_CTRL, SM_RESET, &byte, sizeof(byte));
+
+       if (!err)
+               msleep_interruptible(POST_SM_RESET_DELAY_MS);
+
+       return err;
+}
+
+static int idtcm_read_hw_rev_id(struct idtcm *idtcm, u8 *hw_rev_id)
+{
+       return idtcm_read(idtcm,
+                         GENERAL_STATUS,
+                         HW_REV_ID,
+                         hw_rev_id,
+                         sizeof(u8));
+}
+
+static int idtcm_read_bond_id(struct idtcm *idtcm, u8 *bond_id)
+{
+       return idtcm_read(idtcm,
+                         GENERAL_STATUS,
+                         BOND_ID,
+                         bond_id,
+                         sizeof(u8));
+}
+
+static int idtcm_read_hw_csr_id(struct idtcm *idtcm, u16 *hw_csr_id)
+{
+       int err;
+       u8 buf[2] = {0};
+
+       err = idtcm_read(idtcm, GENERAL_STATUS, HW_CSR_ID, buf, sizeof(buf));
+
+       *hw_csr_id = (buf[1] << 8) | buf[0];
+
+       return err;
+}
+
+static int idtcm_read_hw_irq_id(struct idtcm *idtcm, u16 *hw_irq_id)
+{
+       int err;
+       u8 buf[2] = {0};
+
+       err = idtcm_read(idtcm, GENERAL_STATUS, HW_IRQ_ID, buf, sizeof(buf));
+
+       *hw_irq_id = (buf[1] << 8) | buf[0];
+
+       return err;
+}
+
+static int idtcm_read_product_id(struct idtcm *idtcm, u16 *product_id)
+{
+       int err;
+       u8 buf[2] = {0};
+
+       err = idtcm_read(idtcm, GENERAL_STATUS, PRODUCT_ID, buf, sizeof(buf));
+
+       *product_id = (buf[1] << 8) | buf[0];
+
+       return err;
+}
+
+static int idtcm_read_major_release(struct idtcm *idtcm, u8 *major)
+{
+       int err;
+       u8 buf = 0;
+
+       err = idtcm_read(idtcm, GENERAL_STATUS, MAJ_REL, &buf, sizeof(buf));
+
+       *major = buf >> 1;
+
+       return err;
+}
+
+static int idtcm_read_minor_release(struct idtcm *idtcm, u8 *minor)
+{
+       return idtcm_read(idtcm, GENERAL_STATUS, MIN_REL, minor, sizeof(u8));
+}
+
+static int idtcm_read_hotfix_release(struct idtcm *idtcm, u8 *hotfix)
+{
+       return idtcm_read(idtcm,
+                         GENERAL_STATUS,
+                         HOTFIX_REL,
+                         hotfix,
+                         sizeof(u8));
+}
+
+static int idtcm_read_pipeline(struct idtcm *idtcm, u32 *pipeline)
+{
+       int err;
+       u8 buf[4] = {0};
+
+       err = idtcm_read(idtcm,
+                        GENERAL_STATUS,
+                        PIPELINE_ID,
+                        &buf[0],
+                        sizeof(buf));
+
+       *pipeline = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
+
+       return err;
+}
+
+static int process_pll_mask(struct idtcm *idtcm, u32 addr, u8 val, u8 *mask)
+{
+       int err = 0;
+
+       if (addr == PLL_MASK_ADDR) {
+               if ((val & 0xf0) || !(val & 0xf)) {
+                       dev_err(&idtcm->client->dev,
+                               "Invalid PLL mask 0x%hhx\n", val);
+                       err = -EINVAL;
+               }
+               *mask = val;
+       }
+
+       return err;
+}
+
+static int set_pll_output_mask(struct idtcm *idtcm, u16 addr, u8 val)
+{
+       int err = 0;
+
+       switch (addr) {
+       case OUTPUT_MASK_PLL0_ADDR:
+               SET_U16_LSB(idtcm->channel[0].output_mask, val);
+               break;
+       case OUTPUT_MASK_PLL0_ADDR + 1:
+               SET_U16_MSB(idtcm->channel[0].output_mask, val);
+               break;
+       case OUTPUT_MASK_PLL1_ADDR:
+               SET_U16_LSB(idtcm->channel[1].output_mask, val);
+               break;
+       case OUTPUT_MASK_PLL1_ADDR + 1:
+               SET_U16_MSB(idtcm->channel[1].output_mask, val);
+               break;
+       case OUTPUT_MASK_PLL2_ADDR:
+               SET_U16_LSB(idtcm->channel[2].output_mask, val);
+               break;
+       case OUTPUT_MASK_PLL2_ADDR + 1:
+               SET_U16_MSB(idtcm->channel[2].output_mask, val);
+               break;
+       case OUTPUT_MASK_PLL3_ADDR:
+               SET_U16_LSB(idtcm->channel[3].output_mask, val);
+               break;
+       case OUTPUT_MASK_PLL3_ADDR + 1:
+               SET_U16_MSB(idtcm->channel[3].output_mask, val);
+               break;
+       default:
+               err = -EINVAL;
+               break;
+       }
+
+       return err;
+}
+
+static int check_and_set_masks(struct idtcm *idtcm,
+                              u16 regaddr,
+                              u8 val)
+{
+       int err = 0;
+
+       if (set_pll_output_mask(idtcm, regaddr, val)) {
+               /* Not an output mask, check for pll mask */
+               err = process_pll_mask(idtcm, regaddr, val, &idtcm->pll_mask);
+       }
+
+       return err;
+}
+
+static void display_pll_and_output_masks(struct idtcm *idtcm)
+{
+       u8 i;
+       u8 mask;
+
+       dev_dbg(&idtcm->client->dev, "pllmask = 0x%02x\n", idtcm->pll_mask);
+
+       for (i = 0; i < MAX_PHC_PLL; i++) {
+               mask = 1 << i;
+
+               if (mask & idtcm->pll_mask)
+                       dev_dbg(&idtcm->client->dev,
+                               "PLL%d output_mask = 0x%04x\n",
+                               i, idtcm->channel[i].output_mask);
+       }
+}
+
+static int idtcm_load_firmware(struct idtcm *idtcm,
+                              struct device *dev)
+{
+       const struct firmware *fw;
+       struct idtcm_fwrc *rec;
+       u32 regaddr;
+       int err;
+       s32 len;
+       u8 val;
+       u8 loaddr;
+
+       dev_dbg(&idtcm->client->dev, "requesting firmware '%s'\n", FW_FILENAME);
+
+       err = request_firmware(&fw, FW_FILENAME, dev);
+
+       if (err)
+               return err;
+
+       dev_dbg(&idtcm->client->dev, "firmware size %zu bytes\n", fw->size);
+
+       rec = (struct idtcm_fwrc *) fw->data;
+
+       if (fw->size > 0)
+               idtcm_state_machine_reset(idtcm);
+
+       for (len = fw->size; len > 0; len -= sizeof(*rec)) {
+
+               if (rec->reserved) {
+                       dev_err(&idtcm->client->dev,
+                               "bad firmware, reserved field non-zero\n");
+                       err = -EINVAL;
+               } else {
+                       regaddr = rec->hiaddr << 8;
+                       regaddr |= rec->loaddr;
+
+                       val = rec->value;
+                       loaddr = rec->loaddr;
+
+                       rec++;
+
+                       err = check_and_set_masks(idtcm, regaddr, val);
+               }
+
+               if (err == 0) {
+                       /* Top (status registers) and bottom are read-only */
+                       if ((regaddr < GPIO_USER_CONTROL)
+                           || (regaddr >= SCRATCH))
+                               continue;
+
+                       /* Page size 128, last 4 bytes of page skipped */
+                       if (((loaddr > 0x7b) && (loaddr <= 0x7f))
+                            || ((loaddr > 0xfb) && (loaddr <= 0xff)))
+                               continue;
+
+                       err = idtcm_write(idtcm, regaddr, 0, &val, sizeof(val));
+               }
+
+               if (err)
+                       goto out;
+       }
+
+       display_pll_and_output_masks(idtcm);
+
+out:
+       release_firmware(fw);
+       return err;
+}
+
+static int idtcm_pps_enable(struct idtcm_channel *channel, bool enable)
+{
+       struct idtcm *idtcm = channel->idtcm;
+       u32 module;
+       u8 val;
+       int err;
+
+       /*
+        * This assumes that the 1-PPS is on the second of the two
+        * output.  But is this always true?
+        */
+       switch (channel->dpll_n) {
+       case DPLL_0:
+               module = OUTPUT_1;
+               break;
+       case DPLL_1:
+               module = OUTPUT_3;
+               break;
+       case DPLL_2:
+               module = OUTPUT_5;
+               break;
+       case DPLL_3:
+               module = OUTPUT_7;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       err = idtcm_read(idtcm, module, OUT_CTRL_1, &val, sizeof(val));
+
+       if (err)
+               return err;
+
+       if (enable)
+               val |= SQUELCH_DISABLE;
+       else
+               val &= ~SQUELCH_DISABLE;
+
+       err = idtcm_write(idtcm, module, OUT_CTRL_1, &val, sizeof(val));
+
+       if (err)
+               return err;
+
+       return 0;
+}
+
+static int idtcm_set_pll_mode(struct idtcm_channel *channel,
+                             enum pll_mode pll_mode)
+{
+       struct idtcm *idtcm = channel->idtcm;
+       int err;
+       u8 dpll_mode;
+
+       err = idtcm_read(idtcm, channel->dpll_n, DPLL_MODE,
+                        &dpll_mode, sizeof(dpll_mode));
+       if (err)
+               return err;
+
+       dpll_mode &= ~(PLL_MODE_MASK << PLL_MODE_SHIFT);
+
+       dpll_mode |= (pll_mode << PLL_MODE_SHIFT);
+
+       channel->pll_mode = pll_mode;
+
+       err = idtcm_write(idtcm, channel->dpll_n, DPLL_MODE,
+                         &dpll_mode, sizeof(dpll_mode));
+       if (err)
+               return err;
+
+       return 0;
+}
+
+/* PTP Hardware Clock interface */
+
+static int idtcm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+       struct idtcm_channel *channel =
+               container_of(ptp, struct idtcm_channel, caps);
+       struct idtcm *idtcm = channel->idtcm;
+       u8 i;
+       bool neg_adj = 0;
+       int err;
+       u8 buf[6] = {0};
+       s64 fcw;
+
+       if (channel->pll_mode  != PLL_MODE_WRITE_FREQUENCY) {
+               err = idtcm_set_pll_mode(channel, PLL_MODE_WRITE_FREQUENCY);
+               if (err)
+                       return err;
+       }
+
+       /*
+        * Frequency Control Word unit is: 1.11 * 10^-10 ppm
+        *
+        * adjfreq:
+        *       ppb * 10^9
+        * FCW = ----------
+        *          111
+        *
+        * adjfine:
+        *       ppm_16 * 5^12
+        * FCW = -------------
+        *         111 * 2^4
+        */
+       if (ppb < 0) {
+               neg_adj = 1;
+               ppb = -ppb;
+       }
+
+       /* 2 ^ -53 = 1.1102230246251565404236316680908e-16 */
+       fcw = ppb * 1000000000000ULL;
+
+       fcw = div_u64(fcw, 111022);
+
+       if (neg_adj)
+               fcw = -fcw;
+
+       for (i = 0; i < 6; i++) {
+               buf[i] = fcw & 0xff;
+               fcw >>= 8;
+       }
+
+       mutex_lock(&idtcm->reg_lock);
+
+       err = idtcm_write(idtcm, channel->dpll_freq, DPLL_WR_FREQ,
+                         buf, sizeof(buf));
+
+       mutex_unlock(&idtcm->reg_lock);
+       return err;
+}
+
+static int idtcm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+       struct idtcm_channel *channel =
+               container_of(ptp, struct idtcm_channel, caps);
+       struct idtcm *idtcm = channel->idtcm;
+       int err;
+
+       mutex_lock(&idtcm->reg_lock);
+
+       err = _idtcm_gettime(channel, ts);
+
+       mutex_unlock(&idtcm->reg_lock);
+
+       return err;
+}
+
+static int idtcm_settime(struct ptp_clock_info *ptp,
+                        const struct timespec64 *ts)
+{
+       struct idtcm_channel *channel =
+               container_of(ptp, struct idtcm_channel, caps);
+       struct idtcm *idtcm = channel->idtcm;
+       int err;
+
+       mutex_lock(&idtcm->reg_lock);
+
+       err = _idtcm_settime(channel, ts, HW_TOD_WR_TRIG_SEL_MSB);
+
+       mutex_unlock(&idtcm->reg_lock);
+
+       return err;
+}
+
+static int idtcm_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+       struct idtcm_channel *channel =
+               container_of(ptp, struct idtcm_channel, caps);
+       struct idtcm *idtcm = channel->idtcm;
+       int err;
+
+       mutex_lock(&idtcm->reg_lock);
+
+       err = _idtcm_adjtime(channel, delta);
+
+       mutex_unlock(&idtcm->reg_lock);
+
+       return err;
+}
+
+static int idtcm_enable(struct ptp_clock_info *ptp,
+                       struct ptp_clock_request *rq, int on)
+{
+       struct idtcm_channel *channel =
+               container_of(ptp, struct idtcm_channel, caps);
+
+       switch (rq->type) {
+       case PTP_CLK_REQ_PEROUT:
+               if (!on)
+                       return idtcm_pps_enable(channel, false);
+
+               /* Only accept a 1-PPS aligned to the second. */
+               if (rq->perout.start.nsec || rq->perout.period.sec != 1 ||
+                   rq->perout.period.nsec)
+                       return -ERANGE;
+
+               return idtcm_pps_enable(channel, true);
+       default:
+               break;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int idtcm_enable_tod(struct idtcm_channel *channel)
+{
+       struct idtcm *idtcm = channel->idtcm;
+       struct timespec64 ts = {0, 0};
+       u8 cfg;
+       int err;
+
+       err = idtcm_pps_enable(channel, false);
+       if (err)
+               return err;
+
+       /*
+        * Start the TOD clock ticking.
+        */
+       err = idtcm_read(idtcm, channel->tod_n, TOD_CFG, &cfg, sizeof(cfg));
+       if (err)
+               return err;
+
+       cfg |= TOD_ENABLE;
+
+       err = idtcm_write(idtcm, channel->tod_n, TOD_CFG, &cfg, sizeof(cfg));
+       if (err)
+               return err;
+
+       return _idtcm_settime(channel, &ts, HW_TOD_WR_TRIG_SEL_MSB);
+}
+
+static void idtcm_display_version_info(struct idtcm *idtcm)
+{
+       u8 major;
+       u8 minor;
+       u8 hotfix;
+       u32 pipeline;
+       u16 product_id;
+       u16 csr_id;
+       u16 irq_id;
+       u8 hw_rev_id;
+       u8 bond_id;
+
+       idtcm_read_major_release(idtcm, &major);
+       idtcm_read_minor_release(idtcm, &minor);
+       idtcm_read_hotfix_release(idtcm, &hotfix);
+       idtcm_read_pipeline(idtcm, &pipeline);
+
+       idtcm_read_product_id(idtcm, &product_id);
+       idtcm_read_hw_rev_id(idtcm, &hw_rev_id);
+       idtcm_read_bond_id(idtcm, &bond_id);
+       idtcm_read_hw_csr_id(idtcm, &csr_id);
+       idtcm_read_hw_irq_id(idtcm, &irq_id);
+
+       dev_info(&idtcm->client->dev, "Version:  %d.%d.%d, Pipeline %u\t"
+                "0x%04x, Rev %d, Bond %d, CSR %d, IRQ %d\n",
+                major, minor, hotfix, pipeline,
+                product_id, hw_rev_id, bond_id, csr_id, irq_id);
+}
+
+static struct ptp_clock_info idtcm_caps = {
+       .owner          = THIS_MODULE,
+       .max_adj        = 244000,
+       .n_per_out      = 1,
+       .adjfreq        = &idtcm_adjfreq,
+       .adjtime        = &idtcm_adjtime,
+       .gettime64      = &idtcm_gettime,
+       .settime64      = &idtcm_settime,
+       .enable         = &idtcm_enable,
+};
+
+static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
+{
+       struct idtcm_channel *channel;
+       int err;
+
+       if (!(index < MAX_PHC_PLL))
+               return -EINVAL;
+
+       channel = &idtcm->channel[index];
+
+       switch (index) {
+       case 0:
+               channel->dpll_freq = DPLL_FREQ_0;
+               channel->dpll_n = DPLL_0;
+               channel->tod_read_primary = TOD_READ_PRIMARY_0;
+               channel->tod_write = TOD_WRITE_0;
+               channel->tod_n = TOD_0;
+               channel->hw_dpll_n = HW_DPLL_0;
+               channel->dpll_phase = DPLL_PHASE_0;
+               channel->dpll_ctrl_n = DPLL_CTRL_0;
+               channel->dpll_phase_pull_in = DPLL_PHASE_PULL_IN_0;
+               break;
+       case 1:
+               channel->dpll_freq = DPLL_FREQ_1;
+               channel->dpll_n = DPLL_1;
+               channel->tod_read_primary = TOD_READ_PRIMARY_1;
+               channel->tod_write = TOD_WRITE_1;
+               channel->tod_n = TOD_1;
+               channel->hw_dpll_n = HW_DPLL_1;
+               channel->dpll_phase = DPLL_PHASE_1;
+               channel->dpll_ctrl_n = DPLL_CTRL_1;
+               channel->dpll_phase_pull_in = DPLL_PHASE_PULL_IN_1;
+               break;
+       case 2:
+               channel->dpll_freq = DPLL_FREQ_2;
+               channel->dpll_n = DPLL_2;
+               channel->tod_read_primary = TOD_READ_PRIMARY_2;
+               channel->tod_write = TOD_WRITE_2;
+               channel->tod_n = TOD_2;
+               channel->hw_dpll_n = HW_DPLL_2;
+               channel->dpll_phase = DPLL_PHASE_2;
+               channel->dpll_ctrl_n = DPLL_CTRL_2;
+               channel->dpll_phase_pull_in = DPLL_PHASE_PULL_IN_2;
+               break;
+       case 3:
+               channel->dpll_freq = DPLL_FREQ_3;
+               channel->dpll_n = DPLL_3;
+               channel->tod_read_primary = TOD_READ_PRIMARY_3;
+               channel->tod_write = TOD_WRITE_3;
+               channel->tod_n = TOD_3;
+               channel->hw_dpll_n = HW_DPLL_3;
+               channel->dpll_phase = DPLL_PHASE_3;
+               channel->dpll_ctrl_n = DPLL_CTRL_3;
+               channel->dpll_phase_pull_in = DPLL_PHASE_PULL_IN_3;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       channel->idtcm = idtcm;
+
+       channel->caps = idtcm_caps;
+       snprintf(channel->caps.name, sizeof(channel->caps.name),
+                "IDT CM PLL%u", index);
+
+       err = idtcm_set_pll_mode(channel, PLL_MODE_WRITE_FREQUENCY);
+       if (err)
+               return err;
+
+       err = idtcm_enable_tod(channel);
+       if (err)
+               return err;
+
+       channel->ptp_clock = ptp_clock_register(&channel->caps, NULL);
+
+       if (IS_ERR(channel->ptp_clock)) {
+               err = PTR_ERR(channel->ptp_clock);
+               channel->ptp_clock = NULL;
+               return err;
+       }
+
+       if (!channel->ptp_clock)
+               return -ENOTSUPP;
+
+       dev_info(&idtcm->client->dev, "PLL%d registered as ptp%d\n",
+                index, channel->ptp_clock->index);
+
+       return 0;
+}
+
+static void ptp_clock_unregister_all(struct idtcm *idtcm)
+{
+       u8 i;
+       struct idtcm_channel *channel;
+
+       for (i = 0; i < MAX_PHC_PLL; i++) {
+
+               channel = &idtcm->channel[i];
+
+               if (channel->ptp_clock)
+                       ptp_clock_unregister(channel->ptp_clock);
+       }
+}
+
+static void set_default_masks(struct idtcm *idtcm)
+{
+       idtcm->pll_mask = DEFAULT_PLL_MASK;
+
+       idtcm->channel[0].output_mask = DEFAULT_OUTPUT_MASK_PLL0;
+       idtcm->channel[1].output_mask = DEFAULT_OUTPUT_MASK_PLL1;
+       idtcm->channel[2].output_mask = DEFAULT_OUTPUT_MASK_PLL2;
+       idtcm->channel[3].output_mask = DEFAULT_OUTPUT_MASK_PLL3;
+}
+
+static int set_tod_write_overhead(struct idtcm *idtcm)
+{
+       int err;
+       u8 i;
+
+       s64 total_ns = 0;
+
+       ktime_t start;
+       ktime_t stop;
+
+       char buf[TOD_BYTE_COUNT];
+
+       struct idtcm_channel *channel = &idtcm->channel[2];
+
+       /* Set page offset */
+       idtcm_write(idtcm, channel->hw_dpll_n, HW_DPLL_TOD_OVR__0,
+                   buf, sizeof(buf));
+
+       for (i = 0; i < TOD_WRITE_OVERHEAD_COUNT_MAX; i++) {
+
+               start = ktime_get_raw();
+
+               err = idtcm_write(idtcm, channel->hw_dpll_n,
+                                 HW_DPLL_TOD_OVR__0, buf, sizeof(buf));
+
+               if (err)
+                       return err;
+
+               stop = ktime_get_raw();
+
+               total_ns += ktime_to_ns(stop - start);
+       }
+
+       idtcm->tod_write_overhead_ns = div_s64(total_ns,
+                                              TOD_WRITE_OVERHEAD_COUNT_MAX);
+
+       return err;
+}
+
+static int idtcm_probe(struct i2c_client *client,
+                      const struct i2c_device_id *id)
+{
+       struct idtcm *idtcm;
+       int err;
+       u8 i;
+
+       /* Unused for now */
+       (void)id;
+
+       idtcm = devm_kzalloc(&client->dev, sizeof(struct idtcm), GFP_KERNEL);
+
+       if (!idtcm)
+               return -ENOMEM;
+
+       idtcm->client = client;
+       idtcm->page_offset = 0xff;
+       idtcm->calculate_overhead_flag = 0;
+
+       set_default_masks(idtcm);
+
+       mutex_init(&idtcm->reg_lock);
+       mutex_lock(&idtcm->reg_lock);
+
+       idtcm_display_version_info(idtcm);
+
+       err = set_tod_write_overhead(idtcm);
+
+       if (err)
+               return err;
+
+       err = idtcm_load_firmware(idtcm, &client->dev);
+
+       if (err)
+               dev_warn(&idtcm->client->dev,
+                        "loading firmware failed with %d\n", err);
+
+       if (idtcm->pll_mask) {
+               for (i = 0; i < MAX_PHC_PLL; i++) {
+                       if (idtcm->pll_mask & (1 << i)) {
+                               err = idtcm_enable_channel(idtcm, i);
+                               if (err)
+                                       break;
+                       }
+               }
+       } else {
+               dev_err(&idtcm->client->dev,
+                       "no PLLs flagged as PHCs, nothing to do\n");
+               err = -ENODEV;
+       }
+
+       mutex_unlock(&idtcm->reg_lock);
+
+       if (err) {
+               ptp_clock_unregister_all(idtcm);
+               return err;
+       }
+
+       i2c_set_clientdata(client, idtcm);
+
+       return 0;
+}
+
+static int idtcm_remove(struct i2c_client *client)
+{
+       struct idtcm *idtcm = i2c_get_clientdata(client);
+
+       ptp_clock_unregister_all(idtcm);
+
+       mutex_destroy(&idtcm->reg_lock);
+
+       return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id idtcm_dt_id[] = {
+       { .compatible = "idt,8a34000" },
+       { .compatible = "idt,8a34001" },
+       { .compatible = "idt,8a34002" },
+       { .compatible = "idt,8a34003" },
+       { .compatible = "idt,8a34004" },
+       { .compatible = "idt,8a34005" },
+       { .compatible = "idt,8a34006" },
+       { .compatible = "idt,8a34007" },
+       { .compatible = "idt,8a34008" },
+       { .compatible = "idt,8a34009" },
+       { .compatible = "idt,8a34010" },
+       { .compatible = "idt,8a34011" },
+       { .compatible = "idt,8a34012" },
+       { .compatible = "idt,8a34013" },
+       { .compatible = "idt,8a34014" },
+       { .compatible = "idt,8a34015" },
+       { .compatible = "idt,8a34016" },
+       { .compatible = "idt,8a34017" },
+       { .compatible = "idt,8a34018" },
+       { .compatible = "idt,8a34019" },
+       { .compatible = "idt,8a34040" },
+       { .compatible = "idt,8a34041" },
+       { .compatible = "idt,8a34042" },
+       { .compatible = "idt,8a34043" },
+       { .compatible = "idt,8a34044" },
+       { .compatible = "idt,8a34045" },
+       { .compatible = "idt,8a34046" },
+       { .compatible = "idt,8a34047" },
+       { .compatible = "idt,8a34048" },
+       { .compatible = "idt,8a34049" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, idtcm_dt_id);
+#endif
+
+static const struct i2c_device_id idtcm_i2c_id[] = {
+       { "8a34000" },
+       { "8a34001" },
+       { "8a34002" },
+       { "8a34003" },
+       { "8a34004" },
+       { "8a34005" },
+       { "8a34006" },
+       { "8a34007" },
+       { "8a34008" },
+       { "8a34009" },
+       { "8a34010" },
+       { "8a34011" },
+       { "8a34012" },
+       { "8a34013" },
+       { "8a34014" },
+       { "8a34015" },
+       { "8a34016" },
+       { "8a34017" },
+       { "8a34018" },
+       { "8a34019" },
+       { "8a34040" },
+       { "8a34041" },
+       { "8a34042" },
+       { "8a34043" },
+       { "8a34044" },
+       { "8a34045" },
+       { "8a34046" },
+       { "8a34047" },
+       { "8a34048" },
+       { "8a34049" },
+       {},
+};
+MODULE_DEVICE_TABLE(i2c, idtcm_i2c_id);
+
+static struct i2c_driver idtcm_driver = {
+       .driver = {
+               .of_match_table = of_match_ptr(idtcm_dt_id),
+               .name           = "idtcm",
+       },
+       .probe          = idtcm_probe,
+       .remove         = idtcm_remove,
+       .id_table       = idtcm_i2c_id,
+};
+
+module_i2c_driver(idtcm_driver);
diff --git a/drivers/ptp/ptp_clockmatrix.h b/drivers/ptp/ptp_clockmatrix.h
new file mode 100644 (file)
index 0000000..6c1f93a
--- /dev/null
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PTP hardware clock driver for the IDT ClockMatrix(TM) family of timing and
+ * synchronization devices.
+ *
+ * Copyright (C) 2019 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#ifndef PTP_IDTCLOCKMATRIX_H
+#define PTP_IDTCLOCKMATRIX_H
+
+#include <linux/ktime.h>
+
+#include "idt8a340_reg.h"
+
+#define FW_FILENAME    "idtcm.bin"
+#define MAX_PHC_PLL    4
+
+#define PLL_MASK_ADDR          (0xFFA5)
+#define DEFAULT_PLL_MASK       (0x04)
+
+#define SET_U16_LSB(orig, val8) (orig = (0xff00 & (orig)) | (val8))
+#define SET_U16_MSB(orig, val8) (orig = (0x00ff & (orig)) | (val8 << 8))
+
+#define OUTPUT_MASK_PLL0_ADDR          (0xFFB0)
+#define OUTPUT_MASK_PLL1_ADDR          (0xFFB2)
+#define OUTPUT_MASK_PLL2_ADDR          (0xFFB4)
+#define OUTPUT_MASK_PLL3_ADDR          (0xFFB6)
+
+#define DEFAULT_OUTPUT_MASK_PLL0       (0x003)
+#define DEFAULT_OUTPUT_MASK_PLL1       (0x00c)
+#define DEFAULT_OUTPUT_MASK_PLL2       (0x030)
+#define DEFAULT_OUTPUT_MASK_PLL3       (0x0c0)
+
+#define POST_SM_RESET_DELAY_MS         (3000)
+#define PHASE_PULL_IN_THRESHOLD_NS     (150000)
+#define TOD_WRITE_OVERHEAD_COUNT_MAX    (5)
+#define TOD_BYTE_COUNT                  (11)
+
+/* Values of DPLL_N.DPLL_MODE.PLL_MODE */
+enum pll_mode {
+       PLL_MODE_MIN = 0,
+       PLL_MODE_NORMAL = PLL_MODE_MIN,
+       PLL_MODE_WRITE_PHASE = 1,
+       PLL_MODE_WRITE_FREQUENCY = 2,
+       PLL_MODE_GPIO_INC_DEC = 3,
+       PLL_MODE_SYNTHESIS = 4,
+       PLL_MODE_PHASE_MEASUREMENT = 5,
+       PLL_MODE_MAX = PLL_MODE_PHASE_MEASUREMENT,
+};
+
+enum hw_tod_write_trig_sel {
+       HW_TOD_WR_TRIG_SEL_MIN = 0,
+       HW_TOD_WR_TRIG_SEL_MSB = HW_TOD_WR_TRIG_SEL_MIN,
+       HW_TOD_WR_TRIG_SEL_RESERVED = 1,
+       HW_TOD_WR_TRIG_SEL_TOD_PPS = 2,
+       HW_TOD_WR_TRIG_SEL_IRIGB_PPS = 3,
+       HW_TOD_WR_TRIG_SEL_PWM_PPS = 4,
+       HW_TOD_WR_TRIG_SEL_GPIO = 5,
+       HW_TOD_WR_TRIG_SEL_FOD_SYNC = 6,
+       WR_TRIG_SEL_MAX = HW_TOD_WR_TRIG_SEL_FOD_SYNC,
+};
+
+struct idtcm;
+
+struct idtcm_channel {
+       struct ptp_clock_info   caps;
+       struct ptp_clock        *ptp_clock;
+       struct idtcm            *idtcm;
+       u16                     dpll_phase;
+       u16                     dpll_freq;
+       u16                     dpll_n;
+       u16                     dpll_ctrl_n;
+       u16                     dpll_phase_pull_in;
+       u16                     tod_read_primary;
+       u16                     tod_write;
+       u16                     tod_n;
+       u16                     hw_dpll_n;
+       enum pll_mode           pll_mode;
+       u16                     output_mask;
+};
+
+struct idtcm {
+       struct idtcm_channel    channel[MAX_PHC_PLL];
+       struct i2c_client       *client;
+       u8                      page_offset;
+       u8                      pll_mask;
+
+       /* Overhead calculation for adjtime */
+       u8                      calculate_overhead_flag;
+       s64                     tod_write_overhead_ns;
+       ktime_t                 start_time;
+
+       /* Protects I2C read/modify/write registers from concurrent access */
+       struct mutex            reg_lock;
+};
+
+struct idtcm_fwrc {
+       u8 hiaddr;
+       u8 loaddr;
+       u8 value;
+       u8 reserved;
+} __packed;
+
+#endif /* PTP_IDTCLOCKMATRIX_H */
index 0dcfdc8..82d31ba 100644 (file)
@@ -240,14 +240,12 @@ static int ptp_dte_probe(struct platform_device *pdev)
 {
        struct ptp_dte *ptp_dte;
        struct device *dev = &pdev->dev;
-       struct resource *res;
 
        ptp_dte = devm_kzalloc(dev, sizeof(struct ptp_dte), GFP_KERNEL);
        if (!ptp_dte)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       ptp_dte->regs = devm_ioremap_resource(dev, res);
+       ptp_dte->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(ptp_dte->regs))
                return PTR_ERR(ptp_dte->regs);
 
index a58b45d..2a34a2a 100644 (file)
@@ -82,6 +82,7 @@ enum qdio_irq_states {
 #define QDIO_SIGA_WRITE                0x00
 #define QDIO_SIGA_READ         0x01
 #define QDIO_SIGA_SYNC         0x02
+#define QDIO_SIGA_WRITEM       0x03
 #define QDIO_SIGA_WRITEQ       0x04
 #define QDIO_SIGA_QEBSM_FLAG   0x80
 
index 5b63c50..7368407 100644 (file)
@@ -310,18 +310,19 @@ static inline int qdio_siga_sync_q(struct qdio_q *q)
                return qdio_siga_sync(q, q->mask, 0);
 }
 
-static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit,
-       unsigned long aob)
+static int qdio_siga_output(struct qdio_q *q, unsigned int count,
+                           unsigned int *busy_bit, unsigned long aob)
 {
        unsigned long schid = *((u32 *) &q->irq_ptr->schid);
        unsigned int fc = QDIO_SIGA_WRITE;
        u64 start_time = 0;
        int retries = 0, cc;
-       unsigned long laob = 0;
 
-       if (aob) {
-               fc = QDIO_SIGA_WRITEQ;
-               laob = aob;
+       if (queue_type(q) == QDIO_IQDIO_QFMT && !multicast_outbound(q)) {
+               if (count > 1)
+                       fc = QDIO_SIGA_WRITEM;
+               else if (aob)
+                       fc = QDIO_SIGA_WRITEQ;
        }
 
        if (is_qebsm(q)) {
@@ -329,7 +330,7 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit,
                fc |= QDIO_SIGA_QEBSM_FLAG;
        }
 again:
-       cc = do_siga_output(schid, q->mask, busy_bit, fc, laob);
+       cc = do_siga_output(schid, q->mask, busy_bit, fc, aob);
 
        /* hipersocket busy condition */
        if (unlikely(*busy_bit)) {
@@ -781,7 +782,8 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q, unsigned int start)
        return count;
 }
 
-static int qdio_kick_outbound_q(struct qdio_q *q, unsigned long aob)
+static int qdio_kick_outbound_q(struct qdio_q *q, unsigned int count,
+                               unsigned long aob)
 {
        int retries = 0, cc;
        unsigned int busy_bit;
@@ -793,7 +795,7 @@ static int qdio_kick_outbound_q(struct qdio_q *q, unsigned long aob)
 retry:
        qperf_inc(q, siga_write);
 
-       cc = qdio_siga_output(q, &busy_bit, aob);
+       cc = qdio_siga_output(q, count, &busy_bit, aob);
        switch (cc) {
        case 0:
                break;
@@ -1526,7 +1528,7 @@ set:
  * @count: how many buffers are filled
  */
 static int handle_outbound(struct qdio_q *q, unsigned int callflags,
-                          int bufnr, int count)
+                          unsigned int bufnr, unsigned int count)
 {
        const unsigned int scan_threshold = q->irq_ptr->scan_threshold;
        unsigned char state = 0;
@@ -1549,13 +1551,10 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags,
        if (queue_type(q) == QDIO_IQDIO_QFMT) {
                unsigned long phys_aob = 0;
 
-               /* One SIGA-W per buffer required for unicast HSI */
-               WARN_ON_ONCE(count > 1 && !multicast_outbound(q));
-
-               if (q->u.out.use_cq)
+               if (q->u.out.use_cq && count == 1)
                        phys_aob = qdio_aob_for_buffer(&q->u.out, bufnr);
 
-               rc = qdio_kick_outbound_q(q, phys_aob);
+               rc = qdio_kick_outbound_q(q, count, phys_aob);
        } else if (need_siga_sync(q)) {
                rc = qdio_siga_sync_q(q);
        } else if (count < QDIO_MAX_BUFFERS_PER_Q &&
@@ -1564,7 +1563,7 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags,
                /* The previous buffer is not processed yet, tack on. */
                qperf_inc(q, fast_requeue);
        } else {
-               rc = qdio_kick_outbound_q(q, 0);
+               rc = qdio_kick_outbound_q(q, count, 0);
        }
 
        /* Let drivers implement their own completion scanning: */
index e4b55f9..d081545 100644 (file)
@@ -532,6 +532,8 @@ struct qeth_qdio_out_q {
        struct timer_list timer;
        struct qeth_hdr *prev_hdr;
        u8 bulk_start;
+       u8 bulk_count;
+       u8 bulk_max;
 };
 
 #define qeth_for_each_output_queue(card, q, i)         \
@@ -878,6 +880,13 @@ static inline u16 qeth_iqd_translate_txq(struct net_device *dev, u16 txq)
        return txq;
 }
 
+static inline bool qeth_iqd_is_mcast_queue(struct qeth_card *card,
+                                          struct qeth_qdio_out_q *queue)
+{
+       return qeth_iqd_translate_txq(card->dev, queue->queue_no) ==
+              QETH_IQD_MCAST_TXQ;
+}
+
 static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf,
                                          unsigned int elements)
 {
index dda2743..9e8bd8e 100644 (file)
@@ -1513,7 +1513,6 @@ int qeth_qdio_clear_card(struct qeth_card *card, int use_halt)
        rc = qeth_clear_halt_card(card, use_halt);
        if (rc)
                QETH_CARD_TEXT_(card, 3, "2err%d", rc);
-       card->state = CARD_STATE_DOWN;
        return rc;
 }
 EXPORT_SYMBOL_GPL(qeth_qdio_clear_card);
@@ -2634,6 +2633,18 @@ static int qeth_init_input_buffer(struct qeth_card *card,
        return 0;
 }
 
+static unsigned int qeth_tx_select_bulk_max(struct qeth_card *card,
+                                           struct qeth_qdio_out_q *queue)
+{
+       if (!IS_IQD(card) ||
+           qeth_iqd_is_mcast_queue(card, queue) ||
+           card->options.cq == QETH_CQ_ENABLED ||
+           qdio_get_ssqd_desc(CARD_DDEV(card), &card->ssqd))
+               return 1;
+
+       return card->ssqd.mmwc ? card->ssqd.mmwc : 1;
+}
+
 int qeth_init_qdio_queues(struct qeth_card *card)
 {
        unsigned int i;
@@ -2673,6 +2684,8 @@ int qeth_init_qdio_queues(struct qeth_card *card)
                queue->do_pack = 0;
                queue->prev_hdr = NULL;
                queue->bulk_start = 0;
+               queue->bulk_count = 0;
+               queue->bulk_max = qeth_tx_select_bulk_max(card, queue);
                atomic_set(&queue->used_buffers, 0);
                atomic_set(&queue->set_pci_flags_count, 0);
                atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
@@ -3107,7 +3120,7 @@ static void qeth_queue_input_buffer(struct qeth_card *card, int index)
                for (i = queue->next_buf_to_init;
                     i < queue->next_buf_to_init + count; ++i) {
                        if (qeth_init_input_buffer(card,
-                               &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q])) {
+                               &queue->bufs[QDIO_BUFNR(i)])) {
                                break;
                        } else {
                                newcount++;
@@ -3149,8 +3162,8 @@ static void qeth_queue_input_buffer(struct qeth_card *card, int index)
                if (rc) {
                        QETH_CARD_TEXT(card, 2, "qinberr");
                }
-               queue->next_buf_to_init = (queue->next_buf_to_init + count) %
-                                         QDIO_MAX_BUFFERS_PER_Q;
+               queue->next_buf_to_init = QDIO_BUFNR(queue->next_buf_to_init +
+                                                    count);
        }
 }
 
@@ -3198,7 +3211,7 @@ static int qeth_prep_flush_pack_buffer(struct qeth_qdio_out_q *queue)
                /* it's a packing buffer */
                atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
                queue->next_buf_to_fill =
-                       (queue->next_buf_to_fill + 1) % QDIO_MAX_BUFFERS_PER_Q;
+                       QDIO_BUFNR(queue->next_buf_to_fill + 1);
                return 1;
        }
        return 0;
@@ -3252,7 +3265,8 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
        unsigned int qdio_flags;
 
        for (i = index; i < index + count; ++i) {
-               int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+               unsigned int bidx = QDIO_BUFNR(i);
+
                buf = queue->bufs[bidx];
                buf->buffer->element[buf->next_element_to_fill - 1].eflags |=
                                SBAL_EFLAGS_LAST_ENTRY;
@@ -3318,10 +3332,11 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 
 static void qeth_flush_queue(struct qeth_qdio_out_q *queue)
 {
-       qeth_flush_buffers(queue, queue->bulk_start, 1);
+       qeth_flush_buffers(queue, queue->bulk_start, queue->bulk_count);
 
-       queue->bulk_start = QDIO_BUFNR(queue->bulk_start + 1);
+       queue->bulk_start = QDIO_BUFNR(queue->bulk_start + queue->bulk_count);
        queue->prev_hdr = NULL;
+       queue->bulk_count = 0;
 }
 
 static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
@@ -3419,8 +3434,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
        }
 
        for (i = first_element; i < first_element + count; ++i) {
-               int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
-               struct qdio_buffer *buffer = cq->qdio_bufs[bidx];
+               struct qdio_buffer *buffer = cq->qdio_bufs[QDIO_BUFNR(i)];
                int e = 0;
 
                while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
@@ -3441,8 +3455,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
                        "QDIO reported an error, rc=%i\n", rc);
                QETH_CARD_TEXT(card, 2, "qcqherr");
        }
-       card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init
-                                  + count) % QDIO_MAX_BUFFERS_PER_Q;
+
+       cq->next_buf_to_init = QDIO_BUFNR(cq->next_buf_to_init + count);
 }
 
 static void qeth_qdio_input_handler(struct ccw_device *ccwdev,
@@ -3468,7 +3482,6 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
 {
        struct qeth_card *card        = (struct qeth_card *) card_ptr;
        struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue];
-       struct qeth_qdio_out_buffer *buffer;
        struct net_device *dev = card->dev;
        struct netdev_queue *txq;
        int i;
@@ -3482,10 +3495,10 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
        }
 
        for (i = first_element; i < (first_element + count); ++i) {
-               int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
-               buffer = queue->bufs[bidx];
-               qeth_handle_send_error(card, buffer, qdio_error);
-               qeth_clear_output_buffer(queue, buffer, qdio_error, 0);
+               struct qeth_qdio_out_buffer *buf = queue->bufs[QDIO_BUFNR(i)];
+
+               qeth_handle_send_error(card, buf, qdio_error);
+               qeth_clear_output_buffer(queue, buf, qdio_error, 0);
        }
 
        atomic_sub(count, &queue->used_buffers);
@@ -3680,10 +3693,10 @@ check_layout:
 }
 
 static bool qeth_iqd_may_bulk(struct qeth_qdio_out_q *queue,
-                             struct qeth_qdio_out_buffer *buffer,
                              struct sk_buff *curr_skb,
                              struct qeth_hdr *curr_hdr)
 {
+       struct qeth_qdio_out_buffer *buffer = queue->bufs[queue->bulk_start];
        struct qeth_hdr *prev_hdr = queue->prev_hdr;
 
        if (!prev_hdr)
@@ -3803,13 +3816,14 @@ static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue,
                       struct qeth_hdr *hdr, unsigned int offset,
                       unsigned int hd_len)
 {
-       struct qeth_qdio_out_buffer *buffer = queue->bufs[queue->bulk_start];
        unsigned int bytes = qdisc_pkt_len(skb);
+       struct qeth_qdio_out_buffer *buffer;
        unsigned int next_element;
        struct netdev_queue *txq;
        bool stopped = false;
        bool flush;
 
+       buffer = queue->bufs[QDIO_BUFNR(queue->bulk_start + queue->bulk_count)];
        txq = netdev_get_tx_queue(card->dev, skb_get_queue_mapping(skb));
 
        /* Just a sanity check, the wake/stop logic should ensure that we always
@@ -3818,11 +3832,23 @@ static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue,
        if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
                return -EBUSY;
 
-       if ((buffer->next_element_to_fill + elements > queue->max_elements) ||
-           !qeth_iqd_may_bulk(queue, buffer, skb, hdr)) {
-               atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
-               qeth_flush_queue(queue);
-               buffer = queue->bufs[queue->bulk_start];
+       flush = !qeth_iqd_may_bulk(queue, skb, hdr);
+
+       if (flush ||
+           (buffer->next_element_to_fill + elements > queue->max_elements)) {
+               if (buffer->next_element_to_fill > 0) {
+                       atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
+                       queue->bulk_count++;
+               }
+
+               if (queue->bulk_count >= queue->bulk_max)
+                       flush = true;
+
+               if (flush)
+                       qeth_flush_queue(queue);
+
+               buffer = queue->bufs[QDIO_BUFNR(queue->bulk_start +
+                                               queue->bulk_count)];
 
                /* Sanity-check again: */
                if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
@@ -3848,7 +3874,13 @@ static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue,
 
        if (flush || next_element >= queue->max_elements) {
                atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
-               qeth_flush_queue(queue);
+               queue->bulk_count++;
+
+               if (queue->bulk_count >= queue->bulk_max)
+                       flush = true;
+
+               if (flush)
+                       qeth_flush_queue(queue);
        }
 
        if (stopped && !qeth_out_queue_is_full(queue))
@@ -3898,8 +3930,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
                        atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
                        flush_count++;
                        queue->next_buf_to_fill =
-                               (queue->next_buf_to_fill + 1) %
-                               QDIO_MAX_BUFFERS_PER_Q;
+                               QDIO_BUFNR(queue->next_buf_to_fill + 1);
                        buffer = queue->bufs[queue->next_buf_to_fill];
 
                        /* We stepped forward, so sanity-check again: */
@@ -3932,8 +3963,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
        if (!queue->do_pack || stopped || next_element >= queue->max_elements) {
                flush_count++;
                atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
-               queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) %
-                                         QDIO_MAX_BUFFERS_PER_Q;
+               queue->next_buf_to_fill =
+                               QDIO_BUFNR(queue->next_buf_to_fill + 1);
        }
 
        if (flush_count)
@@ -4261,7 +4292,6 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback)
        }
        return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_set_access_ctrl_online);
 
 void qeth_tx_timeout(struct net_device *dev)
 {
@@ -4977,6 +5007,15 @@ retriable:
                        goto out;
                }
        }
+
+       if (!qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP) ||
+           (card->info.hwtrap && qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM)))
+               card->info.hwtrap = 0;
+
+       rc = qeth_set_access_ctrl_online(card, 0);
+       if (rc)
+               goto out;
+
        return 0;
 out:
        dev_warn(&card->gdev->dev, "The qeth device driver failed to recover "
@@ -5165,8 +5204,7 @@ int qeth_poll(struct napi_struct *napi, int budget)
                                card->rx.b_count--;
                                if (card->rx.b_count) {
                                        card->rx.b_index =
-                                               (card->rx.b_index + 1) %
-                                               QDIO_MAX_BUFFERS_PER_Q;
+                                               QDIO_BUFNR(card->rx.b_index + 1);
                                        card->rx.b_element =
                                                &card->qdio.in_q
                                                ->bufs[card->rx.b_index]
@@ -5182,9 +5220,9 @@ int qeth_poll(struct napi_struct *napi, int budget)
                }
        }
 
-       napi_complete_done(napi, work_done);
-       if (qdio_start_irq(card->data.ccwdev, 0))
-               napi_schedule(&card->napi);
+       if (napi_complete_done(napi, work_done) &&
+           qdio_start_irq(CARD_DDEV(card), 0))
+               napi_schedule(napi);
 out:
        return work_done;
 }
index 6420b58..9ad0d6f 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <asm/qeth.h>
 #include <uapi/linux/if_ether.h>
+#include <uapi/linux/in6.h>
 
 #define IPA_PDU_HEADER_SIZE    0x40
 #define QETH_IPA_PDU_LEN_TOTAL(buffer) (buffer + 0x0e)
@@ -365,8 +366,7 @@ struct qeth_ipacmd_setdelip6 {
 struct qeth_ipacmd_setdelipm {
        __u8 mac[6];
        __u8 padding[2];
-       __u8 ip6[12];
-       __u8 ip4[4];
+       struct in6_addr ip;
 } __attribute__ ((packed));
 
 struct qeth_ipacmd_layer2setdelmac {
index bd8143e..8f3093d 100644 (file)
@@ -759,14 +759,6 @@ add_napi:
        return rc;
 }
 
-static int qeth_l2_start_ipassists(struct qeth_card *card)
-{
-       /* configure isolation level */
-       if (qeth_set_access_ctrl_online(card, 0))
-               return -ENODEV;
-       return 0;
-}
-
 static void qeth_l2_trace_features(struct qeth_card *card)
 {
        /* Set BridgePort features */
@@ -797,13 +789,6 @@ static int qeth_l2_set_online(struct ccwgroup_device *gdev)
                goto out_remove;
        }
 
-       if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) {
-               if (card->info.hwtrap &&
-                   qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM))
-                       card->info.hwtrap = 0;
-       } else
-               card->info.hwtrap = 0;
-
        qeth_bridgeport_query_support(card);
        if (card->options.sbp.supported_funcs)
                dev_info(&card->gdev->dev,
@@ -825,12 +810,6 @@ static int qeth_l2_set_online(struct ccwgroup_device *gdev)
        /* softsetup */
        QETH_CARD_TEXT(card, 2, "softsetp");
 
-       if (IS_OSD(card) || IS_OSX(card)) {
-               rc = qeth_l2_start_ipassists(card);
-               if (rc)
-                       goto out_remove;
-       }
-
        rc = qeth_init_qdio_queues(card);
        if (rc) {
                QETH_CARD_TEXT_(card, 2, "6err%d", rc);
index 87659cf..ba913d1 100644 (file)
@@ -24,7 +24,6 @@ enum qeth_ip_types {
 struct qeth_ipaddr {
        struct hlist_node hnode;
        enum qeth_ip_types type;
-       unsigned char mac[ETH_ALEN];
        u8 is_multicast:1;
        u8 in_progress:1;
        u8 disp_flag:2;
@@ -37,7 +36,7 @@ struct qeth_ipaddr {
        enum qeth_prot_versions proto;
        union {
                struct {
-                       unsigned int addr;
+                       __be32 addr;
                        unsigned int mask;
                } a4;
                struct {
@@ -74,12 +73,10 @@ static inline bool qeth_l3_addr_match_all(struct qeth_ipaddr *a1,
         * so 'proto' and 'addr' match for sure.
         *
         * For ucast:
-        * -    'mac' is always 0.
         * -    'mask'/'pfxlen' for RXIP/VIPA is always 0. For NORMAL, matching
         *      values are required to avoid mixups in takeover eligibility.
         *
         * For mcast,
-        * -    'mac' is mapped from the IP, and thus always matches.
         * -    'mask'/'pfxlen' is always 0.
         */
        if (a1->type != a2->type)
@@ -89,21 +86,12 @@ static inline bool qeth_l3_addr_match_all(struct qeth_ipaddr *a1,
        return a1->u.a4.mask == a2->u.a4.mask;
 }
 
-static inline  u64 qeth_l3_ipaddr_hash(struct qeth_ipaddr *addr)
+static inline u32 qeth_l3_ipaddr_hash(struct qeth_ipaddr *addr)
 {
-       u64  ret = 0;
-       u8 *point;
-
-       if (addr->proto == QETH_PROT_IPV6) {
-               point = (u8 *) &addr->u.a6.addr;
-               ret = get_unaligned((u64 *)point) ^
-                       get_unaligned((u64 *) (point + 8));
-       }
-       if (addr->proto == QETH_PROT_IPV4) {
-               point = (u8 *) &addr->u.a4.addr;
-               ret = get_unaligned((u32 *) point);
-       }
-       return ret;
+       if (addr->proto == QETH_PROT_IPV6)
+               return ipv6_addr_hash(&addr->u.a6.addr);
+       else
+               return ipv4_addr_hash(addr->u.a4.addr);
 }
 
 struct qeth_ipato_entry {
index d7bfc7a..70d4586 100644 (file)
@@ -76,7 +76,7 @@ static struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
 static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
                                                   struct qeth_ipaddr *query)
 {
-       u64 key = qeth_l3_ipaddr_hash(query);
+       u32 key = qeth_l3_ipaddr_hash(query);
        struct qeth_ipaddr *addr;
 
        if (query->is_multicast) {
@@ -381,12 +381,13 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
        if (!iob)
                return -ENOMEM;
        cmd = __ipa_cmd(iob);
-       ether_addr_copy(cmd->data.setdelipm.mac, addr->mac);
-       if (addr->proto == QETH_PROT_IPV6)
-               memcpy(cmd->data.setdelipm.ip6, &addr->u.a6.addr,
-                      sizeof(struct in6_addr));
-       else
-               memcpy(&cmd->data.setdelipm.ip4, &addr->u.a4.addr, 4);
+       if (addr->proto == QETH_PROT_IPV6) {
+               cmd->data.setdelipm.ip = addr->u.a6.addr;
+               ipv6_eth_mc_map(&addr->u.a6.addr, cmd->data.setdelipm.mac);
+       } else {
+               cmd->data.setdelipm.ip.s6_addr32[3] = addr->u.a4.addr;
+               ip_eth_mc_map(addr->u.a4.addr, cmd->data.setdelipm.mac);
+       }
 
        return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL);
 }
@@ -953,8 +954,6 @@ static int qeth_l3_start_ipassists(struct qeth_card *card)
 {
        QETH_CARD_TEXT(card, 3, "strtipas");
 
-       if (qeth_set_access_ctrl_online(card, 0))
-               return -EIO;
        qeth_l3_start_ipa_arp_processing(card); /* go on*/
        qeth_l3_start_ipa_source_mac(card);     /* go on*/
        qeth_l3_start_ipa_vlan(card);           /* go on*/
@@ -1129,8 +1128,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev)
 
        for (im4 = rcu_dereference(in4_dev->mc_list); im4 != NULL;
             im4 = rcu_dereference(im4->next_rcu)) {
-               ip_eth_mc_map(im4->multiaddr, tmp->mac);
-               tmp->u.a4.addr = be32_to_cpu(im4->multiaddr);
+               tmp->u.a4.addr = im4->multiaddr;
                tmp->is_multicast = 1;
 
                ipm = qeth_l3_find_addr_by_ip(card, tmp);
@@ -1141,8 +1139,8 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev)
                        ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
                        if (!ipm)
                                continue;
-                       ether_addr_copy(ipm->mac, tmp->mac);
-                       ipm->u.a4.addr = be32_to_cpu(im4->multiaddr);
+
+                       ipm->u.a4.addr = im4->multiaddr;
                        ipm->is_multicast = 1;
                        ipm->disp_flag = QETH_DISP_ADDR_ADD;
                        hash_add(card->ip_mc_htable,
@@ -1209,9 +1207,7 @@ static void qeth_l3_add_mc6_to_hash(struct qeth_card *card,
                return;
 
        for (im6 = in6_dev->mc_list; im6 != NULL; im6 = im6->next) {
-               ipv6_eth_mc_map(&im6->mca_addr, tmp->mac);
-               memcpy(&tmp->u.a6.addr, &im6->mca_addr.s6_addr,
-                      sizeof(struct in6_addr));
+               tmp->u.a6.addr = im6->mca_addr;
                tmp->is_multicast = 1;
 
                ipm = qeth_l3_find_addr_by_ip(card, tmp);
@@ -1225,9 +1221,7 @@ static void qeth_l3_add_mc6_to_hash(struct qeth_card *card,
                if (!ipm)
                        continue;
 
-               ether_addr_copy(ipm->mac, tmp->mac);
-               memcpy(&ipm->u.a6.addr, &im6->mca_addr.s6_addr,
-                      sizeof(struct in6_addr));
+               ipm->u.a6.addr = im6->mca_addr;
                ipm->is_multicast = 1;
                ipm->disp_flag = QETH_DISP_ADDR_ADD;
                hash_add(card->ip_mc_htable,
@@ -2313,13 +2307,6 @@ static int qeth_l3_set_online(struct ccwgroup_device *gdev)
                goto out_remove;
        }
 
-       if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) {
-               if (card->info.hwtrap &&
-                   qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM))
-                       card->info.hwtrap = 0;
-       } else
-               card->info.hwtrap = 0;
-
        card->state = CARD_STATE_HARDSETUP;
        qeth_print_status_message(card);
 
@@ -2557,7 +2544,7 @@ static int qeth_l3_ip_event(struct notifier_block *this,
        QETH_CARD_TEXT(card, 3, "ipevent");
 
        qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4);
-       addr.u.a4.addr = be32_to_cpu(ifa->ifa_address);
+       addr.u.a4.addr = ifa->ifa_address;
        addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask);
 
        return qeth_l3_handle_ip_event(card, &addr, event);
index bf68d86..1e164e0 100644 (file)
@@ -1749,6 +1749,13 @@ struct qman_portal *qman_get_affine_portal(int cpu)
 }
 EXPORT_SYMBOL(qman_get_affine_portal);
 
+int qman_start_using_portal(struct qman_portal *p, struct device *dev)
+{
+       return (!device_link_add(dev, p->config->dev,
+                                DL_FLAG_AUTOREMOVE_CONSUMER)) ? -EINVAL : 0;
+}
+EXPORT_SYMBOL(qman_start_using_portal);
+
 int qman_p_poll_dqrr(struct qman_portal *p, unsigned int limit)
 {
        return __poll_portal_fast(p, limit);
index f9502db..9bb36c3 100644 (file)
@@ -1171,6 +1171,11 @@ static int spi_transfer_one_message(struct spi_controller *ctlr,
                spi_statistics_add_transfer_stats(statm, xfer, ctlr);
                spi_statistics_add_transfer_stats(stats, xfer, ctlr);
 
+               if (!ctlr->ptp_sts_supported) {
+                       xfer->ptp_sts_word_pre = 0;
+                       ptp_read_system_prets(xfer->ptp_sts);
+               }
+
                if (xfer->tx_buf || xfer->rx_buf) {
                        reinit_completion(&ctlr->xfer_completion);
 
@@ -1197,6 +1202,11 @@ static int spi_transfer_one_message(struct spi_controller *ctlr,
                                        xfer->len);
                }
 
+               if (!ctlr->ptp_sts_supported) {
+                       ptp_read_system_postts(xfer->ptp_sts);
+                       xfer->ptp_sts_word_post = xfer->len;
+               }
+
                trace_spi_transfer_stop(msg, xfer);
 
                if (msg->status != -EINPROGRESS)
@@ -1265,6 +1275,7 @@ EXPORT_SYMBOL_GPL(spi_finalize_current_transfer);
  */
 static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 {
+       struct spi_transfer *xfer;
        struct spi_message *msg;
        bool was_busy = false;
        unsigned long flags;
@@ -1391,6 +1402,13 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
                goto out;
        }
 
+       if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
+               list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+                       xfer->ptp_sts_word_pre = 0;
+                       ptp_read_system_prets(xfer->ptp_sts);
+               }
+       }
+
        ret = ctlr->transfer_one_message(ctlr, msg);
        if (ret) {
                dev_err(&ctlr->dev,
@@ -1418,6 +1436,99 @@ static void spi_pump_messages(struct kthread_work *work)
        __spi_pump_messages(ctlr, true);
 }
 
+/**
+ * spi_take_timestamp_pre - helper for drivers to collect the beginning of the
+ *                         TX timestamp for the requested byte from the SPI
+ *                         transfer. The frequency with which this function
+ *                         must be called (once per word, once for the whole
+ *                         transfer, once per batch of words etc) is arbitrary
+ *                         as long as the @tx buffer offset is greater than or
+ *                         equal to the requested byte at the time of the
+ *                         call. The timestamp is only taken once, at the
+ *                         first such call. It is assumed that the driver
+ *                         advances its @tx buffer pointer monotonically.
+ * @ctlr: Pointer to the spi_controller structure of the driver
+ * @xfer: Pointer to the transfer being timestamped
+ * @tx: Pointer to the current word within the xfer->tx_buf that the driver is
+ *     preparing to transmit right now.
+ * @irqs_off: If true, will disable IRQs and preemption for the duration of the
+ *           transfer, for less jitter in time measurement. Only compatible
+ *           with PIO drivers. If true, must follow up with
+ *           spi_take_timestamp_post or otherwise system will crash.
+ *           WARNING: for fully predictable results, the CPU frequency must
+ *           also be under control (governor).
+ */
+void spi_take_timestamp_pre(struct spi_controller *ctlr,
+                           struct spi_transfer *xfer,
+                           const void *tx, bool irqs_off)
+{
+       u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8);
+
+       if (!xfer->ptp_sts)
+               return;
+
+       if (xfer->timestamped_pre)
+               return;
+
+       if (tx < (xfer->tx_buf + xfer->ptp_sts_word_pre * bytes_per_word))
+               return;
+
+       /* Capture the resolution of the timestamp */
+       xfer->ptp_sts_word_pre = (tx - xfer->tx_buf) / bytes_per_word;
+
+       xfer->timestamped_pre = true;
+
+       if (irqs_off) {
+               local_irq_save(ctlr->irq_flags);
+               preempt_disable();
+       }
+
+       ptp_read_system_prets(xfer->ptp_sts);
+}
+EXPORT_SYMBOL_GPL(spi_take_timestamp_pre);
+
+/**
+ * spi_take_timestamp_post - helper for drivers to collect the end of the
+ *                          TX timestamp for the requested byte from the SPI
+ *                          transfer. Can be called with an arbitrary
+ *                          frequency: only the first call where @tx exceeds
+ *                          or is equal to the requested word will be
+ *                          timestamped.
+ * @ctlr: Pointer to the spi_controller structure of the driver
+ * @xfer: Pointer to the transfer being timestamped
+ * @tx: Pointer to the current word within the xfer->tx_buf that the driver has
+ *     just transmitted.
+ * @irqs_off: If true, will re-enable IRQs and preemption for the local CPU.
+ */
+void spi_take_timestamp_post(struct spi_controller *ctlr,
+                            struct spi_transfer *xfer,
+                            const void *tx, bool irqs_off)
+{
+       u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8);
+
+       if (!xfer->ptp_sts)
+               return;
+
+       if (xfer->timestamped_post)
+               return;
+
+       if (tx < (xfer->tx_buf + xfer->ptp_sts_word_post * bytes_per_word))
+               return;
+
+       ptp_read_system_postts(xfer->ptp_sts);
+
+       if (irqs_off) {
+               local_irq_restore(ctlr->irq_flags);
+               preempt_enable();
+       }
+
+       /* Capture the resolution of the timestamp */
+       xfer->ptp_sts_word_post = (tx - xfer->tx_buf) / bytes_per_word;
+
+       xfer->timestamped_post = true;
+}
+EXPORT_SYMBOL_GPL(spi_take_timestamp_post);
+
 /**
  * spi_set_thread_rt - set the controller to pump at realtime priority
  * @ctlr: controller to boost priority of
@@ -1503,6 +1614,7 @@ EXPORT_SYMBOL_GPL(spi_get_next_queued_message);
  */
 void spi_finalize_current_message(struct spi_controller *ctlr)
 {
+       struct spi_transfer *xfer;
        struct spi_message *mesg;
        unsigned long flags;
        int ret;
@@ -1511,6 +1623,13 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
        mesg = ctlr->cur_msg;
        spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
+       if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
+               list_for_each_entry(xfer, &mesg->transfers, transfer_list) {
+                       ptp_read_system_postts(xfer->ptp_sts);
+                       xfer->ptp_sts_word_post = xfer->len;
+               }
+       }
+
        spi_unmap_msg(ctlr, mesg);
 
        if (ctlr->cur_msg_prepared && ctlr->unprepare_message) {
@@ -3273,6 +3392,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 static int __spi_async(struct spi_device *spi, struct spi_message *message)
 {
        struct spi_controller *ctlr = spi->controller;
+       struct spi_transfer *xfer;
 
        /*
         * Some controllers do not support doing regular SPI transfers. Return
@@ -3288,6 +3408,13 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message)
 
        trace_spi_message_submit(message);
 
+       if (!ctlr->ptp_sts_supported) {
+               list_for_each_entry(xfer, &message->transfers, transfer_list) {
+                       xfer->ptp_sts_word_pre = 0;
+                       ptp_read_system_prets(xfer->ptp_sts);
+               }
+       }
+
        return ctlr->transfer(spi, message);
 }
 
index 6f1fa4c..333308f 100644 (file)
@@ -125,4 +125,6 @@ source "drivers/staging/exfat/Kconfig"
 
 source "drivers/staging/qlge/Kconfig"
 
+source "drivers/staging/hp/Kconfig"
+
 endif # STAGING
index a90f9b3..e4943cd 100644 (file)
@@ -53,3 +53,4 @@ obj-$(CONFIG_UWB)             += uwb/
 obj-$(CONFIG_USB_WUSB)         += wusbcore/
 obj-$(CONFIG_EXFAT_FS)         += exfat/
 obj-$(CONFIG_QLGE)             += qlge/
+obj-$(CONFIG_NET_VENDOR_HP)    += hp/
diff --git a/drivers/staging/hp/Kconfig b/drivers/staging/hp/Kconfig
new file mode 100644 (file)
index 0000000..fb395cf
--- /dev/null
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# HP network device configuration
+#
+
+config NET_VENDOR_HP
+       bool "HP devices"
+       default y
+       depends on ISA || EISA || PCI
+       ---help---
+         If you have a network (Ethernet) card belonging to this class, say Y.
+
+         Note that the answer to this question doesn't directly affect the
+         kernel: saying N will just cause the configurator to skip all
+         the questions about HP cards. If you say Y, you will be asked for
+         your specific card in the following questions.
+
+if NET_VENDOR_HP
+
+config HP100
+       tristate "HP 10/100VG PCLAN (ISA, EISA, PCI) support"
+       depends on (ISA || EISA || PCI)
+       ---help---
+         If you have a network (Ethernet) card of this type, say Y here.
+
+         To compile this driver as a module, choose M here. The module
+         will be called hp100.
+
+endif # NET_VENDOR_HP
diff --git a/drivers/staging/hp/Makefile b/drivers/staging/hp/Makefile
new file mode 100644 (file)
index 0000000..5ed723b
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the HP network device drivers.
+#
+
+obj-$(CONFIG_HP100) += hp100.o
diff --git a/drivers/staging/hp/hp100.c b/drivers/staging/hp/hp100.c
new file mode 100644 (file)
index 0000000..6ec78f5
--- /dev/null
@@ -0,0 +1,3037 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+** hp100.c
+** HP CASCADE Architecture Driver for 100VG-AnyLan Network Adapters
+**
+** $Id: hp100.c,v 1.58 2001/09/24 18:03:01 perex Exp perex $
+**
+** Based on the HP100 driver written by Jaroslav Kysela <perex@jcu.cz>
+** Extended for new busmaster capable chipsets by
+** Siegfried "Frieder" Loeffler (dg1sek) <floeff@mathematik.uni-stuttgart.de>
+**
+** Maintained by: Jaroslav Kysela <perex@perex.cz>
+**
+** This driver has only been tested with
+** -- HP J2585B 10/100 Mbit/s PCI Busmaster
+** -- HP J2585A 10/100 Mbit/s PCI
+** -- HP J2970A 10 Mbit/s PCI Combo 10base-T/BNC
+** -- HP J2973A 10 Mbit/s PCI 10base-T
+** -- HP J2573  10/100 ISA
+** -- Compex ReadyLink ENET100-VG4  10/100 Mbit/s PCI / EISA
+** -- Compex FreedomLine 100/VG  10/100 Mbit/s ISA / EISA / PCI
+**
+** but it should also work with the other CASCADE based adapters.
+**
+** TODO:
+**       -  J2573 seems to hang sometimes when in shared memory mode.
+**       -  Mode for Priority TX
+**       -  Check PCI registers, performance might be improved?
+**       -  To reduce interrupt load in busmaster, one could switch off
+**          the interrupts that are used to refill the queues whenever the
+**          queues are filled up to more than a certain threshold.
+**       -  some updates for EISA version of card
+**
+**
+**
+** 1.57c -> 1.58
+**   - used indent to change coding-style
+**   - added KTI DP-200 EISA ID
+**   - ioremap is also used for low (<1MB) memory (multi-architecture support)
+**
+** 1.57b -> 1.57c - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+**   - release resources on failure in init_module
+**
+** 1.57 -> 1.57b - Jean II
+**   - fix spinlocks, SMP is now working !
+**
+** 1.56 -> 1.57
+**   - updates for new PCI interface for 2.1 kernels
+**
+** 1.55 -> 1.56
+**   - removed printk in misc. interrupt and update statistics to allow
+**     monitoring of card status
+**   - timing changes in xmit routines, relogin to 100VG hub added when
+**     driver does reset
+**   - included fix for Compex FreedomLine PCI adapter
+**
+** 1.54 -> 1.55
+**   - fixed bad initialization in init_module
+**   - added Compex FreedomLine adapter
+**   - some fixes in card initialization
+**
+** 1.53 -> 1.54
+**   - added hardware multicast filter support (doesn't work)
+**   - little changes in hp100_sense_lan routine
+**     - added support for Coax and AUI (J2970)
+**   - fix for multiple cards and hp100_mode parameter (insmod)
+**   - fix for shared IRQ
+**
+** 1.52 -> 1.53
+**   - fixed bug in multicast support
+**
+*/
+
+#define HP100_DEFAULT_PRIORITY_TX 0
+
+#undef HP100_DEBUG
+#undef HP100_DEBUG_B           /* Trace  */
+#undef HP100_DEBUG_BM          /* Debug busmaster code (PDL stuff) */
+
+#undef HP100_DEBUG_TRAINING    /* Debug login-to-hub procedure */
+#undef HP100_DEBUG_TX
+#undef HP100_DEBUG_IRQ
+#undef HP100_DEBUG_RX
+
+#undef HP100_MULTICAST_FILTER  /* Need to be debugged... */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/eisa.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/jiffies.h>
+
+#include <asm/io.h>
+
+#include "hp100.h"
+
+/*
+ *  defines
+ */
+
+#define HP100_BUS_ISA     0
+#define HP100_BUS_EISA    1
+#define HP100_BUS_PCI     2
+
+#define HP100_REGION_SIZE      0x20    /* for ioports */
+#define HP100_SIG_LEN          8       /* same as EISA_SIG_LEN */
+
+#define HP100_MAX_PACKET_SIZE  (1536+4)
+#define HP100_MIN_PACKET_SIZE  60
+
+#ifndef HP100_DEFAULT_RX_RATIO
+/* default - 75% onboard memory on the card are used for RX packets */
+#define HP100_DEFAULT_RX_RATIO 75
+#endif
+
+#ifndef HP100_DEFAULT_PRIORITY_TX
+/* default - don't enable transmit outgoing packets as priority */
+#define HP100_DEFAULT_PRIORITY_TX 0
+#endif
+
+/*
+ *  structures
+ */
+
+struct hp100_private {
+       spinlock_t lock;
+       char id[HP100_SIG_LEN];
+       u_short chip;
+       u_short soft_model;
+       u_int memory_size;
+       u_int virt_memory_size;
+       u_short rx_ratio;       /* 1 - 99 */
+       u_short priority_tx;    /* != 0 - priority tx */
+       u_short mode;           /* PIO, Shared Mem or Busmaster */
+       u_char bus;
+       struct pci_dev *pci_dev;
+       short mem_mapped;       /* memory mapped access */
+       void __iomem *mem_ptr_virt;     /* virtual memory mapped area, maybe NULL */
+       unsigned long mem_ptr_phys;     /* physical memory mapped area */
+       short lan_type;         /* 10Mb/s, 100Mb/s or -1 (error) */
+       int hub_status;         /* was login to hub successful? */
+       u_char mac1_mode;
+       u_char mac2_mode;
+       u_char hash_bytes[8];
+
+       /* Rings for busmaster mode: */
+       hp100_ring_t *rxrhead;  /* Head (oldest) index into rxring */
+       hp100_ring_t *rxrtail;  /* Tail (newest) index into rxring */
+       hp100_ring_t *txrhead;  /* Head (oldest) index into txring */
+       hp100_ring_t *txrtail;  /* Tail (newest) index into txring */
+
+       hp100_ring_t rxring[MAX_RX_PDL];
+       hp100_ring_t txring[MAX_TX_PDL];
+
+       u_int *page_vaddr_algn; /* Aligned virtual address of allocated page */
+       u_long whatever_offset; /* Offset to bus/phys/dma address */
+       int rxrcommit;          /* # Rx PDLs committed to adapter */
+       int txrcommit;          /* # Tx PDLs committed to adapter */
+};
+
+/*
+ *  variables
+ */
+#ifdef CONFIG_ISA
+static const char *hp100_isa_tbl[] = {
+       "HWPF150", /* HP J2573 rev A */
+       "HWP1950", /* HP J2573 */
+};
+#endif
+
+static const struct eisa_device_id hp100_eisa_tbl[] = {
+       { "HWPF180" }, /* HP J2577 rev A */
+       { "HWP1920" }, /* HP 27248B */
+       { "HWP1940" }, /* HP J2577 */
+       { "HWP1990" }, /* HP J2577 */
+       { "CPX0301" }, /* ReadyLink ENET100-VG4 */
+       { "CPX0401" }, /* FreedomLine 100/VG */
+       { "" }         /* Mandatory final entry ! */
+};
+MODULE_DEVICE_TABLE(eisa, hp100_eisa_tbl);
+
+static const struct pci_device_id hp100_pci_tbl[] = {
+       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585A, PCI_ANY_ID, PCI_ANY_ID,},
+       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585B, PCI_ANY_ID, PCI_ANY_ID,},
+       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2970A, PCI_ANY_ID, PCI_ANY_ID,},
+       {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2973A, PCI_ANY_ID, PCI_ANY_ID,},
+       {PCI_VENDOR_ID_COMPEX, PCI_DEVICE_ID_COMPEX_ENET100VG4, PCI_ANY_ID, PCI_ANY_ID,},
+       {PCI_VENDOR_ID_COMPEX2, PCI_DEVICE_ID_COMPEX2_100VG, PCI_ANY_ID, PCI_ANY_ID,},
+/*     {PCI_VENDOR_ID_KTI, PCI_DEVICE_ID_KTI_DP200, PCI_ANY_ID, PCI_ANY_ID }, */
+       {}                      /* Terminating entry */
+};
+MODULE_DEVICE_TABLE(pci, hp100_pci_tbl);
+
+static int hp100_rx_ratio = HP100_DEFAULT_RX_RATIO;
+static int hp100_priority_tx = HP100_DEFAULT_PRIORITY_TX;
+static int hp100_mode = 1;
+
+module_param(hp100_rx_ratio, int, 0);
+module_param(hp100_priority_tx, int, 0);
+module_param(hp100_mode, int, 0);
+
+/*
+ *  prototypes
+ */
+
+static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus,
+                       struct pci_dev *pci_dev);
+
+
+static int hp100_open(struct net_device *dev);
+static int hp100_close(struct net_device *dev);
+static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
+                                   struct net_device *dev);
+static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
+                                      struct net_device *dev);
+static void hp100_rx(struct net_device *dev);
+static struct net_device_stats *hp100_get_stats(struct net_device *dev);
+static void hp100_misc_interrupt(struct net_device *dev);
+static void hp100_update_stats(struct net_device *dev);
+static void hp100_clear_stats(struct hp100_private *lp, int ioaddr);
+static void hp100_set_multicast_list(struct net_device *dev);
+static irqreturn_t hp100_interrupt(int irq, void *dev_id);
+static void hp100_start_interface(struct net_device *dev);
+static void hp100_stop_interface(struct net_device *dev);
+static void hp100_load_eeprom(struct net_device *dev, u_short ioaddr);
+static int hp100_sense_lan(struct net_device *dev);
+static int hp100_login_to_vg_hub(struct net_device *dev,
+                                u_short force_relogin);
+static int hp100_down_vg_link(struct net_device *dev);
+static void hp100_cascade_reset(struct net_device *dev, u_short enable);
+static void hp100_BM_shutdown(struct net_device *dev);
+static void hp100_mmuinit(struct net_device *dev);
+static void hp100_init_pdls(struct net_device *dev);
+static int hp100_init_rxpdl(struct net_device *dev,
+                           register hp100_ring_t * ringptr,
+                           register u_int * pdlptr);
+static int hp100_init_txpdl(struct net_device *dev,
+                           register hp100_ring_t * ringptr,
+                           register u_int * pdlptr);
+static void hp100_rxfill(struct net_device *dev);
+static void hp100_hwinit(struct net_device *dev);
+static void hp100_clean_txring(struct net_device *dev);
+#ifdef HP100_DEBUG
+static void hp100_RegisterDump(struct net_device *dev);
+#endif
+
+/* Conversion to new PCI API :
+ * Convert an address in a kernel buffer to a bus/phys/dma address.
+ * This work *only* for memory fragments part of lp->page_vaddr,
+ * because it was properly DMA allocated via pci_alloc_consistent(),
+ * so we just need to "retrieve" the original mapping to bus/phys/dma
+ * address - Jean II */
+static inline dma_addr_t virt_to_whatever(struct net_device *dev, u32 * ptr)
+{
+       struct hp100_private *lp = netdev_priv(dev);
+       return ((u_long) ptr) + lp->whatever_offset;
+}
+
+static inline u_int pdl_map_data(struct hp100_private *lp, void *data)
+{
+       return pci_map_single(lp->pci_dev, data,
+                             MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE);
+}
+
+/* TODO: This function should not really be needed in a good design... */
+static void wait(void)
+{
+       mdelay(1);
+}
+
+/*
+ *  probe functions
+ *  These functions should - if possible - avoid doing write operations
+ *  since this could cause problems when the card is not installed.
+ */
+
+/*
+ * Read board id and convert to string.
+ * Effectively same code as decode_eisa_sig
+ */
+static const char *hp100_read_id(int ioaddr)
+{
+       int i;
+       static char str[HP100_SIG_LEN];
+       unsigned char sig[4], sum;
+        unsigned short rev;
+
+       hp100_page(ID_MAC_ADDR);
+       sum = 0;
+       for (i = 0; i < 4; i++) {
+               sig[i] = hp100_inb(BOARD_ID + i);
+               sum += sig[i];
+       }
+
+       sum += hp100_inb(BOARD_ID + i);
+       if (sum != 0xff)
+               return NULL;    /* bad checksum */
+
+        str[0] = ((sig[0] >> 2) & 0x1f) + ('A' - 1);
+        str[1] = (((sig[0] & 3) << 3) | (sig[1] >> 5)) + ('A' - 1);
+        str[2] = (sig[1] & 0x1f) + ('A' - 1);
+        rev = (sig[2] << 8) | sig[3];
+        sprintf(str + 3, "%04X", rev);
+
+       return str;
+}
+
+#ifdef CONFIG_ISA
+static __init int hp100_isa_probe1(struct net_device *dev, int ioaddr)
+{
+       const char *sig;
+       int i;
+
+       if (!request_region(ioaddr, HP100_REGION_SIZE, "hp100"))
+               goto err;
+
+       if (hp100_inw(HW_ID) != HP100_HW_ID_CASCADE) {
+               release_region(ioaddr, HP100_REGION_SIZE);
+               goto err;
+       }
+
+       sig = hp100_read_id(ioaddr);
+       release_region(ioaddr, HP100_REGION_SIZE);
+
+       if (sig == NULL)
+               goto err;
+
+       for (i = 0; i < ARRAY_SIZE(hp100_isa_tbl); i++) {
+               if (!strcmp(hp100_isa_tbl[i], sig))
+                       break;
+
+       }
+
+       if (i < ARRAY_SIZE(hp100_isa_tbl))
+               return hp100_probe1(dev, ioaddr, HP100_BUS_ISA, NULL);
+ err:
+       return -ENODEV;
+
+}
+/*
+ * Probe for ISA board.
+ * EISA and PCI are handled by device infrastructure.
+ */
+
+static int  __init hp100_isa_probe(struct net_device *dev, int addr)
+{
+       int err = -ENODEV;
+
+       /* Probe for a specific ISA address */
+       if (addr > 0xff && addr < 0x400)
+               err = hp100_isa_probe1(dev, addr);
+
+       else if (addr != 0)
+               err = -ENXIO;
+
+       else {
+               /* Probe all ISA possible port regions */
+               for (addr = 0x100; addr < 0x400; addr += 0x20) {
+                       err = hp100_isa_probe1(dev, addr);
+                       if (!err)
+                               break;
+               }
+       }
+       return err;
+}
+#endif /* CONFIG_ISA */
+
+#if !defined(MODULE) && defined(CONFIG_ISA)
+struct net_device * __init hp100_probe(int unit)
+{
+       struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
+       int err;
+
+       if (!dev)
+               return ERR_PTR(-ENODEV);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4200, TRACE);
+       printk("hp100: %s: probe\n", dev->name);
+#endif
+
+       if (unit >= 0) {
+               sprintf(dev->name, "eth%d", unit);
+               netdev_boot_setup_check(dev);
+       }
+
+       err = hp100_isa_probe(dev, dev->base_addr);
+       if (err)
+               goto out;
+
+       return dev;
+ out:
+       free_netdev(dev);
+       return ERR_PTR(err);
+}
+#endif /* !MODULE && CONFIG_ISA */
+
+static const struct net_device_ops hp100_bm_netdev_ops = {
+       .ndo_open               = hp100_open,
+       .ndo_stop               = hp100_close,
+       .ndo_start_xmit         = hp100_start_xmit_bm,
+       .ndo_get_stats          = hp100_get_stats,
+       .ndo_set_rx_mode        = hp100_set_multicast_list,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_validate_addr      = eth_validate_addr,
+};
+
+static const struct net_device_ops hp100_netdev_ops = {
+       .ndo_open               = hp100_open,
+       .ndo_stop               = hp100_close,
+       .ndo_start_xmit         = hp100_start_xmit,
+       .ndo_get_stats          = hp100_get_stats,
+       .ndo_set_rx_mode        = hp100_set_multicast_list,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_validate_addr      = eth_validate_addr,
+};
+
+static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus,
+                       struct pci_dev *pci_dev)
+{
+       int i;
+       int err = -ENODEV;
+       const char *eid;
+       u_int chip;
+       u_char uc;
+       u_int memory_size = 0, virt_memory_size = 0;
+       u_short local_mode, lsw;
+       short mem_mapped;
+       unsigned long mem_ptr_phys;
+       void __iomem *mem_ptr_virt;
+       struct hp100_private *lp;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4201, TRACE);
+       printk("hp100: %s: probe1\n", dev->name);
+#endif
+
+       /* memory region for programmed i/o */
+       if (!request_region(ioaddr, HP100_REGION_SIZE, "hp100"))
+               goto out1;
+
+       if (hp100_inw(HW_ID) != HP100_HW_ID_CASCADE)
+               goto out2;
+
+       chip = hp100_inw(PAGING) & HP100_CHIPID_MASK;
+#ifdef HP100_DEBUG
+       if (chip == HP100_CHIPID_SHASTA)
+               printk("hp100: %s: Shasta Chip detected. (This is a pre 802.12 chip)\n", dev->name);
+       else if (chip == HP100_CHIPID_RAINIER)
+               printk("hp100: %s: Rainier Chip detected. (This is a pre 802.12 chip)\n", dev->name);
+       else if (chip == HP100_CHIPID_LASSEN)
+               printk("hp100: %s: Lassen Chip detected.\n", dev->name);
+       else
+               printk("hp100: %s: Warning: Unknown CASCADE chip (id=0x%.4x).\n", dev->name, chip);
+#endif
+
+       dev->base_addr = ioaddr;
+
+       eid = hp100_read_id(ioaddr);
+       if (eid == NULL) {      /* bad checksum? */
+               printk(KERN_WARNING "%s: bad ID checksum at base port 0x%x\n",
+                      __func__, ioaddr);
+               goto out2;
+       }
+
+       hp100_page(ID_MAC_ADDR);
+       for (i = uc = 0; i < 7; i++)
+               uc += hp100_inb(LAN_ADDR + i);
+       if (uc != 0xff) {
+               printk(KERN_WARNING
+                      "%s: bad lan address checksum at port 0x%x)\n",
+                      __func__, ioaddr);
+               err = -EIO;
+               goto out2;
+       }
+
+       /* Make sure, that all registers are correctly updated... */
+
+       hp100_load_eeprom(dev, ioaddr);
+       wait();
+
+       /*
+        * Determine driver operation mode
+        *
+        * Use the variable "hp100_mode" upon insmod or as kernel parameter to
+        * force driver modes:
+        * hp100_mode=1 -> default, use busmaster mode if configured.
+        * hp100_mode=2 -> enable shared memory mode
+        * hp100_mode=3 -> force use of i/o mapped mode.
+        * hp100_mode=4 -> same as 1, but re-set the enable bit on the card.
+        */
+
+       /*
+        * LSW values:
+        *   0x2278 -> J2585B, PnP shared memory mode
+        *   0x2270 -> J2585B, shared memory mode, 0xdc000
+        *   0xa23c -> J2585B, I/O mapped mode
+        *   0x2240 -> EISA COMPEX, BusMaster (Shasta Chip)
+        *   0x2220 -> EISA HP, I/O (Shasta Chip)
+        *   0x2260 -> EISA HP, BusMaster (Shasta Chip)
+        */
+
+#if 0
+       local_mode = 0x2270;
+       hp100_outw(0xfefe, OPTION_LSW);
+       hp100_outw(local_mode | HP100_SET_LB | HP100_SET_HB, OPTION_LSW);
+#endif
+
+       /* hp100_mode value maybe used in future by another card */
+       local_mode = hp100_mode;
+       if (local_mode < 1 || local_mode > 4)
+               local_mode = 1; /* default */
+#ifdef HP100_DEBUG
+       printk("hp100: %s: original LSW = 0x%x\n", dev->name,
+              hp100_inw(OPTION_LSW));
+#endif
+
+       if (local_mode == 3) {
+               hp100_outw(HP100_MEM_EN | HP100_RESET_LB, OPTION_LSW);
+               hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
+               hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
+               printk("hp100: IO mapped mode forced.\n");
+       } else if (local_mode == 2) {
+               hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
+               hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
+               hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
+               printk("hp100: Shared memory mode requested.\n");
+       } else if (local_mode == 4) {
+               if (chip == HP100_CHIPID_LASSEN) {
+                       hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_SET_HB, OPTION_LSW);
+                       hp100_outw(HP100_IO_EN | HP100_MEM_EN | HP100_RESET_LB, OPTION_LSW);
+                       printk("hp100: Busmaster mode requested.\n");
+               }
+               local_mode = 1;
+       }
+
+       if (local_mode == 1) {  /* default behaviour */
+               lsw = hp100_inw(OPTION_LSW);
+
+               if ((lsw & HP100_IO_EN) && (~lsw & HP100_MEM_EN) &&
+                   (~lsw & (HP100_BM_WRITE | HP100_BM_READ))) {
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: IO_EN bit is set on card.\n", dev->name);
+#endif
+                       local_mode = 3;
+               } else if (chip == HP100_CHIPID_LASSEN &&
+                          (lsw & (HP100_BM_WRITE | HP100_BM_READ)) == (HP100_BM_WRITE | HP100_BM_READ)) {
+                       /* Conversion to new PCI API :
+                        * I don't have the doc, but I assume that the card
+                        * can map the full 32bit address space.
+                        * Also, we can have EISA Busmaster cards (not tested),
+                        * so beware !!! - Jean II */
+                       if((bus == HP100_BUS_PCI) &&
+                          (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)))) {
+                               /* Gracefully fallback to shared memory */
+                               goto busmasterfail;
+                       }
+                       printk("hp100: Busmaster mode enabled.\n");
+                       hp100_outw(HP100_MEM_EN | HP100_IO_EN | HP100_RESET_LB, OPTION_LSW);
+               } else {
+               busmasterfail:
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: Card not configured for BM or BM not supported with this card.\n", dev->name);
+                       printk("hp100: %s: Trying shared memory mode.\n", dev->name);
+#endif
+                       /* In this case, try shared memory mode */
+                       local_mode = 2;
+                       hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
+                       /* hp100_outw(HP100_IO_EN|HP100_RESET_LB, OPTION_LSW); */
+               }
+       }
+#ifdef HP100_DEBUG
+       printk("hp100: %s: new LSW = 0x%x\n", dev->name, hp100_inw(OPTION_LSW));
+#endif
+
+       /* Check for shared memory on the card, eventually remap it */
+       hp100_page(HW_MAP);
+       mem_mapped = ((hp100_inw(OPTION_LSW) & (HP100_MEM_EN)) != 0);
+       mem_ptr_phys = 0UL;
+       mem_ptr_virt = NULL;
+       memory_size = (8192 << ((hp100_inb(SRAM) >> 5) & 0x07));
+       virt_memory_size = 0;
+
+       /* For memory mapped or busmaster mode, we want the memory address */
+       if (mem_mapped || (local_mode == 1)) {
+               mem_ptr_phys = (hp100_inw(MEM_MAP_LSW) | (hp100_inw(MEM_MAP_MSW) << 16));
+               mem_ptr_phys &= ~0x1fff;        /* 8k alignment */
+
+               if (bus == HP100_BUS_ISA && (mem_ptr_phys & ~0xfffff) != 0) {
+                       printk("hp100: Can only use programmed i/o mode.\n");
+                       mem_ptr_phys = 0;
+                       mem_mapped = 0;
+                       local_mode = 3; /* Use programmed i/o */
+               }
+
+               /* We do not need access to shared memory in busmaster mode */
+               /* However in slave mode we need to remap high (>1GB) card memory  */
+               if (local_mode != 1) {  /* = not busmaster */
+                       /* We try with smaller memory sizes, if ioremap fails */
+                       for (virt_memory_size = memory_size; virt_memory_size > 16383; virt_memory_size >>= 1) {
+                               if ((mem_ptr_virt = ioremap((u_long) mem_ptr_phys, virt_memory_size)) == NULL) {
+#ifdef HP100_DEBUG
+                                       printk("hp100: %s: ioremap for 0x%x bytes high PCI memory at 0x%lx failed\n", dev->name, virt_memory_size, mem_ptr_phys);
+#endif
+                               } else {
+#ifdef HP100_DEBUG
+                                       printk("hp100: %s: remapped 0x%x bytes high PCI memory at 0x%lx to %p.\n", dev->name, virt_memory_size, mem_ptr_phys, mem_ptr_virt);
+#endif
+                                       break;
+                               }
+                       }
+
+                       if (mem_ptr_virt == NULL) {     /* all ioremap tries failed */
+                               printk("hp100: Failed to ioremap the PCI card memory. Will have to use i/o mapped mode.\n");
+                               local_mode = 3;
+                               virt_memory_size = 0;
+                       }
+               }
+       }
+
+       if (local_mode == 3) {  /* io mapped forced */
+               mem_mapped = 0;
+               mem_ptr_phys = 0;
+               mem_ptr_virt = NULL;
+               printk("hp100: Using (slow) programmed i/o mode.\n");
+       }
+
+       /* Initialise the "private" data structure for this card. */
+       lp = netdev_priv(dev);
+
+       spin_lock_init(&lp->lock);
+       strlcpy(lp->id, eid, HP100_SIG_LEN);
+       lp->chip = chip;
+       lp->mode = local_mode;
+       lp->bus = bus;
+       lp->pci_dev = pci_dev;
+       lp->priority_tx = hp100_priority_tx;
+       lp->rx_ratio = hp100_rx_ratio;
+       lp->mem_ptr_phys = mem_ptr_phys;
+       lp->mem_ptr_virt = mem_ptr_virt;
+       hp100_page(ID_MAC_ADDR);
+       lp->soft_model = hp100_inb(SOFT_MODEL);
+       lp->mac1_mode = HP100_MAC1MODE3;
+       lp->mac2_mode = HP100_MAC2MODE3;
+       memset(&lp->hash_bytes, 0x00, 8);
+
+       dev->base_addr = ioaddr;
+
+       lp->memory_size = memory_size;
+       lp->virt_memory_size = virt_memory_size;
+       lp->rx_ratio = hp100_rx_ratio;  /* can be conf'd with insmod */
+
+       if (lp->mode == 1)      /* busmaster */
+               dev->netdev_ops = &hp100_bm_netdev_ops;
+       else
+               dev->netdev_ops = &hp100_netdev_ops;
+
+       /* Ask the card for which IRQ line it is configured */
+       if (bus == HP100_BUS_PCI) {
+               dev->irq = pci_dev->irq;
+       } else {
+               hp100_page(HW_MAP);
+               dev->irq = hp100_inb(IRQ_CHANNEL) & HP100_IRQMASK;
+               if (dev->irq == 2)
+                       dev->irq = 9;
+       }
+
+       if (lp->mode == 1)      /* busmaster */
+               dev->dma = 4;
+
+       /* Ask the card for its MAC address and store it for later use. */
+       hp100_page(ID_MAC_ADDR);
+       for (i = uc = 0; i < 6; i++)
+               dev->dev_addr[i] = hp100_inb(LAN_ADDR + i);
+
+       /* Reset statistics (counters) */
+       hp100_clear_stats(lp, ioaddr);
+
+       /* If busmaster mode is wanted, a dma-capable memory area is needed for
+        * the rx and tx PDLs
+        * PCI cards can access the whole PC memory. Therefore GFP_DMA is not
+        * needed for the allocation of the memory area.
+        */
+
+       /* TODO: We do not need this with old cards, where PDLs are stored
+        * in the cards shared memory area. But currently, busmaster has been
+        * implemented/tested only with the lassen chip anyway... */
+       if (lp->mode == 1) {    /* busmaster */
+               dma_addr_t page_baddr;
+               /* Get physically continuous memory for TX & RX PDLs    */
+               /* Conversion to new PCI API :
+                * Pages are always aligned and zeroed, no need to it ourself.
+                * Doc says should be OK for EISA bus as well - Jean II */
+               lp->page_vaddr_algn = pci_alloc_consistent(lp->pci_dev, MAX_RINGSIZE, &page_baddr);
+               if (!lp->page_vaddr_algn) {
+                       err = -ENOMEM;
+                       goto out_mem_ptr;
+               }
+               lp->whatever_offset = ((u_long) page_baddr) - ((u_long) lp->page_vaddr_algn);
+
+#ifdef HP100_DEBUG_BM
+               printk("hp100: %s: Reserved DMA memory from 0x%x to 0x%x\n", dev->name, (u_int) lp->page_vaddr_algn, (u_int) lp->page_vaddr_algn + MAX_RINGSIZE);
+#endif
+               lp->rxrcommit = lp->txrcommit = 0;
+               lp->rxrhead = lp->rxrtail = &(lp->rxring[0]);
+               lp->txrhead = lp->txrtail = &(lp->txring[0]);
+       }
+
+       /* Initialise the card. */
+       /* (I'm not really sure if it's a good idea to do this during probing, but
+        * like this it's assured that the lan connection type can be sensed
+        * correctly)
+        */
+       hp100_hwinit(dev);
+
+       /* Try to find out which kind of LAN the card is connected to. */
+       lp->lan_type = hp100_sense_lan(dev);
+
+       /* Print out a message what about what we think we have probed. */
+       printk("hp100: at 0x%x, IRQ %d, ", ioaddr, dev->irq);
+       switch (bus) {
+       case HP100_BUS_EISA:
+               printk("EISA");
+               break;
+       case HP100_BUS_PCI:
+               printk("PCI");
+               break;
+       default:
+               printk("ISA");
+               break;
+       }
+       printk(" bus, %dk SRAM (rx/tx %d%%).\n", lp->memory_size >> 10, lp->rx_ratio);
+
+       if (lp->mode == 2) {    /* memory mapped */
+               printk("hp100: Memory area at 0x%lx-0x%lx", mem_ptr_phys,
+                               (mem_ptr_phys + (mem_ptr_phys > 0x100000 ? (u_long) lp->memory_size : 16 * 1024)) - 1);
+               if (mem_ptr_virt)
+                       printk(" (virtual base %p)", mem_ptr_virt);
+               printk(".\n");
+
+               /* Set for info when doing ifconfig */
+               dev->mem_start = mem_ptr_phys;
+               dev->mem_end = mem_ptr_phys + lp->memory_size;
+       }
+
+       printk("hp100: ");
+       if (lp->lan_type != HP100_LAN_ERR)
+               printk("Adapter is attached to ");
+       switch (lp->lan_type) {
+       case HP100_LAN_100:
+               printk("100Mb/s Voice Grade AnyLAN network.\n");
+               break;
+       case HP100_LAN_10:
+               printk("10Mb/s network (10baseT).\n");
+               break;
+       case HP100_LAN_COAX:
+               printk("10Mb/s network (coax).\n");
+               break;
+       default:
+               printk("Warning! Link down.\n");
+       }
+
+       err = register_netdev(dev);
+       if (err)
+               goto out3;
+
+       return 0;
+out3:
+       if (local_mode == 1)
+               pci_free_consistent(lp->pci_dev, MAX_RINGSIZE + 0x0f,
+                                   lp->page_vaddr_algn,
+                                   virt_to_whatever(dev, lp->page_vaddr_algn));
+out_mem_ptr:
+       if (mem_ptr_virt)
+               iounmap(mem_ptr_virt);
+out2:
+       release_region(ioaddr, HP100_REGION_SIZE);
+out1:
+       return err;
+}
+
+/* This procedure puts the card into a stable init state */
+static void hp100_hwinit(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4202, TRACE);
+       printk("hp100: %s: hwinit\n", dev->name);
+#endif
+
+       /* Initialise the card. -------------------------------------------- */
+
+       /* Clear all pending Ints and disable Ints */
+       hp100_page(PERFORMANCE);
+       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
+       hp100_outw(0xffff, IRQ_STATUS); /* clear all pending ints */
+
+       hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
+       hp100_outw(HP100_TRI_INT | HP100_SET_HB, OPTION_LSW);
+
+       if (lp->mode == 1) {
+               hp100_BM_shutdown(dev); /* disables BM, puts cascade in reset */
+               wait();
+       } else {
+               hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
+               hp100_cascade_reset(dev, 1);
+               hp100_page(MAC_CTRL);
+               hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);
+       }
+
+       /* Initiate EEPROM reload */
+       hp100_load_eeprom(dev, 0);
+
+       wait();
+
+       /* Go into reset again. */
+       hp100_cascade_reset(dev, 1);
+
+       /* Set Option Registers to a safe state  */
+       hp100_outw(HP100_DEBUG_EN |
+                  HP100_RX_HDR |
+                  HP100_EE_EN |
+                  HP100_BM_WRITE |
+                  HP100_BM_READ | HP100_RESET_HB |
+                  HP100_FAKE_INT |
+                  HP100_INT_EN |
+                  HP100_MEM_EN |
+                  HP100_IO_EN | HP100_RESET_LB, OPTION_LSW);
+
+       hp100_outw(HP100_TRI_INT |
+                  HP100_MMAP_DIS | HP100_SET_HB, OPTION_LSW);
+
+       hp100_outb(HP100_PRIORITY_TX |
+                  HP100_ADV_NXT_PKT |
+                  HP100_TX_CMD | HP100_RESET_LB, OPTION_MSW);
+
+       /* TODO: Configure MMU for Ram Test. */
+       /* TODO: Ram Test. */
+
+       /* Re-check if adapter is still at same i/o location      */
+       /* (If the base i/o in eeprom has been changed but the    */
+       /* registers had not been changed, a reload of the eeprom */
+       /* would move the adapter to the address stored in eeprom */
+
+       /* TODO: Code to implement. */
+
+       /* Until here it was code from HWdiscover procedure. */
+       /* Next comes code from mmuinit procedure of SCO BM driver which is
+        * called from HWconfigure in the SCO driver.  */
+
+       /* Initialise MMU, eventually switch on Busmaster Mode, initialise
+        * multicast filter...
+        */
+       hp100_mmuinit(dev);
+
+       /* We don't turn the interrupts on here - this is done by start_interface. */
+       wait();                 /* TODO: Do we really need this? */
+
+       /* Enable Hardware (e.g. unreset) */
+       hp100_cascade_reset(dev, 0);
+
+       /* ------- initialisation complete ----------- */
+
+       /* Finally try to log in the Hub if there may be a VG connection. */
+       if ((lp->lan_type == HP100_LAN_100) || (lp->lan_type == HP100_LAN_ERR))
+               hp100_login_to_vg_hub(dev, 0);  /* relogin */
+
+}
+
+
+/*
+ * mmuinit - Reinitialise Cascade MMU and MAC settings.
+ * Note: Must already be in reset and leaves card in reset.
+ */
+static void hp100_mmuinit(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+       int i;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4203, TRACE);
+       printk("hp100: %s: mmuinit\n", dev->name);
+#endif
+
+#ifdef HP100_DEBUG
+       if (0 != (hp100_inw(OPTION_LSW) & HP100_HW_RST)) {
+               printk("hp100: %s: Not in reset when entering mmuinit. Fix me.\n", dev->name);
+               return;
+       }
+#endif
+
+       /* Make sure IRQs are masked off and ack'ed. */
+       hp100_page(PERFORMANCE);
+       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
+       hp100_outw(0xffff, IRQ_STATUS); /* ack IRQ */
+
+       /*
+        * Enable Hardware
+        * - Clear Debug En, Rx Hdr Pipe, EE En, I/O En, Fake Int and Intr En
+        * - Set Tri-State Int, Bus Master Rd/Wr, and Mem Map Disable
+        * - Clear Priority, Advance Pkt and Xmit Cmd
+        */
+
+       hp100_outw(HP100_DEBUG_EN |
+                  HP100_RX_HDR |
+                  HP100_EE_EN | HP100_RESET_HB |
+                  HP100_IO_EN |
+                  HP100_FAKE_INT |
+                  HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
+
+       hp100_outw(HP100_TRI_INT | HP100_SET_HB, OPTION_LSW);
+
+       if (lp->mode == 1) {    /* busmaster */
+               hp100_outw(HP100_BM_WRITE |
+                          HP100_BM_READ |
+                          HP100_MMAP_DIS | HP100_SET_HB, OPTION_LSW);
+       } else if (lp->mode == 2) {     /* memory mapped */
+               hp100_outw(HP100_BM_WRITE |
+                          HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
+               hp100_outw(HP100_MMAP_DIS | HP100_RESET_HB, OPTION_LSW);
+               hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
+               hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
+       } else if (lp->mode == 3) {     /* i/o mapped mode */
+               hp100_outw(HP100_MMAP_DIS | HP100_SET_HB |
+                          HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
+       }
+
+       hp100_page(HW_MAP);
+       hp100_outb(0, EARLYRXCFG);
+       hp100_outw(0, EARLYTXCFG);
+
+       /*
+        * Enable Bus Master mode
+        */
+       if (lp->mode == 1) {    /* busmaster */
+               /* Experimental: Set some PCI configuration bits */
+               hp100_page(HW_MAP);
+               hp100_andb(~HP100_PDL_USE3, MODECTRL1); /* BM engine read maximum */
+               hp100_andb(~HP100_TX_DUALQ, MODECTRL1); /* No Queue for Priority TX */
+
+               /* PCI Bus failures should result in a Misc. Interrupt */
+               hp100_orb(HP100_EN_BUS_FAIL, MODECTRL2);
+
+               hp100_outw(HP100_BM_READ | HP100_BM_WRITE | HP100_SET_HB, OPTION_LSW);
+               hp100_page(HW_MAP);
+               /* Use Burst Mode and switch on PAGE_CK */
+               hp100_orb(HP100_BM_BURST_RD | HP100_BM_BURST_WR, BM);
+               if ((lp->chip == HP100_CHIPID_RAINIER) || (lp->chip == HP100_CHIPID_SHASTA))
+                       hp100_orb(HP100_BM_PAGE_CK, BM);
+               hp100_orb(HP100_BM_MASTER, BM);
+       } else {                /* not busmaster */
+
+               hp100_page(HW_MAP);
+               hp100_andb(~HP100_BM_MASTER, BM);
+       }
+
+       /*
+        * Divide card memory into regions for Rx, Tx and, if non-ETR chip, PDLs
+        */
+       hp100_page(MMU_CFG);
+       if (lp->mode == 1) {    /* only needed for Busmaster */
+               int xmit_stop, recv_stop;
+
+               if ((lp->chip == HP100_CHIPID_RAINIER) ||
+                   (lp->chip == HP100_CHIPID_SHASTA)) {
+                       int pdl_stop;
+
+                       /*
+                        * Each pdl is 508 bytes long. (63 frags * 4 bytes for address and
+                        * 4 bytes for header). We will leave NUM_RXPDLS * 508 (rounded
+                        * to the next higher 1k boundary) bytes for the rx-pdl's
+                        * Note: For non-etr chips the transmit stop register must be
+                        * programmed on a 1k boundary, i.e. bits 9:0 must be zero.
+                        */
+                       pdl_stop = lp->memory_size;
+                       xmit_stop = (pdl_stop - 508 * (MAX_RX_PDL) - 16) & ~(0x03ff);
+                       recv_stop = (xmit_stop * (lp->rx_ratio) / 100) & ~(0x03ff);
+                       hp100_outw((pdl_stop >> 4) - 1, PDL_MEM_STOP);
+#ifdef HP100_DEBUG_BM
+                       printk("hp100: %s: PDL_STOP = 0x%x\n", dev->name, pdl_stop);
+#endif
+               } else {
+                       /* ETR chip (Lassen) in busmaster mode */
+                       xmit_stop = (lp->memory_size) - 1;
+                       recv_stop = ((lp->memory_size * lp->rx_ratio) / 100) & ~(0x03ff);
+               }
+
+               hp100_outw(xmit_stop >> 4, TX_MEM_STOP);
+               hp100_outw(recv_stop >> 4, RX_MEM_STOP);
+#ifdef HP100_DEBUG_BM
+               printk("hp100: %s: TX_STOP  = 0x%x\n", dev->name, xmit_stop >> 4);
+               printk("hp100: %s: RX_STOP  = 0x%x\n", dev->name, recv_stop >> 4);
+#endif
+       } else {
+               /* Slave modes (memory mapped and programmed io)  */
+               hp100_outw((((lp->memory_size * lp->rx_ratio) / 100) >> 4), RX_MEM_STOP);
+               hp100_outw(((lp->memory_size - 1) >> 4), TX_MEM_STOP);
+#ifdef HP100_DEBUG
+               printk("hp100: %s: TX_MEM_STOP: 0x%x\n", dev->name, hp100_inw(TX_MEM_STOP));
+               printk("hp100: %s: RX_MEM_STOP: 0x%x\n", dev->name, hp100_inw(RX_MEM_STOP));
+#endif
+       }
+
+       /* Write MAC address into page 1 */
+       hp100_page(MAC_ADDRESS);
+       for (i = 0; i < 6; i++)
+               hp100_outb(dev->dev_addr[i], MAC_ADDR + i);
+
+       /* Zero the multicast hash registers */
+       for (i = 0; i < 8; i++)
+               hp100_outb(0x0, HASH_BYTE0 + i);
+
+       /* Set up MAC defaults */
+       hp100_page(MAC_CTRL);
+
+       /* Go to LAN Page and zero all filter bits */
+       /* Zero accept error, accept multicast, accept broadcast and accept */
+       /* all directed packet bits */
+       hp100_andb(~(HP100_RX_EN |
+                    HP100_TX_EN |
+                    HP100_ACC_ERRORED |
+                    HP100_ACC_MC |
+                    HP100_ACC_BC | HP100_ACC_PHY), MAC_CFG_1);
+
+       hp100_outb(0x00, MAC_CFG_2);
+
+       /* Zero the frame format bit. This works around a training bug in the */
+       /* new hubs. */
+       hp100_outb(0x00, VG_LAN_CFG_2); /* (use 802.3) */
+
+       if (lp->priority_tx)
+               hp100_outb(HP100_PRIORITY_TX | HP100_SET_LB, OPTION_MSW);
+       else
+               hp100_outb(HP100_PRIORITY_TX | HP100_RESET_LB, OPTION_MSW);
+
+       hp100_outb(HP100_ADV_NXT_PKT |
+                  HP100_TX_CMD | HP100_RESET_LB, OPTION_MSW);
+
+       /* If busmaster, initialize the PDLs */
+       if (lp->mode == 1)
+               hp100_init_pdls(dev);
+
+       /* Go to performance page and initialize isr and imr registers */
+       hp100_page(PERFORMANCE);
+       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
+       hp100_outw(0xffff, IRQ_STATUS); /* ack IRQ */
+}
+
+/*
+ *  open/close functions
+ */
+
+static int hp100_open(struct net_device *dev)
+{
+       struct hp100_private *lp = netdev_priv(dev);
+#ifdef HP100_DEBUG_B
+       int ioaddr = dev->base_addr;
+#endif
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4204, TRACE);
+       printk("hp100: %s: open\n", dev->name);
+#endif
+
+       /* New: if bus is PCI or EISA, interrupts might be shared interrupts */
+       if (request_irq(dev->irq, hp100_interrupt,
+                       lp->bus == HP100_BUS_PCI || lp->bus ==
+                       HP100_BUS_EISA ? IRQF_SHARED : 0,
+                       dev->name, dev)) {
+               printk("hp100: %s: unable to get IRQ %d\n", dev->name, dev->irq);
+               return -EAGAIN;
+       }
+
+       netif_trans_update(dev); /* prevent tx timeout */
+       netif_start_queue(dev);
+
+       lp->lan_type = hp100_sense_lan(dev);
+       lp->mac1_mode = HP100_MAC1MODE3;
+       lp->mac2_mode = HP100_MAC2MODE3;
+       memset(&lp->hash_bytes, 0x00, 8);
+
+       hp100_stop_interface(dev);
+
+       hp100_hwinit(dev);
+
+       hp100_start_interface(dev);     /* sets mac modes, enables interrupts */
+
+       return 0;
+}
+
+/* The close function is called when the interface is to be brought down */
+static int hp100_close(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4205, TRACE);
+       printk("hp100: %s: close\n", dev->name);
+#endif
+
+       hp100_page(PERFORMANCE);
+       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all IRQs */
+
+       hp100_stop_interface(dev);
+
+       if (lp->lan_type == HP100_LAN_100)
+               lp->hub_status = hp100_login_to_vg_hub(dev, 0);
+
+       netif_stop_queue(dev);
+
+       free_irq(dev->irq, dev);
+
+#ifdef HP100_DEBUG
+       printk("hp100: %s: close LSW = 0x%x\n", dev->name,
+              hp100_inw(OPTION_LSW));
+#endif
+
+       return 0;
+}
+
+
+/*
+ * Configure the PDL Rx rings and LAN
+ */
+static void hp100_init_pdls(struct net_device *dev)
+{
+       struct hp100_private *lp = netdev_priv(dev);
+       hp100_ring_t *ringptr;
+       u_int *pageptr;         /* Warning : increment by 4 - Jean II */
+       int i;
+
+#ifdef HP100_DEBUG_B
+       int ioaddr = dev->base_addr;
+#endif
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4206, TRACE);
+       printk("hp100: %s: init pdls\n", dev->name);
+#endif
+
+       if (!lp->page_vaddr_algn)
+               printk("hp100: %s: Warning: lp->page_vaddr_algn not initialised!\n", dev->name);
+       else {
+               /* pageptr shall point into the DMA accessible memory region  */
+               /* we use this pointer to status the upper limit of allocated */
+               /* memory in the allocated page. */
+               /* note: align the pointers to the pci cache line size */
+               memset(lp->page_vaddr_algn, 0, MAX_RINGSIZE);   /* Zero  Rx/Tx ring page */
+               pageptr = lp->page_vaddr_algn;
+
+               lp->rxrcommit = 0;
+               ringptr = lp->rxrhead = lp->rxrtail = &(lp->rxring[0]);
+
+               /* Initialise Rx Ring */
+               for (i = MAX_RX_PDL - 1; i >= 0; i--) {
+                       lp->rxring[i].next = ringptr;
+                       ringptr = &(lp->rxring[i]);
+                       pageptr += hp100_init_rxpdl(dev, ringptr, pageptr);
+               }
+
+               /* Initialise Tx Ring */
+               lp->txrcommit = 0;
+               ringptr = lp->txrhead = lp->txrtail = &(lp->txring[0]);
+               for (i = MAX_TX_PDL - 1; i >= 0; i--) {
+                       lp->txring[i].next = ringptr;
+                       ringptr = &(lp->txring[i]);
+                       pageptr += hp100_init_txpdl(dev, ringptr, pageptr);
+               }
+       }
+}
+
+
+/* These functions "format" the entries in the pdl structure   */
+/* They return how much memory the fragments need.            */
+static int hp100_init_rxpdl(struct net_device *dev,
+                           register hp100_ring_t * ringptr,
+                           register u32 * pdlptr)
+{
+       /* pdlptr is starting address for this pdl */
+
+       if (0 != (((unsigned long) pdlptr) & 0xf))
+               printk("hp100: %s: Init rxpdl: Unaligned pdlptr 0x%lx.\n",
+                      dev->name, (unsigned long) pdlptr);
+
+       ringptr->pdl = pdlptr + 1;
+       ringptr->pdl_paddr = virt_to_whatever(dev, pdlptr + 1);
+       ringptr->skb = NULL;
+
+       /*
+        * Write address and length of first PDL Fragment (which is used for
+        * storing the RX-Header
+        * We use the 4 bytes _before_ the PDH in the pdl memory area to
+        * store this information. (PDH is at offset 0x04)
+        */
+       /* Note that pdlptr+1 and not pdlptr is the pointer to the PDH */
+
+       *(pdlptr + 2) = (u_int) virt_to_whatever(dev, pdlptr);  /* Address Frag 1 */
+       *(pdlptr + 3) = 4;      /* Length  Frag 1 */
+
+       return roundup(MAX_RX_FRAG * 2 + 2, 4);
+}
+
+
+static int hp100_init_txpdl(struct net_device *dev,
+                           register hp100_ring_t * ringptr,
+                           register u32 * pdlptr)
+{
+       if (0 != (((unsigned long) pdlptr) & 0xf))
+               printk("hp100: %s: Init txpdl: Unaligned pdlptr 0x%lx.\n", dev->name, (unsigned long) pdlptr);
+
+       ringptr->pdl = pdlptr;  /* +1; */
+       ringptr->pdl_paddr = virt_to_whatever(dev, pdlptr);     /* +1 */
+       ringptr->skb = NULL;
+
+       return roundup(MAX_TX_FRAG * 2 + 2, 4);
+}
+
+/*
+ * hp100_build_rx_pdl allocates an skb_buff of maximum size plus two bytes
+ * for possible odd word alignment rounding up to next dword and set PDL
+ * address for fragment#2
+ * Returns: 0 if unable to allocate skb_buff
+ *          1 if successful
+ */
+static int hp100_build_rx_pdl(hp100_ring_t * ringptr,
+                             struct net_device *dev)
+{
+#ifdef HP100_DEBUG_B
+       int ioaddr = dev->base_addr;
+#endif
+#ifdef HP100_DEBUG_BM
+       u_int *p;
+#endif
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4207, TRACE);
+       printk("hp100: %s: build rx pdl\n", dev->name);
+#endif
+
+       /* Allocate skb buffer of maximum size */
+       /* Note: This depends on the alloc_skb functions allocating more
+        * space than requested, i.e. aligning to 16bytes */
+
+       ringptr->skb = netdev_alloc_skb(dev, roundup(MAX_ETHER_SIZE + 2, 4));
+
+       if (NULL != ringptr->skb) {
+               /*
+                * Reserve 2 bytes at the head of the buffer to land the IP header
+                * on a long word boundary (According to the Network Driver section
+                * in the Linux KHG, this should help to increase performance.)
+                */
+               skb_reserve(ringptr->skb, 2);
+
+               ringptr->skb->data = skb_put(ringptr->skb, MAX_ETHER_SIZE);
+
+               /* ringptr->pdl points to the beginning of the PDL, i.e. the PDH */
+               /* Note: 1st Fragment is used for the 4 byte packet status
+                * (receive header). Its PDL entries are set up by init_rxpdl. So
+                * here we only have to set up the PDL fragment entries for the data
+                * part. Those 4 bytes will be stored in the DMA memory region
+                * directly before the PDL.
+                */
+#ifdef HP100_DEBUG_BM
+               printk("hp100: %s: build_rx_pdl: PDH@0x%x, skb->data (len %d) at 0x%x\n",
+                                    dev->name, (u_int) ringptr->pdl,
+                                    roundup(MAX_ETHER_SIZE + 2, 4),
+                                    (unsigned int) ringptr->skb->data);
+#endif
+
+               /* Conversion to new PCI API : map skbuf data to PCI bus.
+                * Doc says it's OK for EISA as well - Jean II */
+               ringptr->pdl[0] = 0x00020000;   /* Write PDH */
+               ringptr->pdl[3] = pdl_map_data(netdev_priv(dev),
+                                              ringptr->skb->data);
+               ringptr->pdl[4] = MAX_ETHER_SIZE;       /* Length of Data */
+
+#ifdef HP100_DEBUG_BM
+               for (p = (ringptr->pdl); p < (ringptr->pdl + 5); p++)
+                       printk("hp100: %s: Adr 0x%.8x = 0x%.8x\n", dev->name, (u_int) p, (u_int) * p);
+#endif
+               return 1;
+       }
+       /* else: */
+       /* alloc_skb failed (no memory) -> still can receive the header
+        * fragment into PDL memory. make PDL safe by clearing msgptr and
+        * making the PDL only 1 fragment (i.e. the 4 byte packet status)
+        */
+#ifdef HP100_DEBUG_BM
+       printk("hp100: %s: build_rx_pdl: PDH@0x%x, No space for skb.\n", dev->name, (u_int) ringptr->pdl);
+#endif
+
+       ringptr->pdl[0] = 0x00010000;   /* PDH: Count=1 Fragment */
+
+       return 0;
+}
+
+/*
+ *  hp100_rxfill - attempt to fill the Rx Ring will empty skb's
+ *
+ * Makes assumption that skb's are always contiguous memory areas and
+ * therefore PDLs contain only 2 physical fragments.
+ * -  While the number of Rx PDLs with buffers is less than maximum
+ *      a.  Get a maximum packet size skb
+ *      b.  Put the physical address of the buffer into the PDL.
+ *      c.  Output physical address of PDL to adapter.
+ */
+static void hp100_rxfill(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+
+       struct hp100_private *lp = netdev_priv(dev);
+       hp100_ring_t *ringptr;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4208, TRACE);
+       printk("hp100: %s: rxfill\n", dev->name);
+#endif
+
+       hp100_page(PERFORMANCE);
+
+       while (lp->rxrcommit < MAX_RX_PDL) {
+               /*
+                  ** Attempt to get a buffer and build a Rx PDL.
+                */
+               ringptr = lp->rxrtail;
+               if (0 == hp100_build_rx_pdl(ringptr, dev)) {
+                       return; /* None available, return */
+               }
+
+               /* Hand this PDL over to the card */
+               /* Note: This needs performance page selected! */
+#ifdef HP100_DEBUG_BM
+               printk("hp100: %s: rxfill: Hand to card: pdl #%d @0x%x phys:0x%x, buffer: 0x%x\n",
+                                    dev->name, lp->rxrcommit, (u_int) ringptr->pdl,
+                                    (u_int) ringptr->pdl_paddr, (u_int) ringptr->pdl[3]);
+#endif
+
+               hp100_outl((u32) ringptr->pdl_paddr, RX_PDA);
+
+               lp->rxrcommit += 1;
+               lp->rxrtail = ringptr->next;
+       }
+}
+
+/*
+ * BM_shutdown - shutdown bus mastering and leave chip in reset state
+ */
+
+static void hp100_BM_shutdown(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+       unsigned long time;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4209, TRACE);
+       printk("hp100: %s: bm shutdown\n", dev->name);
+#endif
+
+       hp100_page(PERFORMANCE);
+       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
+       hp100_outw(0xffff, IRQ_STATUS); /* Ack all ints */
+
+       /* Ensure Interrupts are off */
+       hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
+
+       /* Disable all MAC activity */
+       hp100_page(MAC_CTRL);
+       hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);    /* stop rx/tx */
+
+       /* If cascade MMU is not already in reset */
+       if (0 != (hp100_inw(OPTION_LSW) & HP100_HW_RST)) {
+               /* Wait 1.3ms (10Mb max packet time) to ensure MAC is idle so
+                * MMU pointers will not be reset out from underneath
+                */
+               hp100_page(MAC_CTRL);
+               for (time = 0; time < 5000; time++) {
+                       if ((hp100_inb(MAC_CFG_1) & (HP100_TX_IDLE | HP100_RX_IDLE)) == (HP100_TX_IDLE | HP100_RX_IDLE))
+                               break;
+               }
+
+               /* Shutdown algorithm depends on the generation of Cascade */
+               if (lp->chip == HP100_CHIPID_LASSEN) {  /* ETR shutdown/reset */
+                       /* Disable Busmaster mode and wait for bit to go to zero. */
+                       hp100_page(HW_MAP);
+                       hp100_andb(~HP100_BM_MASTER, BM);
+                       /* 100 ms timeout */
+                       for (time = 0; time < 32000; time++) {
+                               if (0 == (hp100_inb(BM) & HP100_BM_MASTER))
+                                       break;
+                       }
+               } else {        /* Shasta or Rainier Shutdown/Reset */
+                       /* To ensure all bus master inloading activity has ceased,
+                        * wait for no Rx PDAs or no Rx packets on card.
+                        */
+                       hp100_page(PERFORMANCE);
+                       /* 100 ms timeout */
+                       for (time = 0; time < 10000; time++) {
+                               /* RX_PDL: PDLs not executed. */
+                               /* RX_PKT_CNT: RX'd packets on card. */
+                               if ((hp100_inb(RX_PDL) == 0) && (hp100_inb(RX_PKT_CNT) == 0))
+                                       break;
+                       }
+
+                       if (time >= 10000)
+                               printk("hp100: %s: BM shutdown error.\n", dev->name);
+
+                       /* To ensure all bus master outloading activity has ceased,
+                        * wait until the Tx PDA count goes to zero or no more Tx space
+                        * available in the Tx region of the card.
+                        */
+                       /* 100 ms timeout */
+                       for (time = 0; time < 10000; time++) {
+                               if ((0 == hp100_inb(TX_PKT_CNT)) &&
+                                   (0 != (hp100_inb(TX_MEM_FREE) & HP100_AUTO_COMPARE)))
+                                       break;
+                       }
+
+                       /* Disable Busmaster mode */
+                       hp100_page(HW_MAP);
+                       hp100_andb(~HP100_BM_MASTER, BM);
+               }       /* end of shutdown procedure for non-etr parts */
+
+               hp100_cascade_reset(dev, 1);
+       }
+       hp100_page(PERFORMANCE);
+       /* hp100_outw( HP100_BM_READ | HP100_BM_WRITE | HP100_RESET_HB, OPTION_LSW ); */
+       /* Busmaster mode should be shut down now. */
+}
+
+static int hp100_check_lan(struct net_device *dev)
+{
+       struct hp100_private *lp = netdev_priv(dev);
+
+       if (lp->lan_type < 0) { /* no LAN type detected yet? */
+               hp100_stop_interface(dev);
+               if ((lp->lan_type = hp100_sense_lan(dev)) < 0) {
+                       printk("hp100: %s: no connection found - check wire\n", dev->name);
+                       hp100_start_interface(dev);     /* 10Mb/s RX packets maybe handled */
+                       return -EIO;
+               }
+               if (lp->lan_type == HP100_LAN_100)
+                       lp->hub_status = hp100_login_to_vg_hub(dev, 0); /* relogin */
+               hp100_start_interface(dev);
+       }
+       return 0;
+}
+
+/*
+ *  transmit functions
+ */
+
+/* tx function for busmaster mode */
+static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
+                                      struct net_device *dev)
+{
+       unsigned long flags;
+       int i, ok_flag;
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+       hp100_ring_t *ringptr;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4210, TRACE);
+       printk("hp100: %s: start_xmit_bm\n", dev->name);
+#endif
+       if (skb->len <= 0)
+               goto drop;
+
+       if (lp->chip == HP100_CHIPID_SHASTA && skb_padto(skb, ETH_ZLEN))
+               return NETDEV_TX_OK;
+
+       /* Get Tx ring tail pointer */
+       if (lp->txrtail->next == lp->txrhead) {
+               /* No memory. */
+#ifdef HP100_DEBUG
+               printk("hp100: %s: start_xmit_bm: No TX PDL available.\n", dev->name);
+#endif
+               /* not waited long enough since last tx? */
+               if (time_before(jiffies, dev_trans_start(dev) + HZ))
+                       goto drop;
+
+               if (hp100_check_lan(dev))
+                       goto drop;
+
+               if (lp->lan_type == HP100_LAN_100 && lp->hub_status < 0) {
+                       /* we have a 100Mb/s adapter but it isn't connected to hub */
+                       printk("hp100: %s: login to 100Mb/s hub retry\n", dev->name);
+                       hp100_stop_interface(dev);
+                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
+                       hp100_start_interface(dev);
+               } else {
+                       spin_lock_irqsave(&lp->lock, flags);
+                       hp100_ints_off();       /* Useful ? Jean II */
+                       i = hp100_sense_lan(dev);
+                       hp100_ints_on();
+                       spin_unlock_irqrestore(&lp->lock, flags);
+                       if (i == HP100_LAN_ERR)
+                               printk("hp100: %s: link down detected\n", dev->name);
+                       else if (lp->lan_type != i) {   /* cable change! */
+                               /* it's very hard - all network settings must be changed!!! */
+                               printk("hp100: %s: cable change 10Mb/s <-> 100Mb/s detected\n", dev->name);
+                               lp->lan_type = i;
+                               hp100_stop_interface(dev);
+                               if (lp->lan_type == HP100_LAN_100)
+                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
+                               hp100_start_interface(dev);
+                       } else {
+                               printk("hp100: %s: interface reset\n", dev->name);
+                               hp100_stop_interface(dev);
+                               if (lp->lan_type == HP100_LAN_100)
+                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
+                               hp100_start_interface(dev);
+                       }
+               }
+
+               goto drop;
+       }
+
+       /*
+        * we have to turn int's off before modifying this, otherwise
+        * a tx_pdl_cleanup could occur at the same time
+        */
+       spin_lock_irqsave(&lp->lock, flags);
+       ringptr = lp->txrtail;
+       lp->txrtail = ringptr->next;
+
+       /* Check whether packet has minimal packet size */
+       ok_flag = skb->len >= HP100_MIN_PACKET_SIZE;
+       i = ok_flag ? skb->len : HP100_MIN_PACKET_SIZE;
+
+       ringptr->skb = skb;
+       ringptr->pdl[0] = ((1 << 16) | i);      /* PDH: 1 Fragment & length */
+       if (lp->chip == HP100_CHIPID_SHASTA) {
+               /* TODO:Could someone who has the EISA card please check if this works? */
+               ringptr->pdl[2] = i;
+       } else {                /* Lassen */
+               /* In the PDL, don't use the padded size but the real packet size: */
+               ringptr->pdl[2] = skb->len;     /* 1st Frag: Length of frag */
+       }
+       /* Conversion to new PCI API : map skbuf data to PCI bus.
+        * Doc says it's OK for EISA as well - Jean II */
+       ringptr->pdl[1] = ((u32) pci_map_single(lp->pci_dev, skb->data, ringptr->pdl[2], PCI_DMA_TODEVICE));    /* 1st Frag: Adr. of data */
+
+       /* Hand this PDL to the card. */
+       hp100_outl(ringptr->pdl_paddr, TX_PDA_L);       /* Low Prio. Queue */
+
+       lp->txrcommit++;
+
+       dev->stats.tx_packets++;
+       dev->stats.tx_bytes += skb->len;
+
+       spin_unlock_irqrestore(&lp->lock, flags);
+
+       return NETDEV_TX_OK;
+
+drop:
+       dev_kfree_skb(skb);
+       return NETDEV_TX_OK;
+}
+
+
+/* clean_txring checks if packets have been sent by the card by reading
+ * the TX_PDL register from the performance page and comparing it to the
+ * number of committed packets. It then frees the skb's of the packets that
+ * obviously have been sent to the network.
+ *
+ * Needs the PERFORMANCE page selected.
+ */
+static void hp100_clean_txring(struct net_device *dev)
+{
+       struct hp100_private *lp = netdev_priv(dev);
+       int ioaddr = dev->base_addr;
+       int donecount;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4211, TRACE);
+       printk("hp100: %s: clean txring\n", dev->name);
+#endif
+
+       /* How many PDLs have been transmitted? */
+       donecount = (lp->txrcommit) - hp100_inb(TX_PDL);
+
+#ifdef HP100_DEBUG
+       if (donecount > MAX_TX_PDL)
+               printk("hp100: %s: Warning: More PDLs transmitted than committed to card???\n", dev->name);
+#endif
+
+       for (; 0 != donecount; donecount--) {
+#ifdef HP100_DEBUG_BM
+               printk("hp100: %s: Free skb: data @0x%.8x txrcommit=0x%x TXPDL=0x%x, done=0x%x\n",
+                               dev->name, (u_int) lp->txrhead->skb->data,
+                               lp->txrcommit, hp100_inb(TX_PDL), donecount);
+#endif
+               /* Conversion to new PCI API : NOP */
+               pci_unmap_single(lp->pci_dev, (dma_addr_t) lp->txrhead->pdl[1], lp->txrhead->pdl[2], PCI_DMA_TODEVICE);
+               dev_consume_skb_any(lp->txrhead->skb);
+               lp->txrhead->skb = NULL;
+               lp->txrhead = lp->txrhead->next;
+               lp->txrcommit--;
+       }
+}
+
+/* tx function for slave modes */
+static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
+                                   struct net_device *dev)
+{
+       unsigned long flags;
+       int i, ok_flag;
+       int ioaddr = dev->base_addr;
+       u_short val;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4212, TRACE);
+       printk("hp100: %s: start_xmit\n", dev->name);
+#endif
+       if (skb->len <= 0)
+               goto drop;
+
+       if (hp100_check_lan(dev))
+               goto drop;
+
+       /* If there is not enough free memory on the card... */
+       i = hp100_inl(TX_MEM_FREE) & 0x7fffffff;
+       if (!(((i / 2) - 539) > (skb->len + 16) && (hp100_inb(TX_PKT_CNT) < 255))) {
+#ifdef HP100_DEBUG
+               printk("hp100: %s: start_xmit: tx free mem = 0x%x\n", dev->name, i);
+#endif
+               /* not waited long enough since last failed tx try? */
+               if (time_before(jiffies, dev_trans_start(dev) + HZ)) {
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: trans_start timing problem\n",
+                              dev->name);
+#endif
+                       goto drop;
+               }
+               if (lp->lan_type == HP100_LAN_100 && lp->hub_status < 0) {
+                       /* we have a 100Mb/s adapter but it isn't connected to hub */
+                       printk("hp100: %s: login to 100Mb/s hub retry\n", dev->name);
+                       hp100_stop_interface(dev);
+                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
+                       hp100_start_interface(dev);
+               } else {
+                       spin_lock_irqsave(&lp->lock, flags);
+                       hp100_ints_off();       /* Useful ? Jean II */
+                       i = hp100_sense_lan(dev);
+                       hp100_ints_on();
+                       spin_unlock_irqrestore(&lp->lock, flags);
+                       if (i == HP100_LAN_ERR)
+                               printk("hp100: %s: link down detected\n", dev->name);
+                       else if (lp->lan_type != i) {   /* cable change! */
+                               /* it's very hard - all network setting must be changed!!! */
+                               printk("hp100: %s: cable change 10Mb/s <-> 100Mb/s detected\n", dev->name);
+                               lp->lan_type = i;
+                               hp100_stop_interface(dev);
+                               if (lp->lan_type == HP100_LAN_100)
+                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
+                               hp100_start_interface(dev);
+                       } else {
+                               printk("hp100: %s: interface reset\n", dev->name);
+                               hp100_stop_interface(dev);
+                               if (lp->lan_type == HP100_LAN_100)
+                                       lp->hub_status = hp100_login_to_vg_hub(dev, 0);
+                               hp100_start_interface(dev);
+                               mdelay(1);
+                       }
+               }
+               goto drop;
+       }
+
+       for (i = 0; i < 6000 && (hp100_inb(OPTION_MSW) & HP100_TX_CMD); i++) {
+#ifdef HP100_DEBUG_TX
+               printk("hp100: %s: start_xmit: busy\n", dev->name);
+#endif
+       }
+
+       spin_lock_irqsave(&lp->lock, flags);
+       hp100_ints_off();
+       val = hp100_inw(IRQ_STATUS);
+       /* Ack / clear the interrupt TX_COMPLETE interrupt - this interrupt is set
+        * when the current packet being transmitted on the wire is completed. */
+       hp100_outw(HP100_TX_COMPLETE, IRQ_STATUS);
+#ifdef HP100_DEBUG_TX
+       printk("hp100: %s: start_xmit: irq_status=0x%.4x, irqmask=0x%.4x, len=%d\n",
+                       dev->name, val, hp100_inw(IRQ_MASK), (int) skb->len);
+#endif
+
+       ok_flag = skb->len >= HP100_MIN_PACKET_SIZE;
+       i = ok_flag ? skb->len : HP100_MIN_PACKET_SIZE;
+
+       hp100_outw(i, DATA32);  /* tell card the total packet length */
+       hp100_outw(i, FRAGMENT_LEN);    /* and first/only fragment length    */
+
+       if (lp->mode == 2) {    /* memory mapped */
+               /* Note: The J2585B needs alignment to 32bits here!  */
+               memcpy_toio(lp->mem_ptr_virt, skb->data, (skb->len + 3) & ~3);
+               if (!ok_flag)
+                       memset_io(lp->mem_ptr_virt, 0, HP100_MIN_PACKET_SIZE - skb->len);
+       } else {                /* programmed i/o */
+               outsl(ioaddr + HP100_REG_DATA32, skb->data,
+                     (skb->len + 3) >> 2);
+               if (!ok_flag)
+                       for (i = (skb->len + 3) & ~3; i < HP100_MIN_PACKET_SIZE; i += 4)
+                               hp100_outl(0, DATA32);
+       }
+
+       hp100_outb(HP100_TX_CMD | HP100_SET_LB, OPTION_MSW);    /* send packet */
+
+       dev->stats.tx_packets++;
+       dev->stats.tx_bytes += skb->len;
+       hp100_ints_on();
+       spin_unlock_irqrestore(&lp->lock, flags);
+
+       dev_consume_skb_any(skb);
+
+#ifdef HP100_DEBUG_TX
+       printk("hp100: %s: start_xmit: end\n", dev->name);
+#endif
+
+       return NETDEV_TX_OK;
+
+drop:
+       dev_kfree_skb(skb);
+       return NETDEV_TX_OK;
+
+}
+
+
+/*
+ * Receive Function (Non-Busmaster mode)
+ * Called when an "Receive Packet" interrupt occurs, i.e. the receive
+ * packet counter is non-zero.
+ * For non-busmaster, this function does the whole work of transferring
+ * the packet to the host memory and then up to higher layers via skb
+ * and netif_rx.
+ */
+
+static void hp100_rx(struct net_device *dev)
+{
+       int packets, pkt_len;
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+       u_int header;
+       struct sk_buff *skb;
+
+#ifdef DEBUG_B
+       hp100_outw(0x4213, TRACE);
+       printk("hp100: %s: rx\n", dev->name);
+#endif
+
+       /* First get indication of received lan packet */
+       /* RX_PKT_CND indicates the number of packets which have been fully */
+       /* received onto the card but have not been fully transferred of the card */
+       packets = hp100_inb(RX_PKT_CNT);
+#ifdef HP100_DEBUG_RX
+       if (packets > 1)
+               printk("hp100: %s: rx: waiting packets = %d\n", dev->name, packets);
+#endif
+
+       while (packets-- > 0) {
+               /* If ADV_NXT_PKT is still set, we have to wait until the card has */
+               /* really advanced to the next packet. */
+               for (pkt_len = 0; pkt_len < 6000 && (hp100_inb(OPTION_MSW) & HP100_ADV_NXT_PKT); pkt_len++) {
+#ifdef HP100_DEBUG_RX
+                       printk ("hp100: %s: rx: busy, remaining packets = %d\n", dev->name, packets);
+#endif
+               }
+
+               /* First we get the header, which contains information about the */
+               /* actual length of the received packet. */
+               if (lp->mode == 2) {    /* memory mapped mode */
+                       header = readl(lp->mem_ptr_virt);
+               } else          /* programmed i/o */
+                       header = hp100_inl(DATA32);
+
+               pkt_len = ((header & HP100_PKT_LEN_MASK) + 3) & ~3;
+
+#ifdef HP100_DEBUG_RX
+               printk("hp100: %s: rx: new packet - length=%d, errors=0x%x, dest=0x%x\n",
+                                    dev->name, header & HP100_PKT_LEN_MASK,
+                                    (header >> 16) & 0xfff8, (header >> 16) & 7);
+#endif
+
+               /* Now we allocate the skb and transfer the data into it. */
+               skb = netdev_alloc_skb(dev, pkt_len + 2);
+               if (skb == NULL) {      /* Not enough memory->drop packet */
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: rx: couldn't allocate a sk_buff of size %d\n",
+                                            dev->name, pkt_len);
+#endif
+                       dev->stats.rx_dropped++;
+               } else {        /* skb successfully allocated */
+
+                       u_char *ptr;
+
+                       skb_reserve(skb,2);
+
+                       /* ptr to start of the sk_buff data area */
+                       skb_put(skb, pkt_len);
+                       ptr = skb->data;
+
+                       /* Now transfer the data from the card into that area */
+                       if (lp->mode == 2)
+                               memcpy_fromio(ptr, lp->mem_ptr_virt,pkt_len);
+                       else    /* io mapped */
+                               insl(ioaddr + HP100_REG_DATA32, ptr, pkt_len >> 2);
+
+                       skb->protocol = eth_type_trans(skb, dev);
+
+#ifdef HP100_DEBUG_RX
+                       printk("hp100: %s: rx: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
+                                       dev->name, ptr[0], ptr[1], ptr[2], ptr[3],
+                                       ptr[4], ptr[5], ptr[6], ptr[7], ptr[8],
+                                       ptr[9], ptr[10], ptr[11]);
+#endif
+                       netif_rx(skb);
+                       dev->stats.rx_packets++;
+                       dev->stats.rx_bytes += pkt_len;
+               }
+
+               /* Indicate the card that we have got the packet */
+               hp100_outb(HP100_ADV_NXT_PKT | HP100_SET_LB, OPTION_MSW);
+
+               switch (header & 0x00070000) {
+               case (HP100_MULTI_ADDR_HASH << 16):
+               case (HP100_MULTI_ADDR_NO_HASH << 16):
+                       dev->stats.multicast++;
+                       break;
+               }
+       }                       /* end of while(there are packets) loop */
+#ifdef HP100_DEBUG_RX
+       printk("hp100_rx: %s: end\n", dev->name);
+#endif
+}
+
+/*
+ * Receive Function for Busmaster Mode
+ */
+static void hp100_rx_bm(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+       hp100_ring_t *ptr;
+       u_int header;
+       int pkt_len;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4214, TRACE);
+       printk("hp100: %s: rx_bm\n", dev->name);
+#endif
+
+#ifdef HP100_DEBUG
+       if (0 == lp->rxrcommit) {
+               printk("hp100: %s: rx_bm called although no PDLs were committed to adapter?\n", dev->name);
+               return;
+       } else
+               /* RX_PKT_CNT states how many PDLs are currently formatted and available to
+                * the cards BM engine */
+       if ((hp100_inw(RX_PKT_CNT) & 0x00ff) >= lp->rxrcommit) {
+               printk("hp100: %s: More packets received than committed? RX_PKT_CNT=0x%x, commit=0x%x\n",
+                                    dev->name, hp100_inw(RX_PKT_CNT) & 0x00ff,
+                                    lp->rxrcommit);
+               return;
+       }
+#endif
+
+       while ((lp->rxrcommit > hp100_inb(RX_PDL))) {
+               /*
+                * The packet was received into the pdl pointed to by lp->rxrhead (
+                * the oldest pdl in the ring
+                */
+
+               /* First we get the header, which contains information about the */
+               /* actual length of the received packet. */
+
+               ptr = lp->rxrhead;
+
+               header = *(ptr->pdl - 1);
+               pkt_len = (header & HP100_PKT_LEN_MASK);
+
+               /* Conversion to new PCI API : NOP */
+               pci_unmap_single(lp->pci_dev, (dma_addr_t) ptr->pdl[3], MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE);
+
+#ifdef HP100_DEBUG_BM
+               printk("hp100: %s: rx_bm: header@0x%x=0x%x length=%d, errors=0x%x, dest=0x%x\n",
+                               dev->name, (u_int) (ptr->pdl - 1), (u_int) header,
+                               pkt_len, (header >> 16) & 0xfff8, (header >> 16) & 7);
+               printk("hp100: %s: RX_PDL_COUNT:0x%x TX_PDL_COUNT:0x%x, RX_PKT_CNT=0x%x PDH=0x%x, Data@0x%x len=0x%x\n",
+                               dev->name, hp100_inb(RX_PDL), hp100_inb(TX_PDL),
+                               hp100_inb(RX_PKT_CNT), (u_int) * (ptr->pdl),
+                               (u_int) * (ptr->pdl + 3), (u_int) * (ptr->pdl + 4));
+#endif
+
+               if ((pkt_len >= MIN_ETHER_SIZE) &&
+                   (pkt_len <= MAX_ETHER_SIZE)) {
+                       if (ptr->skb == NULL) {
+                               printk("hp100: %s: rx_bm: skb null\n", dev->name);
+                               /* can happen if we only allocated room for the pdh due to memory shortage. */
+                               dev->stats.rx_dropped++;
+                       } else {
+                               skb_trim(ptr->skb, pkt_len);    /* Shorten it */
+                               ptr->skb->protocol =
+                                   eth_type_trans(ptr->skb, dev);
+
+                               netif_rx(ptr->skb);     /* Up and away... */
+
+                               dev->stats.rx_packets++;
+                               dev->stats.rx_bytes += pkt_len;
+                       }
+
+                       switch (header & 0x00070000) {
+                       case (HP100_MULTI_ADDR_HASH << 16):
+                       case (HP100_MULTI_ADDR_NO_HASH << 16):
+                               dev->stats.multicast++;
+                               break;
+                       }
+               } else {
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: rx_bm: Received bad packet (length=%d)\n", dev->name, pkt_len);
+#endif
+                       if (ptr->skb != NULL)
+                               dev_kfree_skb_any(ptr->skb);
+                       dev->stats.rx_errors++;
+               }
+
+               lp->rxrhead = lp->rxrhead->next;
+
+               /* Allocate a new rx PDL (so lp->rxrcommit stays the same) */
+               if (0 == hp100_build_rx_pdl(lp->rxrtail, dev)) {
+                       /* No space for skb, header can still be received. */
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: rx_bm: No space for new PDL.\n", dev->name);
+#endif
+                       return;
+               } else {        /* successfully allocated new PDL - put it in ringlist at tail. */
+                       hp100_outl((u32) lp->rxrtail->pdl_paddr, RX_PDA);
+                       lp->rxrtail = lp->rxrtail->next;
+               }
+
+       }
+}
+
+/*
+ *  statistics
+ */
+static struct net_device_stats *hp100_get_stats(struct net_device *dev)
+{
+       unsigned long flags;
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4215, TRACE);
+#endif
+
+       spin_lock_irqsave(&lp->lock, flags);
+       hp100_ints_off();       /* Useful ? Jean II */
+       hp100_update_stats(dev);
+       hp100_ints_on();
+       spin_unlock_irqrestore(&lp->lock, flags);
+       return &(dev->stats);
+}
+
+static void hp100_update_stats(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       u_short val;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4216, TRACE);
+       printk("hp100: %s: update-stats\n", dev->name);
+#endif
+
+       /* Note: Statistics counters clear when read. */
+       hp100_page(MAC_CTRL);
+       val = hp100_inw(DROPPED) & 0x0fff;
+       dev->stats.rx_errors += val;
+       dev->stats.rx_over_errors += val;
+       val = hp100_inb(CRC);
+       dev->stats.rx_errors += val;
+       dev->stats.rx_crc_errors += val;
+       val = hp100_inb(ABORT);
+       dev->stats.tx_errors += val;
+       dev->stats.tx_aborted_errors += val;
+       hp100_page(PERFORMANCE);
+}
+
+static void hp100_misc_interrupt(struct net_device *dev)
+{
+#ifdef HP100_DEBUG_B
+       int ioaddr = dev->base_addr;
+#endif
+
+#ifdef HP100_DEBUG_B
+       int ioaddr = dev->base_addr;
+       hp100_outw(0x4216, TRACE);
+       printk("hp100: %s: misc_interrupt\n", dev->name);
+#endif
+
+       /* Note: Statistics counters clear when read. */
+       dev->stats.rx_errors++;
+       dev->stats.tx_errors++;
+}
+
+static void hp100_clear_stats(struct hp100_private *lp, int ioaddr)
+{
+       unsigned long flags;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4217, TRACE);
+       printk("hp100: %s: clear_stats\n", dev->name);
+#endif
+
+       spin_lock_irqsave(&lp->lock, flags);
+       hp100_page(MAC_CTRL);   /* get all statistics bytes */
+       hp100_inw(DROPPED);
+       hp100_inb(CRC);
+       hp100_inb(ABORT);
+       hp100_page(PERFORMANCE);
+       spin_unlock_irqrestore(&lp->lock, flags);
+}
+
+
+/*
+ *  multicast setup
+ */
+
+/*
+ *  Set or clear the multicast filter for this adapter.
+ */
+
+static void hp100_set_multicast_list(struct net_device *dev)
+{
+       unsigned long flags;
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4218, TRACE);
+       printk("hp100: %s: set_mc_list\n", dev->name);
+#endif
+
+       spin_lock_irqsave(&lp->lock, flags);
+       hp100_ints_off();
+       hp100_page(MAC_CTRL);
+       hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);    /* stop rx/tx */
+
+       if (dev->flags & IFF_PROMISC) {
+               lp->mac2_mode = HP100_MAC2MODE6;        /* promiscuous mode = get all good */
+               lp->mac1_mode = HP100_MAC1MODE6;        /* packets on the net */
+               memset(&lp->hash_bytes, 0xff, 8);
+       } else if (!netdev_mc_empty(dev) || (dev->flags & IFF_ALLMULTI)) {
+               lp->mac2_mode = HP100_MAC2MODE5;        /* multicast mode = get packets for */
+               lp->mac1_mode = HP100_MAC1MODE5;        /* me, broadcasts and all multicasts */
+#ifdef HP100_MULTICAST_FILTER  /* doesn't work!!! */
+               if (dev->flags & IFF_ALLMULTI) {
+                       /* set hash filter to receive all multicast packets */
+                       memset(&lp->hash_bytes, 0xff, 8);
+               } else {
+                       int i, idx;
+                       u_char *addrs;
+                       struct netdev_hw_addr *ha;
+
+                       memset(&lp->hash_bytes, 0x00, 8);
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: computing hash filter - mc_count = %i\n",
+                              dev->name, netdev_mc_count(dev));
+#endif
+                       netdev_for_each_mc_addr(ha, dev) {
+                               addrs = ha->addr;
+#ifdef HP100_DEBUG
+                               printk("hp100: %s: multicast = %pM, ",
+                                            dev->name, addrs);
+#endif
+                               for (i = idx = 0; i < 6; i++) {
+                                       idx ^= *addrs++ & 0x3f;
+                                       printk(":%02x:", idx);
+                               }
+#ifdef HP100_DEBUG
+                               printk("idx = %i\n", idx);
+#endif
+                               lp->hash_bytes[idx >> 3] |= (1 << (idx & 7));
+                       }
+               }
+#else
+               memset(&lp->hash_bytes, 0xff, 8);
+#endif
+       } else {
+               lp->mac2_mode = HP100_MAC2MODE3;        /* normal mode = get packets for me */
+               lp->mac1_mode = HP100_MAC1MODE3;        /* and broadcasts */
+               memset(&lp->hash_bytes, 0x00, 8);
+       }
+
+       if (((hp100_inb(MAC_CFG_1) & 0x0f) != lp->mac1_mode) ||
+           (hp100_inb(MAC_CFG_2) != lp->mac2_mode)) {
+               int i;
+
+               hp100_outb(lp->mac2_mode, MAC_CFG_2);
+               hp100_andb(HP100_MAC1MODEMASK, MAC_CFG_1);      /* clear mac1 mode bits */
+               hp100_orb(lp->mac1_mode, MAC_CFG_1);    /* and set the new mode */
+
+               hp100_page(MAC_ADDRESS);
+               for (i = 0; i < 8; i++)
+                       hp100_outb(lp->hash_bytes[i], HASH_BYTE0 + i);
+#ifdef HP100_DEBUG
+               printk("hp100: %s: mac1 = 0x%x, mac2 = 0x%x, multicast hash = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
+                                    dev->name, lp->mac1_mode, lp->mac2_mode,
+                                    lp->hash_bytes[0], lp->hash_bytes[1],
+                                    lp->hash_bytes[2], lp->hash_bytes[3],
+                                    lp->hash_bytes[4], lp->hash_bytes[5],
+                                    lp->hash_bytes[6], lp->hash_bytes[7]);
+#endif
+
+               if (lp->lan_type == HP100_LAN_100) {
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: 100VG MAC settings have changed - relogin.\n", dev->name);
+#endif
+                       lp->hub_status = hp100_login_to_vg_hub(dev, 1); /* force a relogin to the hub */
+               }
+       } else {
+               int i;
+               u_char old_hash_bytes[8];
+
+               hp100_page(MAC_ADDRESS);
+               for (i = 0; i < 8; i++)
+                       old_hash_bytes[i] = hp100_inb(HASH_BYTE0 + i);
+               if (memcmp(old_hash_bytes, &lp->hash_bytes, 8)) {
+                       for (i = 0; i < 8; i++)
+                               hp100_outb(lp->hash_bytes[i], HASH_BYTE0 + i);
+#ifdef HP100_DEBUG
+                       printk("hp100: %s: multicast hash = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
+                                       dev->name, lp->hash_bytes[0],
+                                       lp->hash_bytes[1], lp->hash_bytes[2],
+                                       lp->hash_bytes[3], lp->hash_bytes[4],
+                                       lp->hash_bytes[5], lp->hash_bytes[6],
+                                       lp->hash_bytes[7]);
+#endif
+
+                       if (lp->lan_type == HP100_LAN_100) {
+#ifdef HP100_DEBUG
+                               printk("hp100: %s: 100VG MAC settings have changed - relogin.\n", dev->name);
+#endif
+                               lp->hub_status = hp100_login_to_vg_hub(dev, 1); /* force a relogin to the hub */
+                       }
+               }
+       }
+
+       hp100_page(MAC_CTRL);
+       hp100_orb(HP100_RX_EN | HP100_RX_IDLE | /* enable rx */
+                 HP100_TX_EN | HP100_TX_IDLE, MAC_CFG_1);      /* enable tx */
+
+       hp100_page(PERFORMANCE);
+       hp100_ints_on();
+       spin_unlock_irqrestore(&lp->lock, flags);
+}
+
+/*
+ *  hardware interrupt handling
+ */
+
+static irqreturn_t hp100_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = (struct net_device *) dev_id;
+       struct hp100_private *lp = netdev_priv(dev);
+
+       int ioaddr;
+       u_int val;
+
+       if (dev == NULL)
+               return IRQ_NONE;
+       ioaddr = dev->base_addr;
+
+       spin_lock(&lp->lock);
+
+       hp100_ints_off();
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4219, TRACE);
+#endif
+
+       /*  hp100_page( PERFORMANCE ); */
+       val = hp100_inw(IRQ_STATUS);
+#ifdef HP100_DEBUG_IRQ
+       printk("hp100: %s: mode=%x,IRQ_STAT=0x%.4x,RXPKTCNT=0x%.2x RXPDL=0x%.2x TXPKTCNT=0x%.2x TXPDL=0x%.2x\n",
+                            dev->name, lp->mode, (u_int) val, hp100_inb(RX_PKT_CNT),
+                            hp100_inb(RX_PDL), hp100_inb(TX_PKT_CNT), hp100_inb(TX_PDL));
+#endif
+
+       if (val == 0) {         /* might be a shared interrupt */
+               spin_unlock(&lp->lock);
+               hp100_ints_on();
+               return IRQ_NONE;
+       }
+       /* We're only interested in those interrupts we really enabled. */
+       /* val &= hp100_inw( IRQ_MASK ); */
+
+       /*
+        * RX_PDL_FILL_COMPL is set whenever a RX_PDL has been executed. A RX_PDL
+        * is considered executed whenever the RX_PDL data structure is no longer
+        * needed.
+        */
+       if (val & HP100_RX_PDL_FILL_COMPL) {
+               if (lp->mode == 1)
+                       hp100_rx_bm(dev);
+               else {
+                       printk("hp100: %s: rx_pdl_fill_compl interrupt although not busmaster?\n", dev->name);
+               }
+       }
+
+       /*
+        * The RX_PACKET interrupt is set, when the receive packet counter is
+        * non zero. We use this interrupt for receiving in slave mode. In
+        * busmaster mode, we use it to make sure we did not miss any rx_pdl_fill
+        * interrupts. If rx_pdl_fill_compl is not set and rx_packet is set, then
+        * we somehow have missed a rx_pdl_fill_compl interrupt.
+        */
+
+       if (val & HP100_RX_PACKET) {    /* Receive Packet Counter is non zero */
+               if (lp->mode != 1)      /* non busmaster */
+                       hp100_rx(dev);
+               else if (!(val & HP100_RX_PDL_FILL_COMPL)) {
+                       /* Shouldn't happen - maybe we missed a RX_PDL_FILL Interrupt?  */
+                       hp100_rx_bm(dev);
+               }
+       }
+
+       /*
+        * Ack. that we have noticed the interrupt and thereby allow next one.
+        * Note that this is now done after the slave rx function, since first
+        * acknowledging and then setting ADV_NXT_PKT caused an extra interrupt
+        * on the J2573.
+        */
+       hp100_outw(val, IRQ_STATUS);
+
+       /*
+        * RX_ERROR is set when a packet is dropped due to no memory resources on
+        * the card or when a RCV_ERR occurs.
+        * TX_ERROR is set when a TX_ABORT condition occurs in the MAC->exists
+        * only in the 802.3 MAC and happens when 16 collisions occur during a TX
+        */
+       if (val & (HP100_TX_ERROR | HP100_RX_ERROR)) {
+#ifdef HP100_DEBUG_IRQ
+               printk("hp100: %s: TX/RX Error IRQ\n", dev->name);
+#endif
+               hp100_update_stats(dev);
+               if (lp->mode == 1) {
+                       hp100_rxfill(dev);
+                       hp100_clean_txring(dev);
+               }
+       }
+
+       /*
+        * RX_PDA_ZERO is set when the PDA count goes from non-zero to zero.
+        */
+       if ((lp->mode == 1) && (val & (HP100_RX_PDA_ZERO)))
+               hp100_rxfill(dev);
+
+       /*
+        * HP100_TX_COMPLETE interrupt occurs when packet transmitted on wire
+        * is completed
+        */
+       if ((lp->mode == 1) && (val & (HP100_TX_COMPLETE)))
+               hp100_clean_txring(dev);
+
+       /*
+        * MISC_ERROR is set when either the LAN link goes down or a detected
+        * bus error occurs.
+        */
+       if (val & HP100_MISC_ERROR) {   /* New for J2585B */
+#ifdef HP100_DEBUG_IRQ
+               printk
+                   ("hp100: %s: Misc. Error Interrupt - Check cabling.\n",
+                    dev->name);
+#endif
+               if (lp->mode == 1) {
+                       hp100_clean_txring(dev);
+                       hp100_rxfill(dev);
+               }
+               hp100_misc_interrupt(dev);
+       }
+
+       spin_unlock(&lp->lock);
+       hp100_ints_on();
+       return IRQ_HANDLED;
+}
+
+/*
+ *  some misc functions
+ */
+
+static void hp100_start_interface(struct net_device *dev)
+{
+       unsigned long flags;
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4220, TRACE);
+       printk("hp100: %s: hp100_start_interface\n", dev->name);
+#endif
+
+       spin_lock_irqsave(&lp->lock, flags);
+
+       /* Ensure the adapter does not want to request an interrupt when */
+       /* enabling the IRQ line to be active on the bus (i.e. not tri-stated) */
+       hp100_page(PERFORMANCE);
+       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
+       hp100_outw(0xffff, IRQ_STATUS); /* ack all IRQs */
+       hp100_outw(HP100_FAKE_INT | HP100_INT_EN | HP100_RESET_LB,
+                  OPTION_LSW);
+       /* Un Tri-state int. TODO: Check if shared interrupts can be realised? */
+       hp100_outw(HP100_TRI_INT | HP100_RESET_HB, OPTION_LSW);
+
+       if (lp->mode == 1) {
+               /* Make sure BM bit is set... */
+               hp100_page(HW_MAP);
+               hp100_orb(HP100_BM_MASTER, BM);
+               hp100_rxfill(dev);
+       } else if (lp->mode == 2) {
+               /* Enable memory mapping. Note: Don't do this when busmaster. */
+               hp100_outw(HP100_MMAP_DIS | HP100_RESET_HB, OPTION_LSW);
+       }
+
+       hp100_page(PERFORMANCE);
+       hp100_outw(0xfefe, IRQ_MASK);   /* mask off all ints */
+       hp100_outw(0xffff, IRQ_STATUS); /* ack IRQ */
+
+       /* enable a few interrupts: */
+       if (lp->mode == 1) {    /* busmaster mode */
+               hp100_outw(HP100_RX_PDL_FILL_COMPL |
+                          HP100_RX_PDA_ZERO | HP100_RX_ERROR |
+                          /* HP100_RX_PACKET    | */
+                          /* HP100_RX_EARLY_INT |  */ HP100_SET_HB |
+                          /* HP100_TX_PDA_ZERO  |  */
+                          HP100_TX_COMPLETE |
+                          /* HP100_MISC_ERROR   |  */
+                          HP100_TX_ERROR | HP100_SET_LB, IRQ_MASK);
+       } else {
+               hp100_outw(HP100_RX_PACKET |
+                          HP100_RX_ERROR | HP100_SET_HB |
+                          HP100_TX_ERROR | HP100_SET_LB, IRQ_MASK);
+       }
+
+       /* Note : before hp100_set_multicast_list(), because it will play with
+        * spinlock itself... Jean II */
+       spin_unlock_irqrestore(&lp->lock, flags);
+
+       /* Enable MAC Tx and RX, set MAC modes, ... */
+       hp100_set_multicast_list(dev);
+}
+
+static void hp100_stop_interface(struct net_device *dev)
+{
+       struct hp100_private *lp = netdev_priv(dev);
+       int ioaddr = dev->base_addr;
+       u_int val;
+
+#ifdef HP100_DEBUG_B
+       printk("hp100: %s: hp100_stop_interface\n", dev->name);
+       hp100_outw(0x4221, TRACE);
+#endif
+
+       if (lp->mode == 1)
+               hp100_BM_shutdown(dev);
+       else {
+               /* Note: MMAP_DIS will be reenabled by start_interface */
+               hp100_outw(HP100_INT_EN | HP100_RESET_LB |
+                          HP100_TRI_INT | HP100_MMAP_DIS | HP100_SET_HB,
+                          OPTION_LSW);
+               val = hp100_inw(OPTION_LSW);
+
+               hp100_page(MAC_CTRL);
+               hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);
+
+               if (!(val & HP100_HW_RST))
+                       return; /* If reset, imm. return ... */
+               /* ... else: busy wait until idle */
+               for (val = 0; val < 6000; val++)
+                       if ((hp100_inb(MAC_CFG_1) & (HP100_TX_IDLE | HP100_RX_IDLE)) == (HP100_TX_IDLE | HP100_RX_IDLE)) {
+                               hp100_page(PERFORMANCE);
+                               return;
+                       }
+               printk("hp100: %s: hp100_stop_interface - timeout\n", dev->name);
+               hp100_page(PERFORMANCE);
+       }
+}
+
+static void hp100_load_eeprom(struct net_device *dev, u_short probe_ioaddr)
+{
+       int i;
+       int ioaddr = probe_ioaddr > 0 ? probe_ioaddr : dev->base_addr;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4222, TRACE);
+#endif
+
+       hp100_page(EEPROM_CTRL);
+       hp100_andw(~HP100_EEPROM_LOAD, EEPROM_CTRL);
+       hp100_orw(HP100_EEPROM_LOAD, EEPROM_CTRL);
+       for (i = 0; i < 10000; i++)
+               if (!(hp100_inb(OPTION_MSW) & HP100_EE_LOAD))
+                       return;
+       printk("hp100: %s: hp100_load_eeprom - timeout\n", dev->name);
+}
+
+/*  Sense connection status.
+ *  return values: LAN_10  - Connected to 10Mbit/s network
+ *                 LAN_100 - Connected to 100Mbit/s network
+ *                 LAN_ERR - not connected or 100Mbit/s Hub down
+ */
+static int hp100_sense_lan(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       u_short val_VG, val_10;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4223, TRACE);
+#endif
+
+       hp100_page(MAC_CTRL);
+       val_10 = hp100_inb(10_LAN_CFG_1);
+       val_VG = hp100_inb(VG_LAN_CFG_1);
+       hp100_page(PERFORMANCE);
+#ifdef HP100_DEBUG
+       printk("hp100: %s: sense_lan: val_VG = 0x%04x, val_10 = 0x%04x\n",
+              dev->name, val_VG, val_10);
+#endif
+
+       if (val_10 & HP100_LINK_BEAT_ST)        /* 10Mb connection is active */
+               return HP100_LAN_10;
+
+       if (val_10 & HP100_AUI_ST) {    /* have we BNC or AUI onboard? */
+               /*
+                * This can be overriden by dos utility, so if this has no effect,
+                * perhaps you need to download that utility from HP and set card
+                * back to "auto detect".
+                */
+               val_10 |= HP100_AUI_SEL | HP100_LOW_TH;
+               hp100_page(MAC_CTRL);
+               hp100_outb(val_10, 10_LAN_CFG_1);
+               hp100_page(PERFORMANCE);
+               return HP100_LAN_COAX;
+       }
+
+       /* Those cards don't have a 100 Mbit connector */
+       if ( !strcmp(lp->id, "HWP1920")  ||
+            (lp->pci_dev &&
+             lp->pci_dev->vendor == PCI_VENDOR_ID &&
+             (lp->pci_dev->device == PCI_DEVICE_ID_HP_J2970A ||
+              lp->pci_dev->device == PCI_DEVICE_ID_HP_J2973A)))
+               return HP100_LAN_ERR;
+
+       if (val_VG & HP100_LINK_CABLE_ST)       /* Can hear the HUBs tone. */
+               return HP100_LAN_100;
+       return HP100_LAN_ERR;
+}
+
+static int hp100_down_vg_link(struct net_device *dev)
+{
+       struct hp100_private *lp = netdev_priv(dev);
+       int ioaddr = dev->base_addr;
+       unsigned long time;
+       long savelan, newlan;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4224, TRACE);
+       printk("hp100: %s: down_vg_link\n", dev->name);
+#endif
+
+       hp100_page(MAC_CTRL);
+       time = jiffies + (HZ / 4);
+       do {
+               if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST)
+                       break;
+               if (!in_interrupt())
+                       schedule_timeout_interruptible(1);
+       } while (time_after(time, jiffies));
+
+       if (time_after_eq(jiffies, time))       /* no signal->no logout */
+               return 0;
+
+       /* Drop the VG Link by clearing the link up cmd and load addr. */
+
+       hp100_andb(~(HP100_LOAD_ADDR | HP100_LINK_CMD), VG_LAN_CFG_1);
+       hp100_orb(HP100_VG_SEL, VG_LAN_CFG_1);
+
+       /* Conditionally stall for >250ms on Link-Up Status (to go down) */
+       time = jiffies + (HZ / 2);
+       do {
+               if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
+                       break;
+               if (!in_interrupt())
+                       schedule_timeout_interruptible(1);
+       } while (time_after(time, jiffies));
+
+#ifdef HP100_DEBUG
+       if (time_after_eq(jiffies, time))
+               printk("hp100: %s: down_vg_link: Link does not go down?\n", dev->name);
+#endif
+
+       /* To prevent condition where Rev 1 VG MAC and old hubs do not complete */
+       /* logout under traffic (even though all the status bits are cleared),  */
+       /* do this workaround to get the Rev 1 MAC in its idle state */
+       if (lp->chip == HP100_CHIPID_LASSEN) {
+               /* Reset VG MAC to insure it leaves the logoff state even if */
+               /* the Hub is still emitting tones */
+               hp100_andb(~HP100_VG_RESET, VG_LAN_CFG_1);
+               udelay(1500);   /* wait for >1ms */
+               hp100_orb(HP100_VG_RESET, VG_LAN_CFG_1);        /* Release Reset */
+               udelay(1500);
+       }
+
+       /* New: For lassen, switch to 10 Mbps mac briefly to clear training ACK */
+       /* to get the VG mac to full reset. This is not req.d with later chips */
+       /* Note: It will take the between 1 and 2 seconds for the VG mac to be */
+       /* selected again! This will be left to the connect hub function to */
+       /* perform if desired.  */
+       if (lp->chip == HP100_CHIPID_LASSEN) {
+               /* Have to write to 10 and 100VG control registers simultaneously */
+               savelan = newlan = hp100_inl(10_LAN_CFG_1);     /* read 10+100 LAN_CFG regs */
+               newlan &= ~(HP100_VG_SEL << 16);
+               newlan |= (HP100_DOT3_MAC) << 8;
+               hp100_andb(~HP100_AUTO_MODE, MAC_CFG_3);        /* Autosel off */
+               hp100_outl(newlan, 10_LAN_CFG_1);
+
+               /* Conditionally stall for 5sec on VG selected. */
+               time = jiffies + (HZ * 5);
+               do {
+                       if (!(hp100_inb(MAC_CFG_4) & HP100_MAC_SEL_ST))
+                               break;
+                       if (!in_interrupt())
+                               schedule_timeout_interruptible(1);
+               } while (time_after(time, jiffies));
+
+               hp100_orb(HP100_AUTO_MODE, MAC_CFG_3);  /* Autosel back on */
+               hp100_outl(savelan, 10_LAN_CFG_1);
+       }
+
+       time = jiffies + (3 * HZ);      /* Timeout 3s */
+       do {
+               if ((hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST) == 0)
+                       break;
+               if (!in_interrupt())
+                       schedule_timeout_interruptible(1);
+       } while (time_after(time, jiffies));
+
+       if (time_before_eq(time, jiffies)) {
+#ifdef HP100_DEBUG
+               printk("hp100: %s: down_vg_link: timeout\n", dev->name);
+#endif
+               return -EIO;
+       }
+
+       time = jiffies + (2 * HZ);      /* This seems to take a while.... */
+       do {
+               if (!in_interrupt())
+                       schedule_timeout_interruptible(1);
+       } while (time_after(time, jiffies));
+
+       return 0;
+}
+
+static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin)
+{
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+       u_short val = 0;
+       unsigned long time;
+       int startst;
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4225, TRACE);
+       printk("hp100: %s: login_to_vg_hub\n", dev->name);
+#endif
+
+       /* Initiate a login sequence iff VG MAC is enabled and either Load Address
+        * bit is zero or the force relogin flag is set (e.g. due to MAC address or
+        * promiscuous mode change)
+        */
+       hp100_page(MAC_CTRL);
+       startst = hp100_inb(VG_LAN_CFG_1);
+       if ((force_relogin == 1) || (hp100_inb(MAC_CFG_4) & HP100_MAC_SEL_ST)) {
+#ifdef HP100_DEBUG_TRAINING
+               printk("hp100: %s: Start training\n", dev->name);
+#endif
+
+               /* Ensure VG Reset bit is 1 (i.e., do not reset) */
+               hp100_orb(HP100_VG_RESET, VG_LAN_CFG_1);
+
+               /* If Lassen AND auto-select-mode AND VG tones were sensed on */
+               /* entry then temporarily put them into force 100Mbit mode */
+               if ((lp->chip == HP100_CHIPID_LASSEN) && (startst & HP100_LINK_CABLE_ST))
+                       hp100_andb(~HP100_DOT3_MAC, 10_LAN_CFG_2);
+
+               /* Drop the VG link by zeroing Link Up Command and Load Address  */
+               hp100_andb(~(HP100_LINK_CMD /* |HP100_LOAD_ADDR */ ), VG_LAN_CFG_1);
+
+#ifdef HP100_DEBUG_TRAINING
+               printk("hp100: %s: Bring down the link\n", dev->name);
+#endif
+
+               /* Wait for link to drop */
+               time = jiffies + (HZ / 10);
+               do {
+                       if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
+                               break;
+                       if (!in_interrupt())
+                               schedule_timeout_interruptible(1);
+               } while (time_after(time, jiffies));
+
+               /* Start an addressed training and optionally request promiscuous port */
+               if ((dev->flags) & IFF_PROMISC) {
+                       hp100_orb(HP100_PROM_MODE, VG_LAN_CFG_2);
+                       if (lp->chip == HP100_CHIPID_LASSEN)
+                               hp100_orw(HP100_MACRQ_PROMSC, TRAIN_REQUEST);
+               } else {
+                       hp100_andb(~HP100_PROM_MODE, VG_LAN_CFG_2);
+                       /* For ETR parts we need to reset the prom. bit in the training
+                        * register, otherwise promiscious mode won't be disabled.
+                        */
+                       if (lp->chip == HP100_CHIPID_LASSEN) {
+                               hp100_andw(~HP100_MACRQ_PROMSC, TRAIN_REQUEST);
+                       }
+               }
+
+               /* With ETR parts, frame format request bits can be set. */
+               if (lp->chip == HP100_CHIPID_LASSEN)
+                       hp100_orb(HP100_MACRQ_FRAMEFMT_EITHER, TRAIN_REQUEST);
+
+               hp100_orb(HP100_LINK_CMD | HP100_LOAD_ADDR | HP100_VG_RESET, VG_LAN_CFG_1);
+
+               /* Note: Next wait could be omitted for Hood and earlier chips under */
+               /* certain circumstances */
+               /* TODO: check if hood/earlier and skip wait. */
+
+               /* Wait for either short timeout for VG tones or long for login    */
+               /* Wait for the card hardware to signalise link cable status ok... */
+               hp100_page(MAC_CTRL);
+               time = jiffies + (1 * HZ);      /* 1 sec timeout for cable st */
+               do {
+                       if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST)
+                               break;
+                       if (!in_interrupt())
+                               schedule_timeout_interruptible(1);
+               } while (time_before(jiffies, time));
+
+               if (time_after_eq(jiffies, time)) {
+#ifdef HP100_DEBUG_TRAINING
+                       printk("hp100: %s: Link cable status not ok? Training aborted.\n", dev->name);
+#endif
+               } else {
+#ifdef HP100_DEBUG_TRAINING
+                       printk
+                           ("hp100: %s: HUB tones detected. Trying to train.\n",
+                            dev->name);
+#endif
+
+                       time = jiffies + (2 * HZ);      /* again a timeout */
+                       do {
+                               val = hp100_inb(VG_LAN_CFG_1);
+                               if ((val & (HP100_LINK_UP_ST))) {
+#ifdef HP100_DEBUG_TRAINING
+                                       printk("hp100: %s: Passed training.\n", dev->name);
+#endif
+                                       break;
+                               }
+                               if (!in_interrupt())
+                                       schedule_timeout_interruptible(1);
+                       } while (time_after(time, jiffies));
+               }
+
+               /* If LINK_UP_ST is set, then we are logged into the hub. */
+               if (time_before_eq(jiffies, time) && (val & HP100_LINK_UP_ST)) {
+#ifdef HP100_DEBUG_TRAINING
+                       printk("hp100: %s: Successfully logged into the HUB.\n", dev->name);
+                       if (lp->chip == HP100_CHIPID_LASSEN) {
+                               val = hp100_inw(TRAIN_ALLOW);
+                               printk("hp100: %s: Card supports 100VG MAC Version \"%s\" ",
+                                            dev->name, (hp100_inw(TRAIN_REQUEST) & HP100_CARD_MACVER) ? "802.12" : "Pre");
+                               printk("Driver will use MAC Version \"%s\"\n", (val & HP100_HUB_MACVER) ? "802.12" : "Pre");
+                               printk("hp100: %s: Frame format is %s.\n", dev->name, (val & HP100_MALLOW_FRAMEFMT) ? "802.5" : "802.3");
+                       }
+#endif
+               } else {
+                       /* If LINK_UP_ST is not set, login was not successful */
+                       printk("hp100: %s: Problem logging into the HUB.\n", dev->name);
+                       if (lp->chip == HP100_CHIPID_LASSEN) {
+                               /* Check allowed Register to find out why there is a problem. */
+                               val = hp100_inw(TRAIN_ALLOW);   /* won't work on non-ETR card */
+#ifdef HP100_DEBUG_TRAINING
+                               printk("hp100: %s: MAC Configuration requested: 0x%04x, HUB allowed: 0x%04x\n", dev->name, hp100_inw(TRAIN_REQUEST), val);
+#endif
+                               if (val & HP100_MALLOW_ACCDENIED)
+                                       printk("hp100: %s: HUB access denied.\n", dev->name);
+                               if (val & HP100_MALLOW_CONFIGURE)
+                                       printk("hp100: %s: MAC Configuration is incompatible with the Network.\n", dev->name);
+                               if (val & HP100_MALLOW_DUPADDR)
+                                       printk("hp100: %s: Duplicate MAC Address on the Network.\n", dev->name);
+                       }
+               }
+
+               /* If we have put the chip into forced 100 Mbit mode earlier, go back */
+               /* to auto-select mode */
+
+               if ((lp->chip == HP100_CHIPID_LASSEN) && (startst & HP100_LINK_CABLE_ST)) {
+                       hp100_page(MAC_CTRL);
+                       hp100_orb(HP100_DOT3_MAC, 10_LAN_CFG_2);
+               }
+
+               val = hp100_inb(VG_LAN_CFG_1);
+
+               /* Clear the MISC_ERROR Interrupt, which might be generated when doing the relogin */
+               hp100_page(PERFORMANCE);
+               hp100_outw(HP100_MISC_ERROR, IRQ_STATUS);
+
+               if (val & HP100_LINK_UP_ST)
+                       return 0;       /* login was ok */
+               else {
+                       printk("hp100: %s: Training failed.\n", dev->name);
+                       hp100_down_vg_link(dev);
+                       return -EIO;
+               }
+       }
+       /* no forced relogin & already link there->no training. */
+       return -EIO;
+}
+
+static void hp100_cascade_reset(struct net_device *dev, u_short enable)
+{
+       int ioaddr = dev->base_addr;
+       struct hp100_private *lp = netdev_priv(dev);
+
+#ifdef HP100_DEBUG_B
+       hp100_outw(0x4226, TRACE);
+       printk("hp100: %s: cascade_reset\n", dev->name);
+#endif
+
+       if (enable) {
+               hp100_outw(HP100_HW_RST | HP100_RESET_LB, OPTION_LSW);
+               if (lp->chip == HP100_CHIPID_LASSEN) {
+                       /* Lassen requires a PCI transmit fifo reset */
+                       hp100_page(HW_MAP);
+                       hp100_andb(~HP100_PCI_RESET, PCICTRL2);
+                       hp100_orb(HP100_PCI_RESET, PCICTRL2);
+                       /* Wait for min. 300 ns */
+                       /* we can't use jiffies here, because it may be */
+                       /* that we have disabled the timer... */
+                       udelay(400);
+                       hp100_andb(~HP100_PCI_RESET, PCICTRL2);
+                       hp100_page(PERFORMANCE);
+               }
+       } else {                /* bring out of reset */
+               hp100_outw(HP100_HW_RST | HP100_SET_LB, OPTION_LSW);
+               udelay(400);
+               hp100_page(PERFORMANCE);
+       }
+}
+
+#ifdef HP100_DEBUG
+void hp100_RegisterDump(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       int Page;
+       int Register;
+
+       /* Dump common registers */
+       printk("hp100: %s: Cascade Register Dump\n", dev->name);
+       printk("hardware id #1: 0x%.2x\n", hp100_inb(HW_ID));
+       printk("hardware id #2/paging: 0x%.2x\n", hp100_inb(PAGING));
+       printk("option #1: 0x%.4x\n", hp100_inw(OPTION_LSW));
+       printk("option #2: 0x%.4x\n", hp100_inw(OPTION_MSW));
+
+       /* Dump paged registers */
+       for (Page = 0; Page < 8; Page++) {
+               /* Dump registers */
+               printk("page: 0x%.2x\n", Page);
+               outw(Page, ioaddr + 0x02);
+               for (Register = 0x8; Register < 0x22; Register += 2) {
+                       /* Display Register contents except data port */
+                       if (((Register != 0x10) && (Register != 0x12)) || (Page > 0)) {
+                               printk("0x%.2x = 0x%.4x\n", Register, inw(ioaddr + Register));
+                       }
+               }
+       }
+       hp100_page(PERFORMANCE);
+}
+#endif
+
+
+static void cleanup_dev(struct net_device *d)
+{
+       struct hp100_private *p = netdev_priv(d);
+
+       unregister_netdev(d);
+       release_region(d->base_addr, HP100_REGION_SIZE);
+
+       if (p->mode == 1)       /* busmaster */
+               pci_free_consistent(p->pci_dev, MAX_RINGSIZE + 0x0f,
+                                   p->page_vaddr_algn,
+                                   virt_to_whatever(d, p->page_vaddr_algn));
+       if (p->mem_ptr_virt)
+               iounmap(p->mem_ptr_virt);
+
+       free_netdev(d);
+}
+
+static int hp100_eisa_probe(struct device *gendev)
+{
+       struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
+       struct eisa_device *edev = to_eisa_device(gendev);
+       int err;
+
+       if (!dev)
+               return -ENOMEM;
+
+       SET_NETDEV_DEV(dev, &edev->dev);
+
+       err = hp100_probe1(dev, edev->base_addr + 0xC38, HP100_BUS_EISA, NULL);
+       if (err)
+               goto out1;
+
+#ifdef HP100_DEBUG
+       printk("hp100: %s: EISA adapter found at 0x%x\n", dev->name,
+              dev->base_addr);
+#endif
+       dev_set_drvdata(gendev, dev);
+       return 0;
+ out1:
+       free_netdev(dev);
+       return err;
+}
+
+static int hp100_eisa_remove(struct device *gendev)
+{
+       struct net_device *dev = dev_get_drvdata(gendev);
+       cleanup_dev(dev);
+       return 0;
+}
+
+static struct eisa_driver hp100_eisa_driver = {
+        .id_table = hp100_eisa_tbl,
+        .driver   = {
+                .name    = "hp100",
+                .probe   = hp100_eisa_probe,
+               .remove  = hp100_eisa_remove,
+        }
+};
+
+static int hp100_pci_probe(struct pci_dev *pdev,
+                          const struct pci_device_id *ent)
+{
+       struct net_device *dev;
+       int ioaddr;
+       u_short pci_command;
+       int err;
+
+       if (pci_enable_device(pdev))
+               return -ENODEV;
+
+       dev = alloc_etherdev(sizeof(struct hp100_private));
+       if (!dev) {
+               err = -ENOMEM;
+               goto out0;
+       }
+
+       SET_NETDEV_DEV(dev, &pdev->dev);
+
+       pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+       if (!(pci_command & PCI_COMMAND_IO)) {
+#ifdef HP100_DEBUG
+               printk("hp100: %s: PCI I/O Bit has not been set. Setting...\n", dev->name);
+#endif
+               pci_command |= PCI_COMMAND_IO;
+               pci_write_config_word(pdev, PCI_COMMAND, pci_command);
+       }
+
+       if (!(pci_command & PCI_COMMAND_MASTER)) {
+#ifdef HP100_DEBUG
+               printk("hp100: %s: PCI Master Bit has not been set. Setting...\n", dev->name);
+#endif
+               pci_command |= PCI_COMMAND_MASTER;
+               pci_write_config_word(pdev, PCI_COMMAND, pci_command);
+       }
+
+       ioaddr = pci_resource_start(pdev, 0);
+       err = hp100_probe1(dev, ioaddr, HP100_BUS_PCI, pdev);
+       if (err)
+               goto out1;
+
+#ifdef HP100_DEBUG
+       printk("hp100: %s: PCI adapter found at 0x%x\n", dev->name, ioaddr);
+#endif
+       pci_set_drvdata(pdev, dev);
+       return 0;
+ out1:
+       free_netdev(dev);
+ out0:
+       pci_disable_device(pdev);
+       return err;
+}
+
+static void hp100_pci_remove(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+
+       cleanup_dev(dev);
+       pci_disable_device(pdev);
+}
+
+
+static struct pci_driver hp100_pci_driver = {
+       .name           = "hp100",
+       .id_table       = hp100_pci_tbl,
+       .probe          = hp100_pci_probe,
+       .remove         = hp100_pci_remove,
+};
+
+/*
+ *  module section
+ */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, "
+              "Siegfried \"Frieder\" Loeffler (dg1sek) <floeff@mathematik.uni-stuttgart.de>");
+MODULE_DESCRIPTION("HP CASCADE Architecture Driver for 100VG-AnyLan Network Adapters");
+
+/*
+ * Note: to register three isa devices, use:
+ * option hp100 hp100_port=0,0,0
+ *        to register one card at io 0x280 as eth239, use:
+ * option hp100 hp100_port=0x280
+ */
+#if defined(MODULE) && defined(CONFIG_ISA)
+#define HP100_DEVICES 5
+/* Parameters set by insmod */
+static int hp100_port[HP100_DEVICES] = { 0, [1 ... (HP100_DEVICES-1)] = -1 };
+module_param_hw_array(hp100_port, int, ioport, NULL, 0);
+
+/* List of devices */
+static struct net_device *hp100_devlist[HP100_DEVICES];
+
+static int __init hp100_isa_init(void)
+{
+       struct net_device *dev;
+       int i, err, cards = 0;
+
+       /* Don't autoprobe ISA bus */
+       if (hp100_port[0] == 0)
+               return -ENODEV;
+
+       /* Loop on all possible base addresses */
+       for (i = 0; i < HP100_DEVICES && hp100_port[i] != -1; ++i) {
+               dev = alloc_etherdev(sizeof(struct hp100_private));
+               if (!dev) {
+                       while (cards > 0)
+                               cleanup_dev(hp100_devlist[--cards]);
+
+                       return -ENOMEM;
+               }
+
+               err = hp100_isa_probe(dev, hp100_port[i]);
+               if (!err)
+                       hp100_devlist[cards++] = dev;
+               else
+                       free_netdev(dev);
+       }
+
+       return cards > 0 ? 0 : -ENODEV;
+}
+
+static void hp100_isa_cleanup(void)
+{
+       int i;
+
+       for (i = 0; i < HP100_DEVICES; i++) {
+               struct net_device *dev = hp100_devlist[i];
+               if (dev)
+                       cleanup_dev(dev);
+       }
+}
+#else
+#define hp100_isa_init()       (0)
+#define hp100_isa_cleanup()    do { } while(0)
+#endif
+
+static int __init hp100_module_init(void)
+{
+       int err;
+
+       err = hp100_isa_init();
+       if (err && err != -ENODEV)
+               goto out;
+       err = eisa_driver_register(&hp100_eisa_driver);
+       if (err && err != -ENODEV)
+               goto out2;
+       err = pci_register_driver(&hp100_pci_driver);
+       if (err && err != -ENODEV)
+               goto out3;
+ out:
+       return err;
+ out3:
+       eisa_driver_unregister (&hp100_eisa_driver);
+ out2:
+       hp100_isa_cleanup();
+       goto out;
+}
+
+
+static void __exit hp100_module_exit(void)
+{
+       hp100_isa_cleanup();
+       eisa_driver_unregister (&hp100_eisa_driver);
+       pci_unregister_driver (&hp100_pci_driver);
+}
+
+module_init(hp100_module_init)
+module_exit(hp100_module_exit)
diff --git a/drivers/staging/hp/hp100.h b/drivers/staging/hp/hp100.h
new file mode 100644 (file)
index 0000000..7239b94
--- /dev/null
@@ -0,0 +1,611 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * hp100.h: Hewlett Packard HP10/100VG ANY LAN ethernet driver for Linux.
+ *
+ * $Id: hp100.h,v 1.51 1997/04/08 14:26:42 floeff Exp floeff $
+ *
+ * Authors:  Jaroslav Kysela, <perex@pf.jcu.cz>
+ *           Siegfried Loeffler <floeff@tunix.mathematik.uni-stuttgart.de>
+ *
+ * This driver is based on the 'hpfepkt' crynwr packet driver.
+ */
+
+/****************************************************************************
+ *  Hardware Constants
+ ****************************************************************************/
+
+/*
+ * Page Identifiers
+ * (Swap Paging Register, PAGING, bits 3:0, Offset 0x02)
+ */
+
+#define HP100_PAGE_PERFORMANCE 0x0     /* Page 0 */
+#define HP100_PAGE_MAC_ADDRESS 0x1     /* Page 1 */
+#define HP100_PAGE_HW_MAP      0x2     /* Page 2 */
+#define HP100_PAGE_EEPROM_CTRL 0x3     /* Page 3 */
+#define HP100_PAGE_MAC_CTRL    0x4     /* Page 4 */
+#define HP100_PAGE_MMU_CFG     0x5     /* Page 5 */
+#define HP100_PAGE_ID_MAC_ADDR 0x6     /* Page 6 */
+#define HP100_PAGE_MMU_POINTER 0x7     /* Page 7 */
+
+
+/* Registers that are present on all pages  */
+
+#define HP100_REG_HW_ID                0x00    /* R:  (16) Unique card ID           */
+#define HP100_REG_TRACE                0x00    /* W:  (16) Used for debug output    */
+#define HP100_REG_PAGING       0x02    /* R:  (16),15:4 Card ID             */
+                                       /* W:  (16),3:0 Switch pages         */
+#define HP100_REG_OPTION_LSW   0x04    /* RW: (16) Select card functions    */
+#define HP100_REG_OPTION_MSW   0x06    /* RW: (16) Select card functions    */
+
+/*  Page 0 - Performance  */
+
+#define HP100_REG_IRQ_STATUS   0x08    /* RW: (16) Which ints are pending   */
+#define HP100_REG_IRQ_MASK     0x0a    /* RW: (16) Select ints to allow     */
+#define HP100_REG_FRAGMENT_LEN 0x0c    /* W: (16)12:0 Current fragment len */
+/* Note: For 32 bit systems, fragment len and offset registers are available */
+/*       at offset 0x28 and 0x2c, where they can be written as 32bit values. */
+#define HP100_REG_OFFSET       0x0e    /* RW: (16)12:0 Offset to start read */
+#define HP100_REG_DATA32       0x10    /* RW: (32) I/O mode data port       */
+#define HP100_REG_DATA16       0x12    /* RW: WORDs must be read from here  */
+#define HP100_REG_TX_MEM_FREE  0x14    /* RD: (32) Amount of free Tx mem    */
+#define HP100_REG_TX_PDA_L      0x14   /* W: (32) BM: Ptr to PDL, Low Pri  */
+#define HP100_REG_TX_PDA_H      0x1c   /* W: (32) BM: Ptr to PDL, High Pri */
+#define HP100_REG_RX_PKT_CNT   0x18    /* RD: (8) Rx count of pkts on card  */
+#define HP100_REG_TX_PKT_CNT   0x19    /* RD: (8) Tx count of pkts on card  */
+#define HP100_REG_RX_PDL        0x1a   /* R: (8) BM: # rx pdl not executed */
+#define HP100_REG_TX_PDL        0x1b   /* R: (8) BM: # tx pdl not executed */
+#define HP100_REG_RX_PDA        0x18   /* W: (32) BM: Up to 31 addresses */
+                                       /*             which point to a PDL */
+#define HP100_REG_SL_EARLY      0x1c   /*    (32) Enhanced Slave Early Rx */
+#define HP100_REG_STAT_DROPPED  0x20   /* R (12) Dropped Packet Counter */
+#define HP100_REG_STAT_ERRORED  0x22   /* R (8) Errored Packet Counter */
+#define HP100_REG_STAT_ABORT    0x23   /* R (8) Abort Counter/OW Coll. Flag */
+#define HP100_REG_RX_RING       0x24   /* W (32) Slave: RX Ring Pointers */
+#define HP100_REG_32_FRAGMENT_LEN 0x28 /* W (13) Slave: Fragment Length Reg */
+#define HP100_REG_32_OFFSET     0x2c   /* W (16) Slave: Offset Register */
+
+/*  Page 1 - MAC Address/Hash Table  */
+
+#define HP100_REG_MAC_ADDR     0x08    /* RW: (8) Cards MAC address         */
+#define HP100_REG_HASH_BYTE0   0x10    /* RW: (8) Cards multicast filter    */
+
+/*  Page 2 - Hardware Mapping  */
+
+#define HP100_REG_MEM_MAP_LSW  0x08    /* RW: (16) LSW of cards mem addr    */
+#define HP100_REG_MEM_MAP_MSW  0x0a    /* RW: (16) MSW of cards mem addr    */
+#define HP100_REG_IO_MAP       0x0c    /* RW: (8) Cards I/O address         */
+#define HP100_REG_IRQ_CHANNEL  0x0d    /* RW: (8) IRQ and edge/level int    */
+#define HP100_REG_SRAM         0x0e    /* RW: (8) How much RAM on card      */
+#define HP100_REG_BM           0x0f    /* RW: (8) Controls BM functions     */
+
+/* New on Page 2 for ETR chips: */
+#define HP100_REG_MODECTRL1     0x10   /* RW: (8) Mode Control 1 */
+#define HP100_REG_MODECTRL2     0x11   /* RW: (8) Mode Control 2 */
+#define HP100_REG_PCICTRL1      0x12   /* RW: (8) PCI Cfg 1 */
+#define HP100_REG_PCICTRL2      0x13   /* RW: (8) PCI Cfg 2 */
+#define HP100_REG_PCIBUSMLAT    0x15   /* RW: (8) PCI Bus Master Latency */
+#define HP100_REG_EARLYTXCFG    0x16   /* RW: (16) Early TX Cfg/Cntrl Reg */
+#define HP100_REG_EARLYRXCFG    0x18   /* RW: (8) Early RX Cfg/Cntrl Reg */
+#define HP100_REG_ISAPNPCFG1    0x1a   /* RW: (8) ISA PnP Cfg/Cntrl Reg 1 */
+#define HP100_REG_ISAPNPCFG2    0x1b   /* RW: (8) ISA PnP Cfg/Cntrl Reg 2 */
+
+/*  Page 3 - EEPROM/Boot ROM  */
+
+#define HP100_REG_EEPROM_CTRL  0x08    /* RW: (16) Used to load EEPROM      */
+#define HP100_REG_BOOTROM_CTRL  0x0a
+
+/*  Page 4 - LAN Configuration  (MAC_CTRL) */
+
+#define HP100_REG_10_LAN_CFG_1 0x08    /* RW: (8) Set 10M XCVR functions   */
+#define HP100_REG_10_LAN_CFG_2  0x09   /* RW: (8)     10M XCVR functions   */
+#define HP100_REG_VG_LAN_CFG_1 0x0a    /* RW: (8) Set 100M XCVR functions  */
+#define HP100_REG_VG_LAN_CFG_2  0x0b   /* RW: (8) 100M LAN Training cfgregs */
+#define HP100_REG_MAC_CFG_1    0x0c    /* RW: (8) Types of pkts to accept   */
+#define HP100_REG_MAC_CFG_2    0x0d    /* RW: (8) Misc MAC functions        */
+#define HP100_REG_MAC_CFG_3     0x0e   /* RW: (8) Misc MAC functions */
+#define HP100_REG_MAC_CFG_4     0x0f   /* R:  (8) Misc MAC states */
+#define HP100_REG_DROPPED      0x10    /* R:  (16),11:0 Pkts can't fit in mem */
+#define HP100_REG_CRC          0x12    /* R:  (8) Pkts with CRC             */
+#define HP100_REG_ABORT                0x13    /* R:  (8) Aborted Tx pkts           */
+#define HP100_REG_TRAIN_REQUEST 0x14   /* RW: (16) Endnode MAC register. */
+#define HP100_REG_TRAIN_ALLOW   0x16   /* R:  (16) Hub allowed register */
+
+/*  Page 5 - MMU  */
+
+#define HP100_REG_RX_MEM_STOP  0x0c    /* RW: (16) End of Rx ring addr      */
+#define HP100_REG_TX_MEM_STOP  0x0e    /* RW: (16) End of Tx ring addr      */
+#define HP100_REG_PDL_MEM_STOP  0x10   /* Not used by 802.12 devices */
+#define HP100_REG_ECB_MEM_STOP  0x14   /* I've no idea what this is */
+
+/*  Page 6 - Card ID/Physical LAN Address  */
+
+#define HP100_REG_BOARD_ID     0x08    /* R:  (8) EISA/ISA card ID          */
+#define HP100_REG_BOARD_IO_CHCK 0x0c   /* R:  (8) Added to ID to get FFh    */
+#define HP100_REG_SOFT_MODEL   0x0d    /* R:  (8) Config program defined    */
+#define HP100_REG_LAN_ADDR     0x10    /* R:  (8) MAC addr of card          */
+#define HP100_REG_LAN_ADDR_CHCK 0x16   /* R:  (8) Added to addr to get FFh  */
+
+/*  Page 7 - MMU Current Pointers  */
+
+#define HP100_REG_PTR_RXSTART  0x08    /* R:  (16) Current begin of Rx ring */
+#define HP100_REG_PTR_RXEND    0x0a    /* R:  (16) Current end of Rx ring   */
+#define HP100_REG_PTR_TXSTART  0x0c    /* R:  (16) Current begin of Tx ring */
+#define HP100_REG_PTR_TXEND    0x0e    /* R:  (16) Current end of Rx ring   */
+#define HP100_REG_PTR_RPDLSTART 0x10
+#define HP100_REG_PTR_RPDLEND   0x12
+#define HP100_REG_PTR_RINGPTRS  0x14
+#define HP100_REG_PTR_MEMDEBUG  0x1a
+/* ------------------------------------------------------------------------ */
+
+
+/*
+ * Hardware ID Register I (Always available, HW_ID, Offset 0x00)
+ */
+#define HP100_HW_ID_CASCADE     0x4850 /* Identifies Cascade Chip */
+
+/*
+ * Hardware ID Register 2 & Paging Register
+ * (Always available, PAGING, Offset 0x02)
+ * Bits 15:4 are for the Chip ID
+ */
+#define HP100_CHIPID_MASK        0xFFF0
+#define HP100_CHIPID_SHASTA      0x5350        /* Not 802.12 compliant */
+                                        /* EISA BM/SL, MCA16/32 SL, ISA SL */
+#define HP100_CHIPID_RAINIER     0x5360        /* Not 802.12 compliant EISA BM, */
+                                        /* PCI SL, MCA16/32 SL, ISA SL */
+#define HP100_CHIPID_LASSEN      0x5370        /* 802.12 compliant PCI BM, PCI SL */
+                                        /* LRF supported */
+
+/*
+ *  Option Registers I and II
+ * (Always available, OPTION_LSW, Offset 0x04-0x05)
+ */
+#define HP100_DEBUG_EN         0x8000  /* 0:Dis., 1:Enable Debug Dump Ptr. */
+#define HP100_RX_HDR           0x4000  /* 0:Dis., 1:Enable putting pkt into */
+                                       /*   system mem. before Rx interrupt */
+#define HP100_MMAP_DIS         0x2000  /* 0:Enable, 1:Disable mem.mapping. */
+                                       /*   MMAP_DIS must be 0 and MEM_EN */
+                                       /*   must be 1 for memory-mapped */
+                                       /*   mode to be enabled */
+#define HP100_EE_EN            0x1000  /* 0:Disable,1:Enable EEPROM writing */
+#define HP100_BM_WRITE         0x0800  /* 0:Slave, 1:Bus Master for Tx data */
+#define HP100_BM_READ          0x0400  /* 0:Slave, 1:Bus Master for Rx data */
+#define HP100_TRI_INT          0x0200  /* 0:Don't, 1:Do tri-state the int */
+#define HP100_MEM_EN           0x0040  /* Config program set this to */
+                                       /*   0:Disable, 1:Enable mem map. */
+                                       /*   See MMAP_DIS. */
+#define HP100_IO_EN            0x0020  /* 1:Enable I/O transfers */
+#define HP100_BOOT_EN          0x0010  /* 1:Enable boot ROM access */
+#define HP100_FAKE_INT         0x0008  /* 1:int */
+#define HP100_INT_EN           0x0004  /* 1:Enable ints from card */
+#define HP100_HW_RST           0x0002  /* 0:Reset, 1:Out of reset */
+                                       /* NIC reset on 0 to 1 transition */
+
+/*
+ *  Option Register III
+ * (Always available, OPTION_MSW, Offset 0x06)
+ */
+#define HP100_PRIORITY_TX      0x0080  /* 1:Do all Tx pkts as priority */
+#define HP100_EE_LOAD          0x0040  /* 1:EEPROM loading, 0 when done */
+#define HP100_ADV_NXT_PKT      0x0004  /* 1:Advance to next pkt in Rx queue */
+                                       /*   h/w will set to 0 when done */
+#define HP100_TX_CMD           0x0002  /* 1:Tell h/w download done, h/w */
+                                       /*   will set to 0 when done */
+
+/*
+ * Interrupt Status Registers I and II
+ * (Page PERFORMANCE, IRQ_STATUS, Offset 0x08-0x09)
+ * Note: With old chips, these Registers will clear when 1 is written to them
+ *       with new chips this depends on setting of CLR_ISMODE
+ */
+#define HP100_RX_EARLY_INT      0x2000
+#define HP100_RX_PDA_ZERO       0x1000
+#define HP100_RX_PDL_FILL_COMPL 0x0800
+#define HP100_RX_PACKET                0x0400  /* 0:No, 1:Yes pkt has been Rx */
+#define HP100_RX_ERROR         0x0200  /* 0:No, 1:Yes Rx pkt had error */
+#define HP100_TX_PDA_ZERO       0x0020 /* 1 when PDA count goes to zero */
+#define HP100_TX_SPACE_AVAIL   0x0010  /* 0:<8192, 1:>=8192 Tx free bytes */
+#define HP100_TX_COMPLETE      0x0008  /* 0:No, 1:Yes a Tx has completed */
+#define HP100_MISC_ERROR        0x0004 /* 0:No, 1:Lan Link down or bus error */
+#define HP100_TX_ERROR         0x0002  /* 0:No, 1:Yes Tx pkt had error */
+
+/*
+ * Xmit Memory Free Count
+ * (Page PERFORMANCE, TX_MEM_FREE, Offset 0x14) (Read only, 32bit)
+ */
+#define HP100_AUTO_COMPARE     0x80000000      /* Tx Space avail & pkts<255 */
+#define HP100_FREE_SPACE       0x7fffffe0      /* Tx free memory */
+
+/*
+ *  IRQ Channel
+ * (Page HW_MAP, IRQ_CHANNEL, Offset 0x0d)
+ */
+#define HP100_ZERO_WAIT_EN     0x80    /* 0:No, 1:Yes asserts NOWS signal */
+#define HP100_IRQ_SCRAMBLE      0x40
+#define HP100_BOND_HP           0x20
+#define HP100_LEVEL_IRQ                0x10    /* 0:Edge, 1:Level type interrupts. */
+                                       /* (Only valid on EISA cards) */
+#define HP100_IRQMASK          0x0F    /* Isolate the IRQ bits */
+
+/*
+ * SRAM Parameters
+ * (Page HW_MAP, SRAM, Offset 0x0e)
+ */
+#define HP100_RAM_SIZE_MASK    0xe0    /* AND to get SRAM size index */
+#define HP100_RAM_SIZE_SHIFT   0x05    /* Shift count(put index in lwr bits) */
+
+/*
+ * Bus Master Register
+ * (Page HW_MAP, BM, Offset 0x0f)
+ */
+#define HP100_BM_BURST_RD       0x01   /* EISA only: 1=Use burst trans. fm system */
+                                       /* memory to chip (tx) */
+#define HP100_BM_BURST_WR       0x02   /* EISA only: 1=Use burst trans. fm system */
+                                       /* memory to chip (rx) */
+#define HP100_BM_MASTER                0x04    /* 0:Slave, 1:BM mode */
+#define HP100_BM_PAGE_CK        0x08   /* This bit should be set whenever in */
+                                       /* an EISA system */
+#define HP100_BM_PCI_8CLK       0x40   /* ... cycles 8 clocks apart */
+
+
+/*
+ * Mode Control Register I
+ * (Page HW_MAP, MODECTRL1, Offset0x10)
+ */
+#define HP100_TX_DUALQ          0x10
+   /* If set and BM -> dual tx pda queues */
+#define HP100_ISR_CLRMODE       0x02   /* If set ISR will clear all pending */
+                                      /* interrupts on read (etr only?) */
+#define HP100_EE_NOLOAD         0x04   /* Status whether res will be loaded */
+                                      /* from the eeprom */
+#define HP100_TX_CNT_FLG        0x08   /* Controls Early TX Reg Cnt Field */
+#define HP100_PDL_USE3          0x10   /* If set BM engine will read only */
+                                      /* first three data elements of a PDL */
+                                      /* on the first access. */
+#define HP100_BUSTYPE_MASK      0xe0   /* Three bit bus type info */
+
+/*
+ * Mode Control Register II
+ * (Page HW_MAP, MODECTRL2, Offset0x11)
+ */
+#define HP100_EE_MASK           0x0f   /* Tell EEPROM circuit not to load */
+                                      /* certain resources */
+#define HP100_DIS_CANCEL        0x20   /* For tx dualq mode operation */
+#define HP100_EN_PDL_WB         0x40   /* 1: Status of PDL completion may be */
+                                      /* written back to system mem */
+#define HP100_EN_BUS_FAIL       0x80   /* Enables bus-fail portion of misc */
+                                      /* interrupt */
+
+/*
+ * PCI Configuration and Control Register I
+ * (Page HW_MAP, PCICTRL1, Offset 0x12)
+ */
+#define HP100_LO_MEM            0x01   /* 1: Mapped Mem requested below 1MB */
+#define HP100_NO_MEM            0x02   /* 1: Disables Req for sysmem to PCI */
+                                      /* bios */
+#define HP100_USE_ISA           0x04   /* 1: isa type decodes will occur */
+                                      /* simultaneously with PCI decodes */
+#define HP100_IRQ_HI_MASK       0xf0   /* pgmed by pci bios */
+#define HP100_PCI_IRQ_HI_MASK   0x78   /* Isolate 4 bits for PCI IRQ  */
+
+/*
+ * PCI Configuration and Control Register II
+ * (Page HW_MAP, PCICTRL2, Offset 0x13)
+ */
+#define HP100_RD_LINE_PDL       0x01   /* 1: PCI command Memory Read Line en */
+#define HP100_RD_TX_DATA_MASK   0x06   /* choose PCI memread cmds for TX */
+#define HP100_MWI               0x08   /* 1: en. PCI memory write invalidate */
+#define HP100_ARB_MODE          0x10   /* Select PCI arbitor type */
+#define HP100_STOP_EN           0x20   /* Enables PCI state machine to issue */
+                                      /* pci stop if cascade not ready */
+#define HP100_IGNORE_PAR        0x40   /* 1: PCI state machine ignores parity */
+#define HP100_PCI_RESET         0x80   /* 0->1: Reset PCI block */
+
+/*
+ * Early TX Configuration and Control Register
+ * (Page HW_MAP, EARLYTXCFG, Offset 0x16)
+ */
+#define HP100_EN_EARLY_TX       0x8000 /* 1=Enable Early TX */
+#define HP100_EN_ADAPTIVE       0x4000 /* 1=Enable adaptive mode */
+#define HP100_EN_TX_UR_IRQ      0x2000 /* reserved, must be 0 */
+#define HP100_EN_LOW_TX         0x1000 /* reserved, must be 0 */
+#define HP100_ET_CNT_MASK       0x0fff /* bits 11..0: ET counters */
+
+/*
+ * Early RX Configuration and Control Register
+ * (Page HW_MAP, EARLYRXCFG, Offset 0x18)
+ */
+#define HP100_EN_EARLY_RX       0x80   /* 1=Enable Early RX */
+#define HP100_EN_LOW_RX         0x40   /* reserved, must be 0 */
+#define HP100_RX_TRIP_MASK      0x1f   /* bits 4..0: threshold at which the
+                                        * early rx circuit will start the
+                                        * dma of received packet into system
+                                        * memory for BM */
+
+/*
+ *  Serial Devices Control Register
+ * (Page EEPROM_CTRL, EEPROM_CTRL, Offset 0x08)
+ */
+#define HP100_EEPROM_LOAD      0x0001  /* 0->1 loads EEPROM into registers. */
+                                       /* When it goes back to 0, load is   */
+                                       /* complete. This should take ~600us. */
+
+/*
+ * 10MB LAN Control and Configuration Register I
+ * (Page MAC_CTRL, 10_LAN_CFG_1, Offset 0x08)
+ */
+#define HP100_MAC10_SEL                0xc0    /* Get bits to indicate MAC */
+#define HP100_AUI_SEL          0x20    /* Status of AUI selection */
+#define HP100_LOW_TH           0x10    /* 0:No, 1:Yes allow better cabling */
+#define HP100_LINK_BEAT_DIS    0x08    /* 0:Enable, 1:Disable link beat */
+#define HP100_LINK_BEAT_ST     0x04    /* 0:No, 1:Yes link beat being Rx */
+#define HP100_R_ROL_ST         0x02    /* 0:No, 1:Yes Rx twisted pair has */
+                                       /*             been reversed */
+#define HP100_AUI_ST           0x01    /* 0:No, 1:Yes use AUI on TP card */
+
+/*
+ * 10 MB LAN Control and Configuration Register II
+ * (Page MAC_CTRL, 10_LAN_CFG_2, Offset 0x09)
+ */
+#define HP100_SQU_ST           0x01    /* 0:No, 1:Yes collision signal sent */
+                                       /*       after Tx.Only used for AUI. */
+#define HP100_FULLDUP           0x02   /* 1: LXT901 XCVR fullduplx enabled */
+#define HP100_DOT3_MAC          0x04   /* 1: DOT 3 Mac sel. unless Autosel */
+
+/*
+ * MAC Selection, use with MAC10_SEL bits
+ */
+#define HP100_AUTO_SEL_10      0x0     /* Auto select */
+#define HP100_XCVR_LXT901_10   0x1     /* LXT901 10BaseT transceiver */
+#define HP100_XCVR_7213                0x2     /* 7213 transceiver */
+#define HP100_XCVR_82503       0x3     /* 82503 transceiver */
+
+/*
+ *  100MB LAN Training Register
+ * (Page MAC_CTRL, VG_LAN_CFG_2, Offset 0x0b) (old, pre 802.12)
+ */
+#define HP100_FRAME_FORMAT     0x08    /* 0:802.3, 1:802.5 frames */
+#define HP100_BRIDGE           0x04    /* 0:No, 1:Yes tell hub i am a bridge */
+#define HP100_PROM_MODE                0x02    /* 0:No, 1:Yes tell hub card is */
+                                       /*         promiscuous */
+#define HP100_REPEATER         0x01    /* 0:No, 1:Yes tell hub MAC wants to */
+                                       /*         be a cascaded repeater */
+
+/*
+ * 100MB LAN Control and Configuration Register
+ * (Page MAC_CTRL, VG_LAN_CFG_1, Offset 0x0a)
+ */
+#define HP100_VG_SEL           0x80    /* 0:No, 1:Yes use 100 Mbit MAC */
+#define HP100_LINK_UP_ST       0x40    /* 0:No, 1:Yes endnode logged in */
+#define HP100_LINK_CABLE_ST    0x20    /* 0:No, 1:Yes cable can hear tones */
+                                       /*         from  hub */
+#define HP100_LOAD_ADDR                0x10    /* 0->1 card addr will be sent  */
+                                       /* 100ms later the link status  */
+                                       /* bits are valid */
+#define HP100_LINK_CMD         0x08    /* 0->1 link will attempt to log in. */
+                                       /* 100ms later the link status */
+                                       /* bits are valid */
+#define HP100_TRN_DONE          0x04   /* NEW ETR-Chips only: Will be reset */
+                                       /* after LinkUp Cmd is given and set */
+                                       /* when training has completed. */
+#define HP100_LINK_GOOD_ST     0x02    /* 0:No, 1:Yes cable passed training */
+#define HP100_VG_RESET         0x01    /* 0:Yes, 1:No reset the 100VG MAC */
+
+
+/*
+ *  MAC Configuration Register I
+ * (Page MAC_CTRL, MAC_CFG_1, Offset 0x0c)
+ */
+#define HP100_RX_IDLE          0x80    /* 0:Yes, 1:No currently receiving pkts */
+#define HP100_TX_IDLE          0x40    /* 0:Yes, 1:No currently Txing pkts */
+#define HP100_RX_EN            0x20    /* 1: allow receiving of pkts */
+#define HP100_TX_EN            0x10    /* 1: allow transmitting of pkts */
+#define HP100_ACC_ERRORED      0x08    /* 0:No, 1:Yes allow Rx of errored pkts */
+#define HP100_ACC_MC           0x04    /* 0:No, 1:Yes allow Rx of multicast pkts */
+#define HP100_ACC_BC           0x02    /* 0:No, 1:Yes allow Rx of broadcast pkts */
+#define HP100_ACC_PHY          0x01    /* 0:No, 1:Yes allow Rx of ALL phys. pkts */
+#define HP100_MAC1MODEMASK     0xf0    /* Hide ACC bits */
+#define HP100_MAC1MODE1                0x00    /* Receive nothing, must also disable RX */
+#define HP100_MAC1MODE2                0x00
+#define HP100_MAC1MODE3                HP100_MAC1MODE2 | HP100_ACC_BC
+#define HP100_MAC1MODE4                HP100_MAC1MODE3 | HP100_ACC_MC
+#define HP100_MAC1MODE5                HP100_MAC1MODE4 /* set mc hash to all ones also */
+#define HP100_MAC1MODE6                HP100_MAC1MODE5 | HP100_ACC_PHY /* Promiscuous */
+/* Note MODE6 will receive all GOOD packets on the LAN. This really needs
+   a mode 7 defined to be LAN Analyzer mode, which will receive errored and
+   runt packets, and keep the CRC bytes. */
+#define HP100_MAC1MODE7                HP100_MAC1MODE6 | HP100_ACC_ERRORED
+
+/*
+ *  MAC Configuration Register II
+ * (Page MAC_CTRL, MAC_CFG_2, Offset 0x0d)
+ */
+#define HP100_TR_MODE          0x80    /* 0:No, 1:Yes support Token Ring formats */
+#define HP100_TX_SAME          0x40    /* 0:No, 1:Yes Tx same packet continuous */
+#define HP100_LBK_XCVR         0x20    /* 0:No, 1:Yes loopback through MAC & */
+                                       /*   transceiver */
+#define HP100_LBK_MAC          0x10    /* 0:No, 1:Yes loopback through MAC */
+#define HP100_CRC_I            0x08    /* 0:No, 1:Yes inhibit CRC on Tx packets */
+#define HP100_ACCNA             0x04   /* 1: For 802.5: Accept only token ring
+                                        * group addr that maches NA mask */
+#define HP100_KEEP_CRC         0x02    /* 0:No, 1:Yes keep CRC on Rx packets. */
+                                       /*   The length will reflect this. */
+#define HP100_ACCFA             0x01   /* 1: For 802.5: Accept only functional
+                                        * addrs that match FA mask (page1) */
+#define HP100_MAC2MODEMASK     0x02
+#define HP100_MAC2MODE1                0x00
+#define HP100_MAC2MODE2                0x00
+#define HP100_MAC2MODE3                0x00
+#define HP100_MAC2MODE4                0x00
+#define HP100_MAC2MODE5                0x00
+#define HP100_MAC2MODE6                0x00
+#define HP100_MAC2MODE7                KEEP_CRC
+
+/*
+ * MAC Configuration Register III
+ * (Page MAC_CTRL, MAC_CFG_3, Offset 0x0e)
+ */
+#define HP100_PACKET_PACE       0x03   /* Packet Pacing:
+                                        * 00: No packet pacing
+                                        * 01: 8 to 16 uS delay
+                                        * 10: 16 to 32 uS delay
+                                        * 11: 32 to 64 uS delay
+                                        */
+#define HP100_LRF_EN            0x04   /* 1: External LAN Rcv Filter and
+                                        * TCP/IP Checksumming enabled. */
+#define HP100_AUTO_MODE         0x10   /* 1: AutoSelect between 10/100 */
+
+/*
+ * MAC Configuration Register IV
+ * (Page MAC_CTRL, MAC_CFG_4, Offset 0x0f)
+ */
+#define HP100_MAC_SEL_ST        0x01   /* (R): Status of external VGSEL
+                                        * Signal, 1=100VG, 0=10Mbit sel. */
+#define HP100_LINK_FAIL_ST      0x02   /* (R): Status of Link Fail portion
+                                        * of the Misc. Interrupt */
+
+/*
+ *  100 MB LAN Training Request/Allowed Registers
+ * (Page MAC_CTRL, TRAIN_REQUEST and TRAIN_ALLOW, Offset 0x14-0x16)(ETR parts only)
+ */
+#define HP100_MACRQ_REPEATER         0x0001    /* 1: MAC tells HUB it wants to be
+                                                *    a cascaded repeater
+                                                * 0: ... wants to be a DTE */
+#define HP100_MACRQ_PROMSC           0x0006    /* 2 bits: Promiscious mode
+                                                * 00: Rcv only unicast packets
+                                                *     specifically addr to this
+                                                *     endnode
+                                                * 10: Rcv all pckts fwded by
+                                                *     the local repeater */
+#define HP100_MACRQ_FRAMEFMT_EITHER  0x0018    /* 11: either format allowed */
+#define HP100_MACRQ_FRAMEFMT_802_3   0x0000    /* 00: 802.3 is requested */
+#define HP100_MACRQ_FRAMEFMT_802_5   0x0010    /* 10: 802.5 format is requested */
+#define HP100_CARD_MACVER            0xe000    /* R: 3 bit Cards 100VG MAC version */
+#define HP100_MALLOW_REPEATER        0x0001    /* If reset, requested access as an
+                                                * end node is allowed */
+#define HP100_MALLOW_PROMSC          0x0004    /* 2 bits: Promiscious mode
+                                                * 00: Rcv only unicast packets
+                                                *     specifically addr to this
+                                                *     endnode
+                                                * 10: Rcv all pckts fwded by
+                                                *     the local repeater */
+#define HP100_MALLOW_FRAMEFMT        0x00e0    /* 2 bits: Frame Format
+                                                * 00: 802.3 format will be used
+                                                * 10: 802.5 format will be used */
+#define HP100_MALLOW_ACCDENIED       0x0400    /* N bit */
+#define HP100_MALLOW_CONFIGURE       0x0f00    /* C bit */
+#define HP100_MALLOW_DUPADDR         0x1000    /* D bit */
+#define HP100_HUB_MACVER             0xe000    /* R: 3 bit 802.12 MAC/RMAC training */
+                                            /*    protocol of repeater */
+
+/* ****************************************************************************** */
+
+/*
+ *  Set/Reset bits
+ */
+#define HP100_SET_HB           0x0100  /* 0:Set fields to 0 whose mask is 1 */
+#define HP100_SET_LB           0x0001  /* HB sets upper byte, LB sets lower byte */
+#define HP100_RESET_HB         0x0000  /* For readability when resetting bits */
+#define HP100_RESET_LB         0x0000  /* For readability when resetting bits */
+
+/*
+ *  Misc. Constants
+ */
+#define HP100_LAN_100          100     /* lan_type value for VG */
+#define HP100_LAN_10           10      /* lan_type value for 10BaseT */
+#define HP100_LAN_COAX         9       /* lan_type value for Coax */
+#define HP100_LAN_ERR          (-1)    /* lan_type value for link down */
+
+/*
+ * Bus Master Data Structures  ----------------------------------------------
+ */
+
+#define MAX_RX_PDL              30     /* Card limit = 31 */
+#define MAX_RX_FRAG             2      /* Don't need more... */
+#define MAX_TX_PDL              29
+#define MAX_TX_FRAG             2      /* Limit = 31 */
+
+/* Define total PDL area size in bytes (should be 4096) */
+/* This is the size of kernel (dma) memory that will be allocated. */
+#define MAX_RINGSIZE ((MAX_RX_FRAG*8+4+4)*MAX_RX_PDL+(MAX_TX_FRAG*8+4+4)*MAX_TX_PDL)+16
+
+/* Ethernet Packet Sizes */
+#define MIN_ETHER_SIZE          60
+#define MAX_ETHER_SIZE          1514   /* Needed for preallocation of */
+                                       /* skb buffer when busmastering */
+
+/* Tx or Rx Ring Entry */
+typedef struct hp100_ring {
+       u_int *pdl;             /* Address of PDLs PDH, dword before
+                                * this address is used for rx hdr */
+       u_int pdl_paddr;        /* Physical address of PDL */
+       struct sk_buff *skb;
+       struct hp100_ring *next;
+} hp100_ring_t;
+
+
+
+/* Mask for Header Descriptor */
+#define HP100_PKT_LEN_MASK     0x1FFF  /* AND with RxLength to get length */
+
+
+/* Receive Packet Status.  Note, the error bits are only valid if ACC_ERRORED
+   bit in the MAC Configuration Register 1 is set. */
+#define HP100_RX_PRI           0x8000  /* 0:No, 1:Yes packet is priority */
+#define HP100_SDF_ERR          0x4000  /* 0:No, 1:Yes start of frame error */
+#define HP100_SKEW_ERR         0x2000  /* 0:No, 1:Yes skew out of range */
+#define HP100_BAD_SYMBOL_ERR   0x1000  /* 0:No, 1:Yes invalid symbol received */
+#define HP100_RCV_IPM_ERR      0x0800  /* 0:No, 1:Yes pkt had an invalid packet */
+                                       /*   marker */
+#define HP100_SYMBOL_BAL_ERR   0x0400  /* 0:No, 1:Yes symbol balance error */
+#define HP100_VG_ALN_ERR       0x0200  /* 0:No, 1:Yes non-octet received */
+#define HP100_TRUNC_ERR                0x0100  /* 0:No, 1:Yes the packet was truncated */
+#define HP100_RUNT_ERR         0x0040  /* 0:No, 1:Yes pkt length < Min Pkt */
+                                       /*   Length Reg. */
+#define HP100_ALN_ERR          0x0010  /* 0:No, 1:Yes align error. */
+#define HP100_CRC_ERR          0x0008  /* 0:No, 1:Yes CRC occurred. */
+
+/* The last three bits indicate the type of destination address */
+
+#define HP100_MULTI_ADDR_HASH  0x0006  /* 110: Addr multicast, matched hash */
+#define HP100_BROADCAST_ADDR   0x0003  /* x11: Addr broadcast */
+#define HP100_MULTI_ADDR_NO_HASH 0x0002        /* 010: Addr multicast, didn't match hash */
+#define HP100_PHYS_ADDR_MATCH  0x0001  /* x01: Addr was physical and mine */
+#define HP100_PHYS_ADDR_NO_MATCH 0x0000        /* x00: Addr was physical but not mine */
+
+/*
+ *  macros
+ */
+
+#define hp100_inb( reg ) \
+        inb( ioaddr + HP100_REG_##reg )
+#define hp100_inw( reg ) \
+       inw( ioaddr + HP100_REG_##reg )
+#define hp100_inl( reg ) \
+       inl( ioaddr + HP100_REG_##reg )
+#define hp100_outb( data, reg ) \
+       outb( data, ioaddr + HP100_REG_##reg )
+#define hp100_outw( data, reg ) \
+       outw( data, ioaddr + HP100_REG_##reg )
+#define hp100_outl( data, reg ) \
+       outl( data, ioaddr + HP100_REG_##reg )
+#define hp100_orb( data, reg ) \
+       outb( inb( ioaddr + HP100_REG_##reg ) | (data), ioaddr + HP100_REG_##reg )
+#define hp100_orw( data, reg ) \
+       outw( inw( ioaddr + HP100_REG_##reg ) | (data), ioaddr + HP100_REG_##reg )
+#define hp100_andb( data, reg ) \
+       outb( inb( ioaddr + HP100_REG_##reg ) & (data), ioaddr + HP100_REG_##reg )
+#define hp100_andw( data, reg ) \
+       outw( inw( ioaddr + HP100_REG_##reg ) & (data), ioaddr + HP100_REG_##reg )
+
+#define hp100_page( page ) \
+       outw( HP100_PAGE_##page, ioaddr + HP100_REG_PAGING )
+#define hp100_ints_off() \
+       outw( HP100_INT_EN | HP100_RESET_LB, ioaddr + HP100_REG_OPTION_LSW )
+#define hp100_ints_on() \
+       outw( HP100_INT_EN | HP100_SET_LB, ioaddr + HP100_REG_OPTION_LSW )
+#define hp100_mem_map_enable() \
+       outw( HP100_MMAP_DIS | HP100_RESET_HB, ioaddr + HP100_REG_OPTION_LSW )
+#define hp100_mem_map_disable() \
+       outw( HP100_MMAP_DIS | HP100_SET_HB, ioaddr + HP100_REG_OPTION_LSW )
index 5b9d223..7c7f518 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/u64_stats_sync.h>
 
 struct bpf_verifier_env;
+struct bpf_verifier_log;
 struct perf_event;
 struct bpf_prog;
 struct bpf_map;
@@ -23,6 +24,7 @@ struct sock;
 struct seq_file;
 struct btf;
 struct btf_type;
+struct exception_table_entry;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -211,6 +213,7 @@ enum bpf_arg_type {
        ARG_PTR_TO_INT,         /* pointer to int */
        ARG_PTR_TO_LONG,        /* pointer to long */
        ARG_PTR_TO_SOCKET,      /* pointer to bpf_sock (fullsock) */
+       ARG_PTR_TO_BTF_ID,      /* pointer to in-kernel struct */
 };
 
 /* type of values returned from helper functions */
@@ -233,11 +236,17 @@ struct bpf_func_proto {
        bool gpl_only;
        bool pkt_access;
        enum bpf_return_type ret_type;
-       enum bpf_arg_type arg1_type;
-       enum bpf_arg_type arg2_type;
-       enum bpf_arg_type arg3_type;
-       enum bpf_arg_type arg4_type;
-       enum bpf_arg_type arg5_type;
+       union {
+               struct {
+                       enum bpf_arg_type arg1_type;
+                       enum bpf_arg_type arg2_type;
+                       enum bpf_arg_type arg3_type;
+                       enum bpf_arg_type arg4_type;
+                       enum bpf_arg_type arg5_type;
+               };
+               enum bpf_arg_type arg_type[5];
+       };
+       u32 *btf_id; /* BTF ids of arguments */
 };
 
 /* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -281,6 +290,7 @@ enum bpf_reg_type {
        PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
        PTR_TO_TP_BUFFER,        /* reg points to a writable raw tp's buffer */
        PTR_TO_XDP_SOCK,         /* reg points to struct xdp_sock */
+       PTR_TO_BTF_ID,           /* reg points to kernel struct */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -288,7 +298,11 @@ enum bpf_reg_type {
  */
 struct bpf_insn_access_aux {
        enum bpf_reg_type reg_type;
-       int ctx_field_size;
+       union {
+               int ctx_field_size;
+               u32 btf_id;
+       };
+       struct bpf_verifier_log *log; /* for verbose logs */
 };
 
 static inline void
@@ -359,11 +373,16 @@ enum bpf_cgroup_storage_type {
 
 #define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
 
+/* The longest tracepoint has 12 args.
+ * See include/trace/bpf_probe.h
+ */
+#define MAX_BPF_FUNC_ARGS 12
+
 struct bpf_prog_stats {
        u64 cnt;
        u64 nsecs;
        struct u64_stats_sync syncp;
-};
+} __aligned(2 * sizeof(u64));
 
 struct bpf_prog_aux {
        atomic_t refcnt;
@@ -375,8 +394,14 @@ struct bpf_prog_aux {
        u32 id;
        u32 func_cnt; /* used by non-func prog as the number of func progs */
        u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
+       u32 attach_btf_id; /* in-kernel BTF type id to attach to */
        bool verifier_zext; /* Zero extensions has been inserted by verifier. */
        bool offload_requested;
+       bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
+       /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
+       const struct btf_type *attach_func_proto;
+       /* function name for valid attach_btf_id */
+       const char *attach_func_name;
        struct bpf_prog **func;
        void *jit_data; /* JIT specific data. arch dependent */
        struct latch_tree_node ksym_tnode;
@@ -416,6 +441,8 @@ struct bpf_prog_aux {
         * main prog always has linfo_idx == 0
         */
        u32 linfo_idx;
+       u32 num_exentries;
+       struct exception_table_entry *extable;
        struct bpf_prog_stats __percpu *stats;
        union {
                struct work_struct work;
@@ -482,6 +509,7 @@ struct bpf_event_entry {
 
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
 int bpf_prog_calc_tag(struct bpf_prog *fp);
+const char *kernel_type_name(u32 btf_type_id);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
@@ -747,6 +775,15 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
                                     const union bpf_attr *kattr,
                                     union bpf_attr __user *uattr);
+bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+                   const struct bpf_prog *prog,
+                   struct bpf_insn_access_aux *info);
+int btf_struct_access(struct bpf_verifier_log *log,
+                     const struct btf_type *t, int off, int size,
+                     enum bpf_access_type atype,
+                     u32 *next_btf_id);
+u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int);
+
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -972,31 +1009,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
 }
 #endif
 
-#if defined(CONFIG_XDP_SOCKETS)
-struct xdp_sock;
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
-                      struct xdp_sock *xs);
-void __xsk_map_flush(struct bpf_map *map);
-#else
-struct xdp_sock;
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-                                                    u32 key)
-{
-       return NULL;
-}
-
-static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
-                                    struct xdp_sock *xs)
-{
-       return -EOPNOTSUPP;
-}
-
-static inline void __xsk_map_flush(struct bpf_map *map)
-{
-}
-#endif
-
 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
 void bpf_sk_reuseport_detach(struct sock *sk);
 int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
index 36a9c23..de14872 100644 (file)
@@ -26,6 +26,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
 BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
index 26a6d58..6e7284e 100644 (file)
@@ -52,6 +52,8 @@ struct bpf_reg_state {
                 */
                struct bpf_map *map_ptr;
 
+               u32 btf_id; /* for PTR_TO_BTF_ID */
+
                /* Max size from any of the above. */
                unsigned long raw;
        };
@@ -330,10 +332,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 #define BPF_LOG_STATS  4
 #define BPF_LOG_LEVEL  (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
 #define BPF_LOG_MASK   (BPF_LOG_LEVEL | BPF_LOG_STATS)
+#define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */
 
 static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 {
-       return log->level && log->ubuf && !bpf_verifier_log_full(log);
+       return (log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
+               log->level == BPF_LOG_KERNEL;
 }
 
 #define BPF_MAX_SUBPROGS 256
@@ -397,6 +401,8 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
                                      const char *fmt, va_list args);
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
                                           const char *fmt, ...);
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+                           const char *fmt, ...);
 
 static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
index 6db2d9a..b475e7f 100644 (file)
 #define BCM5482_SHD_SSD                0x14    /* 10100: Secondary SerDes control */
 #define BCM5482_SHD_SSD_LEDM   0x0008  /* SSD LED Mode enable */
 #define BCM5482_SHD_SSD_EN     0x0001  /* SSD enable */
-#define BCM5482_SHD_MODE       0x1f    /* 11111: Mode Control Register */
-#define BCM5482_SHD_MODE_1000BX        0x0001  /* Enable 1000BASE-X registers */
 
+/* 10011: SerDes 100-FX Control Register */
+#define BCM54616S_SHD_100FX_CTRL       0x13
+#define        BCM54616S_100FX_MODE            BIT(0)  /* 100-FX SerDes Enable */
+
+/* 11111: Mode Control Register */
+#define BCM54XX_SHD_MODE               0x1f
+#define BCM54XX_SHD_INTF_SEL_MASK      GENMASK(2, 1)   /* INTERF_SEL[1:0] */
+#define BCM54XX_SHD_MODE_1000BX                BIT(0)  /* Enable 1000-X registers */
 
 /*
  * EXPANSION SHADOW ACCESS REGISTERS.  (PHY REG 0x15, 0x16, and 0x17)
index 64cdf2a..9dee008 100644 (file)
@@ -5,6 +5,7 @@
 #define _LINUX_BTF_H 1
 
 #include <linux/types.h>
+#include <uapi/linux/btf.h>
 
 struct btf;
 struct btf_member;
@@ -53,9 +54,40 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 bool btf_type_is_void(const struct btf_type *t);
 
+static inline bool btf_type_is_ptr(const struct btf_type *t)
+{
+       return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
+}
+
+static inline bool btf_type_is_int(const struct btf_type *t)
+{
+       return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
+}
+
+static inline bool btf_type_is_enum(const struct btf_type *t)
+{
+       return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
+}
+
+static inline bool btf_type_is_typedef(const struct btf_type *t)
+{
+       return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
+}
+
+static inline bool btf_type_is_func(const struct btf_type *t)
+{
+       return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
+}
+
+static inline bool btf_type_is_func_proto(const struct btf_type *t)
+{
+       return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
+struct btf *btf_parse_vmlinux(void);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
                                                    u32 type_id)
index 9fa4b3f..b698266 100644 (file)
@@ -4,22 +4,26 @@
 #ifndef DIM_H
 #define DIM_H
 
+#include <linux/bits.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
 
-/**
+/*
  * Number of events between DIM iterations.
  * Causes a moderation of the algorithm run.
  */
 #define DIM_NEVENTS 64
 
-/**
+/*
  * Is a difference between values justifies taking an action.
  * We consider 10% difference as significant.
  */
 #define IS_SIGNIFICANT_DIFF(val, ref) \
        (((100UL * abs((val) - (ref))) / (ref)) > 10)
 
-/**
+/*
  * Calculate the gap between two values.
  * Take wrap-around and variable size into consideration.
  */
                & (BIT_ULL(bits) - 1))
 
 /**
- * Structure for CQ moderation values.
+ * struct dim_cq_moder - Structure for CQ moderation values.
  * Used for communications between DIM and its consumer.
  *
  * @usec: CQ timer suggestion (by DIM)
  * @pkts: CQ packet counter suggestion (by DIM)
- * @cq_period_mode: CQ priod count mode (from CQE/EQE)
+ * @comps: Completion counter
+ * @cq_period_mode: CQ period count mode (from CQE/EQE)
  */
 struct dim_cq_moder {
        u16 usec;
@@ -42,13 +47,14 @@ struct dim_cq_moder {
 };
 
 /**
- * Structure for DIM sample data.
+ * struct dim_sample - Structure for DIM sample data.
  * Used for communications between DIM and its consumer.
  *
  * @time: Sample timestamp
  * @pkt_ctr: Number of packets
  * @byte_ctr: Number of bytes
  * @event_ctr: Number of events
+ * @comp_ctr: Current completion counter
  */
 struct dim_sample {
        ktime_t time;
@@ -59,12 +65,14 @@ struct dim_sample {
 };
 
 /**
- * Structure for DIM stats.
+ * struct dim_stats - Structure for DIM stats.
  * Used for holding current measured rates.
  *
  * @ppms: Packets per msec
  * @bpms: Bytes per msec
  * @epms: Events per msec
+ * @cpms: Completions per msec
+ * @cpe_ratio: Ratio of completions to events
  */
 struct dim_stats {
        int ppms; /* packets per msec */
@@ -75,12 +83,13 @@ struct dim_stats {
 };
 
 /**
- * Main structure for dynamic interrupt moderation (DIM).
+ * struct dim - Main structure for dynamic interrupt moderation (DIM).
  * Used for holding all information about a specific DIM instance.
  *
  * @state: Algorithm state (see below)
  * @prev_stats: Measured rates from previous iteration (for comparison)
  * @start_sample: Sampled data at start of current iteration
+ * @measuring_sample: A &dim_sample that is used to update the current events
  * @work: Work to perform on action required
  * @priv: A pointer to the struct that points to dim
  * @profile_ix: Current moderation profile
@@ -106,24 +115,21 @@ struct dim {
 };
 
 /**
- * enum dim_cq_period_mode
- *
- * These are the modes for CQ period count.
+ * enum dim_cq_period_mode - Modes for CQ period count
  *
  * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
  * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
  * @DIM_CQ_PERIOD_NUM_MODES: Number of modes
  */
-enum {
+enum dim_cq_period_mode {
        DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
        DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
        DIM_CQ_PERIOD_NUM_MODES
 };
 
 /**
- * enum dim_state
+ * enum dim_state - DIM algorithm states
  *
- * These are the DIM algorithm states.
  * These will determine if the algorithm is in a valid state to start an iteration.
  *
  * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
@@ -131,16 +137,15 @@ enum {
  * need to perform an action
  * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
  */
-enum {
+enum dim_state {
        DIM_START_MEASURE,
        DIM_MEASURE_IN_PROGRESS,
        DIM_APPLY_NEW_PROFILE,
 };
 
 /**
- * enum dim_tune_state
+ * enum dim_tune_state - DIM algorithm tune states
  *
- * These are the DIM algorithm tune states.
  * These will determine which action the algorithm should perform.
  *
  * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
@@ -148,7 +153,7 @@ enum {
  * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
  * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
  */
-enum {
+enum dim_tune_state {
        DIM_PARKING_ON_TOP,
        DIM_PARKING_TIRED,
        DIM_GOING_RIGHT,
@@ -156,25 +161,23 @@ enum {
 };
 
 /**
- * enum dim_stats_state
+ * enum dim_stats_state - DIM algorithm statistics states
  *
- * These are the DIM algorithm statistics states.
  * These will determine the verdict of current iteration.
  *
  * @DIM_STATS_WORSE: Current iteration shows worse performance than before
- * @DIM_STATS_WORSE: Current iteration shows same performance than before
- * @DIM_STATS_WORSE: Current iteration shows better performance than before
+ * @DIM_STATS_SAME:  Current iteration shows same performance than before
+ * @DIM_STATS_BETTER: Current iteration shows better performance than before
  */
-enum {
+enum dim_stats_state {
        DIM_STATS_WORSE,
        DIM_STATS_SAME,
        DIM_STATS_BETTER,
 };
 
 /**
- * enum dim_step_result
+ * enum dim_step_result - DIM algorithm step results
  *
- * These are the DIM algorithm step results.
  * These describe the result of a step.
  *
  * @DIM_STEPPED: Performed a regular step
@@ -182,7 +185,7 @@ enum {
  * tired parking
  * @DIM_ON_EDGE: Stepped to the most left/right profile
  */
-enum {
+enum dim_step_result {
        DIM_STEPPED,
        DIM_TOO_TIRED,
        DIM_ON_EDGE,
@@ -199,7 +202,7 @@ enum {
 bool dim_on_top(struct dim *dim);
 
 /**
- *     dim_turn - change profile alterning direction
+ *     dim_turn - change profile altering direction
  *     @dim: DIM context
  *
  * Go left if we were going right and vice-versa.
@@ -238,7 +241,7 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
                    struct dim_stats *curr_stats);
 
 /**
- *     dim_update_sample - set a sample's fields with give values
+ *     dim_update_sample - set a sample's fields with given values
  *     @event_ctr: number of events to set
  *     @packets: number of packets to set
  *     @bytes: number of bytes to set
@@ -304,8 +307,8 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
  *     @end_sample: Current data measurement
  *
  * Called by the consumer.
- * This is the main logic of the algorithm, where data is processed in order to decide on next
- * required action.
+ * This is the main logic of the algorithm, where data is processed in order
+ * to decide on next required action.
  */
 void net_dim(struct dim *dim, struct dim_sample end_sample);
 
index 81ecfaa..4ab9e78 100644 (file)
@@ -33,4 +33,14 @@ search_module_extables(unsigned long addr)
 }
 #endif /*CONFIG_MODULES*/
 
+#ifdef CONFIG_BPF_JIT
+const struct exception_table_entry *search_bpf_extables(unsigned long addr);
+#else
+static inline const struct exception_table_entry *
+search_bpf_extables(unsigned long addr)
+{
+       return NULL;
+}
+#endif
+
 #endif /* _LINUX_EXTABLE_H */
index 0367a75..7a6f8f6 100644 (file)
@@ -65,6 +65,9 @@ struct ctl_table_header;
 /* unused opcode to mark special call to bpf_tail_call() helper */
 #define BPF_TAIL_CALL  0xf0
 
+/* unused opcode to mark special load instruction. Same as BPF_ABS */
+#define BPF_PROBE_MEM  0x20
+
 /* unused opcode to mark call to interpreter with arguments */
 #define BPF_CALL_ARGS  0xe0
 
@@ -464,10 +467,11 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
 #define BPF_CALL_x(x, name, ...)                                              \
        static __always_inline                                                 \
        u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
+       typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
        u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));         \
        u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))          \
        {                                                                      \
-               return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
+               return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
        }                                                                      \
        static __always_inline                                                 \
        u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
diff --git a/include/linux/firmware/broadcom/tee_bnxt_fw.h b/include/linux/firmware/broadcom/tee_bnxt_fw.h
new file mode 100644 (file)
index 0000000..f24c82d
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * Copyright 2019 Broadcom.
+ */
+
+#ifndef _BROADCOM_TEE_BNXT_FW_H
+#define _BROADCOM_TEE_BNXT_FW_H
+
+#include <linux/types.h>
+
+int tee_bnxt_fw_load(void);
+int tee_bnxt_copy_coredump(void *buf, u32 offset, u32 size);
+
+#endif /* _BROADCOM_TEE_BNXT_FW_H */
index 975553a..54d9436 100644 (file)
@@ -403,6 +403,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev);
 
 void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
 
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev);
+
 extern struct bus_type fsl_mc_bus_type;
 
 extern struct device_type fsl_mc_bus_dprc_type;
index 2d8aaf7..81ca84c 100644 (file)
@@ -20,4 +20,19 @@ static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb)
 {
        return (struct icmphdr *)skb_transport_header(skb);
 }
+
+static inline bool icmp_is_err(int type)
+{
+       switch (type) {
+       case ICMP_DEST_UNREACH:
+       case ICMP_SOURCE_QUENCH:
+       case ICMP_REDIRECT:
+       case ICMP_TIME_EXCEEDED:
+       case ICMP_PARAMETERPROB:
+               return true;
+       }
+
+       return false;
+}
+
 #endif /* _LINUX_ICMP_H */
index a8f8889..ef1cbb5 100644 (file)
@@ -46,4 +46,18 @@ extern void                          icmpv6_flow_init(struct sock *sk,
                                                         const struct in6_addr *saddr,
                                                         const struct in6_addr *daddr,
                                                         int oif);
+
+static inline bool icmpv6_is_err(int type)
+{
+       switch (type) {
+       case ICMPV6_DEST_UNREACH:
+       case ICMPV6_PKT_TOOBIG:
+       case ICMPV6_TIME_EXCEED:
+       case ICMPV6_PARAMPROB:
+               return true;
+       }
+
+       return false;
+}
+
 #endif
index a99c588..fe74003 100644 (file)
@@ -82,4 +82,10 @@ static inline int linkmode_equal(const unsigned long *src1,
        return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
+static inline int linkmode_subset(const unsigned long *src1,
+                                 const unsigned long *src2)
+{
+       return bitmap_subset(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
 #endif /* __LINKMODE_H */
index 34de06b..8071148 100644 (file)
@@ -47,16 +47,16 @@ struct vif_entry_notifier_info {
 };
 
 static inline int mr_call_vif_notifier(struct notifier_block *nb,
-                                      struct net *net,
                                       unsigned short family,
                                       enum fib_event_type event_type,
                                       struct vif_device *vif,
-                                      unsigned short vif_index, u32 tb_id)
+                                      unsigned short vif_index, u32 tb_id,
+                                      struct netlink_ext_ack *extack)
 {
        struct vif_entry_notifier_info info = {
                .info = {
                        .family = family,
-                       .net = net,
+                       .extack = extack,
                },
                .dev = vif->dev,
                .vif_index = vif_index,
@@ -64,7 +64,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
                .tb_id = tb_id,
        };
 
-       return call_fib_notifier(nb, net, event_type, &info.info);
+       return call_fib_notifier(nb, event_type, &info.info);
 }
 
 static inline int mr_call_vif_notifiers(struct net *net,
@@ -77,7 +77,6 @@ static inline int mr_call_vif_notifiers(struct net *net,
        struct vif_entry_notifier_info info = {
                .info = {
                        .family = family,
-                       .net = net,
                },
                .dev = vif->dev,
                .vif_index = vif_index,
@@ -173,21 +172,21 @@ struct mfc_entry_notifier_info {
 };
 
 static inline int mr_call_mfc_notifier(struct notifier_block *nb,
-                                      struct net *net,
                                       unsigned short family,
                                       enum fib_event_type event_type,
-                                      struct mr_mfc *mfc, u32 tb_id)
+                                      struct mr_mfc *mfc, u32 tb_id,
+                                      struct netlink_ext_ack *extack)
 {
        struct mfc_entry_notifier_info info = {
                .info = {
                        .family = family,
-                       .net = net,
+                       .extack = extack,
                },
                .mfc = mfc,
                .tb_id = tb_id
        };
 
-       return call_fib_notifier(nb, net, event_type, &info.info);
+       return call_fib_notifier(nb, event_type, &info.info);
 }
 
 static inline int mr_call_mfc_notifiers(struct net *net,
@@ -199,7 +198,6 @@ static inline int mr_call_mfc_notifiers(struct net *net,
        struct mfc_entry_notifier_info info = {
                .info = {
                        .family = family,
-                       .net = net,
                },
                .mfc = mfc,
                .tb_id = tb_id
@@ -301,10 +299,11 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 
 int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
            int (*rules_dump)(struct net *net,
-                             struct notifier_block *nb),
+                             struct notifier_block *nb,
+                             struct netlink_ext_ack *extack),
            struct mr_table *(*mr_iter)(struct net *net,
                                        struct mr_table *mrt),
-           rwlock_t *mrt_lock);
+           rwlock_t *mrt_lock, struct netlink_ext_ack *extack);
 #else
 static inline void vif_device_init(struct vif_device *v,
                                   struct net_device *dev,
@@ -355,10 +354,11 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 static inline int mr_dump(struct net *net, struct notifier_block *nb,
                          unsigned short family,
                          int (*rules_dump)(struct net *net,
-                                           struct notifier_block *nb),
+                                           struct notifier_block *nb,
+                                           struct netlink_ext_ack *extack),
                          struct mr_table *(*mr_iter)(struct net *net,
                                                      struct mr_table *mrt),
-                         rwlock_t *mrt_lock)
+                         rwlock_t *mrt_lock, struct netlink_ext_ack *extack)
 {
        return -EINVAL;
 }
index c20f190..1f140a6 100644 (file)
@@ -925,6 +925,15 @@ struct dev_ifalias {
 struct devlink;
 struct tlsdev_ops;
 
+struct netdev_name_node {
+       struct hlist_node hlist;
+       struct list_head list;
+       struct net_device *dev;
+       const char *name;
+};
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name);
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
 
 /*
  * This structure defines the management hooks for network devices.
@@ -1564,7 +1573,7 @@ enum netdev_priv_flags {
  *             (i.e. as seen by users in the "Space.c" file).  It is the name
  *             of the interface.
  *
- *     @name_hlist:    Device name hash chain, please keep it close to name[]
+ *     @name_node:     Name hashlist node
  *     @ifalias:       SNMP alias
  *     @mem_end:       Shared memory end
  *     @mem_start:     Shared memory start
@@ -1780,7 +1789,7 @@ enum netdev_priv_flags {
 
 struct net_device {
        char                    name[IFNAMSIZ];
-       struct hlist_node       name_hlist;
+       struct netdev_name_node *name_node;
        struct dev_ifalias      __rcu *ifalias;
        /*
         *      I/O specific fields
@@ -2487,6 +2496,9 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd);
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
+int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb);
+int unregister_netdevice_notifier_net(struct net *net,
+                                     struct notifier_block *nb);
 
 struct netdev_notifier_info {
        struct net_device       *dev;
@@ -2557,6 +2569,9 @@ extern rwlock_t                           dev_base_lock;          /* Device list lock */
                list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_continue(net, d)               \
                list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list)
+#define for_each_netdev_continue_reverse(net, d)               \
+               list_for_each_entry_continue_reverse(d, &(net)->dev_base_head, \
+                                                    dev_list)
 #define for_each_netdev_continue_rcu(net, d)           \
        list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_in_bond_rcu(bond, slave)       \
@@ -4081,9 +4096,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
                                    unsigned char name_assign_type,
                                    void (*setup)(struct net_device *),
                                    unsigned int txqs, unsigned int rxqs);
-int dev_get_valid_name(struct net *net, struct net_device *dev,
-                      const char *name);
-
 #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
        alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)
 
index 77ebb61..eb312e7 100644 (file)
@@ -199,6 +199,8 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
                 const struct nf_hook_entries *e, unsigned int i);
 
+void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
+                      const struct nf_hook_entries *e);
 /**
  *     nf_hook - call a netfilter hook
  *
@@ -311,17 +313,36 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
             struct list_head *head, struct net_device *in, struct net_device *out,
             int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
-       struct sk_buff *skb, *next;
-       struct list_head sublist;
-
-       INIT_LIST_HEAD(&sublist);
-       list_for_each_entry_safe(skb, next, head, list) {
-               list_del(&skb->list);
-               if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
-                       list_add_tail(&skb->list, &sublist);
+       struct nf_hook_entries *hook_head = NULL;
+
+#ifdef CONFIG_JUMP_LABEL
+       if (__builtin_constant_p(pf) &&
+           __builtin_constant_p(hook) &&
+           !static_key_false(&nf_hooks_needed[pf][hook]))
+               return;
+#endif
+
+       rcu_read_lock();
+       switch (pf) {
+       case NFPROTO_IPV4:
+               hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
+               break;
+       case NFPROTO_IPV6:
+               hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               break;
        }
-       /* Put passed packets back on main list */
-       list_splice(&sublist, head);
+
+       if (hook_head) {
+               struct nf_hook_state state;
+
+               nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
+
+               nf_hook_slow_list(head, &state, hook_head);
+       }
+       rcu_read_unlock();
 }
 
 /* Call setsockopt() */
index 9bc255a..4d8b1ea 100644 (file)
@@ -269,34 +269,15 @@ ip_set_ext_destroy(struct ip_set *set, void *data)
        /* Check that the extension is enabled for the set and
         * call it's destroy function for its extension part in data.
         */
-       if (SET_WITH_COMMENT(set))
-               ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
-                       set, ext_comment(data, set));
-}
+       if (SET_WITH_COMMENT(set)) {
+               struct ip_set_comment *c = ext_comment(data, set);
 
-static inline int
-ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
-{
-       u32 cadt_flags = 0;
-
-       if (SET_WITH_TIMEOUT(set))
-               if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-                                          htonl(set->timeout))))
-                       return -EMSGSIZE;
-       if (SET_WITH_COUNTER(set))
-               cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
-       if (SET_WITH_COMMENT(set))
-               cadt_flags |= IPSET_FLAG_WITH_COMMENT;
-       if (SET_WITH_SKBINFO(set))
-               cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
-       if (SET_WITH_FORCEADD(set))
-               cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
-
-       if (!cadt_flags)
-               return 0;
-       return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+               ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(set, c);
+       }
 }
 
+int ip_set_put_flags(struct sk_buff *skb, struct ip_set *set);
+
 /* Netlink CB args */
 enum {
        IPSET_CB_NET = 0,       /* net namespace */
@@ -506,144 +487,8 @@ ip_set_timeout_set(unsigned long *timeout, u32 value)
        *timeout = t;
 }
 
-static inline u32
-ip_set_timeout_get(const unsigned long *timeout)
-{
-       u32 t;
-
-       if (*timeout == IPSET_ELEM_PERMANENT)
-               return 0;
-
-       t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
-       /* Zero value in userspace means no timeout */
-       return t == 0 ? 1 : t;
-}
-
-static inline char*
-ip_set_comment_uget(struct nlattr *tb)
-{
-       return nla_data(tb);
-}
-
-/* Called from uadd only, protected by the set spinlock.
- * The kadt functions don't use the comment extensions in any way.
- */
-static inline void
-ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
-                   const struct ip_set_ext *ext)
-{
-       struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
-       size_t len = ext->comment ? strlen(ext->comment) : 0;
-
-       if (unlikely(c)) {
-               set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
-               kfree_rcu(c, rcu);
-               rcu_assign_pointer(comment->c, NULL);
-       }
-       if (!len)
-               return;
-       if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
-               len = IPSET_MAX_COMMENT_SIZE;
-       c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
-       if (unlikely(!c))
-               return;
-       strlcpy(c->str, ext->comment, len + 1);
-       set->ext_size += sizeof(*c) + strlen(c->str) + 1;
-       rcu_assign_pointer(comment->c, c);
-}
-
-/* Used only when dumping a set, protected by rcu_read_lock() */
-static inline int
-ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
-{
-       struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
-
-       if (!c)
-               return 0;
-       return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
-}
-
-/* Called from uadd/udel, flush or the garbage collectors protected
- * by the set spinlock.
- * Called when the set is destroyed and when there can't be any user
- * of the set data anymore.
- */
-static inline void
-ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
-{
-       struct ip_set_comment_rcu *c;
-
-       c = rcu_dereference_protected(comment->c, 1);
-       if (unlikely(!c))
-               return;
-       set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
-       kfree_rcu(c, rcu);
-       rcu_assign_pointer(comment->c, NULL);
-}
-
-static inline void
-ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
-{
-       atomic64_add((long long)bytes, &(counter)->bytes);
-}
-
-static inline void
-ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
-{
-       atomic64_add((long long)packets, &(counter)->packets);
-}
-
-static inline u64
-ip_set_get_bytes(const struct ip_set_counter *counter)
-{
-       return (u64)atomic64_read(&(counter)->bytes);
-}
-
-static inline u64
-ip_set_get_packets(const struct ip_set_counter *counter)
-{
-       return (u64)atomic64_read(&(counter)->packets);
-}
-
-static inline bool
-ip_set_match_counter(u64 counter, u64 match, u8 op)
-{
-       switch (op) {
-       case IPSET_COUNTER_NONE:
-               return true;
-       case IPSET_COUNTER_EQ:
-               return counter == match;
-       case IPSET_COUNTER_NE:
-               return counter != match;
-       case IPSET_COUNTER_LT:
-               return counter < match;
-       case IPSET_COUNTER_GT:
-               return counter > match;
-       }
-       return false;
-}
-
-static inline void
-ip_set_update_counter(struct ip_set_counter *counter,
-                     const struct ip_set_ext *ext, u32 flags)
-{
-       if (ext->packets != ULLONG_MAX &&
-           !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
-               ip_set_add_bytes(ext->bytes, counter);
-               ip_set_add_packets(ext->packets, counter);
-       }
-}
-
-static inline bool
-ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
-{
-       return nla_put_net64(skb, IPSET_ATTR_BYTES,
-                            cpu_to_be64(ip_set_get_bytes(counter)),
-                            IPSET_ATTR_PAD) ||
-              nla_put_net64(skb, IPSET_ATTR_PACKETS,
-                            cpu_to_be64(ip_set_get_packets(counter)),
-                            IPSET_ATTR_PAD);
-}
+void ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
+                        const struct ip_set_ext *ext);
 
 static inline void
 ip_set_init_counter(struct ip_set_counter *counter,
@@ -655,31 +500,6 @@ ip_set_init_counter(struct ip_set_counter *counter,
                atomic64_set(&(counter)->packets, (long long)(ext->packets));
 }
 
-static inline void
-ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
-                  const struct ip_set_ext *ext,
-                  struct ip_set_ext *mext, u32 flags)
-{
-       mext->skbinfo = *skbinfo;
-}
-
-static inline bool
-ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
-{
-       /* Send nonzero parameters only */
-       return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
-               nla_put_net64(skb, IPSET_ATTR_SKBMARK,
-                             cpu_to_be64((u64)skbinfo->skbmark << 32 |
-                                         skbinfo->skbmarkmask),
-                             IPSET_ATTR_PAD)) ||
-              (skbinfo->skbprio &&
-               nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
-                             cpu_to_be32(skbinfo->skbprio))) ||
-              (skbinfo->skbqueue &&
-               nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
-                             cpu_to_be16(skbinfo->skbqueue)));
-}
-
 static inline void
 ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
                    const struct ip_set_ext *ext)
index 2dddbc6..fcc4d21 100644 (file)
@@ -12,18 +12,4 @@ enum {
        IPSET_ADD_START_STORED_TIMEOUT,
 };
 
-/* Common functions */
-
-static inline u32
-range_to_mask(u32 from, u32 to, u8 *bits)
-{
-       u32 mask = 0xFFFFFFFE;
-
-       *bits = 32;
-       while (--(*bits) > 0 && mask && (to & mask) != from)
-               mask <<= 1;
-
-       return mask;
-}
-
 #endif /* __IP_SET_BITMAP_H */
index d74cd11..1ecaabd 100644 (file)
@@ -20,9 +20,6 @@ static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
 }
 #endif
 
-extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
-                               __be16 *port);
-
 static inline bool ip_set_proto_with_ports(u8 proto)
 {
        switch (proto) {
index 6aeaea1..71bbfcf 100644 (file)
@@ -6,15 +6,18 @@
 #ifndef __LINUX_OF_NET_H
 #define __LINUX_OF_NET_H
 
+#include <linux/phy.h>
+
 #ifdef CONFIG_OF_NET
 #include <linux/of.h>
 
 struct net_device;
-extern int of_get_phy_mode(struct device_node *np);
+extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface);
 extern const void *of_get_mac_address(struct device_node *np);
 extern struct net_device *of_find_net_device_by_node(struct device_node *np);
 #else
-static inline int of_get_phy_mode(struct device_node *np)
+static inline int of_get_phy_mode(struct device_node *np,
+                                 phy_interface_t *interface)
 {
        return -ENODEV;
 }
index 9a0e981..78436d5 100644 (file)
@@ -1106,6 +1106,10 @@ int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
 int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
                                 u16 regnum, u16 val);
 
+/* Clause 37 */
+int genphy_c37_config_aneg(struct phy_device *phydev);
+int genphy_c37_read_status(struct phy_device *phydev);
+
 /* Clause 45 PHY */
 int genphy_c45_restart_aneg(struct phy_device *phydev);
 int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart);
index 1c35428..355a08a 100644 (file)
@@ -508,9 +508,9 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
                          u8 *data);
 void sfp_upstream_start(struct sfp_bus *bus);
 void sfp_upstream_stop(struct sfp_bus *bus);
-struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode,
-                                     void *upstream,
-                                     const struct sfp_upstream_ops *ops);
+struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode,
+                                          void *upstream,
+                                          const struct sfp_upstream_ops *ops);
 void sfp_unregister_upstream(struct sfp_bus *bus);
 #else
 static inline int sfp_parse_port(struct sfp_bus *bus,
@@ -553,11 +553,11 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus)
 {
 }
 
-static inline struct sfp_bus *sfp_register_upstream(
+static inline struct sfp_bus *sfp_register_upstream_node(
        struct fwnode_handle *fwnode, void *upstream,
        const struct sfp_upstream_ops *ops)
 {
-       return (struct sfp_bus *)-1;
+       return NULL;
 }
 
 static inline void sfp_unregister_upstream(struct sfp_bus *bus)
index 64a395c..53238ac 100644 (file)
@@ -2277,12 +2277,12 @@ static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
        return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
 }
 
-static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
+static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len)
 {
        if (likely(len <= skb_headlen(skb)))
-               return 1;
+               return true;
        if (unlikely(len > skb->len))
-               return 0;
+               return false;
        return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
 }
 
index e4b3fb4..fe80d53 100644 (file)
@@ -28,13 +28,14 @@ struct sk_msg_sg {
        u32                             end;
        u32                             size;
        u32                             copybreak;
-       bool                            copy[MAX_MSG_FRAGS];
+       unsigned long                   copy;
        /* The extra element is used for chaining the front and sections when
         * the list becomes partitioned (e.g. end < start). The crypto APIs
         * require the chaining.
         */
        struct scatterlist              data[MAX_MSG_FRAGS + 1];
 };
+static_assert(BITS_PER_LONG >= MAX_MSG_FRAGS);
 
 /* UAPI in filter.c depends on struct sk_msg_sg being first element. */
 struct sk_msg {
@@ -227,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
 {
        struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
 
-       if (msg->sg.copy[msg->sg.start]) {
+       if (test_bit(msg->sg.start, &msg->sg.copy)) {
                msg->data = NULL;
                msg->data_end = NULL;
        } else {
@@ -246,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
        sg_set_page(sge, page, len, offset);
        sg_unmark_end(sge);
 
-       msg->sg.copy[msg->sg.end] = true;
+       __set_bit(msg->sg.end, &msg->sg.copy);
        msg->sg.size += len;
        sk_msg_iter_next(msg, end);
 }
@@ -254,7 +255,10 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
 static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
 {
        do {
-               msg->sg.copy[i] = copy_state;
+               if (copy_state)
+                       __set_bit(i, &msg->sg.copy);
+               else
+                       __clear_bit(i, &msg->sg.copy);
                sk_msg_iter_var_next(i);
                if (i == msg->sg.end)
                        break;
index af4f265..27f6b04 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/completion.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio/consumer.h>
+#include <linux/ptp_clock_kernel.h>
 
 struct dma_chan;
 struct property_entry;
@@ -409,6 +410,12 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @fw_translate_cs: If the boot firmware uses different numbering scheme
  *     what Linux expects, this optional hook can be used to translate
  *     between the two.
+ * @ptp_sts_supported: If the driver sets this to true, it must provide a
+ *     time snapshot in @spi_transfer->ptp_sts as close as possible to the
+ *     moment in time when @spi_transfer->ptp_sts_word_pre and
+ *     @spi_transfer->ptp_sts_word_post were transmitted.
+ *     If the driver does not set this, the SPI core takes the snapshot as
+ *     close to the driver hand-over as possible.
  *
  * Each SPI controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
@@ -604,6 +611,15 @@ struct spi_controller {
        void                    *dummy_tx;
 
        int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs);
+
+       /*
+        * Driver sets this field to indicate it is able to snapshot SPI
+        * transfers (needed e.g. for reading the time of POSIX clocks)
+        */
+       bool                    ptp_sts_supported;
+
+       /* Interrupt enable state during PTP system timestamping */
+       unsigned long           irq_flags;
 };
 
 static inline void *spi_controller_get_devdata(struct spi_controller *ctlr)
@@ -644,6 +660,14 @@ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ct
 extern void spi_finalize_current_message(struct spi_controller *ctlr);
 extern void spi_finalize_current_transfer(struct spi_controller *ctlr);
 
+/* Helper calls for driver to timestamp transfer */
+void spi_take_timestamp_pre(struct spi_controller *ctlr,
+                           struct spi_transfer *xfer,
+                           const void *tx, bool irqs_off);
+void spi_take_timestamp_post(struct spi_controller *ctlr,
+                            struct spi_transfer *xfer,
+                            const void *tx, bool irqs_off);
+
 /* the spi driver core manages memory for the spi_controller classdev */
 extern struct spi_controller *__spi_alloc_controller(struct device *host,
                                                unsigned int size, bool slave);
@@ -753,6 +777,35 @@ extern void spi_res_release(struct spi_controller *ctlr,
  * @transfer_list: transfers are sequenced through @spi_message.transfers
  * @tx_sg: Scatterlist for transmit, currently not for client use
  * @rx_sg: Scatterlist for receive, currently not for client use
+ * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset
+ *     within @tx_buf for which the SPI device is requesting that the time
+ *     snapshot for this transfer begins. Upon completing the SPI transfer,
+ *     this value may have changed compared to what was requested, depending
+ *     on the available snapshotting resolution (DMA transfer,
+ *     @ptp_sts_supported is false, etc).
+ * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning
+ *     that a single byte should be snapshotted).
+ *     If the core takes care of the timestamp (if @ptp_sts_supported is false
+ *     for this controller), it will set @ptp_sts_word_pre to 0, and
+ *     @ptp_sts_word_post to the length of the transfer. This is done
+ *     purposefully (instead of setting to spi_transfer->len - 1) to denote
+ *     that a transfer-level snapshot taken from within the driver may still
+ *     be of higher quality.
+ * @ptp_sts: Pointer to a memory location held by the SPI slave device where a
+ *     PTP system timestamp structure may lie. If drivers use PIO or their
+ *     hardware has some sort of assist for retrieving exact transfer timing,
+ *     they can (and should) assert @ptp_sts_supported and populate this
+ *     structure using the ptp_read_system_*ts helper functions.
+ *     The timestamp must represent the time at which the SPI slave device has
+ *     processed the word, i.e. the "pre" timestamp should be taken before
+ *     transmitting the "pre" word, and the "post" timestamp after receiving
+ *     transmit confirmation from the controller for the "post" word.
+ * @timestamped_pre: Set by the SPI controller driver to denote it has acted
+ *     upon the @ptp_sts request. Not set when the SPI core has taken care of
+ *     the task. SPI device drivers are free to print a warning if this comes
+ *     back unset and they need the better resolution.
+ * @timestamped_post: See above. The reason why both exist is that these
+ *     booleans are also used to keep state in the core SPI logic.
  *
  * SPI transfers always write the same number of bytes as they read.
  * Protocol drivers should always provide @rx_buf and/or @tx_buf.
@@ -842,6 +895,14 @@ struct spi_transfer {
 
        u32             effective_speed_hz;
 
+       unsigned int    ptp_sts_word_pre;
+       unsigned int    ptp_sts_word_post;
+
+       struct ptp_system_timestamp *ptp_sts;
+
+       bool            timestamped_pre;
+       bool            timestamped_post;
+
        struct list_head transfer_list;
 };
 
index dc60d03..d4bcd93 100644 (file)
@@ -13,6 +13,7 @@
 #define __STMMAC_PLATFORM_DATA
 
 #include <linux/platform_device.h>
+#include <linux/phy.h>
 
 #define MTL_MAX_RX_QUEUES      8
 #define MTL_MAX_TX_QUEUES      8
@@ -92,6 +93,7 @@ struct stmmac_dma_cfg {
        int fixed_burst;
        int mixed_burst;
        bool aal;
+       bool eame;
 };
 
 #define AXI_BLEN       7
@@ -131,7 +133,7 @@ struct plat_stmmacenet_data {
        int bus_id;
        int phy_addr;
        int interface;
-       int phy_interface;
+       phy_interface_t phy_interface;
        struct stmmac_mdio_bus_data *mdio_bus_data;
        struct device_node *phy_node;
        struct device_node *phylink_node;
index 2673691..85ec745 100644 (file)
@@ -10,6 +10,8 @@
 #ifndef __SXGBE_PLATFORM_H__
 #define __SXGBE_PLATFORM_H__
 
+#include <linux/phy.h>
+
 /* MDC Clock Selection define*/
 #define SXGBE_CSR_100_150M     0x0     /* MDC = clk_scr_i/62 */
 #define SXGBE_CSR_150_250M     0x1     /* MDC = clk_scr_i/102 */
@@ -38,7 +40,7 @@ struct sxgbe_plat_data {
        char *phy_bus_name;
        int bus_id;
        int phy_addr;
-       int interface;
+       phy_interface_t interface;
        struct sxgbe_mdio_bus_data *mdio_bus_data;
        struct sxgbe_dma_cfg *dma_cfg;
        int clk_csr;
index 668e25a..ca6f015 100644 (file)
@@ -223,7 +223,7 @@ struct tcp_sock {
                fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
                fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
                is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
-               unused:2;
+               fastopen_client_fail:2; /* reason why fastopen failed */
        u8      nonagle     : 4,/* Disable Nagle algorithm?             */
                thin_lto    : 1,/* Use linear timeouts for thin streams */
                recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
index d4ee6e9..67f0160 100644 (file)
@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
  * happens, handle that and return -EFAULT.
  */
 extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
 extern long __probe_kernel_read(void *dst, const void *src, size_t size);
 
 /*
@@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
 extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
 extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
 
+/*
+ * probe_user_write(): safely attempt to write to a location in user space
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
+extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
+
 extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+                                      long count);
+extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
                                     long count);
 extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
index b18c699..0495bdc 100644 (file)
@@ -41,6 +41,7 @@ struct tc_action {
        struct gnet_stats_queue __percpu *cpu_qstats;
        struct tc_cookie        __rcu *act_cookie;
        struct tcf_chain        __rcu *goto_chain;
+       u32                     tcfa_flags;
 };
 #define tcf_index      common.tcfa_index
 #define tcf_refcnt     common.tcfa_refcnt
@@ -94,7 +95,7 @@ struct tc_action_ops {
        int     (*init)(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act, int ovr,
                        int bind, bool rtnl_held, struct tcf_proto *tp,
-                       struct netlink_ext_ack *extack);
+                       u32 flags, struct netlink_ext_ack *extack);
        int     (*walk)(struct net *, struct sk_buff *,
                        struct netlink_callback *, int,
                        const struct tc_action_ops *,
@@ -154,7 +155,11 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                   struct tc_action **a, const struct tc_action_ops *ops,
-                  int bind, bool cpustats);
+                  int bind, bool cpustats, u32 flags);
+int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
+                             struct nlattr *est, struct tc_action **a,
+                             const struct tc_action_ops *ops, int bind,
+                             u32 flags);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
@@ -186,6 +191,43 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
                    int ref);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
+
+static inline void tcf_action_update_bstats(struct tc_action *a,
+                                           struct sk_buff *skb)
+{
+       if (likely(a->cpu_bstats)) {
+               bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+               return;
+       }
+       spin_lock(&a->tcfa_lock);
+       bstats_update(&a->tcfa_bstats, skb);
+       spin_unlock(&a->tcfa_lock);
+}
+
+static inline void tcf_action_inc_drop_qstats(struct tc_action *a)
+{
+       if (likely(a->cpu_qstats)) {
+               qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
+               return;
+       }
+       spin_lock(&a->tcfa_lock);
+       qstats_drop_inc(&a->tcfa_qstats);
+       spin_unlock(&a->tcfa_lock);
+}
+
+static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
+{
+       if (likely(a->cpu_qstats)) {
+               qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats));
+               return;
+       }
+       spin_lock(&a->tcfa_lock);
+       qstats_overlimit_inc(&a->tcfa_qstats);
+       spin_unlock(&a->tcfa_lock);
+}
+
+void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
+                            bool drop, bool hw);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 
 int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
index 3f62b34..1bab881 100644 (file)
@@ -202,11 +202,11 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr,
 /*
  *     multicast prototypes (mcast.c)
  */
-static inline int ipv6_mc_may_pull(struct sk_buff *skb,
-                                  unsigned int len)
+static inline bool ipv6_mc_may_pull(struct sk_buff *skb,
+                                   unsigned int len)
 {
        if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len)
-               return 0;
+               return false;
 
        return pskb_may_pull(skb, len);
 }
index 4ab2c49..ab6850b 100644 (file)
@@ -6593,7 +6593,7 @@ struct cfg80211_roam_info {
  * time it is accessed in __cfg80211_roamed() due to delay in scheduling
  * rdev->event_work. In case of any failures, the reference is released
  * either in cfg80211_roamed() or in __cfg80211_romed(), Otherwise, it will be
- * released while diconneting from the current bss.
+ * released while disconnecting from the current bss.
  */
 void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
                     gfp_t gfp);
index 23e4b65..6bf3b9e 100644 (file)
@@ -39,6 +39,7 @@ struct devlink {
        possible_net_t _net;
        struct mutex lock;
        bool reload_failed;
+       bool registered;
        char priv[0] __aligned(NETDEV_ALIGN);
 };
 
@@ -506,11 +507,13 @@ enum devlink_health_reporter_state {
 struct devlink_health_reporter_ops {
        char *name;
        int (*recover)(struct devlink_health_reporter *reporter,
-                      void *priv_ctx);
+                      void *priv_ctx, struct netlink_ext_ack *extack);
        int (*dump)(struct devlink_health_reporter *reporter,
-                   struct devlink_fmsg *fmsg, void *priv_ctx);
+                   struct devlink_fmsg *fmsg, void *priv_ctx,
+                   struct netlink_ext_ack *extack);
        int (*diagnose)(struct devlink_health_reporter *reporter,
-                       struct devlink_fmsg *fmsg);
+                       struct devlink_fmsg *fmsg,
+                       struct netlink_ext_ack *extack);
 };
 
 /**
@@ -643,7 +646,7 @@ enum devlink_trap_group_generic_id {
        }
 
 struct devlink_ops {
-       int (*reload_down)(struct devlink *devlink,
+       int (*reload_down)(struct devlink *devlink, bool netns_change,
                           struct netlink_ext_ack *extack);
        int (*reload_up)(struct devlink *devlink,
                         struct netlink_ext_ack *extack);
@@ -771,6 +774,8 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev)
 
 struct ib_device;
 
+struct net *devlink_net(const struct devlink *devlink);
+void devlink_net_set(struct devlink *devlink, struct net *net);
 struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size);
 int devlink_register(struct devlink *devlink, struct device *dev);
 void devlink_unregister(struct devlink *devlink);
index 541fb51..9507611 100644 (file)
@@ -94,8 +94,6 @@ struct __dsa_skb_cb {
        u8 priv[48 - sizeof(struct dsa_skb_cb)];
 };
 
-#define __DSA_SKB_CB(skb) ((struct __dsa_skb_cb *)((skb)->cb))
-
 #define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb))
 
 #define DSA_SKB_CB_PRIV(skb)                   \
@@ -122,15 +120,11 @@ struct dsa_switch_tree {
         */
        struct dsa_platform_data        *pd;
 
-       /*
-        * The switch port to which the CPU is attached.
-        */
-       struct dsa_port         *cpu_dp;
+       /* List of switch ports */
+       struct list_head ports;
 
-       /*
-        * Data for the individual switch chips.
-        */
-       struct dsa_switch       *ds[DSA_MAX_SWITCHES];
+       /* List of DSA links composing the routing table */
+       struct list_head rtable;
 };
 
 /* TC matchall action types, only mirroring for now */
@@ -197,6 +191,8 @@ struct dsa_port {
        struct work_struct      xmit_work;
        struct sk_buff_head     xmit_queue;
 
+       struct list_head list;
+
        /*
         * Give the switch driver somewhere to hang its per-port private data
         * structures (accessible from the tagger).
@@ -212,9 +208,24 @@ struct dsa_port {
         * Original copy of the master netdev net_device_ops
         */
        const struct net_device_ops *orig_ndo_ops;
+
+       bool setup;
+};
+
+/* TODO: ideally DSA ports would have a single dp->link_dp member,
+ * and no dst->rtable nor this struct dsa_link would be needed,
+ * but this would require some more complex tree walking,
+ * so keep it stupid at the moment and list them all.
+ */
+struct dsa_link {
+       struct dsa_port *dp;
+       struct dsa_port *link_dp;
+       struct list_head list;
 };
 
 struct dsa_switch {
+       bool setup;
+
        struct device *dev;
 
        /*
@@ -242,13 +253,6 @@ struct dsa_switch {
         */
        const struct dsa_switch_ops     *ops;
 
-       /*
-        * An array of which element [a] indicates which port on this
-        * switch should be used to send packets to that are destined
-        * for switch a. Can be NULL if there is only one switch chip.
-        */
-       s8              rtable[DSA_MAX_SWITCHES];
-
        /*
         * Slave mii_bus and devices for the individual ports.
         */
@@ -275,14 +279,19 @@ struct dsa_switch {
         */
        bool                    vlan_filtering;
 
-       /* Dynamically allocated ports, keep last */
        size_t num_ports;
-       struct dsa_port ports[];
 };
 
-static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
+static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
 {
-       return &ds->ports[p];
+       struct dsa_switch_tree *dst = ds->dst;
+       struct dsa_port *dp;
+
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dp->ds == ds && dp->index == p)
+                       return dp;
+
+       return NULL;
 }
 
 static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
@@ -317,6 +326,19 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds)
        return mask;
 }
 
+/* Return the local port used to reach an arbitrary switch device */
+static inline unsigned int dsa_routing_port(struct dsa_switch *ds, int device)
+{
+       struct dsa_switch_tree *dst = ds->dst;
+       struct dsa_link *dl;
+
+       list_for_each_entry(dl, &dst->rtable, list)
+               if (dl->dp->ds == ds && dl->link_dp->ds->index == device)
+                       return dl->dp->index;
+
+       return ds->num_ports;
+}
+
 /* Return the local port used to reach an arbitrary switch port */
 static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device,
                                            int port)
@@ -324,7 +346,7 @@ static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device,
        if (device == ds->index)
                return port;
        else
-               return ds->rtable[device];
+               return dsa_routing_port(ds, device);
 }
 
 /* Return the local port used to reach the dedicated CPU port */
@@ -543,6 +565,45 @@ struct dsa_switch_ops {
         */
        netdev_tx_t (*port_deferred_xmit)(struct dsa_switch *ds, int port,
                                          struct sk_buff *skb);
+       /* Devlink parameters */
+       int     (*devlink_param_get)(struct dsa_switch *ds, u32 id,
+                                    struct devlink_param_gset_ctx *ctx);
+       int     (*devlink_param_set)(struct dsa_switch *ds, u32 id,
+                                    struct devlink_param_gset_ctx *ctx);
+};
+
+#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes)           \
+       DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes,                \
+                            dsa_devlink_param_get, dsa_devlink_param_set, NULL)
+
+int dsa_devlink_param_get(struct devlink *dl, u32 id,
+                         struct devlink_param_gset_ctx *ctx);
+int dsa_devlink_param_set(struct devlink *dl, u32 id,
+                         struct devlink_param_gset_ctx *ctx);
+int dsa_devlink_params_register(struct dsa_switch *ds,
+                               const struct devlink_param *params,
+                               size_t params_count);
+void dsa_devlink_params_unregister(struct dsa_switch *ds,
+                                  const struct devlink_param *params,
+                                  size_t params_count);
+int dsa_devlink_resource_register(struct dsa_switch *ds,
+                                 const char *resource_name,
+                                 u64 resource_size,
+                                 u64 resource_id,
+                                 u64 parent_resource_id,
+                                 const struct devlink_resource_size_params *size_params);
+
+void dsa_devlink_resources_unregister(struct dsa_switch *ds);
+
+void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
+                                          u64 resource_id,
+                                          devlink_resource_occ_get_t *occ_get,
+                                          void *occ_get_priv);
+void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
+                                            u64 resource_id);
+
+struct dsa_devlink_priv {
+       struct dsa_switch *ds;
 };
 
 struct dsa_switch_driver {
@@ -570,7 +631,6 @@ static inline bool dsa_can_decode(const struct sk_buff *skb,
        return false;
 }
 
-struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n);
 void dsa_unregister_switch(struct dsa_switch *ds);
 int dsa_register_switch(struct dsa_switch *ds);
 #ifdef CONFIG_PM_SLEEP
index c49d7bf..6d59221 100644 (file)
@@ -8,7 +8,6 @@
 struct module;
 
 struct fib_notifier_info {
-       struct net *net;
        int family;
        struct netlink_ext_ack  *extack;
 };
@@ -30,19 +29,21 @@ struct fib_notifier_ops {
        int family;
        struct list_head list;
        unsigned int (*fib_seq_read)(struct net *net);
-       int (*fib_dump)(struct net *net, struct notifier_block *nb);
+       int (*fib_dump)(struct net *net, struct notifier_block *nb,
+                       struct netlink_ext_ack *extack);
        struct module *owner;
        struct rcu_head rcu;
 };
 
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
+int call_fib_notifier(struct notifier_block *nb,
                      enum fib_event_type event_type,
                      struct fib_notifier_info *info);
 int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
                       struct fib_notifier_info *info);
-int register_fib_notifier(struct notifier_block *nb,
-                         void (*cb)(struct notifier_block *nb));
-int unregister_fib_notifier(struct notifier_block *nb);
+int register_fib_notifier(struct net *net, struct notifier_block *nb,
+                         void (*cb)(struct notifier_block *nb),
+                         struct netlink_ext_ack *extack);
+int unregister_fib_notifier(struct net *net, struct notifier_block *nb);
 struct fib_notifier_ops *
 fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net);
 void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
index 20dcadd..54e227e 100644 (file)
@@ -194,7 +194,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
                         u32 flags);
 bool fib_rule_matchall(const struct fib_rule *rule);
-int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
+                  struct netlink_ext_ack *extack);
 unsigned int fib_rules_seq_read(struct net *net, int family);
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
index 5cd1227..b1063db 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/siphash.h>
 #include <uapi/linux/if_ether.h>
 
+struct sk_buff;
+
 /**
  * struct flow_dissector_key_control:
  * @thoff: Transport header offset
@@ -157,19 +159,16 @@ struct flow_dissector_key_ports {
 
 /**
  * flow_dissector_key_icmp:
- *     @ports: type and code of ICMP header
- *             icmp: ICMP type (high) and code (low)
  *             type: ICMP type
  *             code: ICMP code
+ *             id:   session identifier
  */
 struct flow_dissector_key_icmp {
-       union {
-               __be16 icmp;
-               struct {
-                       u8 type;
-                       u8 code;
-               };
+       struct {
+               u8 type;
+               u8 code;
        };
+       u16 id;
 };
 
 /**
@@ -283,6 +282,8 @@ struct flow_keys {
        struct flow_dissector_key_vlan cvlan;
        struct flow_dissector_key_keyid keyid;
        struct flow_dissector_key_ports ports;
+       struct flow_dissector_key_icmp icmp;
+       /* 'addrs' must be the last member */
        struct flow_dissector_key_addrs addrs;
 };
 
@@ -316,6 +317,9 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 }
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
+                          struct flow_dissector_key_icmp *key_icmp,
+                          void *data, int thoff, int hlen);
 
 static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
                                      enum flow_dissector_key_id key_id)
index ca23860..1424e02 100644 (file)
@@ -7,6 +7,12 @@
 #include <linux/rtnetlink.h>
 #include <linux/pkt_sched.h>
 
+/* Note: this used to be in include/uapi/linux/gen_stats.h */
+struct gnet_stats_basic_packed {
+       __u64   bytes;
+       __u64   packets;
+};
+
 struct gnet_stats_basic_cpu {
        struct gnet_stats_basic_packed bstats;
        struct u64_stats_sync syncp;
index 9292f1c..7495066 100644 (file)
@@ -75,8 +75,6 @@ struct genl_family {
        struct module           *module;
 };
 
-struct nlattr **genl_family_attrbuf(const struct genl_family *family);
-
 /**
  * struct genl_info - receiving information
  * @snd_seq: sending sequence number
@@ -127,6 +125,24 @@ enum genl_validate_flags {
        GENL_DONT_VALIDATE_DUMP_STRICT          = BIT(2),
 };
 
+/**
+ * struct genl_info - info that is available during dumpit op call
+ * @family: generic netlink family - for internal genl code usage
+ * @ops: generic netlink ops - for internal genl code usage
+ * @attrs: netlink attributes
+ */
+struct genl_dumpit_info {
+       const struct genl_family *family;
+       const struct genl_ops *ops;
+       struct nlattr **attrs;
+};
+
+static inline const struct genl_dumpit_info *
+genl_dumpit_info(struct netlink_callback *cb)
+{
+       return cb->data;
+}
+
 /**
  * struct genl_ops - generic netlink operations
  * @cmd: command identifier
index 4b5656c..5d16154 100644 (file)
@@ -478,7 +478,7 @@ struct ipv6_route_iter {
 
 extern const struct seq_operations ipv6_route_seq_ops;
 
-int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+int call_fib6_notifier(struct notifier_block *nb,
                       enum fib_event_type event_type,
                       struct fib_notifier_info *info);
 int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
@@ -488,7 +488,8 @@ int __net_init fib6_notifier_init(struct net *net);
 void __net_exit fib6_notifier_exit(struct net *net);
 
 unsigned int fib6_tables_seq_read(struct net *net);
-int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+int fib6_tables_dump(struct net *net, struct notifier_block *nb,
+                    struct netlink_ext_ack *extack);
 
 void fib6_update_sernum(struct net *net, struct fib6_info *rt);
 void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
@@ -504,7 +505,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
-int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+int fib6_rules_dump(struct net *net, struct notifier_block *nb,
+                   struct netlink_ext_ack *extack);
 unsigned int fib6_rules_seq_read(struct net *net);
 
 static inline bool fib6_rules_early_flow_dissect(struct net *net,
@@ -537,7 +539,8 @@ static inline bool fib6_rule_default(const struct fib_rule *rule)
 {
        return true;
 }
-static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb,
+                                 struct netlink_ext_ack *extack)
 {
        return 0;
 }
index ab1ca9e..52b2406 100644 (file)
@@ -219,7 +219,7 @@ struct fib_nh_notifier_info {
        struct fib_nh *fib_nh;
 };
 
-int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+int call_fib4_notifier(struct notifier_block *nb,
                       enum fib_event_type event_type,
                       struct fib_notifier_info *info);
 int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
@@ -229,7 +229,8 @@ int __net_init fib4_notifier_init(struct net *net);
 void __net_exit fib4_notifier_exit(struct net *net);
 
 void fib_info_notify_update(struct net *net, struct nl_info *info);
-void fib_notify(struct net *net, struct notifier_block *nb);
+int fib_notify(struct net *net, struct notifier_block *nb,
+              struct netlink_ext_ack *extack);
 
 struct fib_table {
        struct hlist_node       tb_hlist;
@@ -315,7 +316,8 @@ static inline bool fib4_rule_default(const struct fib_rule *rule)
        return true;
 }
 
-static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb,
+                                 struct netlink_ext_ack *extack)
 {
        return 0;
 }
@@ -377,7 +379,8 @@ out:
 }
 
 bool fib4_rule_default(const struct fib_rule *rule);
-int fib4_rules_dump(struct net *net, struct notifier_block *nb);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb,
+                   struct netlink_ext_ack *extack);
 unsigned int fib4_rules_seq_read(struct net *net);
 
 static inline bool fib4_rules_early_flow_dissect(struct net *net,
index 078887c..83be2d9 100644 (file)
@@ -1325,7 +1325,7 @@ void ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs);
 void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
 void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
 void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs);
+void ip_vs_service_nets_cleanup(struct list_head *net_list);
 
 /* IPVS application functions
  * (from ip_vs_app.c)
index 009605c..d04b7ab 100644 (file)
@@ -696,6 +696,11 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
                                        cpu_to_be32(0x0000ffff))) == 0UL;
 }
 
+static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a)
+{
+       return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]);
+}
+
 static inline u32 ipv6_portaddr_hash(const struct net *net,
                                     const struct in6_addr *addr6,
                                     unsigned int port)
index 523c6a0..d69081c 100644 (file)
@@ -3095,7 +3095,9 @@ enum ieee80211_filter_flags {
  *
  * @IEEE80211_AMPDU_RX_START: start RX aggregation
  * @IEEE80211_AMPDU_RX_STOP: stop RX aggregation
- * @IEEE80211_AMPDU_TX_START: start TX aggregation
+ * @IEEE80211_AMPDU_TX_START: start TX aggregation, the driver must either
+ *     call ieee80211_start_tx_ba_cb_irqsafe() or return the special
+ *     status %IEEE80211_AMPDU_TX_START_IMMEDIATE.
  * @IEEE80211_AMPDU_TX_OPERATIONAL: TX aggregation has become operational
  * @IEEE80211_AMPDU_TX_STOP_CONT: stop TX aggregation but continue transmitting
  *     queued packets, now unaggregated. After all packets are transmitted the
@@ -3119,6 +3121,8 @@ enum ieee80211_ampdu_mlme_action {
        IEEE80211_AMPDU_TX_OPERATIONAL,
 };
 
+#define IEEE80211_AMPDU_TX_START_IMMEDIATE 1
+
 /**
  * struct ieee80211_ampdu_params - AMPDU action parameters
  *
@@ -3896,7 +3900,10 @@ struct ieee80211_ops {
         *
         * Even ``189`` would be wrong since 1 could be lost again.
         *
-        * Returns a negative error code on failure.
+        * Returns a negative error code on failure. The driver may return
+        * %IEEE80211_AMPDU_TX_START_IMMEDIATE for %IEEE80211_AMPDU_TX_START
+        * if the session can start immediately.
+        *
         * The callback can sleep.
         */
        int (*ampdu_action)(struct ieee80211_hw *hw,
index c7e15a2..b8ceaf0 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/ns_common.h>
 #include <linux/idr.h>
 #include <linux/skbuff.h>
+#include <linux/notifier.h>
 
 struct user_namespace;
 struct proc_dir_entry;
@@ -104,6 +105,8 @@ struct net {
 
        struct hlist_head       *dev_name_head;
        struct hlist_head       *dev_index_head;
+       struct raw_notifier_head        netdev_chain;
+
        /* Note that @hash_mix can be read millions times per second,
         * it is critical that it is on a read_mostly cache line.
         */
@@ -326,7 +329,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
 /* Protected by net_rwsem */
 #define for_each_net(VAR)                              \
        list_for_each_entry(VAR, &net_namespace_list, list)
-
+#define for_each_net_continue_reverse(VAR)             \
+       list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list)
 #define for_each_net_rcu(VAR)                          \
        list_for_each_entry_rcu(VAR, &net_namespace_list, list)
 
index 112a6f4..5ae5295 100644 (file)
@@ -43,7 +43,6 @@ enum nf_ct_ext_id {
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
-       struct rcu_head rcu;
        u8 offset[NF_CT_EXT_NUM];
        u8 len;
        char data[0];
@@ -72,15 +71,6 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
 /* Destroy all relationships */
 void nf_ct_ext_destroy(struct nf_conn *ct);
 
-/* Free operation. If you want to free a object referred from private area,
- * please implement __nf_ct_ext_free() and call it.
- */
-static inline void nf_ct_ext_free(struct nf_conn *ct)
-{
-       if (ct->ext)
-               kfree_rcu(ct->ext, rcu);
-}
-
 /* Add this type, returns pointer to data or NULL. */
 void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
 
index b37a7d6..1585142 100644 (file)
@@ -24,6 +24,7 @@ struct nf_flowtable_type {
 struct nf_flowtable {
        struct list_head                list;
        struct rhashtable               rhashtable;
+       int                             priority;
        const struct nf_flowtable_type  *type;
        struct delayed_work             gc_work;
 };
index 001d294..5bf569e 100644 (file)
@@ -963,25 +963,31 @@ struct nft_stats {
        struct u64_stats_sync   syncp;
 };
 
+struct nft_hook {
+       struct list_head        list;
+       struct nf_hook_ops      ops;
+       struct rcu_head         rcu;
+};
+
 /**
  *     struct nft_base_chain - nf_tables base chain
  *
  *     @ops: netfilter hook ops
+ *     @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family)
  *     @type: chain type
  *     @policy: default policy
  *     @stats: per-cpu chain stats
  *     @chain: the chain
- *     @dev_name: device name that this base chain is attached to (if any)
  *     @flow_block: flow block (for hardware offload)
  */
 struct nft_base_chain {
        struct nf_hook_ops              ops;
+       struct list_head                hook_list;
        const struct nft_chain_type     *type;
        u8                              policy;
        u8                              flags;
        struct nft_stats __percpu       *stats;
        struct nft_chain                chain;
-       char                            dev_name[IFNAMSIZ];
        struct flow_block               flow_block;
 };
 
@@ -1146,7 +1152,7 @@ struct nft_object_ops {
 int nft_register_obj(struct nft_object_type *obj_type);
 void nft_unregister_obj(struct nft_object_type *obj_type);
 
-#define NFT_FLOWTABLE_DEVICE_MAX       8
+#define NFT_NETDEVICE_MAX      256
 
 /**
  *     struct nft_flowtable - nf_tables flow table
@@ -1155,7 +1161,6 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
  *     @table: the table the flow table is contained in
  *     @name: name of this flow table
  *     @hooknum: hook number
- *     @priority: hook priority
  *     @ops_len: number of hooks in array
  *     @genmask: generation mask
  *     @use: number of references to this flow table
@@ -1169,13 +1174,12 @@ struct nft_flowtable {
        struct nft_table                *table;
        char                            *name;
        int                             hooknum;
-       int                             priority;
        int                             ops_len;
        u32                             genmask:2,
                                        use:30;
        u64                             handle;
        /* runtime data below here */
-       struct nf_hook_ops              *ops ____cacheline_aligned;
+       struct list_head                hook_list ____cacheline_aligned;
        struct nf_flowtable             data;
 };
 
index 830bdf3..b5fdb10 100644 (file)
@@ -24,6 +24,9 @@ struct netns_mib {
 #ifdef CONFIG_XFRM_STATISTICS
        DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics);
 #endif
+#if IS_ENABLED(CONFIG_TLS)
+       DEFINE_SNMP_STAT(struct linux_tls_mib, tls_statistics);
+#endif
 };
 
 #endif
index 637548d..a8b0a9a 100644 (file)
@@ -1286,17 +1286,9 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
                          struct mini_Qdisc __rcu **p_miniq);
 
-static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
 {
-       struct gnet_stats_queue *stats = res->qstats;
-       int ret;
-
-       if (res->ingress)
-               ret = netif_receive_skb(skb);
-       else
-               ret = dev_queue_xmit(skb);
-       if (ret && stats)
-               qstats_overlimit_inc(res->qstats);
+       return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb);
 }
 
 #endif
index e1a92c4..0b032b9 100644 (file)
@@ -80,13 +80,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
        struct sctp_chunk *chunk,
        gfp_t gfp);
 
-struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
-       const struct sctp_association *asoc,
-       const struct sockaddr_storage *aaddr,
-       int flags,
-       int state,
-       int error,
-       gfp_t gfp);
+void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+                                          int state, int error);
 
 struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
        const struct sctp_association *asoc,
@@ -100,6 +95,13 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
        __u32 error,
        gfp_t gfp);
 
+struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event(
+       const struct sctp_association *asoc,
+       struct sctp_chunk *chunk,
+       __u16 flags,
+       __u32 error,
+       gfp_t gfp);
+
 struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
        const struct sctp_association *asoc,
        __u16 flags,
index bd9c0fb..05174ae 100644 (file)
@@ -75,6 +75,9 @@ struct smcd_dev {
        struct workqueue_struct *event_wq;
        u8 pnetid[SMC_MAX_PNETID_LEN];
        bool pnetid_by_user;
+       struct list_head lgr_list;
+       spinlock_t lgr_lock;
+       u8 going_away : 1;
 };
 
 struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
index cb8ced4..468a678 100644 (file)
@@ -111,6 +111,12 @@ struct linux_xfrm_mib {
        unsigned long   mibs[LINUX_MIB_XFRMMAX];
 };
 
+/* Linux TLS */
+#define LINUX_MIB_TLSMAX       __LINUX_MIB_TLSMAX
+struct linux_tls_mib {
+       unsigned long   mibs[LINUX_MIB_TLSMAX];
+};
+
 #define DEFINE_SNMP_STAT(type, name)   \
        __typeof__(type) __percpu *name
 #define DEFINE_SNMP_STAT_ATOMIC(type, name)    \
index 8f9adcf..ac6042d 100644 (file)
@@ -66,7 +66,6 @@
 #include <net/checksum.h>
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
-#include <net/smc.h>
 #include <net/l3mdev.h>
 
 /*
@@ -2528,7 +2527,7 @@ static inline bool sk_listener(const struct sock *sk)
        return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
 }
 
-void sock_enable_timestamp(struct sock *sk, int flag);
+void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
                       int type);
 
index c664e6d..41265e5 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
 
+#include <net/net_namespace.h>
 #include <net/tcp.h>
 #include <net/strparser.h>
 #include <crypto/aead.h>
@@ -60,7 +61,6 @@
 #define TLS_RECORD_TYPE_DATA           0x17
 
 #define TLS_AAD_SPACE_SIZE             13
-#define TLS_DEVICE_NAME_MAX            32
 
 #define MAX_IV_SIZE                    16
 #define TLS_MAX_REC_SEQ_SIZE           8
  */
 #define TLS_AES_CCM_IV_B0_BYTE         2
 
-/*
- * This structure defines the routines for Inline TLS driver.
- * The following routines are optional and filled with a
- * null pointer if not defined.
- *
- * @name: Its the name of registered Inline tls device
- * @dev_list: Inline tls device list
- * int (*feature)(struct tls_device *device);
- *     Called to return Inline TLS driver capability
- *
- * int (*hash)(struct tls_device *device, struct sock *sk);
- *     This function sets Inline driver for listen and program
- *     device specific functioanlity as required
- *
- * void (*unhash)(struct tls_device *device, struct sock *sk);
- *     This function cleans listen state set by Inline TLS driver
- *
- * void (*release)(struct kref *kref);
- *     Release the registered device and allocated resources
- * @kref: Number of reference to tls_device
- */
-struct tls_device {
-       char name[TLS_DEVICE_NAME_MAX];
-       struct list_head dev_list;
-       int  (*feature)(struct tls_device *device);
-       int  (*hash)(struct tls_device *device, struct sock *sk);
-       void (*unhash)(struct tls_device *device, struct sock *sk);
-       void (*release)(struct kref *kref);
-       struct kref kref;
-};
+#define __TLS_INC_STATS(net, field)                            \
+       __SNMP_INC_STATS((net)->mib.tls_statistics, field)
+#define TLS_INC_STATS(net, field)                              \
+       SNMP_INC_STATS((net)->mib.tls_statistics, field)
+#define __TLS_DEC_STATS(net, field)                            \
+       __SNMP_DEC_STATS((net)->mib.tls_statistics, field)
+#define TLS_DEC_STATS(net, field)                              \
+       SNMP_DEC_STATS((net)->mib.tls_statistics, field)
 
 enum {
        TLS_BASE,
@@ -158,7 +136,7 @@ struct tls_sw_context_tx {
        struct list_head tx_list;
        atomic_t encrypt_pending;
        int async_notify;
-       int async_capable;
+       u8 async_capable:1;
 
 #define BIT_TX_SCHEDULED       0
 #define BIT_TX_CLOSING         1
@@ -174,8 +152,8 @@ struct tls_sw_context_rx {
 
        struct sk_buff *recv_pkt;
        u8 control;
-       int async_capable;
-       bool decrypted;
+       u8 async_capable:1;
+       u8 decrypted:1;
        atomic_t decrypt_pending;
        bool async_notify;
 };
@@ -340,7 +318,10 @@ struct tls_offload_context_rx {
 #define TLS_OFFLOAD_CONTEXT_SIZE_RX                                    \
        (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
 
+struct tls_context *tls_ctx_create(struct sock *sk);
 void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
+void update_sk_prot(struct sock *sk, struct tls_context *ctx);
+
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
                int __user *optlen);
@@ -623,13 +604,6 @@ tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type)
        tls_offload_ctx_rx(tls_ctx)->resync_type = type;
 }
 
-static inline void tls_offload_tx_resync_request(struct sock *sk)
-{
-       struct tls_context *tls_ctx = tls_get_ctx(sk);
-
-       WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags));
-}
-
 /* Driver's seq tracking has to be disabled until resync succeeded */
 static inline bool tls_offload_tx_resync_pending(struct sock *sk)
 {
@@ -641,10 +615,11 @@ static inline bool tls_offload_tx_resync_pending(struct sock *sk)
        return ret;
 }
 
+int __net_init tls_proc_init(struct net *net);
+void __net_exit tls_proc_fini(struct net *net);
+
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
                      unsigned char *record_type);
-void tls_register_device(struct tls_device *device);
-void tls_unregister_device(struct tls_device *device);
 int decrypt_skb(struct sock *sk, struct sk_buff *skb,
                struct scatterlist *sgout);
 struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
@@ -665,7 +640,9 @@ void tls_device_free_resources_tx(struct sock *sk);
 int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
 void tls_device_offload_cleanup_rx(struct sock *sk);
 void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
-int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
+void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq);
+int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+                        struct sk_buff *skb, struct strp_msg *rxm);
 #else
 static inline void tls_device_init(void) {}
 static inline void tls_device_cleanup(void) {}
@@ -688,7 +665,9 @@ static inline void tls_device_offload_cleanup_rx(struct sock *sk) {}
 static inline void
 tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {}
 
-static inline int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+static inline int
+tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+                    struct sk_buff *skb, struct strp_msg *rxm)
 {
        return 0;
 }
diff --git a/include/net/tls_toe.h b/include/net/tls_toe.h
new file mode 100644 (file)
index 0000000..b3aa759
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kref.h>
+#include <linux/list.h>
+
+struct sock;
+
+#define TLS_TOE_DEVICE_NAME_MAX                32
+
+/*
+ * This structure defines the routines for Inline TLS driver.
+ * The following routines are optional and filled with a
+ * null pointer if not defined.
+ *
+ * @name: Its the name of registered Inline tls device
+ * @dev_list: Inline tls device list
+ * int (*feature)(struct tls_toe_device *device);
+ *     Called to return Inline TLS driver capability
+ *
+ * int (*hash)(struct tls_toe_device *device, struct sock *sk);
+ *     This function sets Inline driver for listen and program
+ *     device specific functioanlity as required
+ *
+ * void (*unhash)(struct tls_toe_device *device, struct sock *sk);
+ *     This function cleans listen state set by Inline TLS driver
+ *
+ * void (*release)(struct kref *kref);
+ *     Release the registered device and allocated resources
+ * @kref: Number of reference to tls_toe_device
+ */
+struct tls_toe_device {
+       char name[TLS_TOE_DEVICE_NAME_MAX];
+       struct list_head dev_list;
+       int  (*feature)(struct tls_toe_device *device);
+       int  (*hash)(struct tls_toe_device *device, struct sock *sk);
+       void (*unhash)(struct tls_toe_device *device, struct sock *sk);
+       void (*release)(struct kref *kref);
+       struct kref kref;
+};
+
+int tls_toe_bypass(struct sock *sk);
+int tls_toe_hash(struct sock *sk);
+void tls_toe_unhash(struct sock *sk);
+
+void tls_toe_register_device(struct tls_toe_device *device);
+void tls_toe_unregister_device(struct tls_toe_device *device);
index c9398ce..e3780e4 100644 (file)
@@ -69,7 +69,14 @@ struct xdp_umem {
 /* Nodes are linked in the struct xdp_sock map_list field, and used to
  * track which maps a certain socket reside in.
  */
-struct xsk_map;
+
+struct xsk_map {
+       struct bpf_map map;
+       struct list_head __percpu *flush_list;
+       spinlock_t lock; /* Synchronize map updates */
+       struct xdp_sock *xsk_map[];
+};
+
 struct xsk_map_node {
        struct list_head node;
        struct xsk_map *map;
@@ -109,8 +116,6 @@ struct xdp_sock {
 struct xdp_buff;
 #ifdef CONFIG_XDP_SOCKETS
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-void xsk_flush(struct xdp_sock *xs);
 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
 /* Used from netdev driver */
 bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
@@ -134,6 +139,22 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
                             struct xdp_sock **map_entry);
 int xsk_map_inc(struct xsk_map *map);
 void xsk_map_put(struct xsk_map *map);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+                      struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+                                                    u32 key)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct xdp_sock *xs;
+
+       if (key >= map->max_entries)
+               return NULL;
+
+       xs = READ_ONCE(m->xsk_map[key]);
+       return xs;
+}
 
 static inline u64 xsk_umem_extract_addr(u64 addr)
 {
@@ -224,15 +245,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
        return -ENOTSUPP;
 }
 
-static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
-{
-       return -ENOTSUPP;
-}
-
-static inline void xsk_flush(struct xdp_sock *xs)
-{
-}
-
 static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
 {
        return false;
@@ -357,6 +369,21 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
        return 0;
 }
 
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+                                    struct xdp_sock *xs)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+                                                    u32 key)
+{
+       return NULL;
+}
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
index aa31c05..cfe00e0 100644 (file)
@@ -32,6 +32,7 @@
 #define __FSL_QMAN_H
 
 #include <linux/bitops.h>
+#include <linux/device.h>
 
 /* Hardware constants */
 #define QM_CHANNEL_SWPORTAL0 0
@@ -914,6 +915,16 @@ u16 qman_affine_channel(int cpu);
  */
 struct qman_portal *qman_get_affine_portal(int cpu);
 
+/**
+ * qman_start_using_portal - register a device link for the portal user
+ * @p: the portal that will be in use
+ * @dev: the device that will use the portal
+ *
+ * Makes sure that the devices that use the portal are unbound when the
+ * portal is unbound
+ */
+int qman_start_using_portal(struct qman_portal *p, struct device *dev);
+
 /**
  * qman_p_poll_dqrr - process DQRR (fast-path) entries
  * @limit: the maximum number of DQRR entries to process
index d6e556c..b04c292 100644 (file)
@@ -74,11 +74,12 @@ static inline void bpf_test_probe_##call(void)                              \
 {                                                                      \
        check_trace_callback_type_##call(__bpf_trace_##template);       \
 }                                                                      \
+typedef void (*btf_trace_##call)(void *__data, proto);                 \
 static struct bpf_raw_event_map        __used                                  \
        __attribute__((section("__bpf_raw_tp_map")))                    \
 __bpf_trace_tp_map_##call = {                                          \
        .tp             = &__tracepoint_##call,                         \
-       .bpf_func       = (void *)__bpf_trace_##template,               \
+       .bpf_func       = (void *)(btf_trace_##call)__bpf_trace_##template,     \
        .num_args       = COUNT_ARGS(args),                             \
        .writable_size  = size,                                         \
 };
index 8ea9664..6b20005 100644 (file)
@@ -95,16 +95,16 @@ TRACE_EVENT(fdb_delete,
 TRACE_EVENT(br_fdb_update,
 
        TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
-                const unsigned char *addr, u16 vid, bool added_by_user),
+                const unsigned char *addr, u16 vid, unsigned long flags),
 
-       TP_ARGS(br, source, addr, vid, added_by_user),
+       TP_ARGS(br, source, addr, vid, flags),
 
        TP_STRUCT__entry(
                __string(br_dev, br->dev->name)
                __string(dev, source->dev->name)
                __array(unsigned char, addr, ETH_ALEN)
                __field(u16, vid)
-               __field(bool, added_by_user)
+               __field(unsigned long, flags)
        ),
 
        TP_fast_assign(
@@ -112,14 +112,14 @@ TRACE_EVENT(br_fdb_update,
                __assign_str(dev, source->dev->name);
                memcpy(__entry->addr, addr, ETH_ALEN);
                __entry->vid = vid;
-               __entry->added_by_user = added_by_user;
+               __entry->flags = flags;
        ),
 
-       TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d",
+       TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u flags 0x%lx",
                  __get_str(br_dev), __get_str(dev), __entry->addr[0],
                  __entry->addr[1], __entry->addr[2], __entry->addr[3],
                  __entry->addr[4], __entry->addr[5], __entry->vid,
-                 __entry->added_by_user)
+                 __entry->flags)
 );
 
 
index 8c84202..c7e3c9c 100644 (file)
@@ -22,7 +22,7 @@
 #define __XDP_ACT_SYM_FN(x)    \
        { XDP_##x, #x },
 #define __XDP_ACT_SYM_TAB      \
-       __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, 0 }
+       __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, NULL }
 __XDP_ACT_MAP(__XDP_ACT_TP_FN)
 
 TRACE_EVENT(xdp_exception,
index 77c6be9..df6809a 100644 (file)
@@ -173,6 +173,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_CGROUP_SYSCTL,
        BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
        BPF_PROG_TYPE_CGROUP_SOCKOPT,
+       BPF_PROG_TYPE_TRACING,
 };
 
 enum bpf_attach_type {
@@ -199,6 +200,7 @@ enum bpf_attach_type {
        BPF_CGROUP_UDP6_RECVMSG,
        BPF_CGROUP_GETSOCKOPT,
        BPF_CGROUP_SETSOCKOPT,
+       BPF_TRACE_RAW_TP,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -420,6 +422,7 @@ union bpf_attr {
                __u32           line_info_rec_size;     /* userspace bpf_line_info size */
                __aligned_u64   line_info;      /* line info */
                __u32           line_info_cnt;  /* number of bpf_line_info records */
+               __u32           attach_btf_id;  /* in-kernel BTF type id to attach to */
        };
 
        struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -560,10 +563,13 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
  *     Description
  *             For tracing programs, safely attempt to read *size* bytes from
- *             address *src* and store the data in *dst*.
+ *             kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ *             Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ *             instead.
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
@@ -794,7 +800,7 @@ union bpf_attr {
  *             A 64-bit integer containing the current GID and UID, and
  *             created as such: *current_gid* **<< 32 \|** *current_uid*.
  *
- * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * int bpf_get_current_comm(void *buf, u32 size_of_buf)
  *     Description
  *             Copy the **comm** attribute of the current task into *buf* of
  *             *size_of_buf*. The **comm** attribute contains the name of
@@ -1023,7 +1029,7 @@ union bpf_attr {
  *             The realm of the route for the packet associated to *skb*, or 0
  *             if none was found.
  *
- * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  *     Description
  *             Write raw *data* blob into a special BPF perf event held by
  *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1068,7 +1074,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
  *     Description
  *             This helper was provided as an easy way to load data from a
  *             packet. It can be used to load *len* bytes from *offset* from
@@ -1085,7 +1091,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
+ * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
  *     Description
  *             Walk a user or a kernel stack and return its id. To achieve
  *             this, the helper needs *ctx*, which is a pointer to the context
@@ -1154,7 +1160,7 @@ union bpf_attr {
  *             The checksum result, or a negative error code in case of
  *             failure.
  *
- * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  *     Description
  *             Retrieve tunnel options metadata for the packet associated to
  *             *skb*, and store the raw tunnel option data to the buffer *opt*
@@ -1172,7 +1178,7 @@ union bpf_attr {
  *     Return
  *             The size of the option data retrieved.
  *
- * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  *     Description
  *             Set tunnel options metadata for the packet associated to *skb*
  *             to the option data contained in the raw buffer *opt* of *size*.
@@ -1425,45 +1431,14 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
  *     Description
- *             Copy a NUL terminated string from an unsafe address
- *             *unsafe_ptr* to *dst*. The *size* should include the
- *             terminating NUL byte. In case the string length is smaller than
- *             *size*, the target is not padded with further NUL bytes. If the
- *             string length is larger than *size*, just *size*-1 bytes are
- *             copied and the last byte is set to NUL.
- *
- *             On success, the length of the copied string is returned. This
- *             makes this helper useful in tracing programs for reading
- *             strings, and more importantly to get its length at runtime. See
- *             the following snippet:
- *
- *             ::
- *
- *                     SEC("kprobe/sys_open")
- *                     void bpf_sys_open(struct pt_regs *ctx)
- *                     {
- *                             char buf[PATHLEN]; // PATHLEN is defined to 256
- *                             int res = bpf_probe_read_str(buf, sizeof(buf),
- *                                                          ctx->di);
- *
- *                             // Consume buf, for example push it to
- *                             // userspace via bpf_perf_event_output(); we
- *                             // can use res (the string length) as event
- *                             // size, after checking its boundaries.
- *                     }
- *
- *             In comparison, using **bpf_probe_read()** helper here instead
- *             to read the string would require to estimate the length at
- *             compile time, and would often result in copying more memory
- *             than necessary.
+ *             Copy a NUL terminated string from an unsafe kernel address
+ *             *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ *             more details.
  *
- *             Another useful use case is when parsing individual process
- *             arguments or individual environment variables navigating
- *             *current*\ **->mm->arg_start** and *current*\
- *             **->mm->env_start**: using this helper and the return value,
- *             one can quickly iterate at the right offset of the memory area.
+ *             Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ *             instead.
  *     Return
  *             On success, the strictly positive length of the string,
  *             including the trailing NUL character. On error, a negative
@@ -1511,7 +1486,7 @@ union bpf_attr {
  *     Return
  *             0
  *
- * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
  *     Description
  *             Emulate a call to **setsockopt()** on the socket associated to
  *             *bpf_socket*, which must be a full socket. The *level* at
@@ -1595,7 +1570,7 @@ union bpf_attr {
  *     Return
  *             **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
  *
- * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
  *     Description
  *             Redirect the packet to the socket referenced by *map* (of type
  *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
@@ -1715,7 +1690,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
  *     Description
  *             Emulate a call to **getsockopt()** on the socket associated to
  *             *bpf_socket*, which must be a full socket. The *level* at
@@ -1947,7 +1922,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
  *     Description
  *             Return a user or a kernel stack in bpf program provided buffer.
  *             To achieve this, the helper needs *ctx*, which is a pointer
@@ -1980,7 +1955,7 @@ union bpf_attr {
  *             A non-negative value equal to or less than *size* on success,
  *             or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
  *     Description
  *             This helper is similar to **bpf_skb_load_bytes**\ () in that
  *             it provides an easy way to load *len* bytes from *offset*
@@ -2033,7 +2008,7 @@ union bpf_attr {
  *             * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *               packet is not forwarded or needs assist from full stack
  *
- * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *     Description
  *             Add an entry to, or update a sockhash *map* referencing sockets.
  *             The *skops* is used as a new value for the entry associated to
@@ -2392,7 +2367,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *     Description
  *             For socket policies, insert *len* bytes into *msg* at offset
  *             *start*.
@@ -2408,9 +2383,9 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *     Description
- *             Will remove *pop* bytes from a *msg* starting at byte *start*.
+ *             Will remove *len* bytes from a *msg* starting at byte *start*.
  *             This may result in **ENOMEM** errors under certain situations if
  *             an allocation and copy are required due to a full ring buffer.
  *             However, the helper will try to avoid doing the allocation
@@ -2505,7 +2480,7 @@ union bpf_attr {
  *             A **struct bpf_tcp_sock** pointer on success, or **NULL** in
  *             case of failure.
  *
- * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
+ * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
  *     Description
  *             Set ECN (Explicit Congestion Notification) field of IP header
  *             to **CE** (Congestion Encountered) if current value is **ECT**
@@ -2750,6 +2725,96 @@ union bpf_attr {
  *             **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
  *
  *             **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ *             This helper is similar to **bpf_perf_event_output**\ () but
+ *             restricted to raw_tracepoint bpf programs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Safely attempt to read *size* bytes from user space address
+ *             *unsafe_ptr* and store the data in *dst*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Safely attempt to read *size* bytes from kernel space address
+ *             *unsafe_ptr* and store the data in *dst*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Copy a NUL terminated string from an unsafe user address
+ *             *unsafe_ptr* to *dst*. The *size* should include the
+ *             terminating NUL byte. In case the string length is smaller than
+ *             *size*, the target is not padded with further NUL bytes. If the
+ *             string length is larger than *size*, just *size*-1 bytes are
+ *             copied and the last byte is set to NUL.
+ *
+ *             On success, the length of the copied string is returned. This
+ *             makes this helper useful in tracing programs for reading
+ *             strings, and more importantly to get its length at runtime. See
+ *             the following snippet:
+ *
+ *             ::
+ *
+ *                     SEC("kprobe/sys_open")
+ *                     void bpf_sys_open(struct pt_regs *ctx)
+ *                     {
+ *                             char buf[PATHLEN]; // PATHLEN is defined to 256
+ *                             int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ *                                                               ctx->di);
+ *
+ *                             // Consume buf, for example push it to
+ *                             // userspace via bpf_perf_event_output(); we
+ *                             // can use res (the string length) as event
+ *                             // size, after checking its boundaries.
+ *                     }
+ *
+ *             In comparison, using **bpf_probe_read_user()** helper here
+ *             instead to read the string would require to estimate the length
+ *             at compile time, and would often result in copying more memory
+ *             than necessary.
+ *
+ *             Another useful use case is when parsing individual process
+ *             arguments or individual environment variables navigating
+ *             *current*\ **->mm->arg_start** and *current*\
+ *             **->mm->env_start**: using this helper and the return value,
+ *             one can quickly iterate at the right offset of the memory area.
+ *     Return
+ *             On success, the strictly positive length of the string,
+ *             including the trailing NUL character. On error, a negative
+ *             value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ *             to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ *     Return
+ *             On success, the strictly positive length of the string, including
+ *             the trailing NUL character. On error, a negative value.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2862,7 +2927,12 @@ union bpf_attr {
        FN(sk_storage_get),             \
        FN(sk_storage_delete),          \
        FN(send_signal),                \
-       FN(tcp_gen_syncookie),
+       FN(tcp_gen_syncookie),          \
+       FN(skb_output),                 \
+       FN(probe_read_user),            \
+       FN(probe_read_kernel),          \
+       FN(probe_read_user_str),        \
+       FN(probe_read_kernel_str),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
index 69df19a..a791a94 100644 (file)
@@ -286,7 +286,7 @@ struct dcbmsg {
  * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported
  * @DCB_CMD_SNUMTCS: set the number of traffic classes
  * @DCB_CMD_GBCN: set backward congestion notification configuration
- * @DCB_CMD_SBCN: get backward congestion notification configration.
+ * @DCB_CMD_SBCN: get backward congestion notification configuration.
  * @DCB_CMD_GAPP: get application protocol configuration
  * @DCB_CMD_SAPP: set application protocol configuration
  * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration
index 580b7a2..b558ea8 100644 (file)
@@ -421,6 +421,10 @@ enum devlink_attr {
 
        DEVLINK_ATTR_RELOAD_FAILED,                     /* u8 0 or 1 */
 
+       DEVLINK_ATTR_NETNS_FD,                  /* u32 */
+       DEVLINK_ATTR_NETNS_PID,                 /* u32 */
+       DEVLINK_ATTR_NETNS_ID,                  /* u32 */
+
        /* add new attributes above here, update the policy in devlink.c */
 
        __DEVLINK_ATTR_MAX,
index 8938b76..d459179 100644 (file)
@@ -1507,6 +1507,11 @@ enum ethtool_link_mode_bit_indices {
        ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT         = 66,
        ETHTOOL_LINK_MODE_100baseT1_Full_BIT             = 67,
        ETHTOOL_LINK_MODE_1000baseT1_Full_BIT            = 68,
+       ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT         = 69,
+       ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT         = 70,
+       ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71,
+       ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT         = 72,
+       ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT         = 73,
 
        /* must be last entry */
        __ETHTOOL_LINK_MODE_MASK_NBITS
@@ -1618,6 +1623,7 @@ enum ethtool_link_mode_bit_indices {
 #define SPEED_56000            56000
 #define SPEED_100000           100000
 #define SPEED_200000           200000
+#define SPEED_400000           400000
 
 #define SPEED_UNKNOWN          -1
 
index 065408e..852f234 100644 (file)
@@ -13,6 +13,7 @@ enum {
        TCA_STATS_RATE_EST64,
        TCA_STATS_PAD,
        TCA_STATS_BASIC_HW,
+       TCA_STATS_PKT64,
        __TCA_STATS_MAX,
 };
 #define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
@@ -26,10 +27,6 @@ struct gnet_stats_basic {
        __u64   bytes;
        __u32   packets;
 };
-struct gnet_stats_basic_packed {
-       __u64   bytes;
-       __u32   packets;
-} __attribute__ ((packed));
 
 /**
  * struct gnet_stats_rate_est - rate estimator
index 7fea0fd..4bf3334 100644 (file)
@@ -33,6 +33,7 @@
 #define        IFNAMSIZ        16
 #endif /* __UAPI_DEF_IF_IFNAMSIZ */
 #define        IFALIASZ        256
+#define        ALTIFNAMSIZ     128
 #include <linux/hdlc/ioctl.h>
 
 /* For glibc compatibility. An empty enum does not compile. */
index 4a8c02c..8aec876 100644 (file)
@@ -167,6 +167,8 @@ enum {
        IFLA_NEW_IFINDEX,
        IFLA_MIN_MTU,
        IFLA_MAX_MTU,
+       IFLA_PROP_LIST,
+       IFLA_ALT_IFNAME, /* Alternative ifname */
        __IFLA_MAX
 };
 
index ed8881a..81fed16 100644 (file)
@@ -144,12 +144,14 @@ enum nft_list_attributes {
  * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
  * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
  * @NFTA_HOOK_DEV: netdevice name (NLA_STRING)
+ * @NFTA_HOOK_DEVS: list of netdevices (NLA_NESTED)
  */
 enum nft_hook_attributes {
        NFTA_HOOK_UNSPEC,
        NFTA_HOOK_HOOKNUM,
        NFTA_HOOK_PRIORITY,
        NFTA_HOOK_DEV,
+       NFTA_HOOK_DEVS,
        __NFTA_HOOK_MAX
 };
 #define NFTA_HOOK_MAX          (__NFTA_HOOK_MAX - 1)
index a2a0927..bbf5af2 100644 (file)
@@ -199,7 +199,7 @@ struct arpt_get_entries {
 /* Helper functions */
 static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e)
 {
-       return (void *)e + e->target_offset;
+       return (struct xt_entry_target *)((char *)e + e->target_offset);
 }
 
 /*
index 8076c94..a494cf4 100644 (file)
@@ -194,7 +194,7 @@ struct ebt_entry {
 static __inline__ struct ebt_entry_target *
 ebt_get_target(struct ebt_entry *e)
 {
-       return (void *)e + e->target_offset;
+       return (struct ebt_entry_target *)((char *)e + e->target_offset);
 }
 
 /* {g,s}etsockopt numbers */
index 6aaeb14..50c7fee 100644 (file)
@@ -222,7 +222,7 @@ struct ipt_get_entries {
 static __inline__ struct xt_entry_target *
 ipt_get_target(struct ipt_entry *e)
 {
-       return (void *)e + e->target_offset;
+       return (struct xt_entry_target *)((char *)e + e->target_offset);
 }
 
 /*
index 031d0a4..d9e364f 100644 (file)
@@ -262,7 +262,7 @@ struct ip6t_get_entries {
 static __inline__ struct xt_entry_target *
 ip6t_get_target(struct ip6t_entry *e)
 {
-       return (void *)e + e->target_offset;
+       return (struct xt_entry_target *)((char *)e + e->target_offset);
 }
 
 /*
index beee59c..64135ab 100644 (file)
  *     set of BSSID,frequency parameters is used (i.e., either the enforcing
  *     %NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict
  *     %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT).
+ *     Driver shall not modify the IEs specified through %NL80211_ATTR_IE if
+ *     %NL80211_ATTR_MAC is included. However, if %NL80211_ATTR_MAC_HINT is
+ *     included, these IEs through %NL80211_ATTR_IE are specified by the user
+ *     space based on the best possible BSS selected. Thus, if the driver ends
+ *     up selecting a different BSS, it can modify these IEs accordingly (e.g.
+ *     userspace asks the driver to perform PMKSA caching with BSS1 and the
+ *     driver ends up selecting BSS2 with different PMKSA cache entry; RSNIE
+ *     has to get updated with the apt PMKID).
  *     %NL80211_ATTR_PREV_BSSID can be used to request a reassociation within
  *     the ESS in case the device is already associated and an association with
  *     a different BSS is desired.
index a6aa466..c6ad22f 100644 (file)
@@ -16,9 +16,14 @@ enum {
        TCA_ACT_STATS,
        TCA_ACT_PAD,
        TCA_ACT_COOKIE,
+       TCA_ACT_FLAGS,
        __TCA_ACT_MAX
 };
 
+#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
+                                        * actions stats.
+                                        */
+
 #define TCA_ACT_MAX __TCA_ACT_MAX
 #define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
 #define TCA_ACT_MAX_PRIO 32
index ce2a623..1418a83 100644 (file)
@@ -164,6 +164,13 @@ enum {
        RTM_GETNEXTHOP,
 #define RTM_GETNEXTHOP RTM_GETNEXTHOP
 
+       RTM_NEWLINKPROP = 108,
+#define RTM_NEWLINKPROP        RTM_NEWLINKPROP
+       RTM_DELLINKPROP,
+#define RTM_DELLINKPROP        RTM_DELLINKPROP
+       RTM_GETLINKPROP,
+#define RTM_GETLINKPROP        RTM_GETLINKPROP
+
        __RTM_MAX,
 #define RTM_MAX                (((__RTM_MAX + 3) & ~3) - 1)
 };
index 6d5b164..6bce7f9 100644 (file)
@@ -449,6 +449,16 @@ struct sctp_send_failed {
        __u8 ssf_data[0];
 };
 
+struct sctp_send_failed_event {
+       __u16 ssf_type;
+       __u16 ssf_flags;
+       __u32 ssf_length;
+       __u32 ssf_error;
+       struct sctp_sndinfo ssfe_info;
+       sctp_assoc_t ssf_assoc_id;
+       __u8 ssf_data[0];
+};
+
 /*
  *   ssf_flags: 16 bits (unsigned integer)
  *
@@ -605,6 +615,7 @@ struct sctp_event_subscribe {
        __u8 sctp_stream_reset_event;
        __u8 sctp_assoc_reset_event;
        __u8 sctp_stream_change_event;
+       __u8 sctp_send_failure_event_event;
 };
 
 /*
@@ -632,6 +643,7 @@ union sctp_notification {
        struct sctp_stream_reset_event sn_strreset_event;
        struct sctp_assoc_reset_event sn_assocreset_event;
        struct sctp_stream_change_event sn_strchange_event;
+       struct sctp_send_failed_event sn_send_failed_event;
 };
 
 /* Section 5.3.1
@@ -667,7 +679,9 @@ enum sctp_sn_type {
 #define SCTP_ASSOC_RESET_EVENT         SCTP_ASSOC_RESET_EVENT
        SCTP_STREAM_CHANGE_EVENT,
 #define SCTP_STREAM_CHANGE_EVENT       SCTP_STREAM_CHANGE_EVENT
-       SCTP_SN_TYPE_MAX        = SCTP_STREAM_CHANGE_EVENT,
+       SCTP_SEND_FAILED_EVENT,
+#define SCTP_SEND_FAILED_EVENT         SCTP_SEND_FAILED_EVENT
+       SCTP_SN_TYPE_MAX        = SCTP_SEND_FAILED_EVENT,
 #define SCTP_SN_TYPE_MAX               SCTP_SN_TYPE_MAX
 };
 
index 549a31c..7eee233 100644 (file)
@@ -323,4 +323,21 @@ enum
        __LINUX_MIB_XFRMMAX
 };
 
+/* linux TLS mib definitions */
+enum
+{
+       LINUX_MIB_TLSNUM = 0,
+       LINUX_MIB_TLSCURRTXSW,                  /* TlsCurrTxSw */
+       LINUX_MIB_TLSCURRRXSW,                  /* TlsCurrRxSw */
+       LINUX_MIB_TLSCURRTXDEVICE,              /* TlsCurrTxDevice */
+       LINUX_MIB_TLSCURRRXDEVICE,              /* TlsCurrRxDevice */
+       LINUX_MIB_TLSTXSW,                      /* TlsTxSw */
+       LINUX_MIB_TLSRXSW,                      /* TlsRxSw */
+       LINUX_MIB_TLSTXDEVICE,                  /* TlsTxDevice */
+       LINUX_MIB_TLSRXDEVICE,                  /* TlsRxDevice */
+       LINUX_MIB_TLSDECRYPTERROR,              /* TlsDecryptError */
+       LINUX_MIB_TLSRXDEVICERESYNC,            /* TlsRxDeviceResync */
+       __LINUX_MIB_TLSMAX
+};
+
 #endif /* _LINUX_SNMP_H */
index 81e6979..74af1f7 100644 (file)
@@ -155,6 +155,14 @@ enum {
        TCP_QUEUES_NR,
 };
 
+/* why fastopen failed from client perspective */
+enum tcp_fastopen_client_fail {
+       TFO_STATUS_UNSPEC, /* catch-all */
+       TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */
+       TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */
+       TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */
+};
+
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS    1
 #define TCPI_OPT_SACK          2
@@ -211,7 +219,7 @@ struct tcp_info {
        __u8    tcpi_backoff;
        __u8    tcpi_options;
        __u8    tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
-       __u8    tcpi_delivery_rate_app_limited:1;
+       __u8    tcpi_delivery_rate_app_limited:1, tcpi_fastopen_client_fail:2;
 
        __u32   tcpi_rto;
        __u32   tcpi_ato;
index 7df026e..76421b8 100644 (file)
@@ -191,6 +191,7 @@ struct sockaddr_tipc {
 #define TIPC_GROUP_JOIN         135     /* Takes struct tipc_group_req* */
 #define TIPC_GROUP_LEAVE        136     /* No argument */
 #define TIPC_SOCK_RECVQ_USED    137     /* Default: none (read only) */
+#define TIPC_NODELAY            138     /* Default: false */
 
 /*
  * Flag values
index 4955e1a..4dfc056 100644 (file)
@@ -309,7 +309,7 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
        tlv_ptr->tlv_len  = htons(tlv_len);
        if (len && data) {
                memcpy(TLV_DATA(tlv_ptr), data, len);
-               memset(TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len);
+               memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len);
        }
        return TLV_SPACE(len);
 }
@@ -409,7 +409,7 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags,
        tcm_hdr->tcm_flags = htons(flags);
        if (data_len && data) {
                memcpy(TCM_DATA(msg), data, data_len);
-               memset(TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len);
+               memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len);
        }
        return TCM_SPACE(data_len);
 }
index 4c4e24c..559f42e 100644 (file)
@@ -169,7 +169,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
 {
        vr->num = num;
        vr->desc = p;
-       vr->avail = p + num*sizeof(struct vring_desc);
+       vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc));
        vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16)
                + align-1) & ~(align - 1));
 }
index 9634ecf..af9cda8 100644 (file)
@@ -212,6 +212,7 @@ static int match_dev_by_label(struct device *dev, const void *data)
  *        a colon.
  *     9) PARTLABEL=<name> with name being the GPT partition label.
  *        MSDOS partitions do not support labels!
+ *     10) /dev/cifs represents Root_CIFS (0xfe)
  *
  *     If name doesn't have fall into the categories above, we return (0,0).
  *     block_class is used to check if something is a disk name. If the disk
@@ -268,6 +269,9 @@ dev_t name_to_dev_t(const char *name)
        res = Root_NFS;
        if (strcmp(name, "nfs") == 0)
                goto done;
+       res = Root_CIFS;
+       if (strcmp(name, "cifs") == 0)
+               goto done;
        res = Root_RAM0;
        if (strcmp(name, "ram") == 0)
                goto done;
@@ -501,6 +505,42 @@ static int __init mount_nfs_root(void)
 }
 #endif
 
+#ifdef CONFIG_CIFS_ROOT
+
+extern int cifs_root_data(char **dev, char **opts);
+
+#define CIFSROOT_TIMEOUT_MIN   5
+#define CIFSROOT_TIMEOUT_MAX   30
+#define CIFSROOT_RETRY_MAX     5
+
+static int __init mount_cifs_root(void)
+{
+       char *root_dev, *root_data;
+       unsigned int timeout;
+       int try, err;
+
+       err = cifs_root_data(&root_dev, &root_data);
+       if (err != 0)
+               return 0;
+
+       timeout = CIFSROOT_TIMEOUT_MIN;
+       for (try = 1; ; try++) {
+               err = do_mount_root(root_dev, "cifs", root_mountflags,
+                                   root_data);
+               if (err == 0)
+                       return 1;
+               if (try > CIFSROOT_RETRY_MAX)
+                       break;
+
+               ssleep(timeout);
+               timeout <<= 1;
+               if (timeout > CIFSROOT_TIMEOUT_MAX)
+                       timeout = CIFSROOT_TIMEOUT_MAX;
+       }
+       return 0;
+}
+#endif
+
 #if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
 void __init change_floppy(char *fmt, ...)
 {
@@ -542,6 +582,15 @@ void __init mount_root(void)
                ROOT_DEV = Root_FD0;
        }
 #endif
+#ifdef CONFIG_CIFS_ROOT
+       if (ROOT_DEV == Root_CIFS) {
+               if (mount_cifs_root())
+                       return;
+
+               printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying floppy.\n");
+               ROOT_DEV = Root_FD0;
+       }
+#endif
 #ifdef CONFIG_BLK_DEV_FD
        if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
                /* rd_doload is 2 for a dual initrd/ramload setup */
index 29c7c06..128d896 100644 (file)
@@ -336,16 +336,6 @@ static bool btf_type_is_fwd(const struct btf_type *t)
        return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
 }
 
-static bool btf_type_is_func(const struct btf_type *t)
-{
-       return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
-}
-
-static bool btf_type_is_func_proto(const struct btf_type *t)
-{
-       return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
-}
-
 static bool btf_type_nosize(const struct btf_type *t)
 {
        return btf_type_is_void(t) || btf_type_is_fwd(t) ||
@@ -377,16 +367,6 @@ static bool btf_type_is_array(const struct btf_type *t)
        return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
 }
 
-static bool btf_type_is_ptr(const struct btf_type *t)
-{
-       return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
-}
-
-static bool btf_type_is_int(const struct btf_type *t)
-{
-       return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
-}
-
 static bool btf_type_is_var(const struct btf_type *t)
 {
        return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
@@ -698,6 +678,13 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
        if (!bpf_verifier_log_needed(log))
                return;
 
+       /* btf verifier prints all types it is processing via
+        * btf_verifier_log_type(..., fmt = NULL).
+        * Skip those prints for in-kernel BTF verification.
+        */
+       if (log->level == BPF_LOG_KERNEL && !fmt)
+               return;
+
        __btf_verifier_log(log, "[%u] %s %s%s",
                           env->log_type_id,
                           btf_kind_str[kind],
@@ -735,6 +722,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
        if (!bpf_verifier_log_needed(log))
                return;
 
+       if (log->level == BPF_LOG_KERNEL && !fmt)
+               return;
        /* The CHECK_META phase already did a btf dump.
         *
         * If member is logged again, it must hit an error in
@@ -777,6 +766,8 @@ static void btf_verifier_log_vsi(struct btf_verifier_env *env,
 
        if (!bpf_verifier_log_needed(log))
                return;
+       if (log->level == BPF_LOG_KERNEL && !fmt)
+               return;
        if (env->phase != CHECK_META)
                btf_verifier_log_type(env, datasec_type, NULL);
 
@@ -802,6 +793,8 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env,
        if (!bpf_verifier_log_needed(log))
                return;
 
+       if (log->level == BPF_LOG_KERNEL)
+               return;
        hdr = &btf->hdr;
        __btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
        __btf_verifier_log(log, "version: %u\n", hdr->version);
@@ -2405,7 +2398,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
                        return -EINVAL;
                }
 
-
+               if (env->log.level == BPF_LOG_KERNEL)
+                       continue;
                btf_verifier_log(env, "\t%s val=%d\n",
                                 __btf_name_by_offset(btf, enums[i].name_off),
                                 enums[i].val);
@@ -3367,6 +3361,292 @@ errout:
        return ERR_PTR(err);
 }
 
+extern char __weak _binary__btf_vmlinux_bin_start[];
+extern char __weak _binary__btf_vmlinux_bin_end[];
+
+struct btf *btf_parse_vmlinux(void)
+{
+       struct btf_verifier_env *env = NULL;
+       struct bpf_verifier_log *log;
+       struct btf *btf = NULL;
+       int err;
+
+       env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+       if (!env)
+               return ERR_PTR(-ENOMEM);
+
+       log = &env->log;
+       log->level = BPF_LOG_KERNEL;
+
+       btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+       if (!btf) {
+               err = -ENOMEM;
+               goto errout;
+       }
+       env->btf = btf;
+
+       btf->data = _binary__btf_vmlinux_bin_start;
+       btf->data_size = _binary__btf_vmlinux_bin_end -
+               _binary__btf_vmlinux_bin_start;
+
+       err = btf_parse_hdr(env);
+       if (err)
+               goto errout;
+
+       btf->nohdr_data = btf->data + btf->hdr.hdr_len;
+
+       err = btf_parse_str_sec(env);
+       if (err)
+               goto errout;
+
+       err = btf_check_all_metas(env);
+       if (err)
+               goto errout;
+
+       btf_verifier_env_free(env);
+       refcount_set(&btf->refcnt, 1);
+       return btf;
+
+errout:
+       btf_verifier_env_free(env);
+       if (btf) {
+               kvfree(btf->types);
+               kfree(btf);
+       }
+       return ERR_PTR(err);
+}
+
+extern struct btf *btf_vmlinux;
+
+bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+                   const struct bpf_prog *prog,
+                   struct bpf_insn_access_aux *info)
+{
+       const struct btf_type *t = prog->aux->attach_func_proto;
+       const char *tname = prog->aux->attach_func_name;
+       struct bpf_verifier_log *log = info->log;
+       const struct btf_param *args;
+       u32 nr_args, arg;
+
+       if (off % 8) {
+               bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
+                       tname, off);
+               return false;
+       }
+       arg = off / 8;
+       args = (const struct btf_param *)(t + 1);
+       nr_args = btf_type_vlen(t);
+       if (prog->aux->attach_btf_trace) {
+               /* skip first 'void *__data' argument in btf_trace_##name typedef */
+               args++;
+               nr_args--;
+       }
+       if (arg >= nr_args) {
+               bpf_log(log, "func '%s' doesn't have %d-th argument\n",
+                       tname, arg);
+               return false;
+       }
+
+       t = btf_type_by_id(btf_vmlinux, args[arg].type);
+       /* skip modifiers */
+       while (btf_type_is_modifier(t))
+               t = btf_type_by_id(btf_vmlinux, t->type);
+       if (btf_type_is_int(t))
+               /* accessing a scalar */
+               return true;
+       if (!btf_type_is_ptr(t)) {
+               bpf_log(log,
+                       "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
+                       tname, arg,
+                       __btf_name_by_offset(btf_vmlinux, t->name_off),
+                       btf_kind_str[BTF_INFO_KIND(t->info)]);
+               return false;
+       }
+       if (t->type == 0)
+               /* This is a pointer to void.
+                * It is the same as scalar from the verifier safety pov.
+                * No further pointer walking is allowed.
+                */
+               return true;
+
+       /* this is a pointer to another type */
+       info->reg_type = PTR_TO_BTF_ID;
+       info->btf_id = t->type;
+
+       t = btf_type_by_id(btf_vmlinux, t->type);
+       /* skip modifiers */
+       while (btf_type_is_modifier(t))
+               t = btf_type_by_id(btf_vmlinux, t->type);
+       if (!btf_type_is_struct(t)) {
+               bpf_log(log,
+                       "func '%s' arg%d type %s is not a struct\n",
+                       tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]);
+               return false;
+       }
+       bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
+               tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
+               __btf_name_by_offset(btf_vmlinux, t->name_off));
+       return true;
+}
+
+int btf_struct_access(struct bpf_verifier_log *log,
+                     const struct btf_type *t, int off, int size,
+                     enum bpf_access_type atype,
+                     u32 *next_btf_id)
+{
+       const struct btf_member *member;
+       const struct btf_type *mtype;
+       const char *tname, *mname;
+       int i, moff = 0, msize;
+
+again:
+       tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+       if (!btf_type_is_struct(t)) {
+               bpf_log(log, "Type '%s' is not a struct", tname);
+               return -EINVAL;
+       }
+
+       for_each_member(i, t, member) {
+               /* offset of the field in bits */
+               moff = btf_member_bit_offset(t, member);
+
+               if (btf_member_bitfield_size(t, member))
+                       /* bitfields are not supported yet */
+                       continue;
+
+               if (off + size <= moff / 8)
+                       /* won't find anything, field is already too far */
+                       break;
+
+               /* type of the field */
+               mtype = btf_type_by_id(btf_vmlinux, member->type);
+               mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
+
+               /* skip modifiers */
+               while (btf_type_is_modifier(mtype))
+                       mtype = btf_type_by_id(btf_vmlinux, mtype->type);
+
+               if (btf_type_is_array(mtype))
+                       /* array deref is not supported yet */
+                       continue;
+
+               if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) {
+                       bpf_log(log, "field %s doesn't have size\n", mname);
+                       return -EFAULT;
+               }
+               if (btf_type_is_ptr(mtype))
+                       msize = 8;
+               else
+                       msize = mtype->size;
+               if (off >= moff / 8 + msize)
+                       /* no overlap with member, keep iterating */
+                       continue;
+               /* the 'off' we're looking for is either equal to start
+                * of this field or inside of this struct
+                */
+               if (btf_type_is_struct(mtype)) {
+                       /* our field must be inside that union or struct */
+                       t = mtype;
+
+                       /* adjust offset we're looking for */
+                       off -= moff / 8;
+                       goto again;
+               }
+               if (msize != size) {
+                       /* field access size doesn't match */
+                       bpf_log(log,
+                               "cannot access %d bytes in struct %s field %s that has size %d\n",
+                               size, tname, mname, msize);
+                       return -EACCES;
+               }
+
+               if (btf_type_is_ptr(mtype)) {
+                       const struct btf_type *stype;
+
+                       stype = btf_type_by_id(btf_vmlinux, mtype->type);
+                       /* skip modifiers */
+                       while (btf_type_is_modifier(stype))
+                               stype = btf_type_by_id(btf_vmlinux, stype->type);
+                       if (btf_type_is_struct(stype)) {
+                               *next_btf_id = mtype->type;
+                               return PTR_TO_BTF_ID;
+                       }
+               }
+               /* all other fields are treated as scalars */
+               return SCALAR_VALUE;
+       }
+       bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
+       return -EINVAL;
+}
+
+u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
+{
+       char fnname[KSYM_SYMBOL_LEN + 4] = "btf_";
+       const struct btf_param *args;
+       const struct btf_type *t;
+       const char *tname, *sym;
+       u32 btf_id, i;
+
+       if (IS_ERR(btf_vmlinux)) {
+               bpf_log(log, "btf_vmlinux is malformed\n");
+               return -EINVAL;
+       }
+
+       sym = kallsyms_lookup((long)fn, NULL, NULL, NULL, fnname + 4);
+       if (!sym) {
+               bpf_log(log, "kernel doesn't have kallsyms\n");
+               return -EFAULT;
+       }
+
+       for (i = 1; i <= btf_vmlinux->nr_types; i++) {
+               t = btf_type_by_id(btf_vmlinux, i);
+               if (BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF)
+                       continue;
+               tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+               if (!strcmp(tname, fnname))
+                       break;
+       }
+       if (i > btf_vmlinux->nr_types) {
+               bpf_log(log, "helper %s type is not found\n", fnname);
+               return -ENOENT;
+       }
+
+       t = btf_type_by_id(btf_vmlinux, t->type);
+       if (!btf_type_is_ptr(t))
+               return -EFAULT;
+       t = btf_type_by_id(btf_vmlinux, t->type);
+       if (!btf_type_is_func_proto(t))
+               return -EFAULT;
+
+       args = (const struct btf_param *)(t + 1);
+       if (arg >= btf_type_vlen(t)) {
+               bpf_log(log, "bpf helper %s doesn't have %d-th argument\n",
+                       fnname, arg);
+               return -EINVAL;
+       }
+
+       t = btf_type_by_id(btf_vmlinux, args[arg].type);
+       if (!btf_type_is_ptr(t) || !t->type) {
+               /* anything but the pointer to struct is a helper config bug */
+               bpf_log(log, "ARG_PTR_TO_BTF is misconfigured\n");
+               return -EFAULT;
+       }
+       btf_id = t->type;
+       t = btf_type_by_id(btf_vmlinux, t->type);
+       /* skip modifiers */
+       while (btf_type_is_modifier(t)) {
+               btf_id = t->type;
+               t = btf_type_by_id(btf_vmlinux, t->type);
+       }
+       if (!btf_type_is_struct(t)) {
+               bpf_log(log, "ARG_PTR_TO_BTF is not a struct\n");
+               return -EFAULT;
+       }
+       bpf_log(log, "helper %s arg%d has btf_id %d struct %s\n", fnname + 4,
+               arg, btf_id, __btf_name_by_offset(btf_vmlinux, t->name_off));
+       return btf_id;
+}
+
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
                       struct seq_file *m)
 {
index ef0e1e3..97e37d8 100644 (file)
@@ -30,7 +30,7 @@
 #include <linux/kallsyms.h>
 #include <linux/rcupdate.h>
 #include <linux/perf_event.h>
-
+#include <linux/extable.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -668,9 +668,6 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
 {
        struct latch_tree_node *n;
 
-       if (!bpf_jit_kallsyms_enabled())
-               return NULL;
-
        n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
        return n ?
               container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
@@ -712,6 +709,24 @@ bool is_bpf_text_address(unsigned long addr)
        return ret;
 }
 
+const struct exception_table_entry *search_bpf_extables(unsigned long addr)
+{
+       const struct exception_table_entry *e = NULL;
+       struct bpf_prog *prog;
+
+       rcu_read_lock();
+       prog = bpf_prog_kallsyms_find(addr);
+       if (!prog)
+               goto out;
+       if (!prog->aux->num_exentries)
+               goto out;
+
+       e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
+out:
+       rcu_read_unlock();
+       return e;
+}
+
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
                    char *sym)
 {
@@ -1291,6 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
 }
 
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
+u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+{
+       memset(dst, 0, size);
+       return -EFAULT;
+}
+
 /**
  *     __bpf_prog_run - run eBPF program on a given context
  *     @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -1310,6 +1331,10 @@ static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u6
                /* Non-UAPI available opcodes. */
                [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
                [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+               [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
+               [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
+               [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
+               [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW,
        };
 #undef BPF_INSN_3_LBL
 #undef BPF_INSN_2_LBL
@@ -1542,6 +1567,16 @@ out:
        LDST(W,  u32)
        LDST(DW, u64)
 #undef LDST
+#define LDX_PROBE(SIZEOP, SIZE)                                                        \
+       LDX_PROBE_MEM_##SIZEOP:                                                 \
+               bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC);    \
+               CONT;
+       LDX_PROBE(B,  1)
+       LDX_PROBE(H,  2)
+       LDX_PROBE(W,  4)
+       LDX_PROBE(DW, 8)
+#undef LDX_PROBE
+
        STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
                atomic_add((u32) SRC, (atomic_t *)(unsigned long)
                           (DST + insn->off));
index 052580c..173e983 100644 (file)
@@ -287,7 +287,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
        bool irq_work_busy = false;
        struct stack_map_irq_work *work = NULL;
 
-       if (in_nmi()) {
+       if (irqs_disabled()) {
                work = this_cpu_ptr(&up_read_work);
                if (work->irq_work.flags & IRQ_WORK_BUSY)
                        /* cannot queue more up_read, fallback */
@@ -295,8 +295,9 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
        }
 
        /*
-        * We cannot do up_read() in nmi context. To do build_id lookup
-        * in nmi context, we need to run up_read() in irq_work. We use
+        * We cannot do up_read() when the irq is disabled, because of
+        * risk to deadlock with rq_lock. To do build_id lookup when the
+        * irqs are disabled, we need to run up_read() in irq_work. We use
         * a percpu variable to do the irq_work. If the irq_work is
         * already used by another lookup, we fall back to report ips.
         *
index 0937719..6d9ce95 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/timekeeping.h>
 #include <linux/ctype.h>
 #include <linux/nospec.h>
+#include <uapi/linux/btf.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
                           (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -1573,9 +1574,21 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
 }
 
 static int
-bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
-                               enum bpf_attach_type expected_attach_type)
+bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
+                          enum bpf_attach_type expected_attach_type,
+                          u32 btf_id)
 {
+       switch (prog_type) {
+       case BPF_PROG_TYPE_TRACING:
+               if (btf_id > BTF_MAX_TYPE)
+                       return -EINVAL;
+               break;
+       default:
+               if (btf_id)
+                       return -EINVAL;
+               break;
+       }
+
        switch (prog_type) {
        case BPF_PROG_TYPE_CGROUP_SOCK:
                switch (expected_attach_type) {
@@ -1622,7 +1635,7 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define        BPF_PROG_LOAD_LAST_FIELD line_info_cnt
+#define        BPF_PROG_LOAD_LAST_FIELD attach_btf_id
 
 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 {
@@ -1664,7 +1677,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
                return -EPERM;
 
        bpf_prog_load_fixup_attach_type(attr);
-       if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
+       if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
+                                      attr->attach_btf_id))
                return -EINVAL;
 
        /* plain bpf_prog allocation */
@@ -1673,6 +1687,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
                return -ENOMEM;
 
        prog->expected_attach_type = attr->expected_attach_type;
+       prog->aux->attach_btf_id = attr->attach_btf_id;
 
        prog->aux->offload_requested = !!attr->prog_ifindex;
 
@@ -1815,17 +1830,50 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
        struct bpf_raw_tracepoint *raw_tp;
        struct bpf_raw_event_map *btp;
        struct bpf_prog *prog;
-       char tp_name[128];
+       const char *tp_name;
+       char buf[128];
        int tp_fd, err;
 
-       if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
-                             sizeof(tp_name) - 1) < 0)
-               return -EFAULT;
-       tp_name[sizeof(tp_name) - 1] = 0;
+       if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
+               return -EINVAL;
+
+       prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
+       if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+           prog->type != BPF_PROG_TYPE_TRACING &&
+           prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
+               err = -EINVAL;
+               goto out_put_prog;
+       }
+
+       if (prog->type == BPF_PROG_TYPE_TRACING) {
+               if (attr->raw_tracepoint.name) {
+                       /* raw_tp name should not be specified in raw_tp
+                        * programs that were verified via in-kernel BTF info
+                        */
+                       err = -EINVAL;
+                       goto out_put_prog;
+               }
+               /* raw_tp name is taken from type name instead */
+               tp_name = prog->aux->attach_func_name;
+       } else {
+               if (strncpy_from_user(buf,
+                                     u64_to_user_ptr(attr->raw_tracepoint.name),
+                                     sizeof(buf) - 1) < 0) {
+                       err = -EFAULT;
+                       goto out_put_prog;
+               }
+               buf[sizeof(buf) - 1] = 0;
+               tp_name = buf;
+       }
 
        btp = bpf_get_raw_tracepoint(tp_name);
-       if (!btp)
-               return -ENOENT;
+       if (!btp) {
+               err = -ENOENT;
+               goto out_put_prog;
+       }
 
        raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
        if (!raw_tp) {
@@ -1833,38 +1881,27 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
                goto out_put_btp;
        }
        raw_tp->btp = btp;
-
-       prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
-       if (IS_ERR(prog)) {
-               err = PTR_ERR(prog);
-               goto out_free_tp;
-       }
-       if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
-           prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
-               err = -EINVAL;
-               goto out_put_prog;
-       }
+       raw_tp->prog = prog;
 
        err = bpf_probe_register(raw_tp->btp, prog);
        if (err)
-               goto out_put_prog;
+               goto out_free_tp;
 
-       raw_tp->prog = prog;
        tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
                                 O_CLOEXEC);
        if (tp_fd < 0) {
                bpf_probe_unregister(raw_tp->btp, prog);
                err = tp_fd;
-               goto out_put_prog;
+               goto out_free_tp;
        }
        return tp_fd;
 
-out_put_prog:
-       bpf_prog_put(prog);
 out_free_tp:
        kfree(raw_tp);
 out_put_btp:
        bpf_put_raw_tracepoint(btp);
+out_put_prog:
+       bpf_prog_put(prog);
        return err;
 }
 
index ffc3e53..2f23749 100644 (file)
@@ -205,8 +205,11 @@ struct bpf_call_arg_meta {
        u64 msize_umax_value;
        int ref_obj_id;
        int func_id;
+       u32 btf_id;
 };
 
+struct btf *btf_vmlinux;
+
 static DEFINE_MUTEX(bpf_verifier_lock);
 
 static const struct bpf_line_info *
@@ -243,6 +246,10 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
        n = min(log->len_total - log->len_used - 1, n);
        log->kbuf[n] = '\0';
 
+       if (log->level == BPF_LOG_KERNEL) {
+               pr_err("BPF:%s\n", log->kbuf);
+               return;
+       }
        if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
                log->len_used += n;
        else
@@ -280,6 +287,19 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
        va_end(args);
 }
 
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+                           const char *fmt, ...)
+{
+       va_list args;
+
+       if (!bpf_verifier_log_needed(log))
+               return;
+
+       va_start(args, fmt);
+       bpf_verifier_vlog(log, fmt, args);
+       va_end(args);
+}
+
 static const char *ltrim(const char *s)
 {
        while (isspace(*s))
@@ -400,6 +420,7 @@ static const char * const reg_type_str[] = {
        [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
        [PTR_TO_TP_BUFFER]      = "tp_buffer",
        [PTR_TO_XDP_SOCK]       = "xdp_sock",
+       [PTR_TO_BTF_ID]         = "ptr_",
 };
 
 static char slot_type_char[] = {
@@ -430,6 +451,12 @@ static struct bpf_func_state *func(struct bpf_verifier_env *env,
        return cur->frame[reg->frameno];
 }
 
+const char *kernel_type_name(u32 id)
+{
+       return btf_name_by_offset(btf_vmlinux,
+                                 btf_type_by_id(btf_vmlinux, id)->name_off);
+}
+
 static void print_verifier_state(struct bpf_verifier_env *env,
                                 const struct bpf_func_state *state)
 {
@@ -454,6 +481,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
                        /* reg->off should be 0 for SCALAR_VALUE */
                        verbose(env, "%lld", reg->var_off.value + reg->off);
                } else {
+                       if (t == PTR_TO_BTF_ID)
+                               verbose(env, "%s", kernel_type_name(reg->btf_id));
                        verbose(env, "(id=%d", reg->id);
                        if (reg_type_may_be_refcounted_or_null(t))
                                verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
@@ -2331,10 +2360,12 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 
 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
-                           enum bpf_access_type t, enum bpf_reg_type *reg_type)
+                           enum bpf_access_type t, enum bpf_reg_type *reg_type,
+                           u32 *btf_id)
 {
        struct bpf_insn_access_aux info = {
                .reg_type = *reg_type,
+               .log = &env->log,
        };
 
        if (env->ops->is_valid_access &&
@@ -2348,7 +2379,10 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
                 */
                *reg_type = info.reg_type;
 
-               env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+               if (*reg_type == PTR_TO_BTF_ID)
+                       *btf_id = info.btf_id;
+               else
+                       env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
                /* remember the offset of last byte accessed in ctx */
                if (env->prog->aux->max_ctx_offset < off + size)
                        env->prog->aux->max_ctx_offset = off + size;
@@ -2739,6 +2773,88 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
        reg->smax_value = reg->umax_value;
 }
 
+static bool bpf_map_is_rdonly(const struct bpf_map *map)
+{
+       return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+}
+
+static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
+{
+       void *ptr;
+       u64 addr;
+       int err;
+
+       err = map->ops->map_direct_value_addr(map, &addr, off);
+       if (err)
+               return err;
+       ptr = (void *)(long)addr + off;
+
+       switch (size) {
+       case sizeof(u8):
+               *val = (u64)*(u8 *)ptr;
+               break;
+       case sizeof(u16):
+               *val = (u64)*(u16 *)ptr;
+               break;
+       case sizeof(u32):
+               *val = (u64)*(u32 *)ptr;
+               break;
+       case sizeof(u64):
+               *val = *(u64 *)ptr;
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
+                                  struct bpf_reg_state *regs,
+                                  int regno, int off, int size,
+                                  enum bpf_access_type atype,
+                                  int value_regno)
+{
+       struct bpf_reg_state *reg = regs + regno;
+       const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
+       const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+       u32 btf_id;
+       int ret;
+
+       if (atype != BPF_READ) {
+               verbose(env, "only read is supported\n");
+               return -EACCES;
+       }
+
+       if (off < 0) {
+               verbose(env,
+                       "R%d is ptr_%s invalid negative access: off=%d\n",
+                       regno, tname, off);
+               return -EACCES;
+       }
+       if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+               char tn_buf[48];
+
+               tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+               verbose(env,
+                       "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
+                       regno, tname, off, tn_buf);
+               return -EACCES;
+       }
+
+       ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
+       if (ret < 0)
+               return ret;
+
+       if (ret == SCALAR_VALUE) {
+               mark_reg_unknown(env, regs, value_regno);
+               return 0;
+       }
+       mark_reg_known_zero(env, regs, value_regno);
+       regs[value_regno].type = PTR_TO_BTF_ID;
+       regs[value_regno].btf_id = btf_id;
+       return 0;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -2776,11 +2892,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                if (err)
                        return err;
                err = check_map_access(env, regno, off, size, false);
-               if (!err && t == BPF_READ && value_regno >= 0)
-                       mark_reg_unknown(env, regs, value_regno);
+               if (!err && t == BPF_READ && value_regno >= 0) {
+                       struct bpf_map *map = reg->map_ptr;
+
+                       /* if map is read-only, track its contents as scalars */
+                       if (tnum_is_const(reg->var_off) &&
+                           bpf_map_is_rdonly(map) &&
+                           map->ops->map_direct_value_addr) {
+                               int map_off = off + reg->var_off.value;
+                               u64 val = 0;
 
+                               err = bpf_map_direct_read(map, map_off, size,
+                                                         &val);
+                               if (err)
+                                       return err;
+
+                               regs[value_regno].type = SCALAR_VALUE;
+                               __mark_reg_known(&regs[value_regno], val);
+                       } else {
+                               mark_reg_unknown(env, regs, value_regno);
+                       }
+               }
        } else if (reg->type == PTR_TO_CTX) {
                enum bpf_reg_type reg_type = SCALAR_VALUE;
+               u32 btf_id = 0;
 
                if (t == BPF_WRITE && value_regno >= 0 &&
                    is_pointer_value(env, value_regno)) {
@@ -2792,7 +2927,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                if (err < 0)
                        return err;
 
-               err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
+               err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
+               if (err)
+                       verbose_linfo(env, insn_idx, "; ");
                if (!err && t == BPF_READ && value_regno >= 0) {
                        /* ctx access returns either a scalar, or a
                         * PTR_TO_PACKET[_META,_END]. In the latter
@@ -2811,6 +2948,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                                 * a sub-register.
                                 */
                                regs[value_regno].subreg_def = DEF_NOT_SUBREG;
+                               if (reg_type == PTR_TO_BTF_ID)
+                                       regs[value_regno].btf_id = btf_id;
                        }
                        regs[value_regno].type = reg_type;
                }
@@ -2870,6 +3009,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                err = check_tp_buffer_access(env, reg, regno, off, size);
                if (!err && t == BPF_READ && value_regno >= 0)
                        mark_reg_unknown(env, regs, value_regno);
+       } else if (reg->type == PTR_TO_BTF_ID) {
+               err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
+                                             value_regno);
        } else {
                verbose(env, "R%d invalid mem access '%s'\n", regno,
                        reg_type_str[reg->type]);
@@ -3298,6 +3440,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                expected_type = PTR_TO_SOCKET;
                if (type != expected_type)
                        goto err_type;
+       } else if (arg_type == ARG_PTR_TO_BTF_ID) {
+               expected_type = PTR_TO_BTF_ID;
+               if (type != expected_type)
+                       goto err_type;
+               if (reg->btf_id != meta->btf_id) {
+                       verbose(env, "Helper has type %s got %s in R%d\n",
+                               kernel_type_name(meta->btf_id),
+                               kernel_type_name(reg->btf_id), regno);
+
+                       return -EACCES;
+               }
+               if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
+                       verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
+                               regno);
+                       return -EACCES;
+               }
        } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
                if (meta->func_id == BPF_FUNC_spin_lock) {
                        if (process_spin_lock(env, regno, true))
@@ -3445,6 +3603,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
        case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
                if (func_id != BPF_FUNC_perf_event_read &&
                    func_id != BPF_FUNC_perf_event_output &&
+                   func_id != BPF_FUNC_skb_output &&
                    func_id != BPF_FUNC_perf_event_read_value)
                        goto error;
                break;
@@ -3532,6 +3691,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
        case BPF_FUNC_perf_event_read:
        case BPF_FUNC_perf_event_output:
        case BPF_FUNC_perf_event_read_value:
+       case BPF_FUNC_skb_output:
                if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
                        goto error;
                break;
@@ -3986,21 +4146,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 
        meta.func_id = func_id;
        /* check args */
-       err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
-       if (err)
-               return err;
-       err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
-       if (err)
-               return err;
-       err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
-       if (err)
-               return err;
-       err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
-       if (err)
-               return err;
-       err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
-       if (err)
-               return err;
+       for (i = 0; i < 5; i++) {
+               if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) {
+                       if (!fn->btf_id[i])
+                               fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i);
+                       meta.btf_id = fn->btf_id[i];
+               }
+               err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
+               if (err)
+                       return err;
+       }
 
        err = record_func_map(env, &meta, func_id, insn_idx);
        if (err)
@@ -6124,6 +6279,11 @@ static int check_return_code(struct bpf_verifier_env *env)
        case BPF_PROG_TYPE_CGROUP_SYSCTL:
        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
                break;
+       case BPF_PROG_TYPE_RAW_TRACEPOINT:
+               if (!env->prog->aux->attach_btf_id)
+                       return 0;
+               range = tnum_const(0);
+               break;
        default:
                return 0;
        }
@@ -7440,6 +7600,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
        case PTR_TO_TCP_SOCK:
        case PTR_TO_TCP_SOCK_OR_NULL:
        case PTR_TO_XDP_SOCK:
+       case PTR_TO_BTF_ID:
                return false;
        default:
                return true;
@@ -8581,6 +8742,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                case PTR_TO_XDP_SOCK:
                        convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
                        break;
+               case PTR_TO_BTF_ID:
+                       if (type == BPF_WRITE) {
+                               verbose(env, "Writes through BTF pointers are not allowed\n");
+                               return -EINVAL;
+                       }
+                       insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
+                       env->prog->aux->num_exentries++;
+                       continue;
                default:
                        continue;
                }
@@ -9208,6 +9377,66 @@ static void print_verification_stats(struct bpf_verifier_env *env)
                env->peak_states, env->longest_mark_read_walk);
 }
 
+static int check_attach_btf_id(struct bpf_verifier_env *env)
+{
+       struct bpf_prog *prog = env->prog;
+       u32 btf_id = prog->aux->attach_btf_id;
+       const char prefix[] = "btf_trace_";
+       const struct btf_type *t;
+       const char *tname;
+
+       if (prog->type != BPF_PROG_TYPE_TRACING)
+               return 0;
+
+       if (!btf_id) {
+               verbose(env, "Tracing programs must provide btf_id\n");
+               return -EINVAL;
+       }
+       t = btf_type_by_id(btf_vmlinux, btf_id);
+       if (!t) {
+               verbose(env, "attach_btf_id %u is invalid\n", btf_id);
+               return -EINVAL;
+       }
+       tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+       if (!tname) {
+               verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
+               return -EINVAL;
+       }
+
+       switch (prog->expected_attach_type) {
+       case BPF_TRACE_RAW_TP:
+               if (!btf_type_is_typedef(t)) {
+                       verbose(env, "attach_btf_id %u is not a typedef\n",
+                               btf_id);
+                       return -EINVAL;
+               }
+               if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
+                       verbose(env, "attach_btf_id %u points to wrong type name %s\n",
+                               btf_id, tname);
+                       return -EINVAL;
+               }
+               tname += sizeof(prefix) - 1;
+               t = btf_type_by_id(btf_vmlinux, t->type);
+               if (!btf_type_is_ptr(t))
+                       /* should never happen in valid vmlinux build */
+                       return -EINVAL;
+               t = btf_type_by_id(btf_vmlinux, t->type);
+               if (!btf_type_is_func_proto(t))
+                       /* should never happen in valid vmlinux build */
+                       return -EINVAL;
+
+               /* remember two read only pointers that are valid for
+                * the life time of the kernel
+                */
+               prog->aux->attach_func_name = tname;
+               prog->aux->attach_func_proto = t;
+               prog->aux->attach_btf_trace = true;
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
              union bpf_attr __user *uattr)
 {
@@ -9241,6 +9470,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
        env->ops = bpf_verifier_ops[env->prog->type];
        is_priv = capable(CAP_SYS_ADMIN);
 
+       if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+               mutex_lock(&bpf_verifier_lock);
+               if (!btf_vmlinux)
+                       btf_vmlinux = btf_parse_vmlinux();
+               mutex_unlock(&bpf_verifier_lock);
+       }
+
        /* grab the mutex to protect few globals used by verifier */
        if (!is_priv)
                mutex_lock(&bpf_verifier_lock);
@@ -9260,6 +9496,17 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
                        goto err_unlock;
        }
 
+       if (IS_ERR(btf_vmlinux)) {
+               /* Either gcc or pahole or kernel are broken. */
+               verbose(env, "in-kernel BTF is malformed\n");
+               ret = PTR_ERR(btf_vmlinux);
+               goto skip_full_check;
+       }
+
+       ret = check_attach_btf_id(env);
+       if (ret)
+               goto skip_full_check;
+
        env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
                env->strict_alignment = true;
index 82a1ffe..da16c30 100644 (file)
@@ -9,13 +9,6 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-struct xsk_map {
-       struct bpf_map map;
-       struct xdp_sock **xsk_map;
-       struct list_head __percpu *flush_list;
-       spinlock_t lock; /* Synchronize map updates */
-};
-
 int xsk_map_inc(struct xsk_map *map)
 {
        struct bpf_map *m = &map->map;
@@ -80,9 +73,10 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
 
 static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
 {
+       struct bpf_map_memory mem;
+       int cpu, err, numa_node;
        struct xsk_map *m;
-       int cpu, err;
-       u64 cost;
+       u64 cost, size;
 
        if (!capable(CAP_NET_ADMIN))
                return ERR_PTR(-EPERM);
@@ -92,44 +86,35 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
            attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
                return ERR_PTR(-EINVAL);
 
-       m = kzalloc(sizeof(*m), GFP_USER);
-       if (!m)
+       numa_node = bpf_map_attr_numa_node(attr);
+       size = struct_size(m, xsk_map, attr->max_entries);
+       cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus());
+
+       err = bpf_map_charge_init(&mem, cost);
+       if (err < 0)
+               return ERR_PTR(err);
+
+       m = bpf_map_area_alloc(size, numa_node);
+       if (!m) {
+               bpf_map_charge_finish(&mem);
                return ERR_PTR(-ENOMEM);
+       }
 
        bpf_map_init_from_attr(&m->map, attr);
+       bpf_map_charge_move(&m->map.memory, &mem);
        spin_lock_init(&m->lock);
 
-       cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
-       cost += sizeof(struct list_head) * num_possible_cpus();
-
-       /* Notice returns -EPERM on if map size is larger than memlock limit */
-       err = bpf_map_charge_init(&m->map.memory, cost);
-       if (err)
-               goto free_m;
-
-       err = -ENOMEM;
-
        m->flush_list = alloc_percpu(struct list_head);
-       if (!m->flush_list)
-               goto free_charge;
+       if (!m->flush_list) {
+               bpf_map_charge_finish(&m->map.memory);
+               bpf_map_area_free(m);
+               return ERR_PTR(-ENOMEM);
+       }
 
        for_each_possible_cpu(cpu)
                INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
 
-       m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
-                                       sizeof(struct xdp_sock *),
-                                       m->map.numa_node);
-       if (!m->xsk_map)
-               goto free_percpu;
        return &m->map;
-
-free_percpu:
-       free_percpu(m->flush_list);
-free_charge:
-       bpf_map_charge_finish(&m->map.memory);
-free_m:
-       kfree(m);
-       return ERR_PTR(err);
 }
 
 static void xsk_map_free(struct bpf_map *map)
@@ -139,8 +124,7 @@ static void xsk_map_free(struct bpf_map *map)
        bpf_clear_redirect_map(map);
        synchronize_net();
        free_percpu(m->flush_list);
-       bpf_map_area_free(m->xsk_map);
-       kfree(m);
+       bpf_map_area_free(m);
 }
 
 static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -160,45 +144,20 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
        return 0;
 }
 
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-       struct xsk_map *m = container_of(map, struct xsk_map, map);
-       struct xdp_sock *xs;
-
-       if (key >= map->max_entries)
-               return NULL;
-
-       xs = READ_ONCE(m->xsk_map[key]);
-       return xs;
-}
-
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
-                      struct xdp_sock *xs)
-{
-       struct xsk_map *m = container_of(map, struct xsk_map, map);
-       struct list_head *flush_list = this_cpu_ptr(m->flush_list);
-       int err;
-
-       err = xsk_rcv(xs, xdp);
-       if (err)
-               return err;
-
-       if (!xs->flush_node.prev)
-               list_add(&xs->flush_node, flush_list);
-
-       return 0;
-}
-
-void __xsk_map_flush(struct bpf_map *map)
+static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
-       struct xsk_map *m = container_of(map, struct xsk_map, map);
-       struct list_head *flush_list = this_cpu_ptr(m->flush_list);
-       struct xdp_sock *xs, *tmp;
-
-       list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
-               xsk_flush(xs);
-               __list_del_clearprev(&xs->flush_node);
-       }
+       const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
+       struct bpf_insn *insn = insn_buf;
+
+       *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+       *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+       *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
+       *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
+       *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
+       *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
+       *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+       *insn++ = BPF_MOV64_IMM(ret, 0);
+       return insn - insn_buf;
 }
 
 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
@@ -312,6 +271,7 @@ const struct bpf_map_ops xsk_map_ops = {
        .map_free = xsk_map_free,
        .map_get_next_key = xsk_map_get_next_key,
        .map_lookup_elem = xsk_map_lookup_elem,
+       .map_gen_lookup = xsk_map_gen_lookup,
        .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
        .map_update_elem = xsk_map_update_elem,
        .map_delete_elem = xsk_map_delete_elem,
index f6c9406..f6920a1 100644 (file)
@@ -56,6 +56,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
        e = search_kernel_exception_table(addr);
        if (!e)
                e = search_module_extables(addr);
+       if (!e)
+               e = search_bpf_extables(addr);
        return e;
 }
 
index 44bd08f..ffc91d4 100644 (file)
@@ -138,24 +138,140 @@ static const struct bpf_func_proto bpf_override_return_proto = {
 };
 #endif
 
-BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
+BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
+          const void __user *, unsafe_ptr)
 {
-       int ret;
+       int ret = probe_user_read(dst, unsafe_ptr, size);
 
-       ret = security_locked_down(LOCKDOWN_BPF_READ);
-       if (ret < 0)
-               goto out;
+       if (unlikely(ret < 0))
+               memset(dst, 0, size);
+
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_user_proto = {
+       .func           = bpf_probe_read_user,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
+          const void __user *, unsafe_ptr)
+{
+       int ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
+
+       if (unlikely(ret < 0))
+               memset(dst, 0, size);
+
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
+       .func           = bpf_probe_read_user_str,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg3_type      = ARG_ANYTHING,
+};
 
-       ret = probe_kernel_read(dst, unsafe_ptr, size);
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
+                            const bool compat)
+{
+       int ret = security_locked_down(LOCKDOWN_BPF_READ);
+
+       if (unlikely(ret < 0))
+               goto out;
+       ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
+             probe_kernel_read_strict(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
 out:
                memset(dst, 0, size);
+       return ret;
+}
+
+BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
+          const void *, unsafe_ptr)
+{
+       return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
+}
+
+static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
+       .func           = bpf_probe_read_kernel,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg3_type      = ARG_ANYTHING,
+};
 
+BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
+          const void *, unsafe_ptr)
+{
+       return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
+}
+
+static const struct bpf_func_proto bpf_probe_read_compat_proto = {
+       .func           = bpf_probe_read_compat,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+static __always_inline int
+bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr,
+                                const bool compat)
+{
+       int ret = security_locked_down(LOCKDOWN_BPF_READ);
+
+       if (unlikely(ret < 0))
+               goto out;
+       /*
+        * The strncpy_from_unsafe_*() call will likely not fill the entire
+        * buffer, but that's okay in this circumstance as we're probing
+        * arbitrary memory anyway similar to bpf_probe_read_*() and might
+        * as well probe the stack. Thus, memory is explicitly cleared
+        * only in error case, so that improper users ignoring return
+        * code altogether don't copy garbage; otherwise length of string
+        * is returned that can be used for bpf_perf_event_output() et al.
+        */
+       ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
+             strncpy_from_unsafe_strict(dst, unsafe_ptr, size);
+       if (unlikely(ret < 0))
+out:
+               memset(dst, 0, size);
        return ret;
 }
 
-static const struct bpf_func_proto bpf_probe_read_proto = {
-       .func           = bpf_probe_read,
+BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
+          const void *, unsafe_ptr)
+{
+       return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
+}
+
+static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
+       .func           = bpf_probe_read_kernel_str,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
+          const void *, unsafe_ptr)
+{
+       return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
+}
+
+static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
+       .func           = bpf_probe_read_compat_str,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
@@ -163,7 +279,7 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
        .arg3_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
+BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
           u32, size)
 {
        /*
@@ -186,10 +302,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
                return -EPERM;
        if (unlikely(!nmi_uaccess_okay()))
                return -EPERM;
-       if (!access_ok(unsafe_ptr, size))
-               return -EPERM;
 
-       return probe_kernel_write(unsafe_ptr, src, size);
+       return probe_user_write(unsafe_ptr, src, size);
 }
 
 static const struct bpf_func_proto bpf_probe_write_user_proto = {
@@ -585,41 +699,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
-          const void *, unsafe_ptr)
-{
-       int ret;
-
-       ret = security_locked_down(LOCKDOWN_BPF_READ);
-       if (ret < 0)
-               goto out;
-
-       /*
-        * The strncpy_from_unsafe() call will likely not fill the entire
-        * buffer, but that's okay in this circumstance as we're probing
-        * arbitrary memory anyway similar to bpf_probe_read() and might
-        * as well probe the stack. Thus, memory is explicitly cleared
-        * only in error case, so that improper users ignoring return
-        * code altogether don't copy garbage; otherwise length of string
-        * is returned that can be used for bpf_perf_event_output() et al.
-        */
-       ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
-       if (unlikely(ret < 0))
-out:
-               memset(dst, 0, size);
-
-       return ret;
-}
-
-static const struct bpf_func_proto bpf_probe_read_str_proto = {
-       .func           = bpf_probe_read_str,
-       .gpl_only       = true,
-       .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
-       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
-       .arg3_type      = ARG_ANYTHING,
-};
-
 struct send_signal_irq_work {
        struct irq_work irq_work;
        struct task_struct *task;
@@ -699,8 +778,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_map_pop_elem_proto;
        case BPF_FUNC_map_peek_elem:
                return &bpf_map_peek_elem_proto;
-       case BPF_FUNC_probe_read:
-               return &bpf_probe_read_proto;
        case BPF_FUNC_ktime_get_ns:
                return &bpf_ktime_get_ns_proto;
        case BPF_FUNC_tail_call:
@@ -727,8 +804,18 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_current_task_under_cgroup_proto;
        case BPF_FUNC_get_prandom_u32:
                return &bpf_get_prandom_u32_proto;
+       case BPF_FUNC_probe_read_user:
+               return &bpf_probe_read_user_proto;
+       case BPF_FUNC_probe_read_kernel:
+               return &bpf_probe_read_kernel_proto;
+       case BPF_FUNC_probe_read:
+               return &bpf_probe_read_compat_proto;
+       case BPF_FUNC_probe_read_user_str:
+               return &bpf_probe_read_user_str_proto;
+       case BPF_FUNC_probe_read_kernel_str:
+               return &bpf_probe_read_kernel_str_proto;
        case BPF_FUNC_probe_read_str:
-               return &bpf_probe_read_str_proto;
+               return &bpf_probe_read_compat_str_proto;
 #ifdef CONFIG_CGROUPS
        case BPF_FUNC_get_current_cgroup_id:
                return &bpf_get_current_cgroup_id_proto;
@@ -995,6 +1082,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
+extern const struct bpf_func_proto bpf_skb_output_proto;
+
 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
           struct bpf_map *, map, u64, flags)
 {
@@ -1062,13 +1151,25 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        }
 }
 
+static const struct bpf_func_proto *
+tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+       switch (func_id) {
+#ifdef CONFIG_NET
+       case BPF_FUNC_skb_output:
+               return &bpf_skb_output_proto;
+#endif
+       default:
+               return raw_tp_prog_func_proto(func_id, prog);
+       }
+}
+
 static bool raw_tp_prog_is_valid_access(int off, int size,
                                        enum bpf_access_type type,
                                        const struct bpf_prog *prog,
                                        struct bpf_insn_access_aux *info)
 {
-       /* largest tracepoint in the kernel has 12 args */
-       if (off < 0 || off >= sizeof(__u64) * 12)
+       if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
                return false;
        if (type != BPF_READ)
                return false;
@@ -1077,6 +1178,20 @@ static bool raw_tp_prog_is_valid_access(int off, int size,
        return true;
 }
 
+static bool tracing_prog_is_valid_access(int off, int size,
+                                        enum bpf_access_type type,
+                                        const struct bpf_prog *prog,
+                                        struct bpf_insn_access_aux *info)
+{
+       if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
+               return false;
+       if (type != BPF_READ)
+               return false;
+       if (off % size != 0)
+               return false;
+       return btf_ctx_access(off, size, type, prog, info);
+}
+
 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
        .get_func_proto  = raw_tp_prog_func_proto,
        .is_valid_access = raw_tp_prog_is_valid_access,
@@ -1085,6 +1200,14 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
 };
 
+const struct bpf_verifier_ops tracing_verifier_ops = {
+       .get_func_proto  = tracing_prog_func_proto,
+       .is_valid_access = tracing_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops tracing_prog_ops = {
+};
+
 static bool raw_tp_writable_prog_is_valid_access(int off, int size,
                                                 enum bpf_access_type type,
                                                 const struct bpf_prog *prog,
index 5ef3ecc..cecb230 100644 (file)
@@ -6859,34 +6859,128 @@ err_page0:
        return NULL;
 }
 
-static __init int test_skb_segment(void)
+static __init struct sk_buff *build_test_skb_linear_no_head_frag(void)
 {
+       unsigned int alloc_size = 2000;
+       unsigned int headroom = 102, doffset = 72, data_size = 1308;
+       struct sk_buff *skb[2];
+       int i;
+
+       /* skbs linked in a frag_list, both with linear data, with head_frag=0
+        * (data allocated by kmalloc), both have tcp data of 1308 bytes
+        * (total payload is 2616 bytes).
+        * Data offset is 72 bytes (40 ipv6 hdr, 32 tcp hdr). Some headroom.
+        */
+       for (i = 0; i < 2; i++) {
+               skb[i] = alloc_skb(alloc_size, GFP_KERNEL);
+               if (!skb[i]) {
+                       if (i == 0)
+                               goto err_skb0;
+                       else
+                               goto err_skb1;
+               }
+
+               skb[i]->protocol = htons(ETH_P_IPV6);
+               skb_reserve(skb[i], headroom);
+               skb_put(skb[i], doffset + data_size);
+               skb_reset_network_header(skb[i]);
+               if (i == 0)
+                       skb_reset_mac_header(skb[i]);
+               else
+                       skb_set_mac_header(skb[i], -ETH_HLEN);
+               __skb_pull(skb[i], doffset);
+       }
+
+       /* setup shinfo.
+        * mimic bpf_skb_proto_4_to_6, which resets gso_segs and assigns a
+        * reduced gso_size.
+        */
+       skb_shinfo(skb[0])->gso_size = 1288;
+       skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV6 | SKB_GSO_DODGY;
+       skb_shinfo(skb[0])->gso_segs = 0;
+       skb_shinfo(skb[0])->frag_list = skb[1];
+
+       /* adjust skb[0]'s len */
+       skb[0]->len += skb[1]->len;
+       skb[0]->data_len += skb[1]->len;
+       skb[0]->truesize += skb[1]->truesize;
+
+       return skb[0];
+
+err_skb1:
+       kfree_skb(skb[0]);
+err_skb0:
+       return NULL;
+}
+
+struct skb_segment_test {
+       const char *descr;
+       struct sk_buff *(*build_skb)(void);
        netdev_features_t features;
+};
+
+static struct skb_segment_test skb_segment_tests[] __initconst = {
+       {
+               .descr = "gso_with_rx_frags",
+               .build_skb = build_test_skb,
+               .features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+                           NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM
+       },
+       {
+               .descr = "gso_linear_no_head_frag",
+               .build_skb = build_test_skb_linear_no_head_frag,
+               .features = NETIF_F_SG | NETIF_F_FRAGLIST |
+                           NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
+                           NETIF_F_LLTX_BIT | NETIF_F_GRO |
+                           NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
+                           NETIF_F_HW_VLAN_STAG_TX_BIT
+       }
+};
+
+static __init int test_skb_segment_single(const struct skb_segment_test *test)
+{
        struct sk_buff *skb, *segs;
        int ret = -1;
 
-       features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
-                  NETIF_F_IPV6_CSUM;
-       features |= NETIF_F_RXCSUM;
-       skb = build_test_skb();
+       skb = test->build_skb();
        if (!skb) {
                pr_info("%s: failed to build_test_skb", __func__);
                goto done;
        }
 
-       segs = skb_segment(skb, features);
+       segs = skb_segment(skb, test->features);
        if (!IS_ERR(segs)) {
                kfree_skb_list(segs);
                ret = 0;
-               pr_info("%s: success in skb_segment!", __func__);
-       } else {
-               pr_info("%s: failed in skb_segment!", __func__);
        }
        kfree_skb(skb);
 done:
        return ret;
 }
 
+static __init int test_skb_segment(void)
+{
+       int i, err_cnt = 0, pass_cnt = 0;
+
+       for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) {
+               const struct skb_segment_test *test = &skb_segment_tests[i];
+
+               pr_info("#%d %s ", i, test->descr);
+
+               if (test_skb_segment_single(test)) {
+                       pr_cont("FAIL\n");
+                       err_cnt++;
+               } else {
+                       pr_cont("PASS\n");
+                       pass_cnt++;
+               }
+       }
+
+       pr_info("%s: Summary: %d PASSED, %d FAILED\n", __func__,
+               pass_cnt, err_cnt);
+       return err_cnt ? -EINVAL : 0;
+}
+
 static __init int test_bpf(void)
 {
        int i, err_cnt = 0, pass_cnt = 0;
index d065736..3ca8d97 100644 (file)
@@ -18,6 +18,18 @@ probe_read_common(void *dst, const void __user *src, size_t size)
        return ret ? -EFAULT : 0;
 }
 
+static __always_inline long
+probe_write_common(void __user *dst, const void *src, size_t size)
+{
+       long ret;
+
+       pagefault_disable();
+       ret = __copy_to_user_inatomic(dst, src, size);
+       pagefault_enable();
+
+       return ret ? -EFAULT : 0;
+}
+
 /**
  * probe_kernel_read(): safely attempt to read from a kernel-space location
  * @dst: pointer to the buffer that shall take the data
@@ -31,11 +43,20 @@ probe_read_common(void *dst, const void __user *src, size_t size)
  * do_page_fault() doesn't attempt to take mmap_sem.  This makes
  * probe_kernel_read() suitable for use within regions where the caller
  * already holds mmap_sem, or other locks which nest inside mmap_sem.
+ *
+ * probe_kernel_read_strict() is the same as probe_kernel_read() except for
+ * the case where architectures have non-overlapping user and kernel address
+ * ranges: probe_kernel_read_strict() will additionally return -EFAULT for
+ * probing memory on a user address range where probe_user_read() is supposed
+ * to be used instead.
  */
 
 long __weak probe_kernel_read(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_read")));
 
+long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
+    __attribute__((alias("__probe_kernel_read")));
+
 long __probe_kernel_read(void *dst, const void *src, size_t size)
 {
        long ret;
@@ -85,6 +106,7 @@ EXPORT_SYMBOL_GPL(probe_user_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
+
 long __weak probe_kernel_write(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_write")));
 
@@ -94,15 +116,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
        mm_segment_t old_fs = get_fs();
 
        set_fs(KERNEL_DS);
-       pagefault_disable();
-       ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
-       pagefault_enable();
+       ret = probe_write_common((__force void __user *)dst, src, size);
        set_fs(old_fs);
 
-       return ret ? -EFAULT : 0;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(probe_kernel_write);
 
+/**
+ * probe_user_write(): safely attempt to write to a user-space location
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+
+long __weak probe_user_write(void __user *dst, const void *src, size_t size)
+    __attribute__((alias("__probe_user_write")));
+
+long __probe_user_write(void __user *dst, const void *src, size_t size)
+{
+       long ret = -EFAULT;
+       mm_segment_t old_fs = get_fs();
+
+       set_fs(USER_DS);
+       if (access_ok(dst, size))
+               ret = probe_write_common(dst, src, size);
+       set_fs(old_fs);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(probe_user_write);
 
 /**
  * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
@@ -120,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_kernel_write);
  *
  * If @count is smaller than the length of the string, copies @count-1 bytes,
  * sets the last byte of @dst buffer to NUL and returns @count.
+ *
+ * strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
+ * for the case where architectures have non-overlapping user and kernel address
+ * ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
+ * probing memory on a user address range where strncpy_from_unsafe_user() is
+ * supposed to be used instead.
  */
-long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
+
+long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
+    __attribute__((alias("__strncpy_from_unsafe")));
+
+long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+                                      long count)
+    __attribute__((alias("__strncpy_from_unsafe")));
+
+long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
 {
        mm_segment_t old_fs = get_fs();
        const void *src = unsafe_addr;
index ad5b0ac..7ff92dd 100644 (file)
@@ -934,6 +934,14 @@ static void hci_req_directed_advertising(struct hci_request *req,
                        return;
 
                memset(&cp, 0, sizeof(cp));
+
+               /* Some controllers might reject command if intervals are not
+                * within range for undirected advertising.
+                * BCM20702A0 is known to be affected by this.
+                */
+               cp.min_interval = cpu_to_le16(0x0020);
+               cp.max_interval = cpu_to_le16(0x0020);
+
                cp.type = LE_ADV_DIRECT_IND;
                cp.own_address_type = own_addr_type;
                cp.direct_addr_type = conn->dst_type;
index 04bc793..0cc9ce9 100644 (file)
@@ -842,8 +842,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
        if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
                struct hci_cp_le_write_def_data_len cp;
 
-               cp.tx_len = hdev->le_max_tx_len;
-               cp.tx_time = hdev->le_max_tx_time;
+               cp.tx_len = cpu_to_le16(hdev->le_max_tx_len);
+               cp.tx_time = cpu_to_le16(hdev->le_max_tx_time);
                hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp);
        }
 
@@ -4440,7 +4440,14 @@ static void hci_rx_work(struct work_struct *work)
                        hci_send_to_sock(hdev, skb);
                }
 
-               if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
+               /* If the device has been opened in HCI_USER_CHANNEL,
+                * the userspace has exclusive access to device.
+                * When device is HCI_INIT, we still need to process
+                * the data packets to the driver in order
+                * to complete its setup().
+                */
+               if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+                   !test_bit(HCI_INIT, &hdev->flags)) {
                        kfree_skb(skb);
                        continue;
                }
index 26e8cfa..6b42be4 100644 (file)
@@ -502,15 +502,12 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
                     const bdaddr_t *bdaddr)
 {
        struct l2cap_chan *chan = hdev->smp_data;
-       struct smp_dev *smp;
        u8 hash[3];
        int err;
 
        if (!chan || !chan->data)
                return false;
 
-       smp = chan->data;
-
        BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
 
        err = smp_ah(irk, &bdaddr->b[3], hash);
@@ -523,14 +520,11 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
 int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa)
 {
        struct l2cap_chan *chan = hdev->smp_data;
-       struct smp_dev *smp;
        int err;
 
        if (!chan || !chan->data)
                return -EOPNOTSUPP;
 
-       smp = chan->data;
-
        get_random_bytes(&rpa->b[3], 3);
 
        rpa->b[5] &= 0x3f;      /* Clear two most significant bits */
index 1153bbc..0be4497 100644 (file)
@@ -218,10 +218,18 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 
        if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
                           FIELD_SIZEOF(struct __sk_buff, cb),
+                          offsetof(struct __sk_buff, tstamp)))
+               return -EINVAL;
+
+       /* tstamp is allowed */
+
+       if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) +
+                          FIELD_SIZEOF(struct __sk_buff, tstamp),
                           sizeof(struct __sk_buff)))
                return -EINVAL;
 
        skb->priority = __skb->priority;
+       skb->tstamp = __skb->tstamp;
        memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
 
        return 0;
@@ -235,6 +243,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
                return;
 
        __skb->priority = skb->priority;
+       __skb->tstamp = skb->tstamp;
        memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
 }
 
index b1d3248..4877a0d 100644 (file)
@@ -75,8 +75,9 @@ static inline unsigned long hold_time(const struct net_bridge *br)
 static inline int has_expired(const struct net_bridge *br,
                                  const struct net_bridge_fdb_entry *fdb)
 {
-       return !fdb->is_static && !fdb->added_by_external_learn &&
-               time_before_eq(fdb->updated + hold_time(br), jiffies);
+       return !test_bit(BR_FDB_STATIC, &fdb->flags) &&
+              !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags) &&
+              time_before_eq(fdb->updated + hold_time(br), jiffies);
 }
 
 static void fdb_rcu_free(struct rcu_head *head)
@@ -197,7 +198,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
 {
        trace_fdb_delete(br, f);
 
-       if (f->is_static)
+       if (test_bit(BR_FDB_STATIC, &f->flags))
                fdb_del_hw_addr(br, f->key.addr.addr);
 
        hlist_del_init_rcu(&f->fdb_node);
@@ -224,7 +225,7 @@ static void fdb_delete_local(struct net_bridge *br,
                if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
                    (!vid || br_vlan_find(vg, vid))) {
                        f->dst = op;
-                       f->added_by_user = 0;
+                       clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
                        return;
                }
        }
@@ -235,7 +236,7 @@ static void fdb_delete_local(struct net_bridge *br,
        if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
            (!vid || (v && br_vlan_should_use(v)))) {
                f->dst = NULL;
-               f->added_by_user = 0;
+               clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
                return;
        }
 
@@ -250,7 +251,8 @@ void br_fdb_find_delete_local(struct net_bridge *br,
 
        spin_lock_bh(&br->hash_lock);
        f = br_fdb_find(br, addr, vid);
-       if (f && f->is_local && !f->added_by_user && f->dst == p)
+       if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+           !test_bit(BR_FDB_ADDED_BY_USER, &f->flags) && f->dst == p)
                fdb_delete_local(br, p, f);
        spin_unlock_bh(&br->hash_lock);
 }
@@ -265,7 +267,8 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
        spin_lock_bh(&br->hash_lock);
        vg = nbp_vlan_group(p);
        hlist_for_each_entry(f, &br->fdb_list, fdb_node) {
-               if (f->dst == p && f->is_local && !f->added_by_user) {
+               if (f->dst == p && test_bit(BR_FDB_LOCAL, &f->flags) &&
+                   !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) {
                        /* delete old one */
                        fdb_delete_local(br, p, f);
 
@@ -306,7 +309,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 
        /* If old entry was unassociated with any port, then delete it. */
        f = br_fdb_find(br, br->dev->dev_addr, 0);
-       if (f && f->is_local && !f->dst && !f->added_by_user)
+       if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+           !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
                fdb_delete_local(br, NULL, f);
 
        fdb_insert(br, NULL, newaddr, 0);
@@ -321,7 +325,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
                if (!br_vlan_should_use(v))
                        continue;
                f = br_fdb_find(br, br->dev->dev_addr, v->vid);
-               if (f && f->is_local && !f->dst && !f->added_by_user)
+               if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+                   !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
                        fdb_delete_local(br, NULL, f);
                fdb_insert(br, NULL, newaddr, v->vid);
        }
@@ -346,7 +351,8 @@ void br_fdb_cleanup(struct work_struct *work)
        hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
                unsigned long this_timer;
 
-               if (f->is_static || f->added_by_external_learn)
+               if (test_bit(BR_FDB_STATIC, &f->flags) ||
+                   test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags))
                        continue;
                this_timer = f->updated + delay;
                if (time_after(this_timer, now)) {
@@ -373,7 +379,7 @@ void br_fdb_flush(struct net_bridge *br)
 
        spin_lock_bh(&br->hash_lock);
        hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
-               if (!f->is_static)
+               if (!test_bit(BR_FDB_STATIC, &f->flags))
                        fdb_delete(br, f, true);
        }
        spin_unlock_bh(&br->hash_lock);
@@ -397,10 +403,11 @@ void br_fdb_delete_by_port(struct net_bridge *br,
                        continue;
 
                if (!do_all)
-                       if (f->is_static || (vid && f->key.vlan_id != vid))
+                       if (test_bit(BR_FDB_STATIC, &f->flags) ||
+                           (vid && f->key.vlan_id != vid))
                                continue;
 
-               if (f->is_local)
+               if (test_bit(BR_FDB_LOCAL, &f->flags))
                        fdb_delete_local(br, p, f);
                else
                        fdb_delete(br, f, true);
@@ -469,8 +476,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
                fe->port_no = f->dst->port_no;
                fe->port_hi = f->dst->port_no >> 8;
 
-               fe->is_local = f->is_local;
-               if (!f->is_static)
+               fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags);
+               if (!test_bit(BR_FDB_STATIC, &f->flags))
                        fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
                ++fe;
                ++num;
@@ -484,8 +491,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
                                               struct net_bridge_port *source,
                                               const unsigned char *addr,
                                               __u16 vid,
-                                              unsigned char is_local,
-                                              unsigned char is_static)
+                                              unsigned long flags)
 {
        struct net_bridge_fdb_entry *fdb;
 
@@ -494,12 +500,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
                memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
                fdb->dst = source;
                fdb->key.vlan_id = vid;
-               fdb->is_local = is_local;
-               fdb->is_static = is_static;
-               fdb->added_by_user = 0;
-               fdb->added_by_external_learn = 0;
-               fdb->offloaded = 0;
-               fdb->is_sticky = 0;
+               fdb->flags = flags;
                fdb->updated = fdb->used = jiffies;
                if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
                                                  &fdb->rhnode,
@@ -526,14 +527,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
                /* it is okay to have multiple ports with same
                 * address, just use the first one.
                 */
-               if (fdb->is_local)
+               if (test_bit(BR_FDB_LOCAL, &fdb->flags))
                        return 0;
                br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
                       source ? source->dev->name : br->dev->name, addr, vid);
                fdb_delete(br, fdb, true);
        }
 
-       fdb = fdb_create(br, source, addr, vid, 1, 1);
+       fdb = fdb_create(br, source, addr, vid,
+                        BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
        if (!fdb)
                return -ENOMEM;
 
@@ -555,7 +557,7 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 }
 
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-                  const unsigned char *addr, u16 vid, bool added_by_user)
+                  const unsigned char *addr, u16 vid, unsigned long flags)
 {
        struct net_bridge_fdb_entry *fdb;
        bool fdb_modified = false;
@@ -564,15 +566,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
        if (hold_time(br) == 0)
                return;
 
-       /* ignore packets unless we are using this port */
-       if (!(source->state == BR_STATE_LEARNING ||
-             source->state == BR_STATE_FORWARDING))
-               return;
-
        fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
        if (likely(fdb)) {
                /* attempt to update an entry for a local interface */
-               if (unlikely(fdb->is_local)) {
+               if (unlikely(test_bit(BR_FDB_LOCAL, &fdb->flags))) {
                        if (net_ratelimit())
                                br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n",
                                        source->dev->name, addr, vid);
@@ -580,30 +577,30 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
                        unsigned long now = jiffies;
 
                        /* fastpath: update of existing entry */
-                       if (unlikely(source != fdb->dst && !fdb->is_sticky)) {
+                       if (unlikely(source != fdb->dst &&
+                                    !test_bit(BR_FDB_STICKY, &fdb->flags))) {
                                fdb->dst = source;
                                fdb_modified = true;
                                /* Take over HW learned entry */
-                               if (unlikely(fdb->added_by_external_learn))
-                                       fdb->added_by_external_learn = 0;
+                               if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN,
+                                                     &fdb->flags)))
+                                       clear_bit(BR_FDB_ADDED_BY_EXT_LEARN,
+                                                 &fdb->flags);
                        }
                        if (now != fdb->updated)
                                fdb->updated = now;
-                       if (unlikely(added_by_user))
-                               fdb->added_by_user = 1;
+                       if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags)))
+                               set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
                        if (unlikely(fdb_modified)) {
-                               trace_br_fdb_update(br, source, addr, vid, added_by_user);
+                               trace_br_fdb_update(br, source, addr, vid, flags);
                                fdb_notify(br, fdb, RTM_NEWNEIGH, true);
                        }
                }
        } else {
                spin_lock(&br->hash_lock);
-               fdb = fdb_create(br, source, addr, vid, 0, 0);
+               fdb = fdb_create(br, source, addr, vid, flags);
                if (fdb) {
-                       if (unlikely(added_by_user))
-                               fdb->added_by_user = 1;
-                       trace_br_fdb_update(br, source, addr, vid,
-                                           added_by_user);
+                       trace_br_fdb_update(br, source, addr, vid, flags);
                        fdb_notify(br, fdb, RTM_NEWNEIGH, true);
                }
                /* else  we lose race and someone else inserts
@@ -616,9 +613,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 static int fdb_to_nud(const struct net_bridge *br,
                      const struct net_bridge_fdb_entry *fdb)
 {
-       if (fdb->is_local)
+       if (test_bit(BR_FDB_LOCAL, &fdb->flags))
                return NUD_PERMANENT;
-       else if (fdb->is_static)
+       else if (test_bit(BR_FDB_STATIC, &fdb->flags))
                return NUD_NOARP;
        else if (has_expired(br, fdb))
                return NUD_STALE;
@@ -648,11 +645,11 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
        ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
        ndm->ndm_state   = fdb_to_nud(br, fdb);
 
-       if (fdb->offloaded)
+       if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
                ndm->ndm_flags |= NTF_OFFLOADED;
-       if (fdb->added_by_external_learn)
+       if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
                ndm->ndm_flags |= NTF_EXT_LEARNED;
-       if (fdb->is_sticky)
+       if (test_bit(BR_FDB_STICKY, &fdb->flags))
                ndm->ndm_flags |= NTF_STICKY;
 
        if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
@@ -799,7 +796,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
                         const u8 *addr, u16 state, u16 flags, u16 vid,
                         u8 ndm_flags)
 {
-       u8 is_sticky = !!(ndm_flags & NTF_STICKY);
+       bool is_sticky = !!(ndm_flags & NTF_STICKY);
        struct net_bridge_fdb_entry *fdb;
        bool modified = false;
 
@@ -823,7 +820,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
                if (!(flags & NLM_F_CREATE))
                        return -ENOENT;
 
-               fdb = fdb_create(br, source, addr, vid, 0, 0);
+               fdb = fdb_create(br, source, addr, vid, 0);
                if (!fdb)
                        return -ENOMEM;
 
@@ -840,34 +837,28 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
 
        if (fdb_to_nud(br, fdb) != state) {
                if (state & NUD_PERMANENT) {
-                       fdb->is_local = 1;
-                       if (!fdb->is_static) {
-                               fdb->is_static = 1;
+                       set_bit(BR_FDB_LOCAL, &fdb->flags);
+                       if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
                                fdb_add_hw_addr(br, addr);
-                       }
                } else if (state & NUD_NOARP) {
-                       fdb->is_local = 0;
-                       if (!fdb->is_static) {
-                               fdb->is_static = 1;
+                       clear_bit(BR_FDB_LOCAL, &fdb->flags);
+                       if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
                                fdb_add_hw_addr(br, addr);
-                       }
                } else {
-                       fdb->is_local = 0;
-                       if (fdb->is_static) {
-                               fdb->is_static = 0;
+                       clear_bit(BR_FDB_LOCAL, &fdb->flags);
+                       if (test_and_clear_bit(BR_FDB_STATIC, &fdb->flags))
                                fdb_del_hw_addr(br, addr);
-                       }
                }
 
                modified = true;
        }
 
-       if (is_sticky != fdb->is_sticky) {
-               fdb->is_sticky = is_sticky;
+       if (is_sticky != test_bit(BR_FDB_STICKY, &fdb->flags)) {
+               change_bit(BR_FDB_STICKY, &fdb->flags);
                modified = true;
        }
 
-       fdb->added_by_user = 1;
+       set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
 
        fdb->used = jiffies;
        if (modified) {
@@ -890,9 +881,12 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
                                br->dev->name);
                        return -EINVAL;
                }
+               if (!nbp_state_should_learn(p))
+                       return 0;
+
                local_bh_disable();
                rcu_read_lock();
-               br_fdb_update(br, p, addr, vid, true);
+               br_fdb_update(br, p, addr, vid, BIT(BR_FDB_ADDED_BY_USER));
                rcu_read_unlock();
                local_bh_enable();
        } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
@@ -1064,7 +1058,7 @@ int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
        rcu_read_lock();
        hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
                /* We only care for static entries */
-               if (!f->is_static)
+               if (!test_bit(BR_FDB_STATIC, &f->flags))
                        continue;
                err = dev_uc_add(p->dev, f->key.addr.addr);
                if (err)
@@ -1078,7 +1072,7 @@ done:
 rollback:
        hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) {
                /* We only care for static entries */
-               if (!tmp->is_static)
+               if (!test_bit(BR_FDB_STATIC, &tmp->flags))
                        continue;
                if (tmp == f)
                        break;
@@ -1097,7 +1091,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
        rcu_read_lock();
        hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
                /* We only care for static entries */
-               if (!f->is_static)
+               if (!test_bit(BR_FDB_STATIC, &f->flags))
                        continue;
 
                dev_uc_del(p->dev, f->key.addr.addr);
@@ -1119,14 +1113,15 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 
        fdb = br_fdb_find(br, addr, vid);
        if (!fdb) {
-               fdb = fdb_create(br, p, addr, vid, 0, 0);
+               unsigned long flags = BIT(BR_FDB_ADDED_BY_EXT_LEARN);
+
+               if (swdev_notify)
+                       flags |= BIT(BR_FDB_ADDED_BY_USER);
+               fdb = fdb_create(br, p, addr, vid, flags);
                if (!fdb) {
                        err = -ENOMEM;
                        goto err_unlock;
                }
-               if (swdev_notify)
-                       fdb->added_by_user = 1;
-               fdb->added_by_external_learn = 1;
                fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
        } else {
                fdb->updated = jiffies;
@@ -1136,17 +1131,17 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
                        modified = true;
                }
 
-               if (fdb->added_by_external_learn) {
+               if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
                        /* Refresh entry */
                        fdb->used = jiffies;
-               } else if (!fdb->added_by_user) {
+               } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) {
                        /* Take over SW learned entry */
-                       fdb->added_by_external_learn = 1;
+                       set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags);
                        modified = true;
                }
 
                if (swdev_notify)
-                       fdb->added_by_user = 1;
+                       set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
 
                if (modified)
                        fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
@@ -1168,7 +1163,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
        spin_lock_bh(&br->hash_lock);
 
        fdb = br_fdb_find(br, addr, vid);
-       if (fdb && fdb->added_by_external_learn)
+       if (fdb && test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
                fdb_delete(br, fdb, swdev_notify);
        else
                err = -ENOENT;
@@ -1186,8 +1181,8 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
        spin_lock_bh(&br->hash_lock);
 
        fdb = br_fdb_find(br, addr, vid);
-       if (fdb)
-               fdb->offloaded = offloaded;
+       if (fdb && offloaded != test_bit(BR_FDB_OFFLOADED, &fdb->flags))
+               change_bit(BR_FDB_OFFLOADED, &fdb->flags);
 
        spin_unlock_bh(&br->hash_lock);
 }
@@ -1206,7 +1201,7 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
        spin_lock_bh(&p->br->hash_lock);
        hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) {
                if (f->dst == p && f->key.vlan_id == vid)
-                       f->offloaded = 0;
+                       clear_bit(BR_FDB_OFFLOADED, &f->flags);
        }
        spin_unlock_bh(&p->br->hash_lock);
 }
index 09b1dd8..8944ceb 100644 (file)
@@ -88,7 +88,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        /* insert into forwarding database after filtering to avoid spoofing */
        br = p->br;
        if (p->flags & BR_LEARNING)
-               br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
+               br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
 
        local_rcv = !!(br->dev->flags & IFF_PROMISC);
        if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
@@ -151,7 +151,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        if (dst) {
                unsigned long now = jiffies;
 
-               if (dst->is_local)
+               if (test_bit(BR_FDB_LOCAL, &dst->flags))
                        return br_pass_frame_up(skb);
 
                if (now != dst->used)
@@ -182,9 +182,10 @@ static void __br_handle_local_finish(struct sk_buff *skb)
 
        /* check if vlan is allowed, to avoid spoofing */
        if ((p->flags & BR_LEARNING) &&
+           nbp_state_should_learn(p) &&
            !br_opt_get(p->br, BROPT_NO_LL_LEARN) &&
            br_should_learn(p, skb, &vid))
-               br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+               br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, 0);
 }
 
 /* note: already called with rcu_read_lock */
index ce2ab14..36b0367 100644 (file)
@@ -172,6 +172,16 @@ struct net_bridge_vlan_group {
        u16                             pvid;
 };
 
+/* bridge fdb flags */
+enum {
+       BR_FDB_LOCAL,
+       BR_FDB_STATIC,
+       BR_FDB_STICKY,
+       BR_FDB_ADDED_BY_USER,
+       BR_FDB_ADDED_BY_EXT_LEARN,
+       BR_FDB_OFFLOADED,
+};
+
 struct net_bridge_fdb_key {
        mac_addr addr;
        u16 vlan_id;
@@ -183,12 +193,7 @@ struct net_bridge_fdb_entry {
 
        struct net_bridge_fdb_key       key;
        struct hlist_node               fdb_node;
-       unsigned char                   is_local:1,
-                                       is_static:1,
-                                       is_sticky:1,
-                                       added_by_user:1,
-                                       added_by_external_learn:1,
-                                       offloaded:1;
+       unsigned long                   flags;
 
        /* write-heavy members should not affect lookups */
        unsigned long                   updated ____cacheline_aligned_in_smp;
@@ -495,6 +500,11 @@ static inline bool br_vlan_should_use(const struct net_bridge_vlan *v)
        return true;
 }
 
+static inline bool nbp_state_should_learn(const struct net_bridge_port *p)
+{
+       return p->state == BR_STATE_LEARNING || p->state == BR_STATE_FORWARDING;
+}
+
 static inline int br_opt_get(const struct net_bridge *br,
                             enum net_bridge_opts opt)
 {
@@ -566,7 +576,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
 int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
                  const unsigned char *addr, u16 vid);
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-                  const unsigned char *addr, u16 vid, bool added_by_user);
+                  const unsigned char *addr, u16 vid, unsigned long flags);
 
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
                  struct net_device *dev, const unsigned char *addr, u16 vid);
index 921310d..015209b 100644 (file)
@@ -129,15 +129,19 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
                br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
                                                fdb->key.vlan_id,
                                                fdb->dst->dev,
-                                               fdb->added_by_user,
-                                               fdb->offloaded);
+                                               test_bit(BR_FDB_ADDED_BY_USER,
+                                                        &fdb->flags),
+                                               test_bit(BR_FDB_OFFLOADED,
+                                                        &fdb->flags));
                break;
        case RTM_NEWNEIGH:
                br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
                                                fdb->key.vlan_id,
                                                fdb->dst->dev,
-                                               fdb->added_by_user,
-                                               fdb->offloaded);
+                                               test_bit(BR_FDB_ADDED_BY_USER,
+                                                        &fdb->flags),
+                                               test_bit(BR_FDB_OFFLOADED,
+                                                        &fdb->flags));
                break;
        }
 }
index eb83051..b7532a7 100644 (file)
@@ -13,11 +13,11 @@ menuconfig CAIF
        with its modems. It is accessed from user space as sockets (PF_CAIF).
 
        Say Y (or M) here if you build for a phone product (e.g. Android or
-       MeeGo ) that uses CAIF as transport, if unsure say N.
+       MeeGo) that uses CAIF as transport. If unsure say N.
 
        If you select to build it as module then CAIF_NETDEV also needs to be
-       built as modules. You will also need to say yes to any CAIF physical
-       devices that your platform requires.
+       built as a module. You will also need to say Y (or M) to any CAIF
+       physical devices that your platform requires.
 
        See Documentation/networking/caif for a further explanation on how to
        use and configure CAIF.
@@ -37,7 +37,7 @@ config CAIF_NETDEV
        default CAIF
        ---help---
        Say Y if you will be using a CAIF based GPRS network device.
-       This can be either built-in or a loadable module,
+       This can be either built-in or a loadable module.
        If you select to build it as a built-in then the main CAIF device must
        also be a built-in.
        If unsure say Y.
@@ -48,7 +48,7 @@ config CAIF_USB
        default n
        ---help---
        Say Y if you are using CAIF over USB CDC NCM.
-       This can be either built-in or a loadable module,
+       This can be either built-in or a loadable module.
        If you select to build it as a built-in then the main CAIF device must
        also be a built-in.
        If unsure say N.
index 99ac84f..bb15800 100644 (file)
@@ -229,6 +229,122 @@ static inline void rps_unlock(struct softnet_data *sd)
 #endif
 }
 
+static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
+                                                      const char *name)
+{
+       struct netdev_name_node *name_node;
+
+       name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
+       if (!name_node)
+               return NULL;
+       INIT_HLIST_NODE(&name_node->hlist);
+       name_node->dev = dev;
+       name_node->name = name;
+       return name_node;
+}
+
+static struct netdev_name_node *
+netdev_name_node_head_alloc(struct net_device *dev)
+{
+       struct netdev_name_node *name_node;
+
+       name_node = netdev_name_node_alloc(dev, dev->name);
+       if (!name_node)
+               return NULL;
+       INIT_LIST_HEAD(&name_node->list);
+       return name_node;
+}
+
+static void netdev_name_node_free(struct netdev_name_node *name_node)
+{
+       kfree(name_node);
+}
+
+static void netdev_name_node_add(struct net *net,
+                                struct netdev_name_node *name_node)
+{
+       hlist_add_head_rcu(&name_node->hlist,
+                          dev_name_hash(net, name_node->name));
+}
+
+static void netdev_name_node_del(struct netdev_name_node *name_node)
+{
+       hlist_del_rcu(&name_node->hlist);
+}
+
+static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
+                                                       const char *name)
+{
+       struct hlist_head *head = dev_name_hash(net, name);
+       struct netdev_name_node *name_node;
+
+       hlist_for_each_entry(name_node, head, hlist)
+               if (!strcmp(name_node->name, name))
+                       return name_node;
+       return NULL;
+}
+
+static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
+                                                           const char *name)
+{
+       struct hlist_head *head = dev_name_hash(net, name);
+       struct netdev_name_node *name_node;
+
+       hlist_for_each_entry_rcu(name_node, head, hlist)
+               if (!strcmp(name_node->name, name))
+                       return name_node;
+       return NULL;
+}
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name)
+{
+       struct netdev_name_node *name_node;
+       struct net *net = dev_net(dev);
+
+       name_node = netdev_name_node_lookup(net, name);
+       if (name_node)
+               return -EEXIST;
+       name_node = netdev_name_node_alloc(dev, name);
+       if (!name_node)
+               return -ENOMEM;
+       netdev_name_node_add(net, name_node);
+       /* The node that holds dev->name acts as a head of per-device list. */
+       list_add_tail(&name_node->list, &dev->name_node->list);
+
+       return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_create);
+
+static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+{
+       list_del(&name_node->list);
+       netdev_name_node_del(name_node);
+       kfree(name_node->name);
+       netdev_name_node_free(name_node);
+}
+
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
+{
+       struct netdev_name_node *name_node;
+       struct net *net = dev_net(dev);
+
+       name_node = netdev_name_node_lookup(net, name);
+       if (!name_node)
+               return -ENOENT;
+       __netdev_name_node_alt_destroy(name_node);
+
+       return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_destroy);
+
+static void netdev_name_node_alt_flush(struct net_device *dev)
+{
+       struct netdev_name_node *name_node, *tmp;
+
+       list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
+               __netdev_name_node_alt_destroy(name_node);
+}
+
 /* Device list insertion */
 static void list_netdevice(struct net_device *dev)
 {
@@ -238,7 +354,7 @@ static void list_netdevice(struct net_device *dev)
 
        write_lock_bh(&dev_base_lock);
        list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
-       hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+       netdev_name_node_add(net, dev->name_node);
        hlist_add_head_rcu(&dev->index_hlist,
                           dev_index_hash(net, dev->ifindex));
        write_unlock_bh(&dev_base_lock);
@@ -256,7 +372,7 @@ static void unlist_netdevice(struct net_device *dev)
        /* Unlink dev from the device chain */
        write_lock_bh(&dev_base_lock);
        list_del_rcu(&dev->dev_list);
-       hlist_del_rcu(&dev->name_hlist);
+       netdev_name_node_del(dev->name_node);
        hlist_del_rcu(&dev->index_hlist);
        write_unlock_bh(&dev_base_lock);
 
@@ -652,14 +768,10 @@ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
 
 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 {
-       struct net_device *dev;
-       struct hlist_head *head = dev_name_hash(net, name);
+       struct netdev_name_node *node_name;
 
-       hlist_for_each_entry(dev, head, name_hlist)
-               if (!strncmp(dev->name, name, IFNAMSIZ))
-                       return dev;
-
-       return NULL;
+       node_name = netdev_name_node_lookup(net, name);
+       return node_name ? node_name->dev : NULL;
 }
 EXPORT_SYMBOL(__dev_get_by_name);
 
@@ -677,14 +789,10 @@ EXPORT_SYMBOL(__dev_get_by_name);
 
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 {
-       struct net_device *dev;
-       struct hlist_head *head = dev_name_hash(net, name);
-
-       hlist_for_each_entry_rcu(dev, head, name_hlist)
-               if (!strncmp(dev->name, name, IFNAMSIZ))
-                       return dev;
+       struct netdev_name_node *node_name;
 
-       return NULL;
+       node_name = netdev_name_node_lookup_rcu(net, name);
+       return node_name ? node_name->dev : NULL;
 }
 EXPORT_SYMBOL(dev_get_by_name_rcu);
 
@@ -1060,8 +1168,8 @@ int dev_alloc_name(struct net_device *dev, const char *name)
 }
 EXPORT_SYMBOL(dev_alloc_name);
 
-int dev_get_valid_name(struct net *net, struct net_device *dev,
-                      const char *name)
+static int dev_get_valid_name(struct net *net, struct net_device *dev,
+                             const char *name)
 {
        BUG_ON(!net);
 
@@ -1077,7 +1185,6 @@ int dev_get_valid_name(struct net *net, struct net_device *dev,
 
        return 0;
 }
-EXPORT_SYMBOL(dev_get_valid_name);
 
 /**
  *     dev_change_name - change name of a device
@@ -1151,13 +1258,13 @@ rollback:
        netdev_adjacent_rename_links(dev, oldname);
 
        write_lock_bh(&dev_base_lock);
-       hlist_del_rcu(&dev->name_hlist);
+       netdev_name_node_del(dev->name_node);
        write_unlock_bh(&dev_base_lock);
 
        synchronize_rcu();
 
        write_lock_bh(&dev_base_lock);
-       hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+       netdev_name_node_add(net, dev->name_node);
        write_unlock_bh(&dev_base_lock);
 
        ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1536,6 +1643,62 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
        return nb->notifier_call(nb, val, &info);
 }
 
+static int call_netdevice_register_notifiers(struct notifier_block *nb,
+                                            struct net_device *dev)
+{
+       int err;
+
+       err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
+       err = notifier_to_errno(err);
+       if (err)
+               return err;
+
+       if (!(dev->flags & IFF_UP))
+               return 0;
+
+       call_netdevice_notifier(nb, NETDEV_UP, dev);
+       return 0;
+}
+
+static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
+                                               struct net_device *dev)
+{
+       if (dev->flags & IFF_UP) {
+               call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+                                       dev);
+               call_netdevice_notifier(nb, NETDEV_DOWN, dev);
+       }
+       call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
+}
+
+static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
+                                                struct net *net)
+{
+       struct net_device *dev;
+       int err;
+
+       for_each_netdev(net, dev) {
+               err = call_netdevice_register_notifiers(nb, dev);
+               if (err)
+                       goto rollback;
+       }
+       return 0;
+
+rollback:
+       for_each_netdev_continue_reverse(net, dev)
+               call_netdevice_unregister_notifiers(nb, dev);
+       return err;
+}
+
+static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
+                                                   struct net *net)
+{
+       struct net_device *dev;
+
+       for_each_netdev(net, dev)
+               call_netdevice_unregister_notifiers(nb, dev);
+}
+
 static int dev_boot_phase = 1;
 
 /**
@@ -1554,8 +1717,6 @@ static int dev_boot_phase = 1;
 
 int register_netdevice_notifier(struct notifier_block *nb)
 {
-       struct net_device *dev;
-       struct net_device *last;
        struct net *net;
        int err;
 
@@ -1568,17 +1729,9 @@ int register_netdevice_notifier(struct notifier_block *nb)
        if (dev_boot_phase)
                goto unlock;
        for_each_net(net) {
-               for_each_netdev(net, dev) {
-                       err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
-                       err = notifier_to_errno(err);
-                       if (err)
-                               goto rollback;
-
-                       if (!(dev->flags & IFF_UP))
-                               continue;
-
-                       call_netdevice_notifier(nb, NETDEV_UP, dev);
-               }
+               err = call_netdevice_register_net_notifiers(nb, net);
+               if (err)
+                       goto rollback;
        }
 
 unlock:
@@ -1587,22 +1740,9 @@ unlock:
        return err;
 
 rollback:
-       last = dev;
-       for_each_net(net) {
-               for_each_netdev(net, dev) {
-                       if (dev == last)
-                               goto outroll;
+       for_each_net_continue_reverse(net)
+               call_netdevice_unregister_net_notifiers(nb, net);
 
-                       if (dev->flags & IFF_UP) {
-                               call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
-                                                       dev);
-                               call_netdevice_notifier(nb, NETDEV_DOWN, dev);
-                       }
-                       call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
-               }
-       }
-
-outroll:
        raw_notifier_chain_unregister(&netdev_chain, nb);
        goto unlock;
 }
@@ -1652,6 +1792,80 @@ unlock:
 }
 EXPORT_SYMBOL(unregister_netdevice_notifier);
 
+/**
+ * register_netdevice_notifier_net - register a per-netns network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Register a notifier to be called when network device events occur.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
+ *
+ * When registered all registration and up events are replayed
+ * to the new notifier to allow device to have a race free
+ * view of the network device list.
+ */
+
+int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
+{
+       int err;
+
+       rtnl_lock();
+       err = raw_notifier_chain_register(&net->netdev_chain, nb);
+       if (err)
+               goto unlock;
+       if (dev_boot_phase)
+               goto unlock;
+
+       err = call_netdevice_register_net_notifiers(nb, net);
+       if (err)
+               goto chain_unregister;
+
+unlock:
+       rtnl_unlock();
+       return err;
+
+chain_unregister:
+       raw_notifier_chain_unregister(&netdev_chain, nb);
+       goto unlock;
+}
+EXPORT_SYMBOL(register_netdevice_notifier_net);
+
+/**
+ * unregister_netdevice_notifier_net - unregister a per-netns
+ *                                     network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Unregister a notifier previously registered by
+ * register_netdevice_notifier(). The notifier is unlinked into the
+ * kernel structures and may then be reused. A negative errno code
+ * is returned on a failure.
+ *
+ * After unregistering unregister and down device events are synthesized
+ * for all devices on the device list to the removed notifier to remove
+ * the need for special case cleanup code.
+ */
+
+int unregister_netdevice_notifier_net(struct net *net,
+                                     struct notifier_block *nb)
+{
+       int err;
+
+       rtnl_lock();
+       err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
+       if (err)
+               goto unlock;
+
+       call_netdevice_unregister_net_notifiers(nb, net);
+
+unlock:
+       rtnl_unlock();
+       return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_net);
+
 /**
  *     call_netdevice_notifiers_info - call all network notifier blocks
  *     @val: value passed unmodified to notifier function
@@ -1664,7 +1878,18 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 static int call_netdevice_notifiers_info(unsigned long val,
                                         struct netdev_notifier_info *info)
 {
+       struct net *net = dev_net(info->dev);
+       int ret;
+
        ASSERT_RTNL();
+
+       /* Run per-netns notifier block chain first, then run the global one.
+        * Hopefully, one day, the global one is going to be removed after
+        * all notifier block registrators get converted to be per-netns.
+        */
+       ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
+       if (ret & NOTIFY_STOP_MASK)
+               return ret;
        return raw_notifier_call_chain(&netdev_chain, val, info);
 }
 
@@ -2690,7 +2915,7 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
 void netif_schedule_queue(struct netdev_queue *txq)
 {
        rcu_read_lock();
-       if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+       if (!netif_xmit_stopped(txq)) {
                struct Qdisc *q = rcu_dereference(txq->qdisc);
 
                __netif_schedule(q);
@@ -2858,12 +3083,9 @@ int skb_checksum_help(struct sk_buff *skb)
        offset += skb->csum_offset;
        BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
 
-       if (skb_cloned(skb) &&
-           !skb_clone_writable(skb, offset + sizeof(__sum16))) {
-               ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-               if (ret)
-                       goto out;
-       }
+       ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
+       if (ret)
+               goto out;
 
        *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
 out_set_summed:
@@ -2898,12 +3120,11 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
                ret = -EINVAL;
                goto out;
        }
-       if (skb_cloned(skb) &&
-           !skb_clone_writable(skb, offset + sizeof(__le32))) {
-               ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-               if (ret)
-                       goto out;
-       }
+
+       ret = skb_ensure_writable(skb, offset + sizeof(__le32));
+       if (ret)
+               goto out;
+
        crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
                                                  skb->len - start, ~(__u32)0,
                                                  crc32c_csum_stub));
@@ -5582,6 +5803,26 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
 }
 EXPORT_SYMBOL(gro_find_complete_by_type);
 
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+       if (!napi->rx_count)
+               return;
+       netif_receive_skb_list_internal(&napi->rx_list);
+       INIT_LIST_HEAD(&napi->rx_list);
+       napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+       list_add_tail(&skb->list, &napi->rx_list);
+       if (++napi->rx_count >= gro_normal_batch)
+               gro_normal_list(napi);
+}
+
 static void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
        skb_dst_drop(skb);
@@ -5589,12 +5830,13 @@ static void napi_skb_free_stolen_head(struct sk_buff *skb)
        kmem_cache_free(skbuff_head_cache, skb);
 }
 
-static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(struct napi_struct *napi,
+                                   struct sk_buff *skb,
+                                   gro_result_t ret)
 {
        switch (ret) {
        case GRO_NORMAL:
-               if (netif_receive_skb_internal(skb))
-                       ret = GRO_DROP;
+               gro_normal_one(napi, skb);
                break;
 
        case GRO_DROP:
@@ -5626,7 +5868,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
        skb_gro_reset_offset(skb);
 
-       ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+       ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
        trace_napi_gro_receive_exit(ret);
 
        return ret;
@@ -5672,26 +5914,6 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
-       if (!napi->rx_count)
-               return;
-       netif_receive_skb_list_internal(&napi->rx_list);
-       INIT_LIST_HEAD(&napi->rx_list);
-       napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing.  If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
-       list_add_tail(&skb->list, &napi->rx_list);
-       if (++napi->rx_count >= gro_normal_batch)
-               gro_normal_list(napi);
-}
-
 static gro_result_t napi_frags_finish(struct napi_struct *napi,
                                      struct sk_buff *skb,
                                      gro_result_t ret)
@@ -8532,6 +8754,9 @@ static void rollback_registered_many(struct list_head *head)
                dev_uc_flush(dev);
                dev_mc_flush(dev);
 
+               netdev_name_node_alt_flush(dev);
+               netdev_name_node_free(dev->name_node);
+
                if (dev->netdev_ops->ndo_uninit)
                        dev->netdev_ops->ndo_uninit(dev);
 
@@ -9011,6 +9236,11 @@ int register_netdevice(struct net_device *dev)
        if (ret < 0)
                goto out;
 
+       ret = -ENOMEM;
+       dev->name_node = netdev_name_node_head_alloc(dev);
+       if (!dev->name_node)
+               goto out;
+
        /* Init, if this function is available */
        if (dev->netdev_ops->ndo_init) {
                ret = dev->netdev_ops->ndo_init(dev);
@@ -9132,6 +9362,8 @@ out:
        return ret;
 
 err_uninit:
+       if (dev->name_node)
+               netdev_name_node_free(dev->name_node);
        if (dev->netdev_ops->ndo_uninit)
                dev->netdev_ops->ndo_uninit(dev);
        if (dev->priv_destructor)
@@ -9946,6 +10178,8 @@ static int __net_init netdev_init(struct net *net)
        if (net->dev_index_head == NULL)
                goto err_idx;
 
+       RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
+
        return 0;
 
 err_idx:
index f80151e..97e9a22 100644 (file)
@@ -95,16 +95,25 @@ static LIST_HEAD(devlink_list);
  */
 static DEFINE_MUTEX(devlink_mutex);
 
-static struct net *devlink_net(const struct devlink *devlink)
+struct net *devlink_net(const struct devlink *devlink)
 {
        return read_pnet(&devlink->_net);
 }
+EXPORT_SYMBOL_GPL(devlink_net);
 
-static void devlink_net_set(struct devlink *devlink, struct net *net)
+static void __devlink_net_set(struct devlink *devlink, struct net *net)
 {
        write_pnet(&devlink->_net, net);
 }
 
+void devlink_net_set(struct devlink *devlink, struct net *net)
+{
+       if (WARN_ON(devlink->registered))
+               return;
+       __devlink_net_set(devlink, net);
+}
+EXPORT_SYMBOL_GPL(devlink_net_set);
+
 static struct devlink *devlink_get_from_attrs(struct net *net,
                                              struct nlattr **attrs)
 {
@@ -434,8 +443,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
 {
        struct devlink *devlink;
 
-       devlink = devlink_get_from_info(info);
-       if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+       /* When devlink changes netns, it would not be found
+        * by devlink_get_from_info(). So try if it is stored first.
+        */
+       if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
+               devlink = info->user_ptr[0];
+       } else {
+               devlink = devlink_get_from_info(info);
+               WARN_ON(IS_ERR(devlink));
+       }
+       if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
                mutex_unlock(&devlink->lock);
        mutex_unlock(&devlink_mutex);
 }
@@ -1035,7 +1052,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
        struct devlink_sb *devlink_sb;
        int start = cb->args[0];
        int idx = 0;
-       int err;
+       int err = 0;
 
        mutex_lock(&devlink_mutex);
        list_for_each_entry(devlink, &devlink_list, list) {
@@ -1058,6 +1075,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
 out:
        mutex_unlock(&devlink_mutex);
 
+       if (err != -EMSGSIZE)
+               return err;
+
        cb->args[0] = idx;
        return msg->len;
 }
@@ -1233,7 +1253,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
        struct devlink_sb *devlink_sb;
        int start = cb->args[0];
        int idx = 0;
-       int err;
+       int err = 0;
 
        mutex_lock(&devlink_mutex);
        list_for_each_entry(devlink, &devlink_list, list) {
@@ -1256,6 +1276,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
 out:
        mutex_unlock(&devlink_mutex);
 
+       if (err != -EMSGSIZE)
+               return err;
+
        cb->args[0] = idx;
        return msg->len;
 }
@@ -1460,7 +1483,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
        struct devlink_sb *devlink_sb;
        int start = cb->args[0];
        int idx = 0;
-       int err;
+       int err = 0;
 
        mutex_lock(&devlink_mutex);
        list_for_each_entry(devlink, &devlink_list, list) {
@@ -1485,6 +1508,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
 out:
        mutex_unlock(&devlink_mutex);
 
+       if (err != -EMSGSIZE)
+               return err;
+
        cb->args[0] = idx;
        return msg->len;
 }
@@ -2674,6 +2700,72 @@ devlink_resources_validate(struct devlink *devlink,
        return err;
 }
 
+static struct net *devlink_netns_get(struct sk_buff *skb,
+                                    struct genl_info *info)
+{
+       struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+       struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+       struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+       struct net *net;
+
+       if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+               NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (netns_pid_attr) {
+               net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+       } else if (netns_fd_attr) {
+               net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+       } else if (netns_id_attr) {
+               net = get_net_ns_by_id(sock_net(skb->sk),
+                                      nla_get_u32(netns_id_attr));
+               if (!net)
+                       net = ERR_PTR(-EINVAL);
+       } else {
+               WARN_ON(1);
+               net = ERR_PTR(-EINVAL);
+       }
+       if (IS_ERR(net)) {
+               NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
+               return ERR_PTR(-EINVAL);
+       }
+       if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+               put_net(net);
+               return ERR_PTR(-EPERM);
+       }
+       return net;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+                                unsigned int port_index,
+                                struct devlink_param_item *param_item,
+                                enum devlink_command cmd);
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+                                       struct net *dest_net)
+{
+       struct devlink_param_item *param_item;
+
+       /* Userspace needs to be notified about devlink objects
+        * removed from original and entering new network namespace.
+        * The rest of the devlink objects are re-created during
+        * reload process so the notifications are generated separatelly.
+        */
+
+       list_for_each_entry(param_item, &devlink->param_list, list)
+               devlink_param_notify(devlink, 0, param_item,
+                                    DEVLINK_CMD_PARAM_DEL);
+       devlink_notify(devlink, DEVLINK_CMD_DEL);
+
+       __devlink_net_set(devlink, dest_net);
+
+       devlink_notify(devlink, DEVLINK_CMD_NEW);
+       list_for_each_entry(param_item, &devlink->param_list, list)
+               devlink_param_notify(devlink, 0, param_item,
+                                    DEVLINK_CMD_PARAM_NEW);
+}
+
 static bool devlink_reload_supported(struct devlink *devlink)
 {
        return devlink->ops->reload_down && devlink->ops->reload_up;
@@ -2694,9 +2786,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
 
+static int devlink_reload(struct devlink *devlink, struct net *dest_net,
+                         struct netlink_ext_ack *extack)
+{
+       int err;
+
+       err = devlink->ops->reload_down(devlink, !!dest_net, extack);
+       if (err)
+               return err;
+
+       if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
+               devlink_reload_netns_change(devlink, dest_net);
+
+       err = devlink->ops->reload_up(devlink, extack);
+       devlink_reload_failed_set(devlink, !!err);
+       return err;
+}
+
 static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 {
        struct devlink *devlink = info->user_ptr[0];
+       struct net *dest_net = NULL;
        int err;
 
        if (!devlink_reload_supported(devlink))
@@ -2707,11 +2817,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
                NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
                return err;
        }
-       err = devlink->ops->reload_down(devlink, info->extack);
-       if (err)
-               return err;
-       err = devlink->ops->reload_up(devlink, info->extack);
-       devlink_reload_failed_set(devlink, !!err);
+
+       if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+           info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+           info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+               dest_net = devlink_netns_get(skb, info);
+               if (IS_ERR(dest_net))
+                       return PTR_ERR(dest_net);
+       }
+
+       err = devlink_reload(devlink, dest_net, info->extack);
+
+       if (dest_net)
+               put_net(dest_net);
+
        return err;
 }
 
@@ -3155,7 +3274,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        int start = cb->args[0];
        int idx = 0;
-       int err;
+       int err = 0;
 
        mutex_lock(&devlink_mutex);
        list_for_each_entry(devlink, &devlink_list, list) {
@@ -3183,6 +3302,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 out:
        mutex_unlock(&devlink_mutex);
 
+       if (err != -EMSGSIZE)
+               return err;
+
        cb->args[0] = idx;
        return msg->len;
 }
@@ -3411,7 +3533,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        int start = cb->args[0];
        int idx = 0;
-       int err;
+       int err = 0;
 
        mutex_lock(&devlink_mutex);
        list_for_each_entry(devlink, &devlink_list, list) {
@@ -3444,6 +3566,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 out:
        mutex_unlock(&devlink_mutex);
 
+       if (err != -EMSGSIZE)
+               return err;
+
        cb->args[0] = idx;
        return msg->len;
 }
@@ -3818,29 +3943,19 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
 static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
                                             struct netlink_callback *cb)
 {
+       const struct genl_dumpit_info *info = genl_dumpit_info(cb);
        u64 ret_offset, start_offset, end_offset = 0;
+       struct nlattr **attrs = info->attrs;
        struct devlink_region *region;
        struct nlattr *chunks_attr;
        const char *region_name;
        struct devlink *devlink;
-       struct nlattr **attrs;
        bool dump = true;
        void *hdr;
        int err;
 
        start_offset = *((u64 *)&cb->args[0]);
 
-       attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
-       if (!attrs)
-               return -ENOMEM;
-
-       err = nlmsg_parse_deprecated(cb->nlh,
-                                    GENL_HDRLEN + devlink_nl_family.hdrsize,
-                                    attrs, DEVLINK_ATTR_MAX,
-                                    devlink_nl_family.policy, cb->extack);
-       if (err)
-               goto out_free;
-
        mutex_lock(&devlink_mutex);
        devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
        if (IS_ERR(devlink)) {
@@ -3917,7 +4032,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
        genlmsg_end(skb, hdr);
        mutex_unlock(&devlink->lock);
        mutex_unlock(&devlink_mutex);
-       kfree(attrs);
 
        return skb->len;
 
@@ -3927,8 +4041,6 @@ out_unlock:
        mutex_unlock(&devlink->lock);
 out_dev:
        mutex_unlock(&devlink_mutex);
-out_free:
-       kfree(attrs);
        return err;
 }
 
@@ -4066,7 +4178,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        int start = cb->args[0];
        int idx = 0;
-       int err;
+       int err = 0;
 
        mutex_lock(&devlink_mutex);
        list_for_each_entry(devlink, &devlink_list, list) {
@@ -4094,6 +4206,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
        }
        mutex_unlock(&devlink_mutex);
 
+       if (err != -EMSGSIZE)
+               return err;
+
        cb->args[0] = idx;
        return msg->len;
 }
@@ -4732,14 +4847,17 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
 
 static int
 devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
-                               void *priv_ctx)
+                               void *priv_ctx, struct netlink_ext_ack *extack)
 {
        int err;
 
+       if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
+               return 0;
+
        if (!reporter->ops->recover)
                return -EOPNOTSUPP;
 
-       err = reporter->ops->recover(reporter, priv_ctx);
+       err = reporter->ops->recover(reporter, priv_ctx, extack);
        if (err)
                return err;
 
@@ -4760,7 +4878,8 @@ devlink_health_dump_clear(struct devlink_health_reporter *reporter)
 }
 
 static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
-                                 void *priv_ctx)
+                                 void *priv_ctx,
+                                 struct netlink_ext_ack *extack)
 {
        int err;
 
@@ -4781,7 +4900,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
                goto dump_err;
 
        err = reporter->ops->dump(reporter, reporter->dump_fmsg,
-                                 priv_ctx);
+                                 priv_ctx, extack);
        if (err)
                goto dump_err;
 
@@ -4828,11 +4947,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
 
        mutex_lock(&reporter->dump_lock);
        /* store current dump of current error, for later analysis */
-       devlink_health_do_dump(reporter, priv_ctx);
+       devlink_health_do_dump(reporter, priv_ctx, NULL);
        mutex_unlock(&reporter->dump_lock);
 
        if (reporter->auto_recover)
-               return devlink_health_reporter_recover(reporter, priv_ctx);
+               return devlink_health_reporter_recover(reporter,
+                                                      priv_ctx, NULL);
 
        return 0;
 }
@@ -4867,21 +4987,10 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
 static struct devlink_health_reporter *
 devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
 {
+       const struct genl_dumpit_info *info = genl_dumpit_info(cb);
        struct devlink_health_reporter *reporter;
+       struct nlattr **attrs = info->attrs;
        struct devlink *devlink;
-       struct nlattr **attrs;
-       int err;
-
-       attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
-       if (!attrs)
-               return NULL;
-
-       err = nlmsg_parse_deprecated(cb->nlh,
-                                    GENL_HDRLEN + devlink_nl_family.hdrsize,
-                                    attrs, DEVLINK_ATTR_MAX,
-                                    devlink_nl_family.policy, cb->extack);
-       if (err)
-               goto free;
 
        mutex_lock(&devlink_mutex);
        devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
@@ -4890,12 +4999,9 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
 
        reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
        mutex_unlock(&devlink_mutex);
-       kfree(attrs);
        return reporter;
 unlock:
        mutex_unlock(&devlink_mutex);
-free:
-       kfree(attrs);
        return NULL;
 }
 
@@ -5084,7 +5190,7 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
        if (!reporter)
                return -EINVAL;
 
-       err = devlink_health_reporter_recover(reporter, NULL);
+       err = devlink_health_reporter_recover(reporter, NULL, info->extack);
 
        devlink_health_reporter_put(reporter);
        return err;
@@ -5117,7 +5223,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
        if (err)
                goto out;
 
-       err = reporter->ops->diagnose(reporter, fmsg);
+       err = reporter->ops->diagnose(reporter, fmsg, info->extack);
        if (err)
                goto out;
 
@@ -5152,7 +5258,7 @@ devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
        }
        mutex_lock(&reporter->dump_lock);
        if (!start) {
-               err = devlink_health_do_dump(reporter, NULL);
+               err = devlink_health_do_dump(reporter, NULL, cb->extack);
                if (err)
                        goto unlock;
                cb->args[1] = reporter->dump_ts;
@@ -5793,6 +5899,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
        [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
        [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
        [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
+       [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -6023,7 +6132,8 @@ static const struct genl_ops devlink_nl_ops[] = {
        },
        {
                .cmd = DEVLINK_CMD_REGION_READ,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                .dumpit = devlink_nl_cmd_region_read_dumpit,
                .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
@@ -6071,7 +6181,8 @@ static const struct genl_ops devlink_nl_ops[] = {
        },
        {
                .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
                .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
@@ -6155,7 +6266,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
        if (!devlink)
                return NULL;
        devlink->ops = ops;
-       devlink_net_set(devlink, &init_net);
+       __devlink_net_set(devlink, &init_net);
        INIT_LIST_HEAD(&devlink->port_list);
        INIT_LIST_HEAD(&devlink->sb_list);
        INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
@@ -6181,6 +6292,7 @@ int devlink_register(struct devlink *devlink, struct device *dev)
 {
        mutex_lock(&devlink_mutex);
        devlink->dev = dev;
+       devlink->registered = true;
        list_add_tail(&devlink->list, &devlink_list);
        devlink_notify(devlink, DEVLINK_CMD_NEW);
        mutex_unlock(&devlink_mutex);
@@ -8060,9 +8172,43 @@ int devlink_compat_switch_id_get(struct net_device *dev,
        return 0;
 }
 
+static void __net_exit devlink_pernet_pre_exit(struct net *net)
+{
+       struct devlink *devlink;
+       int err;
+
+       /* In case network namespace is getting destroyed, reload
+        * all devlink instances from this namespace into init_net.
+        */
+       mutex_lock(&devlink_mutex);
+       list_for_each_entry(devlink, &devlink_list, list) {
+               if (net_eq(devlink_net(devlink), net)) {
+                       if (WARN_ON(!devlink_reload_supported(devlink)))
+                               continue;
+                       err = devlink_reload(devlink, &init_net, NULL);
+                       if (err)
+                               pr_warn("Failed to reload devlink instance into init_net\n");
+               }
+       }
+       mutex_unlock(&devlink_mutex);
+}
+
+static struct pernet_operations devlink_pernet_ops __net_initdata = {
+       .pre_exit = devlink_pernet_pre_exit,
+};
+
 static int __init devlink_init(void)
 {
-       return genl_register_family(&devlink_nl_family);
+       int err;
+
+       err = genl_register_family(&devlink_nl_family);
+       if (err)
+               goto out;
+       err = register_pernet_subsys(&devlink_pernet_ops);
+
+out:
+       WARN_ON(err);
+       return err;
 }
 
 subsys_initcall(devlink_init);
index 470a606..fc96259 100644 (file)
@@ -12,17 +12,15 @@ static unsigned int fib_notifier_net_id;
 
 struct fib_notifier_net {
        struct list_head fib_notifier_ops;
+       struct atomic_notifier_head fib_chain;
 };
 
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
+int call_fib_notifier(struct notifier_block *nb,
                      enum fib_event_type event_type,
                      struct fib_notifier_info *info)
 {
        int err;
 
-       info->net = net;
        err = nb->notifier_call(nb, event_type, info);
        return notifier_to_errno(err);
 }
@@ -31,106 +29,100 @@ EXPORT_SYMBOL(call_fib_notifier);
 int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
                       struct fib_notifier_info *info)
 {
+       struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
        int err;
 
-       info->net = net;
-       err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+       err = atomic_notifier_call_chain(&fn_net->fib_chain, event_type, info);
        return notifier_to_errno(err);
 }
 EXPORT_SYMBOL(call_fib_notifiers);
 
-static unsigned int fib_seq_sum(void)
+static unsigned int fib_seq_sum(struct net *net)
 {
-       struct fib_notifier_net *fn_net;
+       struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
        struct fib_notifier_ops *ops;
        unsigned int fib_seq = 0;
-       struct net *net;
 
        rtnl_lock();
-       down_read(&net_rwsem);
-       for_each_net(net) {
-               fn_net = net_generic(net, fib_notifier_net_id);
-               rcu_read_lock();
-               list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
-                       if (!try_module_get(ops->owner))
-                               continue;
-                       fib_seq += ops->fib_seq_read(net);
-                       module_put(ops->owner);
-               }
-               rcu_read_unlock();
+       rcu_read_lock();
+       list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
+               if (!try_module_get(ops->owner))
+                       continue;
+               fib_seq += ops->fib_seq_read(net);
+               module_put(ops->owner);
        }
-       up_read(&net_rwsem);
+       rcu_read_unlock();
        rtnl_unlock();
 
        return fib_seq;
 }
 
-static int fib_net_dump(struct net *net, struct notifier_block *nb)
+static int fib_net_dump(struct net *net, struct notifier_block *nb,
+                       struct netlink_ext_ack *extack)
 {
        struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
        struct fib_notifier_ops *ops;
+       int err = 0;
 
+       rcu_read_lock();
        list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
-               int err;
-
                if (!try_module_get(ops->owner))
                        continue;
-               err = ops->fib_dump(net, nb);
+               err = ops->fib_dump(net, nb, extack);
                module_put(ops->owner);
                if (err)
-                       return err;
+                       goto unlock;
        }
 
-       return 0;
+unlock:
+       rcu_read_unlock();
+
+       return err;
 }
 
-static bool fib_dump_is_consistent(struct notifier_block *nb,
+static bool fib_dump_is_consistent(struct net *net, struct notifier_block *nb,
                                   void (*cb)(struct notifier_block *nb),
                                   unsigned int fib_seq)
 {
-       atomic_notifier_chain_register(&fib_chain, nb);
-       if (fib_seq == fib_seq_sum())
+       struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+       atomic_notifier_chain_register(&fn_net->fib_chain, nb);
+       if (fib_seq == fib_seq_sum(net))
                return true;
-       atomic_notifier_chain_unregister(&fib_chain, nb);
+       atomic_notifier_chain_unregister(&fn_net->fib_chain, nb);
        if (cb)
                cb(nb);
        return false;
 }
 
 #define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
-                         void (*cb)(struct notifier_block *nb))
+int register_fib_notifier(struct net *net, struct notifier_block *nb,
+                         void (*cb)(struct notifier_block *nb),
+                         struct netlink_ext_ack *extack)
 {
        int retries = 0;
        int err;
 
        do {
-               unsigned int fib_seq = fib_seq_sum();
-               struct net *net;
-
-               rcu_read_lock();
-               for_each_net_rcu(net) {
-                       err = fib_net_dump(net, nb);
-                       if (err)
-                               goto err_fib_net_dump;
-               }
-               rcu_read_unlock();
-
-               if (fib_dump_is_consistent(nb, cb, fib_seq))
+               unsigned int fib_seq = fib_seq_sum(net);
+
+               err = fib_net_dump(net, nb, extack);
+               if (err)
+                       return err;
+
+               if (fib_dump_is_consistent(net, nb, cb, fib_seq))
                        return 0;
        } while (++retries < FIB_DUMP_MAX_RETRIES);
 
        return -EBUSY;
-
-err_fib_net_dump:
-       rcu_read_unlock();
-       return err;
 }
 EXPORT_SYMBOL(register_fib_notifier);
 
-int unregister_fib_notifier(struct notifier_block *nb)
+int unregister_fib_notifier(struct net *net, struct notifier_block *nb)
 {
-       return atomic_notifier_chain_unregister(&fib_chain, nb);
+       struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+       return atomic_notifier_chain_unregister(&fn_net->fib_chain, nb);
 }
 EXPORT_SYMBOL(unregister_fib_notifier);
 
@@ -181,6 +173,7 @@ static int __net_init fib_notifier_net_init(struct net *net)
        struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
 
        INIT_LIST_HEAD(&fn_net->fib_notifier_ops);
+       ATOMIC_INIT_NOTIFIER_HEAD(&fn_net->fib_chain);
        return 0;
 }
 
index dd220ce..3e7e152 100644 (file)
@@ -321,16 +321,18 @@ out:
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib_rule_notifier(struct notifier_block *nb,
                                  enum fib_event_type event_type,
-                                 struct fib_rule *rule, int family)
+                                 struct fib_rule *rule, int family,
+                                 struct netlink_ext_ack *extack)
 {
        struct fib_rule_notifier_info info = {
                .info.family = family,
+               .info.extack = extack,
                .rule = rule,
        };
 
-       return call_fib_notifier(nb, net, event_type, &info.info);
+       return call_fib_notifier(nb, event_type, &info.info);
 }
 
 static int call_fib_rule_notifiers(struct net *net,
@@ -350,20 +352,25 @@ static int call_fib_rule_notifiers(struct net *net,
 }
 
 /* Called with rcu_read_lock() */
-int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
+                  struct netlink_ext_ack *extack)
 {
        struct fib_rules_ops *ops;
        struct fib_rule *rule;
+       int err = 0;
 
        ops = lookup_rules_ops(net, family);
        if (!ops)
                return -EAFNOSUPPORT;
-       list_for_each_entry_rcu(rule, &ops->rules_list, list)
-               call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
-                                      family);
+       list_for_each_entry_rcu(rule, &ops->rules_list, list) {
+               err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD,
+                                            rule, family, extack);
+               if (err)
+                       break;
+       }
        rules_ops_put(ops);
 
-       return 0;
+       return err;
 }
 EXPORT_SYMBOL_GPL(fib_rules_dump);
 
index 3fed575..fc303ab 100644 (file)
@@ -2245,7 +2245,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
         * account for the headroom.
         */
        bytes_sg_total = start - offset + bytes;
-       if (!msg->sg.copy[i] && bytes_sg_total <= len)
+       if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
                goto out;
 
        /* At this point we need to linearize multiple scatterlist
@@ -2450,7 +2450,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
        /* Place newly allocated data buffer */
        sk_mem_charge(msg->sk, len);
        msg->sg.size += len;
-       msg->sg.copy[new] = false;
+       __clear_bit(new, &msg->sg.copy);
        sg_set_page(&msg->sg.data[new], page, len + copy, 0);
        if (rsge.length) {
                get_page(sg_page(&rsge));
@@ -3798,7 +3798,7 @@ BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
 
        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                return -EINVAL;
-       if (unlikely(skb_size > skb->len))
+       if (unlikely(!skb || skb_size > skb->len))
                return -EFAULT;
 
        return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
@@ -3816,6 +3816,19 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
+static u32 bpf_skb_output_btf_ids[5];
+const struct bpf_func_proto bpf_skb_output_proto = {
+       .func           = bpf_skb_event_output,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
+       .btf_id         = bpf_skb_output_btf_ids,
+};
+
 static unsigned short bpf_tunnel_key_af(u64 flags)
 {
        return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
index 68eda10..ca87165 100644 (file)
@@ -114,19 +114,50 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
 {
        struct bpf_prog *attached;
        struct net *net;
+       int ret = 0;
 
        net = current->nsproxy->net_ns;
        mutex_lock(&flow_dissector_mutex);
+
+       if (net == &init_net) {
+               /* BPF flow dissector in the root namespace overrides
+                * any per-net-namespace one. When attaching to root,
+                * make sure we don't have any BPF program attached
+                * to the non-root namespaces.
+                */
+               struct net *ns;
+
+               for_each_net(ns) {
+                       if (ns == &init_net)
+                               continue;
+                       if (rcu_access_pointer(ns->flow_dissector_prog)) {
+                               ret = -EEXIST;
+                               goto out;
+                       }
+               }
+       } else {
+               /* Make sure root flow dissector is not attached
+                * when attaching to the non-root namespace.
+                */
+               if (rcu_access_pointer(init_net.flow_dissector_prog)) {
+                       ret = -EEXIST;
+                       goto out;
+               }
+       }
+
        attached = rcu_dereference_protected(net->flow_dissector_prog,
                                             lockdep_is_held(&flow_dissector_mutex));
-       if (attached) {
-               /* Only one BPF program can be attached at a time */
-               mutex_unlock(&flow_dissector_mutex);
-               return -EEXIST;
+       if (attached == prog) {
+               /* The same program cannot be attached twice */
+               ret = -EINVAL;
+               goto out;
        }
        rcu_assign_pointer(net->flow_dissector_prog, prog);
+       if (attached)
+               bpf_prog_put(attached);
+out:
        mutex_unlock(&flow_dissector_mutex);
-       return 0;
+       return ret;
 }
 
 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
@@ -147,27 +178,6 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
        mutex_unlock(&flow_dissector_mutex);
        return 0;
 }
-/**
- * skb_flow_get_be16 - extract be16 entity
- * @skb: sk_buff to extract from
- * @poff: offset to extract at
- * @data: raw buffer pointer to the packet
- * @hlen: packet header length
- *
- * The function will try to retrieve a be32 entity at
- * offset poff
- */
-static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
-                               void *data, int hlen)
-{
-       __be16 *u, _u;
-
-       u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
-       if (u)
-               return *u;
-
-       return 0;
-}
 
 /**
  * __skb_flow_get_ports - extract the upper layer ports and return them
@@ -203,6 +213,72 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 }
 EXPORT_SYMBOL(__skb_flow_get_ports);
 
+static bool icmp_has_id(u8 type)
+{
+       switch (type) {
+       case ICMP_ECHO:
+       case ICMP_ECHOREPLY:
+       case ICMP_TIMESTAMP:
+       case ICMP_TIMESTAMPREPLY:
+       case ICMPV6_ECHO_REQUEST:
+       case ICMPV6_ECHO_REPLY:
+               return true;
+       }
+
+       return false;
+}
+
+/**
+ * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields
+ * @skb: sk_buff to extract from
+ * @key_icmp: struct flow_dissector_key_icmp to fill
+ * @data: raw buffer pointer to the packet
+ * @toff: offset to extract at
+ * @hlen: packet header length
+ */
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
+                          struct flow_dissector_key_icmp *key_icmp,
+                          void *data, int thoff, int hlen)
+{
+       struct icmphdr *ih, _ih;
+
+       ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih);
+       if (!ih)
+               return;
+
+       key_icmp->type = ih->type;
+       key_icmp->code = ih->code;
+
+       /* As we use 0 to signal that the Id field is not present,
+        * avoid confusion with packets without such field
+        */
+       if (icmp_has_id(ih->type))
+               key_icmp->id = ih->un.echo.id ? : 1;
+       else
+               key_icmp->id = 0;
+}
+EXPORT_SYMBOL(skb_flow_get_icmp_tci);
+
+/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet
+ * using skb_flow_get_icmp_tci().
+ */
+static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
+                                   struct flow_dissector *flow_dissector,
+                                   void *target_container,
+                                   void *data, int thoff, int hlen)
+{
+       struct flow_dissector_key_icmp *key_icmp;
+
+       if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP))
+               return;
+
+       key_icmp = skb_flow_dissector_target(flow_dissector,
+                                            FLOW_DISSECTOR_KEY_ICMP,
+                                            target_container);
+
+       skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
+}
+
 void skb_flow_dissect_meta(const struct sk_buff *skb,
                           struct flow_dissector *flow_dissector,
                           void *target_container)
@@ -853,7 +929,6 @@ bool __skb_flow_dissect(const struct net *net,
        struct flow_dissector_key_basic *key_basic;
        struct flow_dissector_key_addrs *key_addrs;
        struct flow_dissector_key_ports *key_ports;
-       struct flow_dissector_key_icmp *key_icmp;
        struct flow_dissector_key_tags *key_tags;
        struct flow_dissector_key_vlan *key_vlan;
        struct bpf_prog *attached = NULL;
@@ -910,7 +985,10 @@ bool __skb_flow_dissect(const struct net *net,
        WARN_ON_ONCE(!net);
        if (net) {
                rcu_read_lock();
-               attached = rcu_dereference(net->flow_dissector_prog);
+               attached = rcu_dereference(init_net.flow_dissector_prog);
+
+               if (!attached)
+                       attached = rcu_dereference(net->flow_dissector_prog);
 
                if (attached) {
                        struct bpf_flow_keys flow_keys;
@@ -1295,6 +1373,12 @@ ip_proto_again:
                                       data, nhoff, hlen);
                break;
 
+       case IPPROTO_ICMP:
+       case IPPROTO_ICMPV6:
+               __skb_flow_dissect_icmp(skb, flow_dissector, target_container,
+                                       data, nhoff, hlen);
+               break;
+
        default:
                break;
        }
@@ -1308,14 +1392,6 @@ ip_proto_again:
                                                        data, hlen);
        }
 
-       if (dissector_uses_key(flow_dissector,
-                              FLOW_DISSECTOR_KEY_ICMP)) {
-               key_icmp = skb_flow_dissector_target(flow_dissector,
-                                                    FLOW_DISSECTOR_KEY_ICMP,
-                                                    target_container);
-               key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
-       }
-
        /* Process result of IP proto processing */
        switch (fdret) {
        case FLOW_DISSECT_RET_PROTO_AGAIN:
@@ -1365,8 +1441,8 @@ static const void *flow_keys_hash_start(const struct flow_keys *flow)
 static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
 {
        size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
-       BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
-                    sizeof(*flow) - sizeof(flow->addrs));
+
+       BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
 
        switch (flow->control.addr_type) {
        case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
@@ -1412,6 +1488,9 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
 }
 EXPORT_SYMBOL(flow_get_u32_dst);
 
+/* Sort the source and destination IP (and the ports if the IP are the same),
+ * to have consistent hash within the two directions
+ */
 static inline void __flow_hash_consistentify(struct flow_keys *keys)
 {
        int addr_diff, i;
index 36888f5..1d653fb 100644 (file)
@@ -123,8 +123,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
        for_each_possible_cpu(i) {
                struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
                unsigned int start;
-               u64 bytes;
-               u32 packets;
+               u64 bytes, packets;
 
                do {
                        start = u64_stats_fetch_begin_irq(&bcpu->syncp);
@@ -176,12 +175,17 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
 
        if (d->tail) {
                struct gnet_stats_basic sb;
+               int res;
 
                memset(&sb, 0, sizeof(sb));
                sb.bytes = bstats.bytes;
                sb.packets = bstats.packets;
-               return gnet_stats_copy(d, type, &sb, sizeof(sb),
-                                      TCA_STATS_PAD);
+               res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
+               if (res < 0 || sb.packets == bstats.packets)
+                       return res;
+               /* emit 64bit stats only if needed */
+               return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
+                                      sizeof(bstats.packets), TCA_STATS_PAD);
        }
        return 0;
 }
index 3634793..6bbd06f 100644 (file)
@@ -20,8 +20,8 @@ static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff
        struct hlist_head *h;
        unsigned int count = 0, offset = get_offset(*pos);
 
-       h = &net->dev_name_head[get_bucket(*pos)];
-       hlist_for_each_entry_rcu(dev, h, name_hlist) {
+       h = &net->dev_index_head[get_bucket(*pos)];
+       hlist_for_each_entry_rcu(dev, h, index_hlist) {
                if (++count == offset)
                        return dev;
        }
index 48b1e42..294bfcf 100644 (file)
@@ -3404,7 +3404,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
        HARD_TX_LOCK(odev, txq, smp_processor_id());
 
        if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) {
-               ret = NETDEV_TX_BUSY;
                pkt_dev->last_ok = 0;
                goto unlock;
        }
index c81cd80..000eddb 100644 (file)
@@ -980,6 +980,19 @@ static size_t rtnl_xdp_size(void)
        return xdp_size;
 }
 
+static size_t rtnl_prop_list_size(const struct net_device *dev)
+{
+       struct netdev_name_node *name_node;
+       size_t size;
+
+       if (list_empty(&dev->name_node->list))
+               return 0;
+       size = nla_total_size(0);
+       list_for_each_entry(name_node, &dev->name_node->list, list)
+               size += nla_total_size(ALTIFNAMSIZ);
+       return size;
+}
+
 static noinline size_t if_nlmsg_size(const struct net_device *dev,
                                     u32 ext_filter_mask)
 {
@@ -1027,6 +1040,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
               + nla_total_size(4)  /* IFLA_CARRIER_DOWN_COUNT */
               + nla_total_size(4)  /* IFLA_MIN_MTU */
               + nla_total_size(4)  /* IFLA_MAX_MTU */
+              + rtnl_prop_list_size(dev)
               + 0;
 }
 
@@ -1584,6 +1598,42 @@ static int rtnl_fill_link_af(struct sk_buff *skb,
        return 0;
 }
 
+static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
+                                const struct net_device *dev)
+{
+       struct netdev_name_node *name_node;
+       int count = 0;
+
+       list_for_each_entry(name_node, &dev->name_node->list, list) {
+               if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
+                       return -EMSGSIZE;
+               count++;
+       }
+       return count;
+}
+
+static int rtnl_fill_prop_list(struct sk_buff *skb,
+                              const struct net_device *dev)
+{
+       struct nlattr *prop_list;
+       int ret;
+
+       prop_list = nla_nest_start(skb, IFLA_PROP_LIST);
+       if (!prop_list)
+               return -EMSGSIZE;
+
+       ret = rtnl_fill_alt_ifnames(skb, dev);
+       if (ret <= 0)
+               goto nest_cancel;
+
+       nla_nest_end(skb, prop_list);
+       return 0;
+
+nest_cancel:
+       nla_nest_cancel(skb, prop_list);
+       return ret;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb,
                            struct net_device *dev, struct net *src_net,
                            int type, u32 pid, u32 seq, u32 change,
@@ -1697,6 +1747,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
                goto nla_put_failure_rcu;
        rcu_read_unlock();
 
+       if (rtnl_fill_prop_list(skb, dev))
+               goto nla_put_failure;
+
        nlmsg_end(skb, nlh);
        return 0;
 
@@ -1750,6 +1803,9 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
        [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
        [IFLA_MIN_MTU]          = { .type = NLA_U32 },
        [IFLA_MAX_MTU]          = { .type = NLA_U32 },
+       [IFLA_PROP_LIST]        = { .type = NLA_NESTED },
+       [IFLA_ALT_IFNAME]       = { .type = NLA_STRING,
+                                   .len = ALTIFNAMSIZ - 1 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2723,6 +2779,26 @@ errout:
        return err;
 }
 
+static struct net_device *rtnl_dev_get(struct net *net,
+                                      struct nlattr *ifname_attr,
+                                      struct nlattr *altifname_attr,
+                                      char *ifname)
+{
+       char buffer[ALTIFNAMSIZ];
+
+       if (!ifname) {
+               ifname = buffer;
+               if (ifname_attr)
+                       nla_strlcpy(ifname, ifname_attr, IFNAMSIZ);
+               else if (altifname_attr)
+                       nla_strlcpy(ifname, altifname_attr, ALTIFNAMSIZ);
+               else
+                       return NULL;
+       }
+
+       return __dev_get_by_name(net, ifname);
+}
+
 static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
 {
@@ -2751,8 +2827,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(net, ifm->ifi_index);
-       else if (tb[IFLA_IFNAME])
-               dev = __dev_get_by_name(net, ifname);
+       else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+               dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
        else
                goto errout;
 
@@ -2825,7 +2901,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct net *tgt_net = net;
        struct net_device *dev = NULL;
        struct ifinfomsg *ifm;
-       char ifname[IFNAMSIZ];
        struct nlattr *tb[IFLA_MAX+1];
        int err;
        int netnsid = -1;
@@ -2839,9 +2914,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (err < 0)
                return err;
 
-       if (tb[IFLA_IFNAME])
-               nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
-
        if (tb[IFLA_TARGET_NETNSID]) {
                netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
                tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
@@ -2853,8 +2925,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
-       else if (tb[IFLA_IFNAME])
-               dev = __dev_get_by_name(tgt_net, ifname);
+       else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+               dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
+                                  tb[IFLA_ALT_IFNAME], NULL);
        else if (tb[IFLA_GROUP])
                err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
        else
@@ -3025,12 +3098,10 @@ replay:
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(net, ifm->ifi_index);
-       else {
-               if (ifname[0])
-                       dev = __dev_get_by_name(net, ifname);
-               else
-                       dev = NULL;
-       }
+       else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+               dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
+       else
+               dev = NULL;
 
        if (dev) {
                master_dev = netdev_master_upper_dev_get(dev);
@@ -3292,6 +3363,7 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb,
 
                switch (i) {
                case IFLA_IFNAME:
+               case IFLA_ALT_IFNAME:
                case IFLA_EXT_MASK:
                case IFLA_TARGET_NETNSID:
                        break;
@@ -3310,7 +3382,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct net *net = sock_net(skb->sk);
        struct net *tgt_net = net;
        struct ifinfomsg *ifm;
-       char ifname[IFNAMSIZ];
        struct nlattr *tb[IFLA_MAX+1];
        struct net_device *dev = NULL;
        struct sk_buff *nskb;
@@ -3333,9 +3404,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                        return PTR_ERR(tgt_net);
        }
 
-       if (tb[IFLA_IFNAME])
-               nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
-
        if (tb[IFLA_EXT_MASK])
                ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
 
@@ -3343,8 +3411,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
-       else if (tb[IFLA_IFNAME])
-               dev = __dev_get_by_name(tgt_net, ifname);
+       else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+               dev = rtnl_dev_get(tgt_net, tb[IFLA_IFNAME],
+                                  tb[IFLA_ALT_IFNAME], NULL);
        else
                goto out;
 
@@ -3374,6 +3443,100 @@ out:
        return err;
 }
 
+static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
+                          bool *changed, struct netlink_ext_ack *extack)
+{
+       char *alt_ifname;
+       int err;
+
+       err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
+       if (err)
+               return err;
+
+       alt_ifname = nla_data(attr);
+       if (cmd == RTM_NEWLINKPROP) {
+               alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
+               if (!alt_ifname)
+                       return -ENOMEM;
+               err = netdev_name_node_alt_create(dev, alt_ifname);
+               if (err) {
+                       kfree(alt_ifname);
+                       return err;
+               }
+       } else if (cmd == RTM_DELLINKPROP) {
+               err = netdev_name_node_alt_destroy(dev, alt_ifname);
+               if (err)
+                       return err;
+       } else {
+               WARN_ON(1);
+               return 0;
+       }
+
+       *changed = true;
+       return 0;
+}
+
+static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
+                        struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(skb->sk);
+       struct nlattr *tb[IFLA_MAX + 1];
+       struct net_device *dev;
+       struct ifinfomsg *ifm;
+       bool changed = false;
+       struct nlattr *attr;
+       int err, rem;
+
+       err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
+       if (err)
+               return err;
+
+       err = rtnl_ensure_unique_netns(tb, extack, true);
+       if (err)
+               return err;
+
+       ifm = nlmsg_data(nlh);
+       if (ifm->ifi_index > 0)
+               dev = __dev_get_by_index(net, ifm->ifi_index);
+       else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+               dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
+                                  tb[IFLA_ALT_IFNAME], NULL);
+       else
+               return -EINVAL;
+
+       if (!dev)
+               return -ENODEV;
+
+       if (!tb[IFLA_PROP_LIST])
+               return 0;
+
+       nla_for_each_nested(attr, tb[IFLA_PROP_LIST], rem) {
+               switch (nla_type(attr)) {
+               case IFLA_ALT_IFNAME:
+                       err = rtnl_alt_ifname(cmd, dev, attr, &changed, extack);
+                       if (err)
+                               return err;
+                       break;
+               }
+       }
+
+       if (changed)
+               netdev_state_change(dev);
+       return 0;
+}
+
+static int rtnl_newlinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
+                           struct netlink_ext_ack *extack)
+{
+       return rtnl_linkprop(RTM_NEWLINKPROP, skb, nlh, extack);
+}
+
+static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
+                           struct netlink_ext_ack *extack)
+{
+       return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack);
+}
+
 static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
        struct net *net = sock_net(skb->sk);
@@ -5332,6 +5495,9 @@ void __init rtnetlink_init(void)
        rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
        rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
 
+       rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0);
+       rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
+
        rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
        rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
        rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
index ac78a57..71787f7 100644 (file)
@@ -333,7 +333,6 @@ EXPORT_SYMBOL(__sk_backlog_rcv);
 static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
 {
        struct __kernel_sock_timeval tv;
-       int size;
 
        if (timeo == MAX_SCHEDULE_TIMEOUT) {
                tv.tv_sec = 0;
@@ -354,13 +353,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
                old_tv.tv_sec = tv.tv_sec;
                old_tv.tv_usec = tv.tv_usec;
                *(struct __kernel_old_timeval *)optval = old_tv;
-               size = sizeof(old_tv);
-       } else {
-               *(struct __kernel_sock_timeval *)optval = tv;
-               size = sizeof(tv);
+               return sizeof(old_tv);
        }
 
-       return size;
+       *(struct __kernel_sock_timeval *)optval = tv;
+       return sizeof(tv);
 }
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
@@ -687,7 +684,8 @@ out:
        return ret;
 }
 
-static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
+static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
+                                    int valbool)
 {
        if (valbool)
                sock_set_flag(sk, bit);
@@ -3015,7 +3013,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp,
                return -ENOENT;
        if (ts.tv_sec == 0) {
                ktime_t kt = ktime_get_real();
-               sock_write_timestamp(sk, kt);;
+               sock_write_timestamp(sk, kt);
                ts = ktime_to_timespec64(kt);
        }
 
@@ -3042,7 +3040,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp,
 }
 EXPORT_SYMBOL(sock_gettstamp);
 
-void sock_enable_timestamp(struct sock *sk, int flag)
+void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
 {
        if (!sock_flag(sk, flag)) {
                unsigned long previous_flags = sk->sk_flags;
index d7bf62f..20781ad 100644 (file)
@@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
 
 /* XDP RX runs under NAPI protection, and in different delivery error
  * scenarios (e.g. queue full), it is possible to return the xdp_frame
- * while still leveraging this protection.  The @napi_direct boolian
+ * while still leveraging this protection.  The @napi_direct boolean
  * is used for those calls sites.  Thus, allowing for faster recycling
  * of xdp_frames/pages in those cases.
  */
index 43120a3..17281fe 100644 (file)
@@ -246,7 +246,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 #ifdef CONFIG_PM_SLEEP
 static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
 {
-       return dsa_is_user_port(ds, p) && ds->ports[p].slave;
+       const struct dsa_port *dp = dsa_to_port(ds, p);
+
+       return dp->type == DSA_PORT_TYPE_USER && dp->slave;
 }
 
 int dsa_switch_suspend(struct dsa_switch *ds)
@@ -258,7 +260,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
                if (!dsa_is_port_initialized(ds, i))
                        continue;
 
-               ret = dsa_slave_suspend(ds->ports[i].slave);
+               ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave);
                if (ret)
                        return ret;
        }
@@ -285,7 +287,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
                if (!dsa_is_port_initialized(ds, i))
                        continue;
 
-               ret = dsa_slave_resume(ds->ports[i].slave);
+               ret = dsa_slave_resume(dsa_to_port(ds, i)->slave);
                if (ret)
                        return ret;
        }
@@ -329,6 +331,91 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
 
+int dsa_devlink_param_get(struct devlink *dl, u32 id,
+                         struct devlink_param_gset_ctx *ctx)
+{
+       struct dsa_devlink_priv *dl_priv;
+       struct dsa_switch *ds;
+
+       dl_priv = devlink_priv(dl);
+       ds = dl_priv->ds;
+
+       if (!ds->ops->devlink_param_get)
+               return -EOPNOTSUPP;
+
+       return ds->ops->devlink_param_get(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
+
+int dsa_devlink_param_set(struct devlink *dl, u32 id,
+                         struct devlink_param_gset_ctx *ctx)
+{
+       struct dsa_devlink_priv *dl_priv;
+       struct dsa_switch *ds;
+
+       dl_priv = devlink_priv(dl);
+       ds = dl_priv->ds;
+
+       if (!ds->ops->devlink_param_set)
+               return -EOPNOTSUPP;
+
+       return ds->ops->devlink_param_set(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_set);
+
+int dsa_devlink_params_register(struct dsa_switch *ds,
+                               const struct devlink_param *params,
+                               size_t params_count)
+{
+       return devlink_params_register(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_register);
+
+void dsa_devlink_params_unregister(struct dsa_switch *ds,
+                                  const struct devlink_param *params,
+                                  size_t params_count)
+{
+       devlink_params_unregister(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_unregister);
+
+int dsa_devlink_resource_register(struct dsa_switch *ds,
+                                 const char *resource_name,
+                                 u64 resource_size,
+                                 u64 resource_id,
+                                 u64 parent_resource_id,
+                                 const struct devlink_resource_size_params *size_params)
+{
+       return devlink_resource_register(ds->devlink, resource_name,
+                                        resource_size, resource_id,
+                                        parent_resource_id,
+                                        size_params);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_register);
+
+void dsa_devlink_resources_unregister(struct dsa_switch *ds)
+{
+       devlink_resources_unregister(ds->devlink, NULL);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister);
+
+void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
+                                          u64 resource_id,
+                                          devlink_resource_occ_get_t *occ_get,
+                                          void *occ_get_priv)
+{
+       return devlink_resource_occ_get_register(ds->devlink, resource_id,
+                                                occ_get, occ_get_priv);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register);
+
+void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
+                                            u64 resource_id)
+{
+       devlink_resource_occ_get_unregister(ds->devlink, resource_id);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
+
 static int __init dsa_init_module(void)
 {
        int rc;
index 716d265..9ef2caa 100644 (file)
@@ -45,6 +45,10 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
 
        dst->index = index;
 
+       INIT_LIST_HEAD(&dst->rtable);
+
+       INIT_LIST_HEAD(&dst->ports);
+
        INIT_LIST_HEAD(&dst->list);
        list_add_tail(&dst->list, &dsa_tree_list);
 
@@ -111,24 +115,38 @@ static bool dsa_port_is_user(struct dsa_port *dp)
 static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
                                                   struct device_node *dn)
 {
-       struct dsa_switch *ds;
        struct dsa_port *dp;
-       int device, port;
 
-       for (device = 0; device < DSA_MAX_SWITCHES; device++) {
-               ds = dst->ds[device];
-               if (!ds)
-                       continue;
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dp->dn == dn)
+                       return dp;
 
-               for (port = 0; port < ds->num_ports; port++) {
-                       dp = &ds->ports[port];
+       return NULL;
+}
 
-                       if (dp->dn == dn)
-                               return dp;
-               }
-       }
+struct dsa_link *dsa_link_touch(struct dsa_port *dp, struct dsa_port *link_dp)
+{
+       struct dsa_switch *ds = dp->ds;
+       struct dsa_switch_tree *dst;
+       struct dsa_link *dl;
 
-       return NULL;
+       dst = ds->dst;
+
+       list_for_each_entry(dl, &dst->rtable, list)
+               if (dl->dp == dp && dl->link_dp == link_dp)
+                       return dl;
+
+       dl = kzalloc(sizeof(*dl), GFP_KERNEL);
+       if (!dl)
+               return NULL;
+
+       dl->dp = dp;
+       dl->link_dp = link_dp;
+
+       INIT_LIST_HEAD(&dl->list);
+       list_add_tail(&dl->list, &dst->rtable);
+
+       return dl;
 }
 
 static bool dsa_port_setup_routing_table(struct dsa_port *dp)
@@ -138,6 +156,7 @@ static bool dsa_port_setup_routing_table(struct dsa_port *dp)
        struct device_node *dn = dp->dn;
        struct of_phandle_iterator it;
        struct dsa_port *link_dp;
+       struct dsa_link *dl;
        int err;
 
        of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
@@ -147,24 +166,22 @@ static bool dsa_port_setup_routing_table(struct dsa_port *dp)
                        return false;
                }
 
-               ds->rtable[link_dp->ds->index] = dp->index;
+               dl = dsa_link_touch(dp, link_dp);
+               if (!dl) {
+                       of_node_put(it.node);
+                       return false;
+               }
        }
 
        return true;
 }
 
-static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
 {
        bool complete = true;
        struct dsa_port *dp;
-       int i;
-
-       for (i = 0; i < DSA_MAX_SWITCHES; i++)
-               ds->rtable[i] = DSA_RTABLE_NONE;
-
-       for (i = 0; i < ds->num_ports; i++) {
-               dp = &ds->ports[i];
 
+       list_for_each_entry(dp, &dst->ports, list) {
                if (dsa_port_is_dsa(dp)) {
                        complete = dsa_port_setup_routing_table(dp);
                        if (!complete)
@@ -175,81 +192,42 @@ static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
        return complete;
 }
 
-static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
-{
-       struct dsa_switch *ds;
-       bool complete = true;
-       int device;
-
-       for (device = 0; device < DSA_MAX_SWITCHES; device++) {
-               ds = dst->ds[device];
-               if (!ds)
-                       continue;
-
-               complete = dsa_switch_setup_routing_table(ds);
-               if (!complete)
-                       break;
-       }
-
-       return complete;
-}
-
 static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
 {
-       struct dsa_switch *ds;
        struct dsa_port *dp;
-       int device, port;
 
-       for (device = 0; device < DSA_MAX_SWITCHES; device++) {
-               ds = dst->ds[device];
-               if (!ds)
-                       continue;
-
-               for (port = 0; port < ds->num_ports; port++) {
-                       dp = &ds->ports[port];
-
-                       if (dsa_port_is_cpu(dp))
-                               return dp;
-               }
-       }
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dsa_port_is_cpu(dp))
+                       return dp;
 
        return NULL;
 }
 
 static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
 {
-       struct dsa_switch *ds;
-       struct dsa_port *dp;
-       int device, port;
+       struct dsa_port *cpu_dp, *dp;
 
-       /* DSA currently only supports a single CPU port */
-       dst->cpu_dp = dsa_tree_find_first_cpu(dst);
-       if (!dst->cpu_dp) {
-               pr_warn("Tree has no master device\n");
+       cpu_dp = dsa_tree_find_first_cpu(dst);
+       if (!cpu_dp) {
+               pr_err("DSA: tree %d has no CPU port\n", dst->index);
                return -EINVAL;
        }
 
        /* Assign the default CPU port to all ports of the fabric */
-       for (device = 0; device < DSA_MAX_SWITCHES; device++) {
-               ds = dst->ds[device];
-               if (!ds)
-                       continue;
-
-               for (port = 0; port < ds->num_ports; port++) {
-                       dp = &ds->ports[port];
-
-                       if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
-                               dp->cpu_dp = dst->cpu_dp;
-               }
-       }
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+                       dp->cpu_dp = cpu_dp;
 
        return 0;
 }
 
 static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
 {
-       /* DSA currently only supports a single CPU port */
-       dst->cpu_dp = NULL;
+       struct dsa_port *dp;
+
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+                       dp->cpu_dp = NULL;
 }
 
 static int dsa_port_setup(struct dsa_port *dp)
@@ -265,6 +243,9 @@ static int dsa_port_setup(struct dsa_port *dp)
        bool dsa_port_enabled = false;
        int err = 0;
 
+       if (dp->setup)
+               return 0;
+
        switch (dp->type) {
        case DSA_PORT_TYPE_UNUSED:
                dsa_port_disable(dp);
@@ -333,14 +314,21 @@ static int dsa_port_setup(struct dsa_port *dp)
                dsa_port_link_unregister_of(dp);
        if (err && devlink_port_registered)
                devlink_port_unregister(dlp);
+       if (err)
+               return err;
 
-       return err;
+       dp->setup = true;
+
+       return 0;
 }
 
 static void dsa_port_teardown(struct dsa_port *dp)
 {
        struct devlink_port *dlp = &dp->devlink_port;
 
+       if (!dp->setup)
+               return;
+
        switch (dp->type) {
        case DSA_PORT_TYPE_UNUSED:
                break;
@@ -363,11 +351,17 @@ static void dsa_port_teardown(struct dsa_port *dp)
                }
                break;
        }
+
+       dp->setup = false;
 }
 
 static int dsa_switch_setup(struct dsa_switch *ds)
 {
-       int err = 0;
+       struct dsa_devlink_priv *dl_priv;
+       int err;
+
+       if (ds->setup)
+               return 0;
 
        /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
         * driver and before ops->setup() has run, since the switch drivers and
@@ -379,9 +373,11 @@ static int dsa_switch_setup(struct dsa_switch *ds)
        /* Add the switch to devlink before calling setup, so that setup can
         * add dpipe tables
         */
-       ds->devlink = devlink_alloc(&dsa_devlink_ops, 0);
+       ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
        if (!ds->devlink)
                return -ENOMEM;
+       dl_priv = devlink_priv(ds->devlink);
+       dl_priv->ds = ds;
 
        err = devlink_register(ds->devlink, ds->dev);
        if (err)
@@ -395,6 +391,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
        if (err < 0)
                goto unregister_notifier;
 
+       devlink_params_publish(ds->devlink);
+
        if (!ds->slave_mii_bus && ds->ops->phy_read) {
                ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
                if (!ds->slave_mii_bus) {
@@ -409,6 +407,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
                        goto unregister_notifier;
        }
 
+       ds->setup = true;
+
        return 0;
 
 unregister_notifier:
@@ -424,6 +424,9 @@ free_devlink:
 
 static void dsa_switch_teardown(struct dsa_switch *ds)
 {
+       if (!ds->setup)
+               return;
+
        if (ds->slave_mii_bus && ds->ops->phy_read)
                mdiobus_unregister(ds->slave_mii_bus);
 
@@ -438,95 +441,72 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
                ds->devlink = NULL;
        }
 
+       ds->setup = false;
 }
 
 static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
 {
-       struct dsa_switch *ds;
        struct dsa_port *dp;
-       int device, port, i;
-       int err = 0;
-
-       for (device = 0; device < DSA_MAX_SWITCHES; device++) {
-               ds = dst->ds[device];
-               if (!ds)
-                       continue;
+       int err;
 
-               err = dsa_switch_setup(ds);
+       list_for_each_entry(dp, &dst->ports, list) {
+               err = dsa_switch_setup(dp->ds);
                if (err)
-                       goto switch_teardown;
-
-               for (port = 0; port < ds->num_ports; port++) {
-                       dp = &ds->ports[port];
+                       goto teardown;
+       }
 
-                       err = dsa_port_setup(dp);
-                       if (err)
-                               goto ports_teardown;
-               }
+       list_for_each_entry(dp, &dst->ports, list) {
+               err = dsa_port_setup(dp);
+               if (err)
+                       goto teardown;
        }
 
        return 0;
 
-ports_teardown:
-       for (i = 0; i < port; i++)
-               dsa_port_teardown(&ds->ports[i]);
+teardown:
+       list_for_each_entry(dp, &dst->ports, list)
+               dsa_port_teardown(dp);
 
-       dsa_switch_teardown(ds);
-
-switch_teardown:
-       for (i = 0; i < device; i++) {
-               ds = dst->ds[i];
-               if (!ds)
-                       continue;
-
-               for (port = 0; port < ds->num_ports; port++) {
-                       dp = &ds->ports[port];
-
-                       dsa_port_teardown(dp);
-               }
-
-               dsa_switch_teardown(ds);
-       }
+       list_for_each_entry(dp, &dst->ports, list)
+               dsa_switch_teardown(dp->ds);
 
        return err;
 }
 
 static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
 {
-       struct dsa_switch *ds;
        struct dsa_port *dp;
-       int device, port;
-
-       for (device = 0; device < DSA_MAX_SWITCHES; device++) {
-               ds = dst->ds[device];
-               if (!ds)
-                       continue;
 
-               for (port = 0; port < ds->num_ports; port++) {
-                       dp = &ds->ports[port];
+       list_for_each_entry(dp, &dst->ports, list)
+               dsa_port_teardown(dp);
 
-                       dsa_port_teardown(dp);
-               }
-
-               dsa_switch_teardown(ds);
-       }
+       list_for_each_entry(dp, &dst->ports, list)
+               dsa_switch_teardown(dp->ds);
 }
 
 static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
 {
-       struct dsa_port *cpu_dp = dst->cpu_dp;
-       struct net_device *master = cpu_dp->master;
+       struct dsa_port *dp;
+       int err;
 
-       /* DSA currently supports a single pair of CPU port and master device */
-       return dsa_master_setup(master, cpu_dp);
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dsa_port_is_cpu(dp)) {
+                       err = dsa_master_setup(dp->master, dp);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
 }
 
 static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
 {
-       struct dsa_port *cpu_dp = dst->cpu_dp;
-       struct net_device *master = cpu_dp->master;
+       struct dsa_port *dp;
 
-       return dsa_master_teardown(master);
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dsa_port_is_cpu(dp))
+                       dsa_master_teardown(dp->master);
 }
 
 static int dsa_tree_setup(struct dsa_switch_tree *dst)
@@ -572,6 +552,8 @@ teardown_default_cpu:
 
 static void dsa_tree_teardown(struct dsa_switch_tree *dst)
 {
+       struct dsa_link *dl, *next;
+
        if (!dst->setup)
                return;
 
@@ -581,39 +563,36 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
 
        dsa_tree_teardown_default_cpu(dst);
 
+       list_for_each_entry_safe(dl, next, &dst->rtable, list) {
+               list_del(&dl->list);
+               kfree(dl);
+       }
+
        pr_info("DSA: tree %d torn down\n", dst->index);
 
        dst->setup = false;
 }
 
-static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
-                                  unsigned int index)
+static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
 {
-       dsa_tree_teardown(dst);
+       struct dsa_switch_tree *dst = ds->dst;
+       struct dsa_port *dp;
 
-       dst->ds[index] = NULL;
-       dsa_tree_put(dst);
-}
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dp->ds == ds && dp->index == index)
+                       return dp;
 
-static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
-                              struct dsa_switch *ds)
-{
-       unsigned int index = ds->index;
-       int err;
+       dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+       if (!dp)
+               return NULL;
 
-       if (dst->ds[index])
-               return -EBUSY;
+       dp->ds = ds;
+       dp->index = index;
 
-       dsa_tree_get(dst);
-       dst->ds[index] = ds;
+       INIT_LIST_HEAD(&dp->list);
+       list_add_tail(&dp->list, &dst->ports);
 
-       err = dsa_tree_setup(dst);
-       if (err) {
-               dst->ds[index] = NULL;
-               dsa_tree_put(dst);
-       }
-
-       return err;
+       return dp;
 }
 
 static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
@@ -708,7 +687,7 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
                        goto out_put_node;
                }
 
-               dp = &ds->ports[reg];
+               dp = dsa_to_port(ds, reg);
 
                err = dsa_port_parse_of(dp, port);
                if (err)
@@ -732,8 +711,6 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
                return sz;
 
        ds->index = m[1];
-       if (ds->index >= DSA_MAX_SWITCHES)
-               return -EINVAL;
 
        ds->dst = dsa_tree_touch(m[0]);
        if (!ds->dst)
@@ -742,6 +719,20 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
        return 0;
 }
 
+static int dsa_switch_touch_ports(struct dsa_switch *ds)
+{
+       struct dsa_port *dp;
+       int port;
+
+       for (port = 0; port < ds->num_ports; port++) {
+               dp = dsa_port_touch(ds, port);
+               if (!dp)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
 static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
 {
        int err;
@@ -750,6 +741,10 @@ static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
        if (err)
                return err;
 
+       err = dsa_switch_touch_ports(ds);
+       if (err)
+               return err;
+
        return dsa_switch_parse_ports_of(ds, dn);
 }
 
@@ -787,7 +782,7 @@ static int dsa_switch_parse_ports(struct dsa_switch *ds,
        for (i = 0; i < DSA_MAX_PORTS; i++) {
                name = cd->port_names[i];
                dev = cd->netdev[i];
-               dp = &ds->ports[i];
+               dp = dsa_to_port(ds, i);
 
                if (!name)
                        continue;
@@ -807,6 +802,8 @@ static int dsa_switch_parse_ports(struct dsa_switch *ds,
 
 static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
 {
+       int err;
+
        ds->cd = cd;
 
        /* We don't support interconnected switches nor multiple trees via
@@ -817,22 +814,29 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
        if (!ds->dst)
                return -ENOMEM;
 
-       return dsa_switch_parse_ports(ds, cd);
-}
-
-static int dsa_switch_add(struct dsa_switch *ds)
-{
-       struct dsa_switch_tree *dst = ds->dst;
+       err = dsa_switch_touch_ports(ds);
+       if (err)
+               return err;
 
-       return dsa_tree_add_switch(dst, ds);
+       return dsa_switch_parse_ports(ds, cd);
 }
 
 static int dsa_switch_probe(struct dsa_switch *ds)
 {
-       struct dsa_chip_data *pdata = ds->dev->platform_data;
-       struct device_node *np = ds->dev->of_node;
+       struct dsa_switch_tree *dst;
+       struct dsa_chip_data *pdata;
+       struct device_node *np;
        int err;
 
+       if (!ds->dev)
+               return -ENODEV;
+
+       pdata = ds->dev->platform_data;
+       np = ds->dev->of_node;
+
+       if (!ds->num_ports)
+               return -EINVAL;
+
        if (np)
                err = dsa_switch_parse_of(ds, np);
        else if (pdata)
@@ -843,29 +847,14 @@ static int dsa_switch_probe(struct dsa_switch *ds)
        if (err)
                return err;
 
-       return dsa_switch_add(ds);
-}
-
-struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
-{
-       struct dsa_switch *ds;
-       int i;
-
-       ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL);
-       if (!ds)
-               return NULL;
-
-       ds->dev = dev;
-       ds->num_ports = n;
-
-       for (i = 0; i < ds->num_ports; ++i) {
-               ds->ports[i].index = i;
-               ds->ports[i].ds = ds;
-       }
+       dst = ds->dst;
+       dsa_tree_get(dst);
+       err = dsa_tree_setup(dst);
+       if (err)
+               dsa_tree_put(dst);
 
-       return ds;
+       return err;
 }
-EXPORT_SYMBOL_GPL(dsa_switch_alloc);
 
 int dsa_register_switch(struct dsa_switch *ds)
 {
@@ -883,9 +872,16 @@ EXPORT_SYMBOL_GPL(dsa_register_switch);
 static void dsa_switch_remove(struct dsa_switch *ds)
 {
        struct dsa_switch_tree *dst = ds->dst;
-       unsigned int index = ds->index;
+       struct dsa_port *dp, *next;
 
-       dsa_tree_remove_switch(dst, index);
+       dsa_tree_teardown(dst);
+
+       list_for_each_entry_safe(dp, next, &dst->ports, list) {
+               list_del(&dp->list);
+               kfree(dp);
+       }
+
+       dsa_tree_put(dst);
 }
 
 void dsa_unregister_switch(struct dsa_switch *ds)
index 12f8c7e..53e7577 100644 (file)
@@ -104,25 +104,14 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
 {
        struct dsa_port *cpu_dp = dev->dsa_ptr;
        struct dsa_switch_tree *dst = cpu_dp->dst;
-       struct dsa_switch *ds;
-       struct dsa_port *slave_port;
+       struct dsa_port *dp;
 
-       if (device < 0 || device >= DSA_MAX_SWITCHES)
-               return NULL;
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dp->ds->index == device && dp->index == port &&
+                   dp->type == DSA_PORT_TYPE_USER)
+                       return dp->slave;
 
-       ds = dst->ds[device];
-       if (!ds)
-               return NULL;
-
-       if (port < 0 || port >= ds->num_ports)
-               return NULL;
-
-       slave_port = &ds->ports[port];
-
-       if (unlikely(slave_port->type != DSA_PORT_TYPE_USER))
-               return NULL;
-
-       return slave_port->slave;
+       return NULL;
 }
 
 /* port.c */
index 9b54e5a..6e93c36 100644 (file)
@@ -561,7 +561,7 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
        struct dsa_switch *ds = dp->ds;
        struct phy_device *phydev;
        int port = dp->index;
-       int mode;
+       phy_interface_t mode;
        int err;
 
        err = of_phy_register_fixed_link(dn);
@@ -574,8 +574,8 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
 
        phydev = of_phy_find_device(dn);
 
-       mode = of_get_phy_mode(dn);
-       if (mode < 0)
+       err = of_get_phy_mode(dn, &mode);
+       if (err)
                mode = PHY_INTERFACE_MODE_NA;
        phydev->interface = mode;
 
@@ -593,10 +593,11 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
 {
        struct dsa_switch *ds = dp->ds;
        struct device_node *port_dn = dp->dn;
-       int mode, err;
+       phy_interface_t mode;
+       int err;
 
-       mode = of_get_phy_mode(port_dn);
-       if (mode < 0)
+       err = of_get_phy_mode(port_dn, &mode);
+       if (err)
                mode = PHY_INTERFACE_MODE_NA;
 
        dp->pl_config.dev = ds->dev;
index 028e65f..78ffc87 100644 (file)
@@ -789,6 +789,22 @@ static int dsa_slave_set_link_ksettings(struct net_device *dev,
        return phylink_ethtool_ksettings_set(dp->pl, cmd);
 }
 
+static void dsa_slave_get_pauseparam(struct net_device *dev,
+                                    struct ethtool_pauseparam *pause)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+
+       phylink_ethtool_get_pauseparam(dp->pl, pause);
+}
+
+static int dsa_slave_set_pauseparam(struct net_device *dev,
+                                   struct ethtool_pauseparam *pause)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+
+       return phylink_ethtool_set_pauseparam(dp->pl, pause);
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static int dsa_slave_netpoll_setup(struct net_device *dev,
                                   struct netpoll_info *ni)
@@ -1192,6 +1208,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
        .get_eee                = dsa_slave_get_eee,
        .get_link_ksettings     = dsa_slave_get_link_ksettings,
        .set_link_ksettings     = dsa_slave_set_link_ksettings,
+       .get_pauseparam         = dsa_slave_get_pauseparam,
+       .set_pauseparam         = dsa_slave_set_pauseparam,
        .get_rxnfc              = dsa_slave_get_rxnfc,
        .set_rxnfc              = dsa_slave_set_rxnfc,
        .get_ts_info            = dsa_slave_get_ts_info,
@@ -1295,11 +1313,12 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
        struct dsa_port *dp = dsa_slave_to_port(slave_dev);
        struct device_node *port_dn = dp->dn;
        struct dsa_switch *ds = dp->ds;
+       phy_interface_t mode;
        u32 phy_flags = 0;
-       int mode, ret;
+       int ret;
 
-       mode = of_get_phy_mode(port_dn);
-       if (mode < 0)
+       ret = of_get_phy_mode(port_dn, &mode);
+       if (ret)
                mode = PHY_INTERFACE_MODE_NA;
 
        dp->pl_config.dev = &slave_dev->dev;
index 6a96075..df4abe8 100644 (file)
@@ -20,7 +20,7 @@ static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
        int i;
 
        for (i = 0; i < ds->num_ports; ++i) {
-               struct dsa_port *dp = &ds->ports[i];
+               struct dsa_port *dp = dsa_to_port(ds, i);
 
                if (dp->ageing_time && dp->ageing_time < ageing_time)
                        ageing_time = dp->ageing_time;
@@ -98,7 +98,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
        if (unset_vlan_filtering) {
                struct switchdev_trans trans = {0};
 
-               err = dsa_port_vlan_filtering(&ds->ports[info->port],
+               err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
                                              false, &trans);
                if (err && err != EOPNOTSUPP)
                        return err;
index 9c1cc24..bc5cb91 100644 (file)
  *     Must be transmitted as zero and ignored on receive.
  *
  * SWITCH_ID - VID[8:6]:
- *     Index of switch within DSA tree. Must be between 0 and
- *     DSA_MAX_SWITCHES - 1.
+ *     Index of switch within DSA tree. Must be between 0 and 7.
  *
  * RSV - VID[5:4]:
  *     To be used for further expansion of PORT or for other purposes.
  *     Must be transmitted as zero and ignored on receive.
  *
  * PORT - VID[3:0]:
- *     Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1.
+ *     Index of switch port. Must be between 0 and 15.
  */
 
 #define DSA_8021Q_DIR_SHIFT            10
@@ -103,7 +102,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
        if (!dsa_is_user_port(ds, port))
                return 0;
 
-       slave = ds->ports[port].slave;
+       slave = dsa_to_port(ds, port)->slave;
 
        err = br_vlan_get_pvid(slave, &pvid);
        if (err < 0)
@@ -118,7 +117,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
                return err;
        }
 
-       return dsa_port_vid_add(&ds->ports[port], pvid, vinfo.flags);
+       return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags);
 }
 
 /* If @enabled is true, installs @vid with @flags into the switch port's HW
@@ -130,7 +129,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
 static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid,
                               u16 flags, bool enabled)
 {
-       struct dsa_port *dp = &ds->ports[port];
+       struct dsa_port *dp = dsa_to_port(ds, port);
        struct bridge_vlan_info vinfo;
        int err;
 
index ffcfcef..7c5a1aa 100644 (file)
@@ -236,21 +236,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
                               struct cfg802154_registered_device **rdev,
                               struct wpan_dev **wpan_dev)
 {
+       const struct genl_dumpit_info *info = genl_dumpit_info(cb);
        int err;
 
        rtnl_lock();
 
        if (!cb->args[0]) {
-               err = nlmsg_parse_deprecated(cb->nlh,
-                                            GENL_HDRLEN + nl802154_fam.hdrsize,
-                                            genl_family_attrbuf(&nl802154_fam),
-                                            nl802154_fam.maxattr,
-                                            nl802154_policy, NULL);
-               if (err)
-                       goto out_unlock;
-
                *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
-                                                           genl_family_attrbuf(&nl802154_fam));
+                                                           info->attrs);
                if (IS_ERR(*wpan_dev)) {
                        err = PTR_ERR(*wpan_dev);
                        goto out_unlock;
@@ -557,17 +550,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
                                        struct netlink_callback *cb,
                                        struct nl802154_dump_wpan_phy_state *state)
 {
-       struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
-       int ret = nlmsg_parse_deprecated(cb->nlh,
-                                        GENL_HDRLEN + nl802154_fam.hdrsize,
-                                        tb, nl802154_fam.maxattr,
-                                        nl802154_policy, NULL);
-
-       /* TODO check if we can handle error here,
-        * we have no backward compatibility
-        */
-       if (ret)
-               return 0;
+       const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+       struct nlattr **tb = info->attrs;
 
        if (tb[NL802154_ATTR_WPAN_PHY])
                state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]);
@@ -2203,7 +2187,8 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 static const struct genl_ops nl802154_ops[] = {
        {
                .cmd = NL802154_CMD_GET_WPAN_PHY,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                .doit = nl802154_get_wpan_phy,
                .dumpit = nl802154_dump_wpan_phy,
                .done = nl802154_dump_wpan_phy_done,
@@ -2343,7 +2328,8 @@ static const struct genl_ops nl802154_ops[] = {
        },
        {
                .cmd = NL802154_CMD_GET_SEC_KEY,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                /* TODO .doit by matching key id? */
                .dumpit = nl802154_dump_llsec_key,
                .flags = GENL_ADMIN_PERM,
@@ -2369,7 +2355,8 @@ static const struct genl_ops nl802154_ops[] = {
        /* TODO unique identifier must short+pan OR extended_addr */
        {
                .cmd = NL802154_CMD_GET_SEC_DEV,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                /* TODO .doit by matching extended_addr? */
                .dumpit = nl802154_dump_llsec_dev,
                .flags = GENL_ADMIN_PERM,
@@ -2395,7 +2382,8 @@ static const struct genl_ops nl802154_ops[] = {
        /* TODO remove complete devkey, put it as nested? */
        {
                .cmd = NL802154_CMD_GET_SEC_DEVKEY,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                /* TODO doit by matching ??? */
                .dumpit = nl802154_dump_llsec_devkey,
                .flags = GENL_ADMIN_PERM,
@@ -2420,7 +2408,8 @@ static const struct genl_ops nl802154_ops[] = {
        },
        {
                .cmd = NL802154_CMD_GET_SEC_LEVEL,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                /* TODO .doit by matching frame_type? */
                .dumpit = nl802154_dump_llsec_seclevel,
                .flags = GENL_ADMIN_PERM,
index b804ccb..0c28bd4 100644 (file)
@@ -9,12 +9,12 @@
 #include <net/netns/ipv4.h>
 #include <net/ip_fib.h>
 
-int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+int call_fib4_notifier(struct notifier_block *nb,
                       enum fib_event_type event_type,
                       struct fib_notifier_info *info)
 {
        info->family = AF_INET;
-       return call_fib_notifier(nb, net, event_type, info);
+       return call_fib_notifier(nb, event_type, info);
 }
 
 int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
@@ -34,17 +34,16 @@ static unsigned int fib4_seq_read(struct net *net)
        return net->ipv4.fib_seq + fib4_rules_seq_read(net);
 }
 
-static int fib4_dump(struct net *net, struct notifier_block *nb)
+static int fib4_dump(struct net *net, struct notifier_block *nb,
+                    struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = fib4_rules_dump(net, nb);
+       err = fib4_rules_dump(net, nb, extack);
        if (err)
                return err;
 
-       fib_notify(net, nb);
-
-       return 0;
+       return fib_notify(net, nb, extack);
 }
 
 static const struct fib_notifier_ops fib4_notifier_ops_template = {
index b43a7ba..f99e3ba 100644 (file)
@@ -65,9 +65,10 @@ bool fib4_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib4_rule_default);
 
-int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+int fib4_rules_dump(struct net *net, struct notifier_block *nb,
+                   struct netlink_ext_ack *extack)
 {
-       return fib_rules_dump(net, nb, AF_INET);
+       return fib_rules_dump(net, nb, AF_INET, extack);
 }
 
 unsigned int fib4_rules_seq_read(struct net *net)
index 1ab2fb6..b9df9c0 100644 (file)
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
-static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib_entry_notifier(struct notifier_block *nb,
                                   enum fib_event_type event_type, u32 dst,
-                                  int dst_len, struct fib_alias *fa)
+                                  int dst_len, struct fib_alias *fa,
+                                  struct netlink_ext_ack *extack)
 {
        struct fib_entry_notifier_info info = {
+               .info.extack = extack,
                .dst = dst,
                .dst_len = dst_len,
                .fi = fa->fa_info,
@@ -86,7 +88,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
                .type = fa->fa_type,
                .tb_id = fa->tb_id,
        };
-       return call_fib4_notifier(nb, net, event_type, &info.info);
+       return call_fib4_notifier(nb, event_type, &info.info);
 }
 
 static int call_fib_entry_notifiers(struct net *net,
@@ -2015,10 +2017,12 @@ void fib_info_notify_update(struct net *net, struct nl_info *info)
        }
 }
 
-static void fib_leaf_notify(struct net *net, struct key_vector *l,
-                           struct fib_table *tb, struct notifier_block *nb)
+static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
+                          struct notifier_block *nb,
+                          struct netlink_ext_ack *extack)
 {
        struct fib_alias *fa;
+       int err;
 
        hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
                struct fib_info *fi = fa->fa_info;
@@ -2032,39 +2036,53 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
                if (tb->tb_id != fa->tb_id)
                        continue;
 
-               call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
-                                       KEYLENGTH - fa->fa_slen, fa);
+               err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key,
+                                             KEYLENGTH - fa->fa_slen,
+                                             fa, extack);
+               if (err)
+                       return err;
        }
+       return 0;
 }
 
-static void fib_table_notify(struct net *net, struct fib_table *tb,
-                            struct notifier_block *nb)
+static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
+                           struct netlink_ext_ack *extack)
 {
        struct trie *t = (struct trie *)tb->tb_data;
        struct key_vector *l, *tp = t->kv;
        t_key key = 0;
+       int err;
 
        while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
-               fib_leaf_notify(net, l, tb, nb);
+               err = fib_leaf_notify(l, tb, nb, extack);
+               if (err)
+                       return err;
 
                key = l->key + 1;
                /* stop in case of wrap around */
                if (key < l->key)
                        break;
        }
+       return 0;
 }
 
-void fib_notify(struct net *net, struct notifier_block *nb)
+int fib_notify(struct net *net, struct notifier_block *nb,
+              struct netlink_ext_ack *extack)
 {
        unsigned int h;
+       int err;
 
        for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
                struct hlist_head *head = &net->ipv4.fib_table_hash[h];
                struct fib_table *tb;
 
-               hlist_for_each_entry_rcu(tb, head, tb_hlist)
-                       fib_table_notify(net, tb, nb);
+               hlist_for_each_entry_rcu(tb, head, tb_hlist) {
+                       err = fib_table_notify(tb, nb, extack);
+                       if (err)
+                               return err;
+               }
        }
+       return 0;
 }
 
 static void __trie_free_rcu(struct rcu_head *head)
index 4298aae..a72fbdf 100644 (file)
@@ -682,7 +682,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
                        dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
 
                if (dev)
-                       saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+                       saddr = inet_select_addr(dev, iph->saddr,
+                                                RT_SCOPE_LINK);
                else
                        saddr = 0;
                rcu_read_unlock();
index 480d0b2..3b9c7a2 100644 (file)
@@ -1563,7 +1563,7 @@ static int ip_mc_check_igmp_msg(struct sk_buff *skb)
        }
 }
 
-static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
+static __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
 {
        return skb_checksum_simple_validate(skb);
 }
index c59a78a..24a9512 100644 (file)
@@ -611,5 +611,6 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
                list_add_tail(&skb->list, &sublist);
        }
        /* dispatch final sublist */
-       ip_sublist_rcv(&sublist, curr_dev, curr_net);
+       if (!list_empty(&sublist))
+               ip_sublist_rcv(&sublist, curr_dev, curr_net);
 }
index 9bcca08..32e20b7 100644 (file)
@@ -1483,10 +1483,10 @@ static int __init ip_auto_config(void)
         * missing values.
         */
        if (ic_myaddr == NONE ||
-#ifdef CONFIG_ROOT_NFS
+#if defined(CONFIG_ROOT_NFS) || defined(CONFIG_CIFS_ROOT)
            (root_server_addr == NONE &&
             ic_servaddr == NONE &&
-            ROOT_DEV == Root_NFS) ||
+            (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) ||
 #endif
            ic_first_dev->next) {
 #ifdef IPCONFIG_DYNAMIC
@@ -1513,6 +1513,12 @@ static int __init ip_auto_config(void)
                                goto try_try_again;
                        }
 #endif
+#ifdef CONFIG_CIFS_ROOT
+                       if (ROOT_DEV == Root_CIFS) {
+                               pr_err("IP-Config: Retrying forever (CIFS root)...\n");
+                               goto try_try_again;
+                       }
+#endif
 
                        if (--retries) {
                                pr_err("IP-Config: Reopening network devices...\n");
index 716d547..440294b 100644 (file)
@@ -278,9 +278,10 @@ static void __net_exit ipmr_rules_exit(struct net *net)
        rtnl_unlock();
 }
 
-static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
+                          struct netlink_ext_ack *extack)
 {
-       return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
+       return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
 }
 
 static unsigned int ipmr_rules_seq_read(struct net *net)
@@ -336,7 +337,8 @@ static void __net_exit ipmr_rules_exit(struct net *net)
        rtnl_unlock();
 }
 
-static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
+                          struct netlink_ext_ack *extack)
 {
        return 0;
 }
@@ -3040,10 +3042,11 @@ static unsigned int ipmr_seq_read(struct net *net)
        return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
 }
 
-static int ipmr_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_dump(struct net *net, struct notifier_block *nb,
+                    struct netlink_ext_ack *extack)
 {
        return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
-                      ipmr_mr_table_iter, &mrt_lock);
+                      ipmr_mr_table_iter, &mrt_lock, extack);
 }
 
 static const struct fib_notifier_ops ipmr_notifier_ops_template = {
index ea48bd1..aa8738a 100644 (file)
@@ -386,15 +386,17 @@ EXPORT_SYMBOL(mr_rtm_dumproute);
 
 int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
            int (*rules_dump)(struct net *net,
-                             struct notifier_block *nb),
+                             struct notifier_block *nb,
+                             struct netlink_ext_ack *extack),
            struct mr_table *(*mr_iter)(struct net *net,
                                        struct mr_table *mrt),
-           rwlock_t *mrt_lock)
+           rwlock_t *mrt_lock,
+           struct netlink_ext_ack *extack)
 {
        struct mr_table *mrt;
        int err;
 
-       err = rules_dump(net, nb);
+       err = rules_dump(net, nb, extack);
        if (err)
                return err;
 
@@ -409,17 +411,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
                        if (!v->dev)
                                continue;
 
-                       mr_call_vif_notifier(nb, net, family,
-                                            FIB_EVENT_VIF_ADD,
-                                            v, vifi, mrt->id);
+                       err = mr_call_vif_notifier(nb, family,
+                                                  FIB_EVENT_VIF_ADD,
+                                                  v, vifi, mrt->id, extack);
+                       if (err)
+                               break;
                }
                read_unlock(mrt_lock);
 
+               if (err)
+                       return err;
+
                /* Notify on table MFC entries */
-               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-                       mr_call_mfc_notifier(nb, net, family,
-                                            FIB_EVENT_ENTRY_ADD,
-                                            mfc, mrt->id);
+               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+                       err = mr_call_mfc_notifier(nb, family,
+                                                  FIB_EVENT_ENTRY_ADD,
+                                                  mfc, mrt->id, extack);
+                       if (err)
+                               return err;
+               }
        }
 
        return 0;
index 36a28d4..c94445b 100644 (file)
@@ -31,16 +31,8 @@ extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
        if (icmph == NULL)
                return 1;
 
-       switch (icmph->type) {
-       case ICMP_DEST_UNREACH:
-       case ICMP_SOURCE_QUENCH:
-       case ICMP_REDIRECT:
-       case ICMP_TIME_EXCEEDED:
-       case ICMP_PARAMETERPROB:
-               break;
-       default:
+       if (!icmp_is_err(icmph->type))
                return 1;
-       }
 
        inside_iph = skb_header_pointer(skb, outside_hdrlen +
                                        sizeof(struct icmphdr),
index 621f834..dcc4fa1 100644 (file)
@@ -1894,10 +1894,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
        if (!icmph)
                goto out;
 
-       if (icmph->type != ICMP_DEST_UNREACH &&
-           icmph->type != ICMP_REDIRECT &&
-           icmph->type != ICMP_TIME_EXCEEDED &&
-           icmph->type != ICMP_PARAMETERPROB)
+       if (!icmp_is_err(icmph->type))
                goto out;
 
        inner_iph = skb_header_pointer(skb,
index d8876f0..1dd2518 100644 (file)
@@ -1741,8 +1741,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
                                struct tcp_zerocopy_receive *zc)
 {
        unsigned long address = (unsigned long)zc->address;
+       u32 length = 0, seq, offset, zap_len;
        const skb_frag_t *frags = NULL;
-       u32 length = 0, seq, offset;
        struct vm_area_struct *vma;
        struct sk_buff *skb = NULL;
        struct tcp_sock *tp;
@@ -1769,12 +1769,12 @@ static int tcp_zerocopy_receive(struct sock *sk,
        seq = tp->copied_seq;
        inq = tcp_inq(sk);
        zc->length = min_t(u32, zc->length, inq);
-       zc->length &= ~(PAGE_SIZE - 1);
-       if (zc->length) {
-               zap_page_range(vma, address, zc->length);
+       zap_len = zc->length & ~(PAGE_SIZE - 1);
+       if (zap_len) {
+               zap_page_range(vma, address, zap_len);
                zc->recv_skip_hint = 0;
        } else {
-               zc->recv_skip_hint = inq;
+               zc->recv_skip_hint = zc->length;
        }
        ret = 0;
        while (length + PAGE_SIZE <= zc->length) {
@@ -2666,6 +2666,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        /* Clean up fastopen related fields */
        tcp_free_fastopen_req(tp);
        inet->defer_connect = 0;
+       tp->fastopen_client_fail = 0;
 
        WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
@@ -3305,6 +3306,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_reord_seen = tp->reord_seen;
        info->tcpi_rcv_ooopack = tp->rcv_ooopack;
        info->tcpi_snd_wnd = tp->snd_wnd;
+       info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
        unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
index a915ade..19ad958 100644 (file)
@@ -422,7 +422,10 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
                cookie->len = -1;
                return true;
        }
-       return cookie->len > 0;
+       if (cookie->len > 0)
+               return true;
+       tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE;
+       return false;
 }
 
 /* This function checks if we want to defer sending SYN until the first
index a2e52ad..88b987c 100644 (file)
@@ -5814,6 +5814,10 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
        tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
 
        if (data) { /* Retransmit unacked data in SYN */
+               if (tp->total_retrans)
+                       tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
+               else
+                       tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
                skb_rbtree_walk_from(data) {
                        if (__tcp_retransmit_skb(sk, data, 1))
                                break;
index 67b2dc7..899e100 100644 (file)
@@ -121,11 +121,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 #if IS_ENABLED(CONFIG_IPV6)
                if (tw->tw_family == AF_INET6) {
                        if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
-                           (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
-                            (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
+                           ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) ||
                            ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
-                           (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
-                            (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
+                           ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr))
                                loopback = true;
                } else
 #endif
index 34ccef1..98d8230 100644 (file)
@@ -5552,14 +5552,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
        nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
        if (!nla)
                goto nla_put_failure;
-
-       if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
-               goto nla_put_failure;
-
        read_lock_bh(&idev->lock);
        memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
        read_unlock_bh(&idev->lock);
 
+       if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+               goto nla_put_failure;
+
        return 0;
 
 nla_put_failure:
index 05f82ba..f87ae33 100644 (file)
@@ -7,12 +7,12 @@
 #include <net/netns/ipv6.h>
 #include <net/ip6_fib.h>
 
-int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+int call_fib6_notifier(struct notifier_block *nb,
                       enum fib_event_type event_type,
                       struct fib_notifier_info *info)
 {
        info->family = AF_INET6;
-       return call_fib_notifier(nb, net, event_type, info);
+       return call_fib_notifier(nb, event_type, info);
 }
 
 int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
@@ -27,15 +27,16 @@ static unsigned int fib6_seq_read(struct net *net)
        return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
 }
 
-static int fib6_dump(struct net *net, struct notifier_block *nb)
+static int fib6_dump(struct net *net, struct notifier_block *nb,
+                    struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = fib6_rules_dump(net, nb);
+       err = fib6_rules_dump(net, nb, extack);
        if (err)
                return err;
 
-       return fib6_tables_dump(net, nb);
+       return fib6_tables_dump(net, nb, extack);
 }
 
 static const struct fib_notifier_ops fib6_notifier_ops_template = {
index f9e8fe3..fafe556 100644 (file)
@@ -47,9 +47,10 @@ bool fib6_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib6_rule_default);
 
-int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+int fib6_rules_dump(struct net *net, struct notifier_block *nb,
+                   struct netlink_ext_ack *extack)
 {
-       return fib_rules_dump(net, nb, AF_INET6);
+       return fib_rules_dump(net, nb, AF_INET6, extack);
 }
 
 unsigned int fib6_rules_seq_read(struct net *net)
index 62c9972..ef408a5 100644 (file)
@@ -516,13 +516,29 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 
        mip6_addr_swap(skb);
 
+       sk = icmpv6_xmit_lock(net);
+       if (!sk)
+               goto out_bh_enable;
+
        memset(&fl6, 0, sizeof(fl6));
        fl6.flowi6_proto = IPPROTO_ICMPV6;
        fl6.daddr = hdr->saddr;
        if (force_saddr)
                saddr = force_saddr;
-       if (saddr)
+       if (saddr) {
                fl6.saddr = *saddr;
+       } else {
+               /* select a more meaningful saddr from input if */
+               struct net_device *in_netdev;
+
+               in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
+               if (in_netdev) {
+                       ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
+                                          inet6_sk(sk)->srcprefs,
+                                          &fl6.saddr);
+                       dev_put(in_netdev);
+               }
+       }
        fl6.flowi6_mark = mark;
        fl6.flowi6_oif = iif;
        fl6.fl6_icmp_type = type;
@@ -531,10 +547,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
-       sk = icmpv6_xmit_lock(net);
-       if (!sk)
-               goto out_bh_enable;
-
        sk->sk_mark = mark;
        np = inet6_sk(sk);
 
index 6e2af41..f66bc2a 100644 (file)
@@ -357,15 +357,17 @@ unsigned int fib6_tables_seq_read(struct net *net)
        return fib_seq;
 }
 
-static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib6_entry_notifier(struct notifier_block *nb,
                                    enum fib_event_type event_type,
-                                   struct fib6_info *rt)
+                                   struct fib6_info *rt,
+                                   struct netlink_ext_ack *extack)
 {
        struct fib6_entry_notifier_info info = {
+               .info.extack = extack,
                .rt = rt,
        };
 
-       return call_fib6_notifier(nb, net, event_type, &info.info);
+       return call_fib6_notifier(nb, event_type, &info.info);
 }
 
 int call_fib6_entry_notifiers(struct net *net,
@@ -401,40 +403,51 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
 struct fib6_dump_arg {
        struct net *net;
        struct notifier_block *nb;
+       struct netlink_ext_ack *extack;
 };
 
-static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
+static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
 {
        if (rt == arg->net->ipv6.fib6_null_entry)
-               return;
-       call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+               return 0;
+       return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD,
+                                       rt, arg->extack);
 }
 
 static int fib6_node_dump(struct fib6_walker *w)
 {
        struct fib6_info *rt;
+       int err = 0;
 
-       for_each_fib6_walker_rt(w)
-               fib6_rt_dump(rt, w->args);
+       for_each_fib6_walker_rt(w) {
+               err = fib6_rt_dump(rt, w->args);
+               if (err)
+                       break;
+       }
        w->leaf = NULL;
-       return 0;
+       return err;
 }
 
-static void fib6_table_dump(struct net *net, struct fib6_table *tb,
-                           struct fib6_walker *w)
+static int fib6_table_dump(struct net *net, struct fib6_table *tb,
+                          struct fib6_walker *w)
 {
+       int err;
+
        w->root = &tb->tb6_root;
        spin_lock_bh(&tb->tb6_lock);
-       fib6_walk(net, w);
+       err = fib6_walk(net, w);
        spin_unlock_bh(&tb->tb6_lock);
+       return err;
 }
 
 /* Called with rcu_read_lock() */
-int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+int fib6_tables_dump(struct net *net, struct notifier_block *nb,
+                    struct netlink_ext_ack *extack)
 {
        struct fib6_dump_arg arg;
        struct fib6_walker *w;
        unsigned int h;
+       int err = 0;
 
        w = kzalloc(sizeof(*w), GFP_ATOMIC);
        if (!w)
@@ -443,19 +456,24 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
        w->func = fib6_node_dump;
        arg.net = net;
        arg.nb = nb;
+       arg.extack = extack;
        w->args = &arg;
 
        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
                struct hlist_head *head = &net->ipv6.fib_table_hash[h];
                struct fib6_table *tb;
 
-               hlist_for_each_entry_rcu(tb, head, tb6_hlist)
-                       fib6_table_dump(net, tb, w);
+               hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+                       err = fib6_table_dump(net, tb, w);
+                       if (err < 0)
+                               goto out;
+               }
        }
 
+out:
        kfree(w);
 
-       return 0;
+       return err;
 }
 
 static int fib6_dump_node(struct fib6_walker *w)
index 3d71c7d..ef7f707 100644 (file)
@@ -325,7 +325,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
                list_add_tail(&skb->list, &sublist);
        }
        /* dispatch final sublist */
-       ip6_sublist_rcv(&sublist, curr_dev, curr_net);
+       if (!list_empty(&sublist))
+               ip6_sublist_rcv(&sublist, curr_dev, curr_net);
 }
 
 INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
index 857a89a..bfa49ff 100644 (file)
@@ -265,9 +265,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
        rtnl_unlock();
 }
 
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+                           struct netlink_ext_ack *extack)
 {
-       return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+       return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
 }
 
 static unsigned int ip6mr_rules_seq_read(struct net *net)
@@ -324,7 +325,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
        rtnl_unlock();
 }
 
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+                           struct netlink_ext_ack *extack)
 {
        return 0;
 }
@@ -1256,10 +1258,11 @@ static unsigned int ip6mr_seq_read(struct net *net)
        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
 }
 
-static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_dump(struct net *net, struct notifier_block *nb,
+                     struct netlink_ext_ack *extack)
 {
        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
-                      ip6mr_mr_table_iter, &mrt_lock);
+                      ip6mr_mr_table_iter, &mrt_lock, extack);
 }
 
 static struct notifier_block ip6_mr_notifier = {
index 34d51cd..6bac68f 100644 (file)
@@ -150,4 +150,4 @@ EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
-MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
+MODULE_DESCRIPTION("Netfilter IPv6 transparent proxy support");
index a63ff85..bf2dac4 100644 (file)
@@ -1475,11 +1475,11 @@ static u32 rt6_exception_hash(const struct in6_addr *dst,
        u32 val;
 
        net_get_random_once(&seed, sizeof(seed));
-       val = jhash(dst, sizeof(*dst), seed);
+       val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed);
 
 #ifdef CONFIG_IPV6_SUBTREES
        if (src)
-               val = jhash(src, sizeof(*src), val);
+               val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val);
 #endif
        return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
 }
@@ -2291,10 +2291,7 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
        if (!icmph)
                goto out;
 
-       if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
-           icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
-           icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
-           icmph->icmp6_type != ICMPV6_PARAMPROB)
+       if (!icmpv6_is_err(icmph->icmp6_type))
                goto out;
 
        inner_iph = skb_header_pointer(skb,
index b11883d..33da6f7 100644 (file)
@@ -485,7 +485,14 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 
        params.ssn = sta->tid_seq[tid] >> 4;
        ret = drv_ampdu_action(local, sdata, &params);
-       if (ret) {
+       if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
+               /*
+                * We didn't send the request yet, so don't need to check
+                * here if we already got a response, just mark as driver
+                * ready immediately.
+                */
+               set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
+       } else if (ret) {
                ht_dbg(sdata,
                       "BA request denied - HW unavailable for %pM tid %d\n",
                       sta->sta.addr, tid);
index 0a6ff01..d407449 100644 (file)
@@ -538,7 +538,6 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
 {
        struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
        struct cfg80211_bss *cbss;
-       int err, changed = 0;
 
        sdata_assert_lock(sdata);
 
@@ -560,13 +559,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
        ifibss->chandef = sdata->csa_chandef;
 
        /* generate the beacon */
-       err = ieee80211_ibss_csa_beacon(sdata, NULL);
-       if (err < 0)
-               return err;
-
-       changed |= err;
-
-       return changed;
+       return ieee80211_ibss_csa_beacon(sdata, NULL);
 }
 
 void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata)
index ee86c33..86bc469 100644 (file)
@@ -70,7 +70,7 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
 }
 
 /* return current EMWA throughput */
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg)
 {
        int usecs;
 
@@ -79,13 +79,13 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
                usecs = 1000000;
 
        /* reset thr. below 10% success */
-       if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100))
+       if (mr->stats.prob_avg < MINSTREL_FRAC(10, 100))
                return 0;
 
-       if (prob_ewma > MINSTREL_FRAC(90, 100))
+       if (prob_avg > MINSTREL_FRAC(90, 100))
                return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs));
        else
-               return MINSTREL_TRUNC(100000 * (prob_ewma / usecs));
+               return MINSTREL_TRUNC(100000 * (prob_avg / usecs));
 }
 
 /* find & sort topmost throughput rates */
@@ -98,8 +98,8 @@ minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
 
        for (j = MAX_THR_RATES; j > 0; --j) {
                tmp_mrs = &mi->r[tp_list[j - 1]].stats;
-               if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <=
-                   minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))
+               if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_avg) <=
+                   minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_avg))
                        break;
        }
 
@@ -157,20 +157,24 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
 * Recalculate statistics and counters of a given rate
 */
 void
-minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs)
+minstrel_calc_rate_stats(struct minstrel_priv *mp,
+                        struct minstrel_rate_stats *mrs)
 {
        unsigned int cur_prob;
 
        if (unlikely(mrs->attempts > 0)) {
                mrs->sample_skipped = 0;
                cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts);
-               if (unlikely(!mrs->att_hist)) {
-                       mrs->prob_ewma = cur_prob;
+               if (mp->new_avg) {
+                       minstrel_filter_avg_add(&mrs->prob_avg,
+                                               &mrs->prob_avg_1, cur_prob);
+               } else if (unlikely(!mrs->att_hist)) {
+                       mrs->prob_avg = cur_prob;
                } else {
                        /*update exponential weighted moving avarage */
-                       mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma,
-                                                      cur_prob,
-                                                      EWMA_LEVEL);
+                       mrs->prob_avg = minstrel_ewma(mrs->prob_avg,
+                                                     cur_prob,
+                                                     EWMA_LEVEL);
                }
                mrs->att_hist += mrs->attempts;
                mrs->succ_hist += mrs->success;
@@ -200,12 +204,12 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
                struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats;
 
                /* Update statistics of success probability per rate */
-               minstrel_calc_rate_stats(mrs);
+               minstrel_calc_rate_stats(mp, mrs);
 
                /* Sample less often below the 10% chance of success.
                 * Sample less often above the 95% chance of success. */
-               if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
-                   mrs->prob_ewma < MINSTREL_FRAC(10, 100)) {
+               if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
+                   mrs->prob_avg < MINSTREL_FRAC(10, 100)) {
                        mr->adjusted_retry_count = mrs->retry_count >> 1;
                        if (mr->adjusted_retry_count > 2)
                                mr->adjusted_retry_count = 2;
@@ -225,14 +229,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
                 * choose the maximum throughput rate as max_prob_rate
                 * (2) if all success probabilities < 95%, the rate with
                 * highest success probability is chosen as max_prob_rate */
-               if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) {
-                       tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma);
+               if (mrs->prob_avg >= MINSTREL_FRAC(95, 100)) {
+                       tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_avg);
                        tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate],
-                                                         tmp_mrs->prob_ewma);
+                                                         tmp_mrs->prob_avg);
                        if (tmp_cur_tp >= tmp_prob_tp)
                                tmp_prob_rate = i;
                } else {
-                       if (mrs->prob_ewma >= tmp_mrs->prob_ewma)
+                       if (mrs->prob_avg >= tmp_mrs->prob_avg)
                                tmp_prob_rate = i;
                }
        }
@@ -290,7 +294,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
                mi->sample_deferred--;
 
        if (time_after(jiffies, mi->last_stats_update +
-                               (mp->update_interval * HZ) / 1000))
+                               mp->update_interval / (mp->new_avg ? 2 : 1)))
                minstrel_update_stats(mp, mi);
 }
 
@@ -422,7 +426,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
         * has a probability of >95%, we shouldn't be attempting
         * to use it, as this only wastes precious airtime */
        if (!mrr_capable &&
-          (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100)))
+          (mi->r[ndx].stats.prob_avg > MINSTREL_FRAC(95, 100)))
                return;
 
        mi->prev_sample = true;
@@ -573,7 +577,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
         * computing cur_tp
         */
        tmp_mrs = &mi->r[idx].stats;
-       tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
+       tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_avg) * 10;
        tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
 
        return tmp_cur_tp;
index 51d8b2c..dbb43bc 100644 (file)
 /* number of highest throughput rates to consider*/
 #define MAX_THR_RATES 4
 
+/*
+ * Coefficients for moving average with noise filter (period=16),
+ * scaled by 10 bits
+ *
+ * a1 = exp(-pi * sqrt(2) / period)
+ * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period)
+ * coeff3 = -sqr(a1)
+ * coeff1 = 1 - coeff2 - coeff3
+ */
+#define MINSTREL_AVG_COEFF1            (MINSTREL_FRAC(1, 1) - \
+                                        MINSTREL_AVG_COEFF2 - \
+                                        MINSTREL_AVG_COEFF3)
+#define MINSTREL_AVG_COEFF2            0x00001499
+#define MINSTREL_AVG_COEFF3            -0x0000092e
+
 /*
  * Perform EWMA (Exponentially Weighted Moving Average) calculation
  */
@@ -32,6 +47,37 @@ minstrel_ewma(int old, int new, int weight)
        return old + incr;
 }
 
+static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in)
+{
+       s32 out_1 = *prev_1;
+       s32 out_2 = *prev_2;
+       s32 val;
+
+       if (!in)
+               in += 1;
+
+       if (!out_1) {
+               val = out_1 = in;
+               goto out;
+       }
+
+       val = MINSTREL_AVG_COEFF1 * in;
+       val += MINSTREL_AVG_COEFF2 * out_1;
+       val += MINSTREL_AVG_COEFF3 * out_2;
+       val >>= MINSTREL_SCALE;
+
+       if (val > 1 << MINSTREL_SCALE)
+               val = 1 << MINSTREL_SCALE;
+       if (val < 0)
+               val = 1;
+
+out:
+       *prev_2 = out_1;
+       *prev_1 = val;
+
+       return val;
+}
+
 struct minstrel_rate_stats {
        /* current / last sampling period attempts/success counters */
        u16 attempts, last_attempts;
@@ -40,8 +86,9 @@ struct minstrel_rate_stats {
        /* total attempts/success counters */
        u32 att_hist, succ_hist;
 
-       /* prob_ewma - exponential weighted moving average of prob */
-       u16 prob_ewma;
+       /* prob_avg - moving average of prob */
+       u16 prob_avg;
+       u16 prob_avg_1;
 
        /* maximum retry counts */
        u8 retry_count;
@@ -95,6 +142,7 @@ struct minstrel_sta_info {
 struct minstrel_priv {
        struct ieee80211_hw *hw;
        bool has_mrr;
+       bool new_avg;
        u32 sample_switch;
        unsigned int cw_min;
        unsigned int cw_max;
@@ -126,8 +174,9 @@ extern const struct rate_control_ops mac80211_minstrel;
 void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
 
 /* Recalculate success probabilities and counters for a given rate using EWMA */
-void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs);
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma);
+void minstrel_calc_rate_stats(struct minstrel_priv *mp,
+                             struct minstrel_rate_stats *mrs);
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg);
 
 /* debugfs */
 int minstrel_stats_open(struct inode *inode, struct file *file);
index c8afd85..9b8e0da 100644 (file)
@@ -90,8 +90,8 @@ minstrel_stats_open(struct inode *inode, struct file *file)
                p += sprintf(p, "%6u ", mr->perfect_tx_time);
 
                tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
-               tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
-               eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+               tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
+               eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
 
                p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u"
                                "     %3u   %3u %-3u   "
@@ -147,8 +147,8 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file)
                p += sprintf(p, "%u,",mr->perfect_tx_time);
 
                tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
-               tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
-               eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+               tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
+               eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
 
                p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u,"
                                "%llu,%llu,%d,%d\n",
index 0ef2633..694a319 100644 (file)
@@ -346,12 +346,12 @@ minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi)
  */
 int
 minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
-                      int prob_ewma)
+                      int prob_avg)
 {
        unsigned int nsecs = 0;
 
        /* do not account throughput if sucess prob is below 10% */
-       if (prob_ewma < MINSTREL_FRAC(10, 100))
+       if (prob_avg < MINSTREL_FRAC(10, 100))
                return 0;
 
        if (group != MINSTREL_CCK_GROUP)
@@ -365,11 +365,11 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
         * account for collision related packet error rate fluctuation
         * (prob is scaled - see MINSTREL_FRAC above)
         */
-       if (prob_ewma > MINSTREL_FRAC(90, 100))
+       if (prob_avg > MINSTREL_FRAC(90, 100))
                return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000)
                                                                      / nsecs));
        else
-               return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs));
+               return MINSTREL_TRUNC(100000 * ((prob_avg * 1000) / nsecs));
 }
 
 /*
@@ -389,13 +389,13 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index,
 
        cur_group = index / MCS_GROUP_RATES;
        cur_idx = index  % MCS_GROUP_RATES;
-       cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma;
+       cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg;
        cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob);
 
        do {
                tmp_group = tp_list[j - 1] / MCS_GROUP_RATES;
                tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES;
-               tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+               tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
                tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx,
                                                    tmp_prob);
                if (cur_tp_avg < tmp_tp_avg ||
@@ -432,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
 
        tmp_group = mi->max_prob_rate / MCS_GROUP_RATES;
        tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES;
-       tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+       tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
        tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
 
        /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from
@@ -444,11 +444,11 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
 
        max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
        max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
-       max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+       max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg;
 
-       if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
+       if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) {
                cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
-                                                   mrs->prob_ewma);
+                                                   mrs->prob_avg);
                if (cur_tp_avg > tmp_tp_avg)
                        mi->max_prob_rate = index;
 
@@ -458,9 +458,9 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
                if (cur_tp_avg > max_gpr_tp_avg)
                        mg->max_group_prob_rate = index;
        } else {
-               if (mrs->prob_ewma > tmp_prob)
+               if (mrs->prob_avg > tmp_prob)
                        mi->max_prob_rate = index;
-               if (mrs->prob_ewma > max_gpr_prob)
+               if (mrs->prob_avg > max_gpr_prob)
                        mg->max_group_prob_rate = index;
        }
 }
@@ -482,12 +482,12 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
 
        tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES;
        tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES;
-       tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+       tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
        tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
 
        tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES;
        tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES;
-       tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+       tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
        tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
 
        if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) {
@@ -518,7 +518,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
                        continue;
 
                tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
-               tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma;
+               tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg;
 
                if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) &&
                   (minstrel_mcs_groups[group].streams < tmp_max_streams)) {
@@ -623,7 +623,7 @@ minstrel_ht_rate_sample_switch(struct minstrel_priv *mp,
         * If that fails, look again for a rate that is at least as fast
         */
        mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
-       faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100);
+       faster_rate = mrs->prob_avg > MINSTREL_FRAC(75, 100);
        minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate);
        if (!n_rates && faster_rate)
                minstrel_ht_find_probe_rates(mi, rates, &n_rates, false);
@@ -737,8 +737,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 
                        mrs = &mg->rates[i];
                        mrs->retry_updated = false;
-                       minstrel_calc_rate_stats(mrs);
-                       cur_prob = mrs->prob_ewma;
+                       minstrel_calc_rate_stats(mp, mrs);
+                       cur_prob = mrs->prob_avg;
 
                        if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0)
                                continue;
@@ -773,6 +773,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 
        /* try to sample all available rates during each interval */
        mi->sample_count *= 8;
+       if (mp->new_avg)
+               mi->sample_count /= 2;
 
        if (sample)
                minstrel_ht_rate_sample_switch(mp, mi);
@@ -889,6 +891,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
        struct ieee80211_tx_rate *ar = info->status.rates;
        struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL;
        struct minstrel_priv *mp = priv;
+       u32 update_interval = mp->update_interval / 2;
        bool last, update = false;
        bool sample_status = false;
        int i;
@@ -943,6 +946,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 
        switch (mi->sample_mode) {
        case MINSTREL_SAMPLE_IDLE:
+               if (mp->new_avg &&
+                   (mp->hw->max_rates > 1 ||
+                    mi->total_packets_cur < SAMPLE_SWITCH_THR))
+                       update_interval /= 2;
                break;
 
        case MINSTREL_SAMPLE_ACTIVE:
@@ -970,23 +977,20 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
                 */
                rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
                if (rate->attempts > 30 &&
-                   MINSTREL_FRAC(rate->success, rate->attempts) <
-                   MINSTREL_FRAC(20, 100)) {
+                   rate->success < rate->attempts / 4) {
                        minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
                        update = true;
                }
 
                rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
                if (rate2->attempts > 30 &&
-                   MINSTREL_FRAC(rate2->success, rate2->attempts) <
-                   MINSTREL_FRAC(20, 100)) {
+                   rate2->success < rate2->attempts / 4) {
                        minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
                        update = true;
                }
        }
 
-       if (time_after(jiffies, mi->last_stats_update +
-                               (mp->update_interval / 2 * HZ) / 1000)) {
+       if (time_after(jiffies, mi->last_stats_update + update_interval)) {
                update = true;
                minstrel_ht_update_stats(mp, mi, true);
        }
@@ -1008,7 +1012,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
        unsigned int overhead = 0, overhead_rtscts = 0;
 
        mrs = minstrel_get_ratestats(mi, index);
-       if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) {
+       if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) {
                mrs->retry_count = 1;
                mrs->retry_count_rtscts = 1;
                return;
@@ -1065,7 +1069,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
        if (!mrs->retry_updated)
                minstrel_calc_retransmit(mp, mi, index);
 
-       if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) {
+       if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) {
                ratetbl->rate[offset].count = 2;
                ratetbl->rate[offset].count_rts = 2;
                ratetbl->rate[offset].count_cts = 2;
@@ -1099,11 +1103,11 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 }
 
 static inline int
-minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate)
+minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate)
 {
        int group = rate / MCS_GROUP_RATES;
        rate %= MCS_GROUP_RATES;
-       return mi->groups[group].rates[rate].prob_ewma;
+       return mi->groups[group].rates[rate].prob_avg;
 }
 
 static int
@@ -1115,7 +1119,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
        unsigned int duration;
 
        /* Disable A-MSDU if max_prob_rate is bad */
-       if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100))
+       if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100))
                return 1;
 
        duration = g->duration[rate];
@@ -1138,7 +1142,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
         * data packet size
         */
        if (duration > MCS_DURATION(1, 0, 260) ||
-           (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) <
+           (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) <
             MINSTREL_FRAC(75, 100)))
                return 3200;
 
@@ -1243,7 +1247,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
         * rate, to avoid wasting airtime.
         */
        sample_dur = minstrel_get_duration(sample_idx);
-       if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
+       if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
            minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur)
                return -1;
 
@@ -1666,7 +1670,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
                mp->has_mrr = true;
 
        mp->hw = hw;
-       mp->update_interval = 100;
+       mp->update_interval = HZ / 10;
+       mp->new_avg = true;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
        mp->fixed_rate_idx = (u32) -1;
@@ -1674,6 +1679,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
                           &mp->fixed_rate_idx);
        debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir,
                           &mp->sample_switch);
+       debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir,
+                          &mp->new_avg);
 #endif
 
        minstrel_ht_init_cck_rates(mp);
@@ -1698,7 +1705,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
 
        i = mi->max_tp_rate[0] / MCS_GROUP_RATES;
        j = mi->max_tp_rate[0] % MCS_GROUP_RATES;
-       prob = mi->groups[i].rates[j].prob_ewma;
+       prob = mi->groups[i].rates[j].prob_avg;
 
        /* convert tp_avg from pkt per second in kbps */
        tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
index f938701..53ea3c2 100644 (file)
@@ -119,6 +119,6 @@ struct minstrel_ht_sta_priv {
 
 void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
 int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
-                          int prob_ewma);
+                          int prob_avg);
 
 #endif
index 5a6e9f3..bebb719 100644 (file)
@@ -98,8 +98,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
                p += sprintf(p, "%6u  ", tx_time);
 
                tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
-               tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
-               eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+               tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg);
+               eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
 
                p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u"
                                "     %3u   %3u %-3u   "
@@ -243,8 +243,8 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
                p += sprintf(p, "%u,", tx_time);
 
                tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
-               tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
-               eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+               tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg);
+               eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
 
                p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,"
                                "%u,%llu,%llu,",
index 1fa4227..938c10f 100644 (file)
@@ -1617,7 +1617,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
 
 static bool ieee80211_tx_frags(struct ieee80211_local *local,
                               struct ieee80211_vif *vif,
-                              struct ieee80211_sta *sta,
+                              struct sta_info *sta,
                               struct sk_buff_head *skbs,
                               bool txpending)
 {
@@ -1679,7 +1679,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
                spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
                info->control.vif = vif;
-               control.sta = sta;
+               control.sta = sta ? &sta->sta : NULL;
 
                __skb_unlink(skb, skbs);
                drv_tx(local, &control, skb);
@@ -1698,7 +1698,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
        struct ieee80211_tx_info *info;
        struct ieee80211_sub_if_data *sdata;
        struct ieee80211_vif *vif;
-       struct ieee80211_sta *pubsta;
        struct sk_buff *skb;
        bool result = true;
        __le16 fc;
@@ -1713,11 +1712,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
        if (sta && !sta->uploaded)
                sta = NULL;
 
-       if (sta)
-               pubsta = &sta->sta;
-       else
-               pubsta = NULL;
-
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_MONITOR:
                if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
@@ -1744,8 +1738,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
                break;
        }
 
-       result = ieee80211_tx_frags(local, vif, pubsta, skbs,
-                                   txpending);
+       result = ieee80211_tx_frags(local, vif, sta, skbs, txpending);
 
        ieee80211_tpt_led_trig_tx(local, fc, led_len);
 
@@ -3529,7 +3522,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
                                     struct ieee80211_sub_if_data, u.ap);
 
        __skb_queue_tail(&tx.skbs, skb);
-       ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
+       ieee80211_tx_frags(local, &sdata->vif, sta, &tx.skbs, false);
        return true;
 }
 
index 5d5bdf4..78f046e 100644 (file)
@@ -536,6 +536,26 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 }
 EXPORT_SYMBOL(nf_hook_slow);
 
+void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
+                      const struct nf_hook_entries *e)
+{
+       struct sk_buff *skb, *next;
+       struct list_head sublist;
+       int ret;
+
+       INIT_LIST_HEAD(&sublist);
+
+       list_for_each_entry_safe(skb, next, head, list) {
+               skb_list_del_init(skb);
+               ret = nf_hook_slow(skb, state, e, 0);
+               if (ret == 1)
+                       list_add_tail(&skb->list, &sublist);
+       }
+       /* Put passed packets back on main list */
+       list_splice(&sublist, head);
+}
+EXPORT_SYMBOL(nf_hook_slow_list);
+
 /* This needs to be compiled in any case to avoid dependencies between the
  * nfnetlink_queue code and nf_conntrack.
  */
index 063df74..1abd6f0 100644 (file)
@@ -192,7 +192,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 }
 
 #ifndef IP_SET_BITMAP_STORED_TIMEOUT
-static inline bool
+static bool
 mtype_is_filled(const struct mtype_elem *x)
 {
        return true;
index 11ff9d4..abe8f77 100644 (file)
@@ -55,7 +55,7 @@ struct bitmap_ip_adt_elem {
        u16 id;
 };
 
-static inline u32
+static u32
 ip_to_id(const struct bitmap_ip *m, u32 ip)
 {
        return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
@@ -63,33 +63,33 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
 
 /* Common functions */
 
-static inline int
+static int
 bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
                  struct bitmap_ip *map, size_t dsize)
 {
        return !!test_bit(e->id, map->members);
 }
 
-static inline int
+static int
 bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
 {
        return !!test_bit(id, map->members);
 }
 
-static inline int
+static int
 bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
                 u32 flags, size_t dsize)
 {
        return !!test_bit(e->id, map->members);
 }
 
-static inline int
+static int
 bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
 {
        return !test_and_clear_bit(e->id, map->members);
 }
 
-static inline int
+static int
 bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
                  size_t dsize)
 {
@@ -97,7 +97,7 @@ bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
                        htonl(map->first_ip + id * map->hosts));
 }
 
-static inline int
+static int
 bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
 {
        return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
@@ -237,6 +237,18 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
        return true;
 }
 
+static u32
+range_to_mask(u32 from, u32 to, u8 *bits)
+{
+       u32 mask = 0xFFFFFFFE;
+
+       *bits = 32;
+       while (--(*bits) > 0 && mask && (to & mask) != from)
+               mask <<= 1;
+
+       return mask;
+}
+
 static int
 bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
                 u32 flags)
index 1d4e633..b618713 100644 (file)
@@ -65,7 +65,7 @@ struct bitmap_ipmac_elem {
        unsigned char filled;
 } __aligned(__alignof__(u64));
 
-static inline u32
+static u32
 ip_to_id(const struct bitmap_ipmac *m, u32 ip)
 {
        return ip - m->first_ip;
@@ -79,7 +79,7 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
 
 /* Common functions */
 
-static inline int
+static int
 bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
                     const struct bitmap_ipmac *map, size_t dsize)
 {
@@ -94,7 +94,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
        return -EAGAIN;
 }
 
-static inline int
+static int
 bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
 {
        const struct bitmap_ipmac_elem *elem;
@@ -106,13 +106,13 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
        return elem->filled == MAC_FILLED;
 }
 
-static inline int
+static int
 bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
 {
        return elem->filled == MAC_FILLED;
 }
 
-static inline int
+static int
 bitmap_ipmac_add_timeout(unsigned long *timeout,
                         const struct bitmap_ipmac_adt_elem *e,
                         const struct ip_set_ext *ext, struct ip_set *set,
@@ -139,7 +139,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
        return 0;
 }
 
-static inline int
+static int
 bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
                    struct bitmap_ipmac *map, u32 flags, size_t dsize)
 {
@@ -177,14 +177,14 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
        return IPSET_ADD_STORE_PLAIN_TIMEOUT;
 }
 
-static inline int
+static int
 bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
                    struct bitmap_ipmac *map)
 {
        return !test_and_clear_bit(e->id, map->members);
 }
 
-static inline int
+static int
 bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
                     u32 id, size_t dsize)
 {
@@ -197,7 +197,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
                nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
 }
 
-static inline int
+static int
 bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
 {
        return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
index 704a0dd..23d6095 100644 (file)
@@ -46,7 +46,7 @@ struct bitmap_port_adt_elem {
        u16 id;
 };
 
-static inline u16
+static u16
 port_to_id(const struct bitmap_port *m, u16 port)
 {
        return port - m->first_port;
@@ -54,34 +54,34 @@ port_to_id(const struct bitmap_port *m, u16 port)
 
 /* Common functions */
 
-static inline int
+static int
 bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
                    const struct bitmap_port *map, size_t dsize)
 {
        return !!test_bit(e->id, map->members);
 }
 
-static inline int
+static int
 bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
 {
        return !!test_bit(id, map->members);
 }
 
-static inline int
+static int
 bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
                   struct bitmap_port *map, u32 flags, size_t dsize)
 {
        return !!test_bit(e->id, map->members);
 }
 
-static inline int
+static int
 bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
                   struct bitmap_port *map)
 {
        return !test_and_clear_bit(e->id, map->members);
 }
 
-static inline int
+static int
 bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
                    size_t dsize)
 {
@@ -89,13 +89,40 @@ bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
                             htons(map->first_port + id));
 }
 
-static inline int
+static int
 bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
 {
        return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
               nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
 }
 
+static bool
+ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+{
+       bool ret;
+       u8 proto;
+
+       switch (pf) {
+       case NFPROTO_IPV4:
+               ret = ip_set_get_ip4_port(skb, src, port, &proto);
+               break;
+       case NFPROTO_IPV6:
+               ret = ip_set_get_ip6_port(skb, src, port, &proto);
+               break;
+       default:
+               return false;
+       }
+       if (!ret)
+               return ret;
+       switch (proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+               return true;
+       default:
+               return false;
+       }
+}
+
 static int
 bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
                 const struct xt_action_param *par,
index e64d5f9..35cf59e 100644 (file)
@@ -35,7 +35,7 @@ struct ip_set_net {
 
 static unsigned int ip_set_net_id __read_mostly;
 
-static inline struct ip_set_net *ip_set_pernet(struct net *net)
+static struct ip_set_net *ip_set_pernet(struct net *net)
 {
        return net_generic(net, ip_set_net_id);
 }
@@ -67,13 +67,13 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
  * serialized by ip_set_type_mutex.
  */
 
-static inline void
+static void
 ip_set_type_lock(void)
 {
        mutex_lock(&ip_set_type_mutex);
 }
 
-static inline void
+static void
 ip_set_type_unlock(void)
 {
        mutex_unlock(&ip_set_type_mutex);
@@ -277,7 +277,7 @@ ip_set_free(void *members)
 }
 EXPORT_SYMBOL_GPL(ip_set_free);
 
-static inline bool
+static bool
 flag_nested(const struct nlattr *nla)
 {
        return nla->nla_type & NLA_F_NESTED;
@@ -325,6 +325,83 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
+static u32
+ip_set_timeout_get(const unsigned long *timeout)
+{
+       u32 t;
+
+       if (*timeout == IPSET_ELEM_PERMANENT)
+               return 0;
+
+       t = jiffies_to_msecs(*timeout - jiffies) / MSEC_PER_SEC;
+       /* Zero value in userspace means no timeout */
+       return t == 0 ? 1 : t;
+}
+
+static char *
+ip_set_comment_uget(struct nlattr *tb)
+{
+       return nla_data(tb);
+}
+
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
+void
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
+                   const struct ip_set_ext *ext)
+{
+       struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
+       size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+       if (unlikely(c)) {
+               set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+               kfree_rcu(c, rcu);
+               rcu_assign_pointer(comment->c, NULL);
+       }
+       if (!len)
+               return;
+       if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+               len = IPSET_MAX_COMMENT_SIZE;
+       c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+       if (unlikely(!c))
+               return;
+       strlcpy(c->str, ext->comment, len + 1);
+       set->ext_size += sizeof(*c) + strlen(c->str) + 1;
+       rcu_assign_pointer(comment->c, c);
+}
+EXPORT_SYMBOL_GPL(ip_set_init_comment);
+
+/* Used only when dumping a set, protected by rcu_read_lock() */
+static int
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
+{
+       struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
+
+       if (!c)
+               return 0;
+       return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
+}
+
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
+static void
+ip_set_comment_free(struct ip_set *set, void *ptr)
+{
+       struct ip_set_comment *comment = ptr;
+       struct ip_set_comment_rcu *c;
+
+       c = rcu_dereference_protected(comment->c, 1);
+       if (unlikely(!c))
+               return;
+       set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+       kfree_rcu(c, rcu);
+       rcu_assign_pointer(comment->c, NULL);
+}
+
 typedef void (*destroyer)(struct ip_set *, void *);
 /* ipset data extension types, in size order */
 
@@ -351,12 +428,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
                .flag    = IPSET_FLAG_WITH_COMMENT,
                .len     = sizeof(struct ip_set_comment),
                .align   = __alignof__(struct ip_set_comment),
-               .destroy = (destroyer) ip_set_comment_free,
+               .destroy = ip_set_comment_free,
        },
 };
 EXPORT_SYMBOL_GPL(ip_set_extensions);
 
-static inline bool
+static bool
 add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
 {
        return ip_set_extensions[id].flag ?
@@ -446,6 +523,46 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
 
+static u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+       return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+       return (u64)atomic64_read(&(counter)->packets);
+}
+
+static bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+       return nla_put_net64(skb, IPSET_ATTR_BYTES,
+                            cpu_to_be64(ip_set_get_bytes(counter)),
+                            IPSET_ATTR_PAD) ||
+              nla_put_net64(skb, IPSET_ATTR_PACKETS,
+                            cpu_to_be64(ip_set_get_packets(counter)),
+                            IPSET_ATTR_PAD);
+}
+
+static bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+       /* Send nonzero parameters only */
+       return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+               nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+                             cpu_to_be64((u64)skbinfo->skbmark << 32 |
+                                         skbinfo->skbmarkmask),
+                             IPSET_ATTR_PAD)) ||
+              (skbinfo->skbprio &&
+               nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+                             cpu_to_be32(skbinfo->skbprio))) ||
+              (skbinfo->skbqueue &&
+               nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+                             cpu_to_be16(skbinfo->skbqueue)));
+}
+
 int
 ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
                      const void *e, bool active)
@@ -471,6 +588,55 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 }
 EXPORT_SYMBOL_GPL(ip_set_put_extensions);
 
+static bool
+ip_set_match_counter(u64 counter, u64 match, u8 op)
+{
+       switch (op) {
+       case IPSET_COUNTER_NONE:
+               return true;
+       case IPSET_COUNTER_EQ:
+               return counter == match;
+       case IPSET_COUNTER_NE:
+               return counter != match;
+       case IPSET_COUNTER_LT:
+               return counter < match;
+       case IPSET_COUNTER_GT:
+               return counter > match;
+       }
+       return false;
+}
+
+static void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+       atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+       atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static void
+ip_set_update_counter(struct ip_set_counter *counter,
+                     const struct ip_set_ext *ext, u32 flags)
+{
+       if (ext->packets != ULLONG_MAX &&
+           !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+               ip_set_add_bytes(ext->bytes, counter);
+               ip_set_add_packets(ext->packets, counter);
+       }
+}
+
+static void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+                  const struct ip_set_ext *ext,
+                  struct ip_set_ext *mext, u32 flags)
+{
+       mext->skbinfo = *skbinfo;
+}
+
 bool
 ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
                        struct ip_set_ext *mext, u32 flags, void *data)
@@ -506,7 +672,7 @@ EXPORT_SYMBOL_GPL(ip_set_match_extensions);
  * The set behind an index may change by swapping only, from userspace.
  */
 
-static inline void
+static void
 __ip_set_get(struct ip_set *set)
 {
        write_lock_bh(&ip_set_ref_lock);
@@ -514,7 +680,7 @@ __ip_set_get(struct ip_set *set)
        write_unlock_bh(&ip_set_ref_lock);
 }
 
-static inline void
+static void
 __ip_set_put(struct ip_set *set)
 {
        write_lock_bh(&ip_set_ref_lock);
@@ -526,7 +692,7 @@ __ip_set_put(struct ip_set *set)
 /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
  * a separate reference counter
  */
-static inline void
+static void
 __ip_set_put_netlink(struct ip_set *set)
 {
        write_lock_bh(&ip_set_ref_lock);
@@ -541,7 +707,7 @@ __ip_set_put_netlink(struct ip_set *set)
  * so it can't be destroyed (or changed) under our foot.
  */
 
-static inline struct ip_set *
+static struct ip_set *
 ip_set_rcu_get(struct net *net, ip_set_id_t index)
 {
        struct ip_set *set;
@@ -670,7 +836,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
  *
  */
 
-static inline void
+static void
 __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
 {
        struct ip_set *set;
@@ -1252,6 +1418,30 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 #define DUMP_TYPE(arg)         (((u32)(arg)) & 0x0000FFFF)
 #define DUMP_FLAGS(arg)                (((u32)(arg)) >> 16)
 
+int
+ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
+{
+       u32 cadt_flags = 0;
+
+       if (SET_WITH_TIMEOUT(set))
+               if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+                                          htonl(set->timeout))))
+                       return -EMSGSIZE;
+       if (SET_WITH_COUNTER(set))
+               cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
+       if (SET_WITH_COMMENT(set))
+               cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+       if (SET_WITH_SKBINFO(set))
+               cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
+       if (SET_WITH_FORCEADD(set))
+               cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
+
+       if (!cadt_flags)
+               return 0;
+       return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+}
+EXPORT_SYMBOL_GPL(ip_set_put_flags);
+
 static int
 ip_set_dump_done(struct netlink_callback *cb)
 {
index 2b8f959..36615eb 100644 (file)
@@ -148,31 +148,3 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
 #endif
-
-bool
-ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
-{
-       bool ret;
-       u8 proto;
-
-       switch (pf) {
-       case NFPROTO_IPV4:
-               ret = ip_set_get_ip4_port(skb, src, port, &proto);
-               break;
-       case NFPROTO_IPV6:
-               ret = ip_set_get_ip6_port(skb, src, port, &proto);
-               break;
-       default:
-               return false;
-       }
-       if (!ret)
-               return ret;
-       switch (proto) {
-       case IPPROTO_TCP:
-       case IPPROTO_UDP:
-               return true;
-       default:
-               return false;
-       }
-}
-EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
index d098d87..7480ce5 100644 (file)
@@ -39,7 +39,7 @@
 #ifdef IP_SET_HASH_WITH_MULTI
 #define AHASH_MAX(h)                   ((h)->ahash_max)
 
-static inline u8
+static u8
 tune_ahash_max(u8 curr, u32 multi)
 {
        u32 n;
@@ -909,7 +909,7 @@ out:
        return ret;
 }
 
-static inline int
+static int
 mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
                 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 {
index f4432d9..5d6d68e 100644 (file)
@@ -44,7 +44,7 @@ struct hash_ip4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ip4_data_equal(const struct hash_ip4_elem *e1,
                    const struct hash_ip4_elem *e2,
                    u32 *multi)
@@ -63,7 +63,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
 {
        next->ip = e->ip;
@@ -171,7 +171,7 @@ struct hash_ip6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
                    const struct hash_ip6_elem *ip2,
                    u32 *multi)
@@ -179,7 +179,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
        return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
 }
 
-static inline void
+static void
 hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
 {
        ip6_netmask(ip, prefix);
@@ -196,7 +196,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
 {
 }
index 24d8f4d..e28cd72 100644 (file)
@@ -47,7 +47,7 @@ struct hash_ipmac4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
                       const struct hash_ipmac4_elem *e2,
                       u32 *multi)
@@ -67,7 +67,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
                      const struct hash_ipmac4_elem *e)
 {
@@ -154,7 +154,7 @@ struct hash_ipmac6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
                       const struct hash_ipmac6_elem *e2,
                       u32 *multi)
@@ -175,7 +175,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
                      const struct hash_ipmac6_elem *e)
 {
index 7a1734a..aba1df6 100644 (file)
@@ -42,7 +42,7 @@ struct hash_ipmark4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
                        const struct hash_ipmark4_elem *ip2,
                        u32 *multi)
@@ -64,7 +64,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
                       const struct hash_ipmark4_elem *d)
 {
@@ -165,7 +165,7 @@ struct hash_ipmark6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
                        const struct hash_ipmark6_elem *ip2,
                        u32 *multi)
@@ -187,7 +187,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
                       const struct hash_ipmark6_elem *d)
 {
index 32e2406..1ff2287 100644 (file)
@@ -47,7 +47,7 @@ struct hash_ipport4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
                        const struct hash_ipport4_elem *ip2,
                        u32 *multi)
@@ -71,7 +71,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipport4_data_next(struct hash_ipport4_elem *next,
                       const struct hash_ipport4_elem *d)
 {
@@ -202,7 +202,7 @@ struct hash_ipport6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
                        const struct hash_ipport6_elem *ip2,
                        u32 *multi)
@@ -226,7 +226,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipport6_data_next(struct hash_ipport6_elem *next,
                       const struct hash_ipport6_elem *d)
 {
index 15d4193..fa88afd 100644 (file)
@@ -46,7 +46,7 @@ struct hash_ipportip4_elem {
        u8 padding;
 };
 
-static inline bool
+static bool
 hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
                          const struct hash_ipportip4_elem *ip2,
                          u32 *multi)
@@ -72,7 +72,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
                         const struct hash_ipportip4_elem *d)
 {
@@ -210,7 +210,7 @@ struct hash_ipportip6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
                          const struct hash_ipportip6_elem *ip2,
                          u32 *multi)
@@ -236,7 +236,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
                         const struct hash_ipportip6_elem *d)
 {
index 7a4d7af..eef6ecf 100644 (file)
@@ -59,7 +59,7 @@ struct hash_ipportnet4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
                           const struct hash_ipportnet4_elem *ip2,
                           u32 *multi)
@@ -71,25 +71,25 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
               ip1->proto == ip2->proto;
 }
 
-static inline int
+static int
 hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
 {
        elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
-static inline void
+static void
 hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
 {
        elem->ip2 &= ip_set_netmask(cidr);
@@ -116,7 +116,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
                          const struct hash_ipportnet4_elem *d)
 {
@@ -308,7 +308,7 @@ struct hash_ipportnet6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
                           const struct hash_ipportnet6_elem *ip2,
                           u32 *multi)
@@ -320,25 +320,25 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
               ip1->proto == ip2->proto;
 }
 
-static inline int
+static int
 hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
 {
        elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
-static inline void
+static void
 hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
 {
        ip6_netmask(&elem->ip2, cidr);
@@ -365,7 +365,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
                          const struct hash_ipportnet6_elem *d)
 {
index d94c585..0b61593 100644 (file)
@@ -37,7 +37,7 @@ struct hash_mac4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_mac4_data_equal(const struct hash_mac4_elem *e1,
                     const struct hash_mac4_elem *e2,
                     u32 *multi)
@@ -45,7 +45,7 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
        return ether_addr_equal(e1->ether, e2->ether);
 }
 
-static inline bool
+static bool
 hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
 {
        if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
@@ -56,7 +56,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_mac4_data_next(struct hash_mac4_elem *next,
                    const struct hash_mac4_elem *e)
 {
index c259cbc..86133fa 100644 (file)
@@ -47,7 +47,7 @@ struct hash_net4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_net4_data_equal(const struct hash_net4_elem *ip1,
                     const struct hash_net4_elem *ip2,
                     u32 *multi)
@@ -56,25 +56,25 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
               ip1->cidr == ip2->cidr;
 }
 
-static inline int
+static int
 hash_net4_do_data_match(const struct hash_net4_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
 {
        elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
-static inline void
+static void
 hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
 {
        elem->ip &= ip_set_netmask(cidr);
@@ -97,7 +97,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_net4_data_next(struct hash_net4_elem *next,
                    const struct hash_net4_elem *d)
 {
@@ -212,7 +212,7 @@ struct hash_net6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_net6_data_equal(const struct hash_net6_elem *ip1,
                     const struct hash_net6_elem *ip2,
                     u32 *multi)
@@ -221,25 +221,25 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
               ip1->cidr == ip2->cidr;
 }
 
-static inline int
+static int
 hash_net6_do_data_match(const struct hash_net6_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
 {
        elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
-static inline void
+static void
 hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
 {
        ip6_netmask(&elem->ip, cidr);
@@ -262,7 +262,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_net6_data_next(struct hash_net6_elem *next,
                    const struct hash_net6_elem *d)
 {
index 87b29f9..1a04e09 100644 (file)
@@ -62,7 +62,7 @@ struct hash_netiface4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
                          const struct hash_netiface4_elem *ip2,
                          u32 *multi)
@@ -74,25 +74,25 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
               strcmp(ip1->iface, ip2->iface) == 0;
 }
 
-static inline int
+static int
 hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
 {
        elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
-static inline void
+static void
 hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
 {
        elem->ip &= ip_set_netmask(cidr);
@@ -119,7 +119,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netiface4_data_next(struct hash_netiface4_elem *next,
                         const struct hash_netiface4_elem *d)
 {
@@ -285,7 +285,7 @@ struct hash_netiface6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
                          const struct hash_netiface6_elem *ip2,
                          u32 *multi)
@@ -297,25 +297,25 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
               strcmp(ip1->iface, ip2->iface) == 0;
 }
 
-static inline int
+static int
 hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
 {
        elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
-static inline void
+static void
 hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr)
 {
        ip6_netmask(&elem->ip, cidr);
@@ -342,7 +342,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netiface6_data_next(struct hash_netiface6_elem *next,
                         const struct hash_netiface6_elem *d)
 {
index a3ae69b..bcb6d0b 100644 (file)
@@ -52,7 +52,7 @@ struct hash_netnet4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
                        const struct hash_netnet4_elem *ip2,
                        u32 *multi)
@@ -61,32 +61,32 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
               ip1->ccmp == ip2->ccmp;
 }
 
-static inline int
+static int
 hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
 {
        elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
-static inline void
+static void
 hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
                             struct hash_netnet4_elem *orig)
 {
        elem->ip[1] = orig->ip[1];
 }
 
-static inline void
+static void
 hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
 {
        if (inner) {
@@ -117,7 +117,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netnet4_data_next(struct hash_netnet4_elem *next,
                       const struct hash_netnet4_elem *d)
 {
@@ -282,7 +282,7 @@ struct hash_netnet6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
                        const struct hash_netnet6_elem *ip2,
                        u32 *multi)
@@ -292,32 +292,32 @@ hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
               ip1->ccmp == ip2->ccmp;
 }
 
-static inline int
+static int
 hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
 {
        elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
-static inline void
+static void
 hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
                             struct hash_netnet6_elem *orig)
 {
        elem->ip[1] = orig->ip[1];
 }
 
-static inline void
+static void
 hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
 {
        if (inner) {
@@ -348,7 +348,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netnet6_data_next(struct hash_netnet6_elem *next,
                       const struct hash_netnet6_elem *d)
 {
index 799f227..34448df 100644 (file)
@@ -57,7 +57,7 @@ struct hash_netport4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
                         const struct hash_netport4_elem *ip2,
                         u32 *multi)
@@ -68,25 +68,25 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
               ip1->cidr == ip2->cidr;
 }
 
-static inline int
+static int
 hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
 {
        elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
-static inline void
+static void
 hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
 {
        elem->ip &= ip_set_netmask(cidr);
@@ -112,7 +112,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netport4_data_next(struct hash_netport4_elem *next,
                        const struct hash_netport4_elem *d)
 {
@@ -270,7 +270,7 @@ struct hash_netport6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
                         const struct hash_netport6_elem *ip2,
                         u32 *multi)
@@ -281,25 +281,25 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
               ip1->cidr == ip2->cidr;
 }
 
-static inline int
+static int
 hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
 {
        elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
-static inline void
+static void
 hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
 {
        ip6_netmask(&elem->ip, cidr);
@@ -325,7 +325,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netport6_data_next(struct hash_netport6_elem *next,
                        const struct hash_netport6_elem *d)
 {
index a82b70e..934c171 100644 (file)
@@ -56,7 +56,7 @@ struct hash_netportnet4_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
                            const struct hash_netportnet4_elem *ip2,
                            u32 *multi)
@@ -67,32 +67,32 @@ hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
               ip1->proto == ip2->proto;
 }
 
-static inline int
+static int
 hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
 {
        elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
-static inline void
+static void
 hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
                                 struct hash_netportnet4_elem *orig)
 {
        elem->ip[1] = orig->ip[1];
 }
 
-static inline void
+static void
 hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
                              u8 cidr, bool inner)
 {
@@ -126,7 +126,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
                           const struct hash_netportnet4_elem *d)
 {
@@ -331,7 +331,7 @@ struct hash_netportnet6_elem {
 
 /* Common functions */
 
-static inline bool
+static bool
 hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
                            const struct hash_netportnet6_elem *ip2,
                            u32 *multi)
@@ -343,32 +343,32 @@ hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
               ip1->proto == ip2->proto;
 }
 
-static inline int
+static int
 hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
 {
        return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
-static inline void
+static void
 hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
 {
        elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
-static inline void
+static void
 hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
 {
        swap(*flags, elem->nomatch);
 }
 
-static inline void
+static void
 hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
                                 struct hash_netportnet6_elem *orig)
 {
        elem->ip[1] = orig->ip[1];
 }
 
-static inline void
+static void
 hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
                              u8 cidr, bool inner)
 {
@@ -402,7 +402,7 @@ nla_put_failure:
        return true;
 }
 
-static inline void
+static void
 hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
                           const struct hash_netportnet6_elem *d)
 {
index 67ac501..cd747c0 100644 (file)
@@ -149,7 +149,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
        kfree(e);
 }
 
-static inline void
+static void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
        struct list_set *map = set->data;
@@ -160,7 +160,7 @@ list_set_del(struct ip_set *set, struct set_elem *e)
        call_rcu(&e->rcu, __list_set_del_rcu);
 }
 
-static inline void
+static void
 list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
 {
        struct list_set *map = set->data;
index 8b80ab7..512259f 100644 (file)
@@ -2402,18 +2402,22 @@ estimator_fail:
        return -ENOMEM;
 }
 
-static void __net_exit __ip_vs_cleanup(struct net *net)
+static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
 {
-       struct netns_ipvs *ipvs = net_ipvs(net);
-
-       ip_vs_service_net_cleanup(ipvs);        /* ip_vs_flush() with locks */
-       ip_vs_conn_net_cleanup(ipvs);
-       ip_vs_app_net_cleanup(ipvs);
-       ip_vs_protocol_net_cleanup(ipvs);
-       ip_vs_control_net_cleanup(ipvs);
-       ip_vs_estimator_net_cleanup(ipvs);
-       IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
-       net->ipvs = NULL;
+       struct netns_ipvs *ipvs;
+       struct net *net;
+
+       ip_vs_service_nets_cleanup(net_list);   /* ip_vs_flush() with locks */
+       list_for_each_entry(net, net_list, exit_list) {
+               ipvs = net_ipvs(net);
+               ip_vs_conn_net_cleanup(ipvs);
+               ip_vs_app_net_cleanup(ipvs);
+               ip_vs_protocol_net_cleanup(ipvs);
+               ip_vs_control_net_cleanup(ipvs);
+               ip_vs_estimator_net_cleanup(ipvs);
+               IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
+               net->ipvs = NULL;
+       }
 }
 
 static int __net_init __ip_vs_dev_init(struct net *net)
@@ -2429,27 +2433,32 @@ hook_fail:
        return ret;
 }
 
-static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
 {
-       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct netns_ipvs *ipvs;
+       struct net *net;
+
        EnterFunction(2);
-       nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-       ipvs->enable = 0;       /* Disable packet reception */
-       smp_wmb();
-       ip_vs_sync_net_cleanup(ipvs);
+       list_for_each_entry(net, net_list, exit_list) {
+               ipvs = net_ipvs(net);
+               nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+               ipvs->enable = 0;       /* Disable packet reception */
+               smp_wmb();
+               ip_vs_sync_net_cleanup(ipvs);
+       }
        LeaveFunction(2);
 }
 
 static struct pernet_operations ipvs_core_ops = {
        .init = __ip_vs_init,
-       .exit = __ip_vs_cleanup,
+       .exit_batch = __ip_vs_cleanup_batch,
        .id   = &ip_vs_net_id,
        .size = sizeof(struct netns_ipvs),
 };
 
 static struct pernet_operations ipvs_core_dev_ops = {
        .init = __ip_vs_dev_init,
-       .exit = __ip_vs_dev_cleanup,
+       .exit_batch = __ip_vs_dev_cleanup_batch,
 };
 
 /*
index 3cccc88..3be7398 100644 (file)
@@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
 
 /*
  *     Delete service by {netns} in the service table.
- *     Called by __ip_vs_cleanup()
+ *     Called by __ip_vs_batch_cleanup()
  */
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
+void ip_vs_service_nets_cleanup(struct list_head *net_list)
 {
+       struct netns_ipvs *ipvs;
+       struct net *net;
+
        EnterFunction(2);
        /* Check for "full" addressed entries */
        mutex_lock(&__ip_vs_mutex);
-       ip_vs_flush(ipvs, true);
+       list_for_each_entry(net, net_list, exit_list) {
+               ipvs = net_ipvs(net);
+               ip_vs_flush(ipvs, true);
+       }
        mutex_unlock(&__ip_vs_mutex);
        LeaveFunction(2);
 }
index 78b074c..c03066f 100644 (file)
@@ -5,7 +5,7 @@
  * Authors:     Raducu Deaconu <rhadoo_io@yahoo.com>
  *
  * Scheduler implements "overflow" loadbalancing according to number of active
- * connections , will keep all conections to the node with the highest weight
+ * connections , will keep all connections to the node with the highest weight
  * and overflow to the next node if the number of connections exceeds the node's
  * weight.
  * Note that this scheduler might not be suitable for UDP because it only uses
index 888d306..b1e300f 100644 (file)
@@ -407,12 +407,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
                goto err_put;
 
        skb_dst_drop(skb);
-       if (noref) {
-               if (!local)
-                       skb_dst_set_noref(skb, &rt->dst);
-               else
-                       skb_dst_set(skb, dst_clone(&rt->dst));
-       } else
+       if (noref)
+               skb_dst_set_noref(skb, &rt->dst);
+       else
                skb_dst_set(skb, &rt->dst);
 
        return local;
@@ -574,12 +571,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
                goto err_put;
 
        skb_dst_drop(skb);
-       if (noref) {
-               if (!local)
-                       skb_dst_set_noref(skb, &rt->dst);
-               else
-                       skb_dst_set(skb, dst_clone(&rt->dst));
-       } else
+       if (noref)
+               skb_dst_set_noref(skb, &rt->dst);
+       else
                skb_dst_set(skb, &rt->dst);
 
        return local;
index 5cd610b..0af1898 100644 (file)
@@ -573,7 +573,6 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
 void nf_ct_tmpl_free(struct nf_conn *tmpl)
 {
        nf_ct_ext_destroy(tmpl);
-       nf_ct_ext_free(tmpl);
 
        if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
                kfree((char *)tmpl - tmpl->proto.tmpl_padto);
@@ -1417,7 +1416,6 @@ void nf_conntrack_free(struct nf_conn *ct)
        WARN_ON(atomic_read(&ct->ct_general.use) != 0);
 
        nf_ct_ext_destroy(ct);
-       nf_ct_ext_free(ct);
        kmem_cache_free(nf_conntrack_cachep, ct);
        smp_mb__before_atomic();
        atomic_dec(&net->ct.count);
index 6fba74b..7956c9f 100644 (file)
@@ -30,6 +30,7 @@
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
 #define ECACHE_RETRY_WAIT (HZ/10)
+#define ECACHE_STACK_ALLOC (256 / sizeof(void *))
 
 enum retry_state {
        STATE_CONGESTED,
@@ -39,11 +40,11 @@ enum retry_state {
 
 static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
 {
-       struct nf_conn *refs[16];
+       struct nf_conn *refs[ECACHE_STACK_ALLOC];
+       enum retry_state ret = STATE_DONE;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
        unsigned int evicted = 0;
-       enum retry_state ret = STATE_DONE;
 
        spin_lock(&pcpu->lock);
 
@@ -54,10 +55,22 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
                if (!nf_ct_is_confirmed(ct))
                        continue;
 
+               /* This ecache access is safe because the ct is on the
+                * pcpu dying list and we hold the spinlock -- the entry
+                * cannot be free'd until after the lock is released.
+                *
+                * This is true even if ct has a refcount of 0: the
+                * cpu that is about to free the entry must remove it
+                * from the dying list and needs the lock to do so.
+                */
                e = nf_ct_ecache_find(ct);
                if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
                        continue;
 
+               /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
+                * the worker owns this entry: the ct will remain valid
+                * until the worker puts its ct reference.
+                */
                if (nf_conntrack_event(IPCT_DESTROY, ct)) {
                        ret = STATE_CONGESTED;
                        break;
@@ -189,15 +202,15 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
        if (notify == NULL)
                goto out_unlock;
 
+       if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
+               goto out_unlock;
+
        e = nf_ct_ecache_find(ct);
        if (e == NULL)
                goto out_unlock;
 
        events = xchg(&e->cache, 0);
 
-       if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
-               goto out_unlock;
-
        /* We make a copy of the missed event cache without taking
         * the lock, thus we may send missed events twice. However,
         * this does not harm and it happens very rarely. */
index d4ed1e1..c24e5b6 100644 (file)
@@ -34,21 +34,24 @@ void nf_ct_ext_destroy(struct nf_conn *ct)
                        t->destroy(ct);
                rcu_read_unlock();
        }
+
+       kfree(ct->ext);
 }
 EXPORT_SYMBOL(nf_ct_ext_destroy);
 
 void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
        unsigned int newlen, newoff, oldlen, alloc;
-       struct nf_ct_ext *old, *new;
        struct nf_ct_ext_type *t;
+       struct nf_ct_ext *new;
 
        /* Conntrack must not be confirmed to avoid races on reallocation. */
        WARN_ON(nf_ct_is_confirmed(ct));
 
-       old = ct->ext;
 
-       if (old) {
+       if (ct->ext) {
+               const struct nf_ct_ext *old = ct->ext;
+
                if (__nf_ct_ext_exist(old, id))
                        return NULL;
                oldlen = old->len;
@@ -68,22 +71,18 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
        rcu_read_unlock();
 
        alloc = max(newlen, NF_CT_EXT_PREALLOC);
-       kmemleak_not_leak(old);
-       new = __krealloc(old, alloc, gfp);
+       new = krealloc(ct->ext, alloc, gfp);
        if (!new)
                return NULL;
 
-       if (!old) {
+       if (!ct->ext)
                memset(new->offset, 0, sizeof(new->offset));
-               ct->ext = new;
-       } else if (new != old) {
-               kfree_rcu(old, rcu);
-               rcu_assign_pointer(ct->ext, new);
-       }
 
        new->offset[id] = newoff;
        new->len = newlen;
        memset((void *)new + newoff, 0, newlen - newoff);
+
+       ct->ext = new;
        return (void *)new + newoff;
 }
 EXPORT_SYMBOL(nf_ct_ext_add);
index e2d13cd..d8d33ef 100644 (file)
@@ -506,9 +506,45 @@ nla_put_failure:
        return -1;
 }
 
+/* all these functions access ct->ext. Caller must either hold a reference
+ * on ct or prevent its deletion by holding either the bucket spinlock or
+ * pcpu dying list lock.
+ */
+static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+                                 struct nf_conn *ct, u32 type)
+{
+       if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
+           ctnetlink_dump_timestamp(skb, ct) < 0 ||
+           ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+           ctnetlink_dump_labels(skb, ct) < 0 ||
+           ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
+           ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+               return -1;
+
+       return 0;
+}
+
+static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+{
+       if (ctnetlink_dump_status(skb, ct) < 0 ||
+           ctnetlink_dump_mark(skb, ct) < 0 ||
+           ctnetlink_dump_secctx(skb, ct) < 0 ||
+           ctnetlink_dump_id(skb, ct) < 0 ||
+           ctnetlink_dump_use(skb, ct) < 0 ||
+           ctnetlink_dump_master(skb, ct) < 0)
+               return -1;
+
+       if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
+           (ctnetlink_dump_timeout(skb, ct) < 0 ||
+            ctnetlink_dump_protoinfo(skb, ct) < 0))
+               return -1;
+
+       return 0;
+}
+
 static int
 ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
-                   struct nf_conn *ct)
+                   struct nf_conn *ct, bool extinfo)
 {
        const struct nf_conntrack_zone *zone;
        struct nlmsghdr *nlh;
@@ -552,23 +588,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
                                   NF_CT_DEFAULT_ZONE_DIR) < 0)
                goto nla_put_failure;
 
-       if (ctnetlink_dump_status(skb, ct) < 0 ||
-           ctnetlink_dump_acct(skb, ct, type) < 0 ||
-           ctnetlink_dump_timestamp(skb, ct) < 0 ||
-           ctnetlink_dump_helpinfo(skb, ct) < 0 ||
-           ctnetlink_dump_mark(skb, ct) < 0 ||
-           ctnetlink_dump_secctx(skb, ct) < 0 ||
-           ctnetlink_dump_labels(skb, ct) < 0 ||
-           ctnetlink_dump_id(skb, ct) < 0 ||
-           ctnetlink_dump_use(skb, ct) < 0 ||
-           ctnetlink_dump_master(skb, ct) < 0 ||
-           ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
-           ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+       if (ctnetlink_dump_info(skb, ct) < 0)
                goto nla_put_failure;
-
-       if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
-           (ctnetlink_dump_timeout(skb, ct) < 0 ||
-            ctnetlink_dump_protoinfo(skb, ct) < 0))
+       if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0)
                goto nla_put_failure;
 
        nlmsg_end(skb, nlh);
@@ -953,13 +975,11 @@ restart:
                        if (!ctnetlink_filter_match(ct, cb->data))
                                continue;
 
-                       rcu_read_lock();
                        res =
                        ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
                                            cb->nlh->nlmsg_seq,
                                            NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
-                                           ct);
-                       rcu_read_unlock();
+                                           ct, true);
                        if (res < 0) {
                                nf_conntrack_get(&ct->ct_general);
                                cb->args[1] = (unsigned long)ct;
@@ -1364,10 +1384,8 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
                return -ENOMEM;
        }
 
-       rcu_read_lock();
        err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
-                                 NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
-       rcu_read_unlock();
+                                 NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
        nf_ct_put(ct);
        if (err <= 0)
                goto free;
@@ -1429,12 +1447,18 @@ restart:
                                        continue;
                                cb->args[1] = 0;
                        }
-                       rcu_read_lock();
+
+                       /* We can't dump extension info for the unconfirmed
+                        * list because unconfirmed conntracks can have
+                        * ct->ext reallocated (and thus freed).
+                        *
+                        * In the dying list case ct->ext can't be free'd
+                        * until after we drop pcpu->lock.
+                        */
                        res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
                                                  cb->nlh->nlmsg_seq,
                                                  NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
-                                                 ct);
-                       rcu_read_unlock();
+                                                 ct, dying ? true : false);
                        if (res < 0) {
                                if (!atomic_inc_not_zero(&ct->ct_general.use))
                                        continue;
index 097deba..c2e3dff 100644 (file)
@@ -235,11 +235,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
        }
 
        /* Need to track icmp error message? */
-       if (icmph->type != ICMP_DEST_UNREACH &&
-           icmph->type != ICMP_SOURCE_QUENCH &&
-           icmph->type != ICMP_TIME_EXCEEDED &&
-           icmph->type != ICMP_PARAMETERPROB &&
-           icmph->type != ICMP_REDIRECT)
+       if (!icmp_is_err(icmph->type))
                return NF_ACCEPT;
 
        memset(&outer_daddr, 0, sizeof(outer_daddr));
index d481f9b..13f0941 100644 (file)
@@ -151,11 +151,64 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
        }
 }
 
+static int nft_netdev_register_hooks(struct net *net,
+                                    struct list_head *hook_list)
+{
+       struct nft_hook *hook;
+       int err, j;
+
+       j = 0;
+       list_for_each_entry(hook, hook_list, list) {
+               err = nf_register_net_hook(net, &hook->ops);
+               if (err < 0)
+                       goto err_register;
+
+               j++;
+       }
+       return 0;
+
+err_register:
+       list_for_each_entry(hook, hook_list, list) {
+               if (j-- <= 0)
+                       break;
+
+               nf_unregister_net_hook(net, &hook->ops);
+       }
+       return err;
+}
+
+static void nft_netdev_unregister_hooks(struct net *net,
+                                       struct list_head *hook_list)
+{
+       struct nft_hook *hook;
+
+       list_for_each_entry(hook, hook_list, list)
+               nf_unregister_net_hook(net, &hook->ops);
+}
+
+static int nft_register_basechain_hooks(struct net *net, int family,
+                                       struct nft_base_chain *basechain)
+{
+       if (family == NFPROTO_NETDEV)
+               return nft_netdev_register_hooks(net, &basechain->hook_list);
+
+       return nf_register_net_hook(net, &basechain->ops);
+}
+
+static void nft_unregister_basechain_hooks(struct net *net, int family,
+                                          struct nft_base_chain *basechain)
+{
+       if (family == NFPROTO_NETDEV)
+               nft_netdev_unregister_hooks(net, &basechain->hook_list);
+       else
+               nf_unregister_net_hook(net, &basechain->ops);
+}
+
 static int nf_tables_register_hook(struct net *net,
                                   const struct nft_table *table,
                                   struct nft_chain *chain)
 {
-       const struct nft_base_chain *basechain;
+       struct nft_base_chain *basechain;
        const struct nf_hook_ops *ops;
 
        if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -168,14 +221,14 @@ static int nf_tables_register_hook(struct net *net,
        if (basechain->type->ops_register)
                return basechain->type->ops_register(net, ops);
 
-       return nf_register_net_hook(net, ops);
+       return nft_register_basechain_hooks(net, table->family, basechain);
 }
 
 static void nf_tables_unregister_hook(struct net *net,
                                      const struct nft_table *table,
                                      struct nft_chain *chain)
 {
-       const struct nft_base_chain *basechain;
+       struct nft_base_chain *basechain;
        const struct nf_hook_ops *ops;
 
        if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -187,7 +240,7 @@ static void nf_tables_unregister_hook(struct net *net,
        if (basechain->type->ops_unregister)
                return basechain->type->ops_unregister(net, ops);
 
-       nf_unregister_net_hook(net, ops);
+       nft_unregister_basechain_hooks(net, table->family, basechain);
 }
 
 static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -742,7 +795,8 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
                if (cnt && i++ == cnt)
                        break;
 
-               nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+               nft_unregister_basechain_hooks(net, table->family,
+                                              nft_base_chain(chain));
        }
 }
 
@@ -757,14 +811,16 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table)
                if (!nft_is_base_chain(chain))
                        continue;
 
-               err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+               err = nft_register_basechain_hooks(net, table->family,
+                                                  nft_base_chain(chain));
                if (err < 0)
-                       goto err;
+                       goto err_register_hooks;
 
                i++;
        }
        return 0;
-err:
+
+err_register_hooks:
        if (i)
                nft_table_disable(net, table, i);
        return err;
@@ -1225,6 +1281,46 @@ nla_put_failure:
        return -ENOSPC;
 }
 
+static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+                                  const struct nft_base_chain *basechain)
+{
+       const struct nf_hook_ops *ops = &basechain->ops;
+       struct nft_hook *hook, *first = NULL;
+       struct nlattr *nest, *nest_devs;
+       int n = 0;
+
+       nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
+       if (nest == NULL)
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+               goto nla_put_failure;
+
+       if (family == NFPROTO_NETDEV) {
+               nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+               list_for_each_entry(hook, &basechain->hook_list, list) {
+                       if (!first)
+                               first = hook;
+
+                       if (nla_put_string(skb, NFTA_DEVICE_NAME,
+                                          hook->ops.dev->name))
+                               goto nla_put_failure;
+                       n++;
+               }
+               nla_nest_end(skb, nest_devs);
+
+               if (n == 1 &&
+                   nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
+                       goto nla_put_failure;
+       }
+       nla_nest_end(skb, nest);
+
+       return 0;
+nla_put_failure:
+       return -1;
+}
+
 static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
                                     u32 portid, u32 seq, int event, u32 flags,
                                     int family, const struct nft_table *table,
@@ -1253,21 +1349,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 
        if (nft_is_base_chain(chain)) {
                const struct nft_base_chain *basechain = nft_base_chain(chain);
-               const struct nf_hook_ops *ops = &basechain->ops;
                struct nft_stats __percpu *stats;
-               struct nlattr *nest;
 
-               nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
-               if (nest == NULL)
-                       goto nla_put_failure;
-               if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
-                       goto nla_put_failure;
-               if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+               if (nft_dump_basechain_hook(skb, family, basechain))
                        goto nla_put_failure;
-               if (basechain->dev_name[0] &&
-                   nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
-                       goto nla_put_failure;
-               nla_nest_end(skb, nest);
 
                if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
                                 htonl(basechain->policy)))
@@ -1485,6 +1570,7 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
 static void nf_tables_chain_destroy(struct nft_ctx *ctx)
 {
        struct nft_chain *chain = ctx->chain;
+       struct nft_hook *hook, *next;
 
        if (WARN_ON(chain->use > 0))
                return;
@@ -1495,6 +1581,13 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
        if (nft_is_base_chain(chain)) {
                struct nft_base_chain *basechain = nft_base_chain(chain);
 
+               if (ctx->family == NFPROTO_NETDEV) {
+                       list_for_each_entry_safe(hook, next,
+                                                &basechain->hook_list, list) {
+                               list_del_rcu(&hook->list);
+                               kfree_rcu(hook, rcu);
+                       }
+               }
                module_put(basechain->type->owner);
                if (rcu_access_pointer(basechain->stats)) {
                        static_branch_dec(&nft_counters_enabled);
@@ -1508,13 +1601,125 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
        }
 }
 
+static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
+                                             const struct nlattr *attr)
+{
+       struct net_device *dev;
+       char ifname[IFNAMSIZ];
+       struct nft_hook *hook;
+       int err;
+
+       hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+       if (!hook) {
+               err = -ENOMEM;
+               goto err_hook_alloc;
+       }
+
+       nla_strlcpy(ifname, attr, IFNAMSIZ);
+       dev = __dev_get_by_name(net, ifname);
+       if (!dev) {
+               err = -ENOENT;
+               goto err_hook_dev;
+       }
+       hook->ops.dev = dev;
+
+       return hook;
+
+err_hook_dev:
+       kfree(hook);
+err_hook_alloc:
+       return ERR_PTR(err);
+}
+
+static bool nft_hook_list_find(struct list_head *hook_list,
+                              const struct nft_hook *this)
+{
+       struct nft_hook *hook;
+
+       list_for_each_entry(hook, hook_list, list) {
+               if (this->ops.dev == hook->ops.dev)
+                       return true;
+       }
+
+       return false;
+}
+
+static int nf_tables_parse_netdev_hooks(struct net *net,
+                                       const struct nlattr *attr,
+                                       struct list_head *hook_list)
+{
+       struct nft_hook *hook, *next;
+       const struct nlattr *tmp;
+       int rem, n = 0, err;
+
+       nla_for_each_nested(tmp, attr, rem) {
+               if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+                       err = -EINVAL;
+                       goto err_hook;
+               }
+
+               hook = nft_netdev_hook_alloc(net, tmp);
+               if (IS_ERR(hook)) {
+                       err = PTR_ERR(hook);
+                       goto err_hook;
+               }
+               if (nft_hook_list_find(hook_list, hook)) {
+                       err = -EEXIST;
+                       goto err_hook;
+               }
+               list_add_tail(&hook->list, hook_list);
+               n++;
+
+               if (n == NFT_NETDEVICE_MAX) {
+                       err = -EFBIG;
+                       goto err_hook;
+               }
+       }
+       if (!n)
+               return -EINVAL;
+
+       return 0;
+
+err_hook:
+       list_for_each_entry_safe(hook, next, hook_list, list) {
+               list_del(&hook->list);
+               kfree(hook);
+       }
+       return err;
+}
+
 struct nft_chain_hook {
        u32                             num;
        s32                             priority;
        const struct nft_chain_type     *type;
-       struct net_device               *dev;
+       struct list_head                list;
 };
 
+static int nft_chain_parse_netdev(struct net *net,
+                                 struct nlattr *tb[],
+                                 struct list_head *hook_list)
+{
+       struct nft_hook *hook;
+       int err;
+
+       if (tb[NFTA_HOOK_DEV]) {
+               hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
+               if (IS_ERR(hook))
+                       return PTR_ERR(hook);
+
+               list_add_tail(&hook->list, hook_list);
+       } else if (tb[NFTA_HOOK_DEVS]) {
+               err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
+                                                  hook_list);
+               if (err < 0)
+                       return err;
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int nft_chain_parse_hook(struct net *net,
                                const struct nlattr * const nla[],
                                struct nft_chain_hook *hook, u8 family,
@@ -1522,7 +1727,6 @@ static int nft_chain_parse_hook(struct net *net,
 {
        struct nlattr *ha[NFTA_HOOK_MAX + 1];
        const struct nft_chain_type *type;
-       struct net_device *dev;
        int err;
 
        lockdep_assert_held(&net->nft.commit_mutex);
@@ -1560,23 +1764,14 @@ static int nft_chain_parse_hook(struct net *net,
 
        hook->type = type;
 
-       hook->dev = NULL;
+       INIT_LIST_HEAD(&hook->list);
        if (family == NFPROTO_NETDEV) {
-               char ifname[IFNAMSIZ];
-
-               if (!ha[NFTA_HOOK_DEV]) {
-                       module_put(type->owner);
-                       return -EOPNOTSUPP;
-               }
-
-               nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
-               dev = __dev_get_by_name(net, ifname);
-               if (!dev) {
+               err = nft_chain_parse_netdev(net, ha, &hook->list);
+               if (err < 0) {
                        module_put(type->owner);
-                       return -ENOENT;
+                       return err;
                }
-               hook->dev = dev;
-       } else if (ha[NFTA_HOOK_DEV]) {
+       } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
                module_put(type->owner);
                return -EOPNOTSUPP;
        }
@@ -1586,6 +1781,12 @@ static int nft_chain_parse_hook(struct net *net,
 
 static void nft_chain_release_hook(struct nft_chain_hook *hook)
 {
+       struct nft_hook *h, *next;
+
+       list_for_each_entry_safe(h, next, &hook->list, list) {
+               list_del(&h->list);
+               kfree(h);
+       }
        module_put(hook->type->owner);
 }
 
@@ -1610,6 +1811,49 @@ static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *cha
        return kvmalloc(alloc, GFP_KERNEL);
 }
 
+static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
+                                   const struct nft_chain_hook *hook,
+                                   struct nft_chain *chain)
+{
+       ops->pf         = family;
+       ops->hooknum    = hook->num;
+       ops->priority   = hook->priority;
+       ops->priv       = chain;
+       ops->hook       = hook->type->hooks[ops->hooknum];
+}
+
+static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+                             struct nft_chain_hook *hook, u32 flags)
+{
+       struct nft_chain *chain;
+       struct nft_hook *h;
+
+       basechain->type = hook->type;
+       INIT_LIST_HEAD(&basechain->hook_list);
+       chain = &basechain->chain;
+
+       if (family == NFPROTO_NETDEV) {
+               list_splice_init(&hook->list, &basechain->hook_list);
+               list_for_each_entry(h, &basechain->hook_list, list)
+                       nft_basechain_hook_init(&h->ops, family, hook, chain);
+
+               basechain->ops.hooknum  = hook->num;
+               basechain->ops.priority = hook->priority;
+       } else {
+               nft_basechain_hook_init(&basechain->ops, family, hook, chain);
+       }
+
+       chain->flags |= NFT_BASE_CHAIN | flags;
+       basechain->policy = NF_ACCEPT;
+       if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+           nft_chain_offload_priority(basechain) < 0)
+               return -EOPNOTSUPP;
+
+       flow_block_init(&basechain->flow_block);
+
+       return 0;
+}
+
 static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                              u8 policy, u32 flags)
 {
@@ -1628,7 +1872,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 
        if (nla[NFTA_CHAIN_HOOK]) {
                struct nft_chain_hook hook;
-               struct nf_hook_ops *ops;
 
                err = nft_chain_parse_hook(net, nla, &hook, family, true);
                if (err < 0)
@@ -1639,9 +1882,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                        nft_chain_release_hook(&hook);
                        return -ENOMEM;
                }
-
-               if (hook.dev != NULL)
-                       strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
+               chain = &basechain->chain;
 
                if (nla[NFTA_CHAIN_COUNTERS]) {
                        stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@ -1654,24 +1895,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                        static_branch_inc(&nft_counters_enabled);
                }
 
-               basechain->type = hook.type;
-               chain = &basechain->chain;
-
-               ops             = &basechain->ops;
-               ops->pf         = family;
-               ops->hooknum    = hook.num;
-               ops->priority   = hook.priority;
-               ops->priv       = chain;
-               ops->hook       = hook.type->hooks[ops->hooknum];
-               ops->dev        = hook.dev;
-
-               chain->flags |= NFT_BASE_CHAIN | flags;
-               basechain->policy = NF_ACCEPT;
-               if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
-                   nft_chain_offload_priority(basechain) < 0)
-                       return -EOPNOTSUPP;
-
-               flow_block_init(&basechain->flow_block);
+               err = nft_basechain_init(basechain, family, &hook, flags);
+               if (err < 0) {
+                       nft_chain_release_hook(&hook);
+                       kfree(basechain);
+                       return err;
+               }
        } else {
                chain = kzalloc(sizeof(*chain), GFP_KERNEL);
                if (chain == NULL)
@@ -1731,6 +1960,25 @@ err1:
        return err;
 }
 
+static bool nft_hook_list_equal(struct list_head *hook_list1,
+                               struct list_head *hook_list2)
+{
+       struct nft_hook *hook;
+       int n = 0, m = 0;
+
+       n = 0;
+       list_for_each_entry(hook, hook_list2, list) {
+               if (!nft_hook_list_find(hook_list1, hook))
+                       return false;
+
+               n++;
+       }
+       list_for_each_entry(hook, hook_list1, list)
+               m++;
+
+       return n == m;
+}
+
 static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
                              u32 flags)
 {
@@ -1762,12 +2010,19 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
                        return -EBUSY;
                }
 
-               ops = &basechain->ops;
-               if (ops->hooknum != hook.num ||
-                   ops->priority != hook.priority ||
-                   ops->dev != hook.dev) {
-                       nft_chain_release_hook(&hook);
-                       return -EBUSY;
+               if (ctx->family == NFPROTO_NETDEV) {
+                       if (!nft_hook_list_equal(&basechain->hook_list,
+                                                &hook.list)) {
+                               nft_chain_release_hook(&hook);
+                               return -EBUSY;
+                       }
+               } else {
+                       ops = &basechain->ops;
+                       if (ops->hooknum != hook.num ||
+                           ops->priority != hook.priority) {
+                               nft_chain_release_hook(&hook);
+                               return -EBUSY;
+                       }
                }
                nft_chain_release_hook(&hook);
        }
@@ -5628,43 +5883,6 @@ nft_flowtable_lookup_byhandle(const struct nft_table *table,
        return ERR_PTR(-ENOENT);
 }
 
-static int nf_tables_parse_devices(const struct nft_ctx *ctx,
-                                  const struct nlattr *attr,
-                                  struct net_device *dev_array[], int *len)
-{
-       const struct nlattr *tmp;
-       struct net_device *dev;
-       char ifname[IFNAMSIZ];
-       int rem, n = 0, err;
-
-       nla_for_each_nested(tmp, attr, rem) {
-               if (nla_type(tmp) != NFTA_DEVICE_NAME) {
-                       err = -EINVAL;
-                       goto err1;
-               }
-
-               nla_strlcpy(ifname, tmp, IFNAMSIZ);
-               dev = __dev_get_by_name(ctx->net, ifname);
-               if (!dev) {
-                       err = -ENOENT;
-                       goto err1;
-               }
-
-               dev_array[n++] = dev;
-               if (n == NFT_FLOWTABLE_DEVICE_MAX) {
-                       err = -EFBIG;
-                       goto err1;
-               }
-       }
-       if (!len)
-               return -EINVAL;
-
-       err = 0;
-err1:
-       *len = n;
-       return err;
-}
-
 static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
        [NFTA_FLOWTABLE_HOOK_NUM]       = { .type = NLA_U32 },
        [NFTA_FLOWTABLE_HOOK_PRIORITY]  = { .type = NLA_U32 },
@@ -5675,11 +5893,10 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
                                          const struct nlattr *attr,
                                          struct nft_flowtable *flowtable)
 {
-       struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
        struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
-       struct nf_hook_ops *ops;
+       struct nft_hook *hook;
        int hooknum, priority;
-       int err, n = 0, i;
+       int err;
 
        err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
                                          nft_flowtable_hook_policy, NULL);
@@ -5697,27 +5914,21 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 
        priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
 
-       err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
-                                     dev_array, &n);
+       err = nf_tables_parse_netdev_hooks(ctx->net,
+                                          tb[NFTA_FLOWTABLE_HOOK_DEVS],
+                                          &flowtable->hook_list);
        if (err < 0)
                return err;
 
-       ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
-       if (!ops)
-               return -ENOMEM;
-
-       flowtable->hooknum      = hooknum;
-       flowtable->priority     = priority;
-       flowtable->ops          = ops;
-       flowtable->ops_len      = n;
+       flowtable->hooknum              = hooknum;
+       flowtable->data.priority        = priority;
 
-       for (i = 0; i < n; i++) {
-               flowtable->ops[i].pf            = NFPROTO_NETDEV;
-               flowtable->ops[i].hooknum       = hooknum;
-               flowtable->ops[i].priority      = priority;
-               flowtable->ops[i].priv          = &flowtable->data;
-               flowtable->ops[i].hook          = flowtable->data.type->hook;
-               flowtable->ops[i].dev           = dev_array[i];
+       list_for_each_entry(hook, &flowtable->hook_list, list) {
+               hook->ops.pf            = NFPROTO_NETDEV;
+               hook->ops.hooknum       = hooknum;
+               hook->ops.priority      = priority;
+               hook->ops.priv          = &flowtable->data;
+               hook->ops.hook          = flowtable->data.type->hook;
        }
 
        return err;
@@ -5757,14 +5968,51 @@ nft_flowtable_type_get(struct net *net, u8 family)
 static void nft_unregister_flowtable_net_hooks(struct net *net,
                                               struct nft_flowtable *flowtable)
 {
-       int i;
+       struct nft_hook *hook;
 
-       for (i = 0; i < flowtable->ops_len; i++) {
-               if (!flowtable->ops[i].dev)
-                       continue;
+       list_for_each_entry(hook, &flowtable->hook_list, list)
+               nf_unregister_net_hook(net, &hook->ops);
+}
 
-               nf_unregister_net_hook(net, &flowtable->ops[i]);
+static int nft_register_flowtable_net_hooks(struct net *net,
+                                           struct nft_table *table,
+                                           struct nft_flowtable *flowtable)
+{
+       struct nft_hook *hook, *hook2, *next;
+       struct nft_flowtable *ft;
+       int err, i = 0;
+
+       list_for_each_entry(hook, &flowtable->hook_list, list) {
+               list_for_each_entry(ft, &table->flowtables, list) {
+                       list_for_each_entry(hook2, &ft->hook_list, list) {
+                               if (hook->ops.dev == hook2->ops.dev &&
+                                   hook->ops.pf == hook2->ops.pf) {
+                                       err = -EBUSY;
+                                       goto err_unregister_net_hooks;
+                               }
+                       }
+               }
+
+               err = nf_register_net_hook(net, &hook->ops);
+               if (err < 0)
+                       goto err_unregister_net_hooks;
+
+               i++;
        }
+
+       return 0;
+
+err_unregister_net_hooks:
+       list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+               if (i-- <= 0)
+                       break;
+
+               nf_unregister_net_hook(net, &hook->ops);
+               list_del_rcu(&hook->list);
+               kfree_rcu(hook, rcu);
+       }
+
+       return err;
 }
 
 static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
@@ -5775,12 +6023,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        const struct nf_flowtable_type *type;
-       struct nft_flowtable *flowtable, *ft;
        u8 genmask = nft_genmask_next(net);
        int family = nfmsg->nfgen_family;
+       struct nft_flowtable *flowtable;
+       struct nft_hook *hook, *next;
        struct nft_table *table;
        struct nft_ctx ctx;
-       int err, i, k;
+       int err;
 
        if (!nla[NFTA_FLOWTABLE_TABLE] ||
            !nla[NFTA_FLOWTABLE_NAME] ||
@@ -5819,6 +6068,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 
        flowtable->table = table;
        flowtable->handle = nf_tables_alloc_handle(table);
+       INIT_LIST_HEAD(&flowtable->hook_list);
 
        flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
        if (!flowtable->name) {
@@ -5842,43 +6092,24 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
        if (err < 0)
                goto err4;
 
-       for (i = 0; i < flowtable->ops_len; i++) {
-               if (!flowtable->ops[i].dev)
-                       continue;
-
-               list_for_each_entry(ft, &table->flowtables, list) {
-                       for (k = 0; k < ft->ops_len; k++) {
-                               if (!ft->ops[k].dev)
-                                       continue;
-
-                               if (flowtable->ops[i].dev == ft->ops[k].dev &&
-                                   flowtable->ops[i].pf == ft->ops[k].pf) {
-                                       err = -EBUSY;
-                                       goto err5;
-                               }
-                       }
-               }
-
-               err = nf_register_net_hook(net, &flowtable->ops[i]);
-               if (err < 0)
-                       goto err5;
-       }
+       err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
+       if (err < 0)
+               goto err4;
 
        err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
        if (err < 0)
-               goto err6;
+               goto err5;
 
        list_add_tail_rcu(&flowtable->list, &table->flowtables);
        table->use++;
 
        return 0;
-err6:
-       i = flowtable->ops_len;
 err5:
-       for (k = i - 1; k >= 0; k--)
-               nf_unregister_net_hook(net, &flowtable->ops[k]);
-
-       kfree(flowtable->ops);
+       list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+               nf_unregister_net_hook(net, &hook->ops);
+               list_del_rcu(&hook->list);
+               kfree_rcu(hook, rcu);
+       }
 err4:
        flowtable->data.type->free(&flowtable->data);
 err3:
@@ -5945,8 +6176,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 {
        struct nlattr *nest, *nest_devs;
        struct nfgenmsg *nfmsg;
+       struct nft_hook *hook;
        struct nlmsghdr *nlh;
-       int i;
 
        event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
@@ -5969,18 +6200,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
        if (!nest)
                goto nla_put_failure;
        if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
-           nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
+           nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority)))
                goto nla_put_failure;
 
        nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
        if (!nest_devs)
                goto nla_put_failure;
 
-       for (i = 0; i < flowtable->ops_len; i++) {
-               const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
-
-               if (dev &&
-                   nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
+       list_for_each_entry_rcu(hook, &flowtable->hook_list, list) {
+               if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
                        goto nla_put_failure;
        }
        nla_nest_end(skb, nest_devs);
@@ -6171,7 +6399,12 @@ err:
 
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
-       kfree(flowtable->ops);
+       struct nft_hook *hook, *next;
+
+       list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+               list_del_rcu(&hook->list);
+               kfree(hook);
+       }
        kfree(flowtable->name);
        flowtable->data.type->free(&flowtable->data);
        module_put(flowtable->data.type->owner);
@@ -6211,14 +6444,15 @@ nla_put_failure:
 static void nft_flowtable_event(unsigned long event, struct net_device *dev,
                                struct nft_flowtable *flowtable)
 {
-       int i;
+       struct nft_hook *hook;
 
-       for (i = 0; i < flowtable->ops_len; i++) {
-               if (flowtable->ops[i].dev != dev)
+       list_for_each_entry(hook, &flowtable->hook_list, list) {
+               if (hook->ops.dev != dev)
                        continue;
 
-               nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
-               flowtable->ops[i].dev = NULL;
+               nf_unregister_net_hook(dev_net(dev), &hook->ops);
+               list_del_rcu(&hook->list);
+               kfree_rcu(hook, rcu);
                break;
        }
 }
index ad783f4..93e27a6 100644 (file)
@@ -132,13 +132,13 @@ static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
        common->extack = extack;
 }
 
-static int nft_setup_cb_call(struct nft_base_chain *basechain,
-                            enum tc_setup_type type, void *type_data)
+static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
+                            struct list_head *cb_list)
 {
        struct flow_block_cb *block_cb;
        int err;
 
-       list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
+       list_for_each_entry(block_cb, cb_list, list) {
                err = block_cb->cb(type, type_data, block_cb->cb_priv);
                if (err < 0)
                        return err;
@@ -155,32 +155,44 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
        return 0;
 }
 
+static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
+                                      const struct nft_base_chain *basechain,
+                                      const struct nft_rule *rule,
+                                      const struct nft_flow_rule *flow,
+                                      enum flow_cls_command command)
+{
+       struct netlink_ext_ack extack;
+       __be16 proto = ETH_P_ALL;
+
+       memset(cls_flow, 0, sizeof(*cls_flow));
+
+       if (flow)
+               proto = flow->proto;
+
+       nft_flow_offload_common_init(&cls_flow->common, proto,
+                                    basechain->ops.priority, &extack);
+       cls_flow->command = command;
+       cls_flow->cookie = (unsigned long) rule;
+       if (flow)
+               cls_flow->rule = flow->rule;
+}
+
 static int nft_flow_offload_rule(struct nft_chain *chain,
                                 struct nft_rule *rule,
                                 struct nft_flow_rule *flow,
                                 enum flow_cls_command command)
 {
-       struct flow_cls_offload cls_flow = {};
+       struct flow_cls_offload cls_flow;
        struct nft_base_chain *basechain;
-       struct netlink_ext_ack extack;
-       __be16 proto = ETH_P_ALL;
 
        if (!nft_is_base_chain(chain))
                return -EOPNOTSUPP;
 
        basechain = nft_base_chain(chain);
+       nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, command);
 
-       if (flow)
-               proto = flow->proto;
-
-       nft_flow_offload_common_init(&cls_flow.common, proto,
-                                    basechain->ops.priority, &extack);
-       cls_flow.command = command;
-       cls_flow.cookie = (unsigned long) rule;
-       if (flow)
-               cls_flow.rule = flow->rule;
-
-       return nft_setup_cb_call(basechain, TC_SETUP_CLSFLOWER, &cls_flow);
+       return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
+                                &basechain->flow_block.cb_list);
 }
 
 static int nft_flow_offload_bind(struct flow_block_offload *bo,
@@ -194,6 +206,16 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
                                   struct nft_base_chain *basechain)
 {
        struct flow_block_cb *block_cb, *next;
+       struct flow_cls_offload cls_flow;
+       struct nft_chain *chain;
+       struct nft_rule *rule;
+
+       chain = &basechain->chain;
+       list_for_each_entry(rule, &chain->rules, list) {
+               nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
+                                          FLOW_CLS_DESTROY);
+               nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
+       }
 
        list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
                list_del(&block_cb->list);
@@ -224,20 +246,30 @@ static int nft_block_setup(struct nft_base_chain *basechain,
        return err;
 }
 
+static void nft_flow_block_offload_init(struct flow_block_offload *bo,
+                                       struct net *net,
+                                       enum flow_block_command cmd,
+                                       struct nft_base_chain *basechain,
+                                       struct netlink_ext_ack *extack)
+{
+       memset(bo, 0, sizeof(*bo));
+       bo->net         = net;
+       bo->block       = &basechain->flow_block;
+       bo->command     = cmd;
+       bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+       bo->extack      = extack;
+       INIT_LIST_HEAD(&bo->cb_list);
+}
+
 static int nft_block_offload_cmd(struct nft_base_chain *chain,
                                 struct net_device *dev,
                                 enum flow_block_command cmd)
 {
        struct netlink_ext_ack extack = {};
-       struct flow_block_offload bo = {};
+       struct flow_block_offload bo;
        int err;
 
-       bo.net = dev_net(dev);
-       bo.block = &chain->flow_block;
-       bo.command = cmd;
-       bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
-       bo.extack = &extack;
-       INIT_LIST_HEAD(&bo.cb_list);
+       nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
 
        err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
        if (err < 0)
@@ -253,17 +285,12 @@ static void nft_indr_block_ing_cmd(struct net_device *dev,
                                   enum flow_block_command cmd)
 {
        struct netlink_ext_ack extack = {};
-       struct flow_block_offload bo = {};
+       struct flow_block_offload bo;
 
        if (!chain)
                return;
 
-       bo.net = dev_net(dev);
-       bo.block = &chain->flow_block;
-       bo.command = cmd;
-       bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
-       bo.extack = &extack;
-       INIT_LIST_HEAD(&bo.cb_list);
+       nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
 
        cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
 
@@ -274,15 +301,10 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
                                      struct net_device *dev,
                                      enum flow_block_command cmd)
 {
-       struct flow_block_offload bo = {};
        struct netlink_ext_ack extack = {};
+       struct flow_block_offload bo;
 
-       bo.net = dev_net(dev);
-       bo.block = &chain->flow_block;
-       bo.command = cmd;
-       bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
-       bo.extack = &extack;
-       INIT_LIST_HEAD(&bo.cb_list);
+       nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
 
        flow_indr_block_call(dev, &bo, cmd);
 
@@ -294,32 +316,73 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
 
 #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
 
-static int nft_flow_offload_chain(struct nft_chain *chain,
-                                 u8 *ppolicy,
+static int nft_chain_offload_cmd(struct nft_base_chain *basechain,
+                                struct net_device *dev,
+                                enum flow_block_command cmd)
+{
+       int err;
+
+       if (dev->netdev_ops->ndo_setup_tc)
+               err = nft_block_offload_cmd(basechain, dev, cmd);
+       else
+               err = nft_indr_block_offload_cmd(basechain, dev, cmd);
+
+       return err;
+}
+
+static int nft_flow_block_chain(struct nft_base_chain *basechain,
+                               const struct net_device *this_dev,
+                               enum flow_block_command cmd)
+{
+       struct net_device *dev;
+       struct nft_hook *hook;
+       int err, i = 0;
+
+       list_for_each_entry(hook, &basechain->hook_list, list) {
+               dev = hook->ops.dev;
+               if (this_dev && this_dev != dev)
+                       continue;
+
+               err = nft_chain_offload_cmd(basechain, dev, cmd);
+               if (err < 0 && cmd == FLOW_BLOCK_BIND) {
+                       if (!this_dev)
+                               goto err_flow_block;
+
+                       return err;
+               }
+               i++;
+       }
+
+       return 0;
+
+err_flow_block:
+       list_for_each_entry(hook, &basechain->hook_list, list) {
+               if (i-- <= 0)
+                       break;
+
+               dev = hook->ops.dev;
+               nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND);
+       }
+       return err;
+}
+
+static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
                                  enum flow_block_command cmd)
 {
        struct nft_base_chain *basechain;
-       struct net_device *dev;
        u8 policy;
 
        if (!nft_is_base_chain(chain))
                return -EOPNOTSUPP;
 
        basechain = nft_base_chain(chain);
-       dev = basechain->ops.dev;
-       if (!dev)
-               return -EOPNOTSUPP;
-
        policy = ppolicy ? *ppolicy : basechain->policy;
 
        /* Only default policy to accept is supported for now. */
        if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
                return -EOPNOTSUPP;
 
-       if (dev->netdev_ops->ndo_setup_tc)
-               return nft_block_offload_cmd(basechain, dev, cmd);
-       else
-               return nft_indr_block_offload_cmd(basechain, dev, cmd);
+       return nft_flow_block_chain(basechain, NULL, cmd);
 }
 
 int nft_flow_rule_offload_commit(struct net *net)
@@ -385,6 +448,7 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
 {
        struct nft_base_chain *basechain;
        struct net *net = dev_net(dev);
+       struct nft_hook *hook, *found;
        const struct nft_table *table;
        struct nft_chain *chain;
 
@@ -397,8 +461,16 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
                            !(chain->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;
 
+                       found = NULL;
                        basechain = nft_base_chain(chain);
-                       if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ))
+                       list_for_each_entry(hook, &basechain->hook_list, list) {
+                               if (hook->ops.dev != dev)
+                                       continue;
+
+                               found = hook;
+                               break;
+                       }
+                       if (!found)
                                continue;
 
                        return chain;
@@ -426,18 +498,6 @@ static void nft_indr_block_cb(struct net_device *dev,
        mutex_unlock(&net->nft.commit_mutex);
 }
 
-static void nft_offload_chain_clean(struct nft_chain *chain)
-{
-       struct nft_rule *rule;
-
-       list_for_each_entry(rule, &chain->rules, list) {
-               nft_flow_offload_rule(chain, rule,
-                                     NULL, FLOW_CLS_DESTROY);
-       }
-
-       nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
-}
-
 static int nft_offload_netdev_event(struct notifier_block *this,
                                    unsigned long event, void *ptr)
 {
@@ -448,7 +508,9 @@ static int nft_offload_netdev_event(struct notifier_block *this,
        mutex_lock(&net->nft.commit_mutex);
        chain = __nft_offload_get_chain(dev);
        if (chain)
-               nft_offload_chain_clean(chain);
+               nft_flow_block_chain(nft_base_chain(chain), dev,
+                                    FLOW_BLOCK_UNBIND);
+
        mutex_unlock(&net->nft.commit_mutex);
 
        return NOTIFY_DONE;
index b5d5d07..c78d01b 100644 (file)
@@ -287,28 +287,35 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
                             struct nft_ctx *ctx)
 {
        struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
+       struct nft_hook *hook, *found = NULL;
+       int n = 0;
 
-       switch (event) {
-       case NETDEV_UNREGISTER:
-               if (strcmp(basechain->dev_name, dev->name) != 0)
-                       return;
-
-               /* UNREGISTER events are also happpening on netns exit.
-                *
-                * Altough nf_tables core releases all tables/chains, only
-                * this event handler provides guarantee that
-                * basechain.ops->dev is still accessible, so we cannot
-                * skip exiting net namespaces.
-                */
-               __nft_release_basechain(ctx);
-               break;
-       case NETDEV_CHANGENAME:
-               if (dev->ifindex != basechain->ops.dev->ifindex)
-                       return;
+       if (event != NETDEV_UNREGISTER)
+               return;
 
-               strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
-               break;
+       list_for_each_entry(hook, &basechain->hook_list, list) {
+               if (hook->ops.dev == dev)
+                       found = hook;
+
+               n++;
        }
+       if (!found)
+               return;
+
+       if (n > 1) {
+               nf_unregister_net_hook(ctx->net, &found->ops);
+               list_del_rcu(&found->list);
+               kfree_rcu(found, rcu);
+               return;
+       }
+
+       /* UNREGISTER events are also happening on netns exit.
+        *
+        * Although nf_tables core releases all tables/chains, only this event
+        * handler provides guarantee that hook->ops.dev is still accessible,
+        * so we cannot skip exiting net namespaces.
+        */
+       __nft_release_basechain(ctx);
 }
 
 static int nf_tables_netdev_event(struct notifier_block *this,
index be7798a..713fb38 100644 (file)
@@ -239,11 +239,7 @@ static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
                return 0;
 
        /* Error message? */
-       if (icmph->type != ICMP_DEST_UNREACH &&
-           icmph->type != ICMP_SOURCE_QUENCH &&
-           icmph->type != ICMP_TIME_EXCEEDED &&
-           icmph->type != ICMP_PARAMETERPROB &&
-           icmph->type != ICMP_REDIRECT)
+       if (!icmp_is_err(icmph->type))
                return 0;
 
        *nhoff += iphsz + sizeof(_ih);
index efccd1a..0522b2b 100644 (file)
@@ -458,10 +458,63 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 }
 EXPORT_SYMBOL(genlmsg_put);
 
+static struct genl_dumpit_info *genl_dumpit_info_alloc(void)
+{
+       return kmalloc(sizeof(struct genl_dumpit_info), GFP_KERNEL);
+}
+
+static void genl_dumpit_info_free(const struct genl_dumpit_info *info)
+{
+       kfree(info);
+}
+
+static struct nlattr **
+genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
+                               struct nlmsghdr *nlh,
+                               struct netlink_ext_ack *extack,
+                               const struct genl_ops *ops,
+                               int hdrlen,
+                               enum genl_validate_flags no_strict_flag,
+                               bool parallel)
+{
+       enum netlink_validation validate = ops->validate & no_strict_flag ?
+                                          NL_VALIDATE_LIBERAL :
+                                          NL_VALIDATE_STRICT;
+       struct nlattr **attrbuf;
+       int err;
+
+       if (!family->maxattr)
+               return NULL;
+
+       if (parallel) {
+               attrbuf = kmalloc_array(family->maxattr + 1,
+                                       sizeof(struct nlattr *), GFP_KERNEL);
+               if (!attrbuf)
+                       return ERR_PTR(-ENOMEM);
+       } else {
+               attrbuf = family->attrbuf;
+       }
+
+       err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
+                           family->policy, validate, extack);
+       if (err && parallel) {
+               kfree(attrbuf);
+               return ERR_PTR(err);
+       }
+       return attrbuf;
+}
+
+static void genl_family_rcv_msg_attrs_free(const struct genl_family *family,
+                                          struct nlattr **attrbuf,
+                                          bool parallel)
+{
+       if (parallel)
+               kfree(attrbuf);
+}
+
 static int genl_lock_start(struct netlink_callback *cb)
 {
-       /* our ops are always const - netlink API doesn't propagate that */
-       const struct genl_ops *ops = cb->data;
+       const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
        int rc = 0;
 
        if (ops->start) {
@@ -474,8 +527,7 @@ static int genl_lock_start(struct netlink_callback *cb)
 
 static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       /* our ops are always const - netlink API doesn't propagate that */
-       const struct genl_ops *ops = cb->data;
+       const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
        int rc;
 
        genl_lock();
@@ -486,8 +538,8 @@ static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 
 static int genl_lock_done(struct netlink_callback *cb)
 {
-       /* our ops are always const - netlink API doesn't propagate that */
-       const struct genl_ops *ops = cb->data;
+       const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+       const struct genl_ops *ops = info->ops;
        int rc = 0;
 
        if (ops->done) {
@@ -495,120 +547,111 @@ static int genl_lock_done(struct netlink_callback *cb)
                rc = ops->done(cb);
                genl_unlock();
        }
+       genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+       genl_dumpit_info_free(info);
        return rc;
 }
 
-static int genl_family_rcv_msg(const struct genl_family *family,
-                              struct sk_buff *skb,
-                              struct nlmsghdr *nlh,
-                              struct netlink_ext_ack *extack)
+static int genl_parallel_done(struct netlink_callback *cb)
 {
-       const struct genl_ops *ops;
-       struct net *net = sock_net(skb->sk);
-       struct genl_info info;
-       struct genlmsghdr *hdr = nlmsg_data(nlh);
-       struct nlattr **attrbuf;
-       int hdrlen, err;
+       const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+       const struct genl_ops *ops = info->ops;
+       int rc = 0;
 
-       /* this family doesn't exist in this netns */
-       if (!family->netnsok && !net_eq(net, &init_net))
-               return -ENOENT;
+       if (ops->done)
+               rc = ops->done(cb);
+       genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+       genl_dumpit_info_free(info);
+       return rc;
+}
 
-       hdrlen = GENL_HDRLEN + family->hdrsize;
-       if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
-               return -EINVAL;
+static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
+                                     struct sk_buff *skb,
+                                     struct nlmsghdr *nlh,
+                                     struct netlink_ext_ack *extack,
+                                     const struct genl_ops *ops,
+                                     int hdrlen, struct net *net)
+{
+       struct genl_dumpit_info *info;
+       struct nlattr **attrs = NULL;
+       int err;
 
-       ops = genl_get_cmd(hdr->cmd, family);
-       if (ops == NULL)
+       if (!ops->dumpit)
                return -EOPNOTSUPP;
 
-       if ((ops->flags & GENL_ADMIN_PERM) &&
-           !netlink_capable(skb, CAP_NET_ADMIN))
-               return -EPERM;
-
-       if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
-           !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
-               return -EPERM;
-
-       if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
-               int rc;
-
-               if (ops->dumpit == NULL)
-                       return -EOPNOTSUPP;
-
-               if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) {
-                       int hdrlen = GENL_HDRLEN + family->hdrsize;
-
-                       if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
-                               return -EINVAL;
+       if (ops->validate & GENL_DONT_VALIDATE_DUMP)
+               goto no_attrs;
 
-                       if (family->maxattr) {
-                               unsigned int validate = NL_VALIDATE_STRICT;
-
-                               if (ops->validate &
-                                   GENL_DONT_VALIDATE_DUMP_STRICT)
-                                       validate = NL_VALIDATE_LIBERAL;
-                               rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen),
-                                                   nlmsg_attrlen(nlh, hdrlen),
-                                                   family->maxattr,
-                                                   family->policy,
-                                                   validate, extack);
-                               if (rc)
-                                       return rc;
-                       }
-               }
+       if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+               return -EINVAL;
 
-               if (!family->parallel_ops) {
-                       struct netlink_dump_control c = {
-                               .module = family->module,
-                               /* we have const, but the netlink API doesn't */
-                               .data = (void *)ops,
-                               .start = genl_lock_start,
-                               .dump = genl_lock_dumpit,
-                               .done = genl_lock_done,
-                       };
+       attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
+                                               ops, hdrlen,
+                                               GENL_DONT_VALIDATE_DUMP_STRICT,
+                                               true);
+       if (IS_ERR(attrs))
+               return PTR_ERR(attrs);
+
+no_attrs:
+       /* Allocate dumpit info. It is going to be freed by done() callback. */
+       info = genl_dumpit_info_alloc();
+       if (!info) {
+               genl_family_rcv_msg_attrs_free(family, attrs, true);
+               return -ENOMEM;
+       }
 
-                       genl_unlock();
-                       rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
-                       genl_lock();
+       info->family = family;
+       info->ops = ops;
+       info->attrs = attrs;
 
-               } else {
-                       struct netlink_dump_control c = {
-                               .module = family->module,
-                               .start = ops->start,
-                               .dump = ops->dumpit,
-                               .done = ops->done,
-                       };
+       if (!family->parallel_ops) {
+               struct netlink_dump_control c = {
+                       .module = family->module,
+                       .data = info,
+                       .start = genl_lock_start,
+                       .dump = genl_lock_dumpit,
+                       .done = genl_lock_done,
+               };
 
-                       rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
-               }
+               genl_unlock();
+               err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+               genl_lock();
 
-               return rc;
+       } else {
+               struct netlink_dump_control c = {
+                       .module = family->module,
+                       .data = info,
+                       .start = ops->start,
+                       .dump = ops->dumpit,
+                       .done = genl_parallel_done,
+               };
+
+               err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
        }
 
-       if (ops->doit == NULL)
-               return -EOPNOTSUPP;
-
-       if (family->maxattr && family->parallel_ops) {
-               attrbuf = kmalloc_array(family->maxattr + 1,
-                                       sizeof(struct nlattr *),
-                                       GFP_KERNEL);
-               if (attrbuf == NULL)
-                       return -ENOMEM;
-       } else
-               attrbuf = family->attrbuf;
+       return err;
+}
 
-       if (attrbuf) {
-               enum netlink_validation validate = NL_VALIDATE_STRICT;
+static int genl_family_rcv_msg_doit(const struct genl_family *family,
+                                   struct sk_buff *skb,
+                                   struct nlmsghdr *nlh,
+                                   struct netlink_ext_ack *extack,
+                                   const struct genl_ops *ops,
+                                   int hdrlen, struct net *net)
+{
+       struct nlattr **attrbuf;
+       struct genl_info info;
+       int err;
 
-               if (ops->validate & GENL_DONT_VALIDATE_STRICT)
-                       validate = NL_VALIDATE_LIBERAL;
+       if (!ops->doit)
+               return -EOPNOTSUPP;
 
-               err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
-                                   family->policy, validate, extack);
-               if (err < 0)
-                       goto out;
-       }
+       attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
+                                                 ops, hdrlen,
+                                                 GENL_DONT_VALIDATE_STRICT,
+                                                 family->parallel_ops);
+       if (IS_ERR(attrbuf))
+               return PTR_ERR(attrbuf);
 
        info.snd_seq = nlh->nlmsg_seq;
        info.snd_portid = NETLINK_CB(skb).portid;
@@ -632,12 +675,49 @@ static int genl_family_rcv_msg(const struct genl_family *family,
                family->post_doit(ops, skb, &info);
 
 out:
-       if (family->parallel_ops)
-               kfree(attrbuf);
+       genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops);
 
        return err;
 }
 
+static int genl_family_rcv_msg(const struct genl_family *family,
+                              struct sk_buff *skb,
+                              struct nlmsghdr *nlh,
+                              struct netlink_ext_ack *extack)
+{
+       const struct genl_ops *ops;
+       struct net *net = sock_net(skb->sk);
+       struct genlmsghdr *hdr = nlmsg_data(nlh);
+       int hdrlen;
+
+       /* this family doesn't exist in this netns */
+       if (!family->netnsok && !net_eq(net, &init_net))
+               return -ENOENT;
+
+       hdrlen = GENL_HDRLEN + family->hdrsize;
+       if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+               return -EINVAL;
+
+       ops = genl_get_cmd(hdr->cmd, family);
+       if (ops == NULL)
+               return -EOPNOTSUPP;
+
+       if ((ops->flags & GENL_ADMIN_PERM) &&
+           !netlink_capable(skb, CAP_NET_ADMIN))
+               return -EPERM;
+
+       if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
+           !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+
+       if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP)
+               return genl_family_rcv_msg_dumpit(family, skb, nlh, extack,
+                                                 ops, hdrlen, net);
+       else
+               return genl_family_rcv_msg_doit(family, skb, nlh, extack,
+                                               ops, hdrlen, net);
+}
+
 static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
 {
@@ -1088,25 +1168,6 @@ problem:
 
 subsys_initcall(genl_init);
 
-/**
- * genl_family_attrbuf - return family's attrbuf
- * @family: the family
- *
- * Return the family's attrbuf, while validating that it's
- * actually valid to access it.
- *
- * You cannot use this function with a family that has parallel_ops
- * and you can only use it within (pre/post) doit/dumpit callbacks.
- */
-struct nlattr **genl_family_attrbuf(const struct genl_family *family)
-{
-       if (!WARN_ON(family->parallel_ops))
-               lockdep_assert_held(&genl_mutex);
-
-       return family->attrbuf;
-}
-EXPORT_SYMBOL(genl_family_attrbuf);
-
 static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
                         gfp_t flags)
 {
index 17e6ca6..fd9ad53 100644 (file)
@@ -102,22 +102,14 @@ nla_put_failure:
 
 static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
 {
-       struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family);
+       const struct genl_dumpit_info *info = genl_dumpit_info(cb);
        struct nfc_dev *dev;
-       int rc;
        u32 idx;
 
-       rc = nlmsg_parse_deprecated(cb->nlh,
-                                   GENL_HDRLEN + nfc_genl_family.hdrsize,
-                                   attrbuf, nfc_genl_family.maxattr,
-                                   nfc_genl_policy, NULL);
-       if (rc < 0)
-               return ERR_PTR(rc);
-
-       if (!attrbuf[NFC_ATTR_DEVICE_INDEX])
+       if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
                return ERR_PTR(-EINVAL);
 
-       idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]);
+       idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
 
        dev = nfc_get_device(idx);
        if (!dev)
@@ -1697,7 +1689,8 @@ static const struct genl_ops nfc_genl_ops[] = {
        },
        {
                .cmd = NFC_CMD_GET_TARGET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                .dumpit = nfc_genl_dump_targets,
                .done = nfc_genl_dump_targets_done,
        },
index 1c77f52..12936c1 100644 (file)
@@ -200,7 +200,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
        if (err)
                return err;
 
-       flow_key->mpls.top_lse = lse;
+       flow_key->mpls.lse[0] = lse;
        return 0;
 }
 
index 05249eb..df9c80b 100644 (file)
@@ -971,6 +971,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 
        ct = nf_ct_get(skb, &ctinfo);
        if (ct) {
+               bool add_helper = false;
+
                /* Packets starting a new connection must be NATted before the
                 * helper, so that the helper knows about the NAT.  We enforce
                 * this by delaying both NAT and helper calls for unconfirmed
@@ -988,16 +990,17 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
                }
 
                /* Userspace may decide to perform a ct lookup without a helper
-                * specified followed by a (recirculate and) commit with one.
-                * Therefore, for unconfirmed connections which we will commit,
-                * we need to attach the helper here.
+                * specified followed by a (recirculate and) commit with one,
+                * or attach a helper in a later commit.  Therefore, for
+                * connections which we will commit, we may need to attach
+                * the helper here.
                 */
-               if (!nf_ct_is_confirmed(ct) && info->commit &&
-                   info->helper && !nfct_help(ct)) {
+               if (info->commit && info->helper && !nfct_help(ct)) {
                        int err = __nf_ct_try_assign_helper(ct, info->ct,
                                                            GFP_ATOMIC);
                        if (err)
                                return err;
+                       add_helper = true;
 
                        /* helper installed, add seqadj if NAT is required */
                        if (info->nat && !nfct_seqadj(ct)) {
@@ -1007,11 +1010,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
                }
 
                /* Call the helper only if:
-                * - nf_conntrack_in() was executed above ("!cached") for a
-                *   confirmed connection, or
+                * - nf_conntrack_in() was executed above ("!cached") or a
+                *   helper was just attached ("add_helper") for a confirmed
+                *   connection, or
                 * - When committing an unconfirmed connection.
                 */
-               if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
+               if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
+                                             info->commit) &&
                    ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
                        return -EINVAL;
                }
index d8c364d..2088619 100644 (file)
@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
        stats = this_cpu_ptr(dp->stats_percpu);
 
        /* Look up flow. */
-       flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+       flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+                                        &n_mask_hit);
        if (unlikely(!flow)) {
                struct dp_upcall_info upcall;
 
@@ -1575,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
        return 0;
 }
 
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+       dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+       if (!dp->stats_percpu)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+       int i;
+
+       dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+                                 sizeof(struct hlist_head),
+                                 GFP_KERNEL);
+       if (!dp->ports)
+               return -ENOMEM;
+
+       for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+               INIT_HLIST_HEAD(&dp->ports[i]);
+
+       return 0;
+}
+
 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
        struct nlattr **a = info->attrs;
@@ -1583,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        struct datapath *dp;
        struct vport *vport;
        struct ovs_net *ovs_net;
-       int err, i;
+       int err;
 
        err = -EINVAL;
        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1596,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        err = -ENOMEM;
        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
        if (dp == NULL)
-               goto err_free_reply;
+               goto err_destroy_reply;
 
        ovs_dp_set_net(dp, sock_net(skb->sk));
 
        /* Allocate table. */
        err = ovs_flow_tbl_init(&dp->table);
        if (err)
-               goto err_free_dp;
+               goto err_destroy_dp;
 
-       dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
-       if (!dp->stats_percpu) {
-               err = -ENOMEM;
+       err = ovs_dp_stats_init(dp);
+       if (err)
                goto err_destroy_table;
-       }
-
-       dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
-                                 sizeof(struct hlist_head),
-                                 GFP_KERNEL);
-       if (!dp->ports) {
-               err = -ENOMEM;
-               goto err_destroy_percpu;
-       }
 
-       for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
-               INIT_HLIST_HEAD(&dp->ports[i]);
+       err = ovs_dp_vport_init(dp);
+       if (err)
+               goto err_destroy_stats;
 
        err = ovs_meters_init(dp);
        if (err)
-               goto err_destroy_ports_array;
+               goto err_destroy_ports;
 
        /* Set up our datapath device. */
        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1656,6 +1673,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
                                ovs_dp_reset_user_features(skb, info);
                }
 
+               ovs_unlock();
                goto err_destroy_meters;
        }
 
@@ -1672,17 +1690,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        return 0;
 
 err_destroy_meters:
-       ovs_unlock();
        ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
        kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
        free_percpu(dp->stats_percpu);
 err_destroy_table:
        ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
        kfree(dp);
-err_free_reply:
+err_destroy_reply:
        kfree_skb(reply);
 err:
        return err;
index 38147e6..9d375e7 100644 (file)
@@ -637,27 +637,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
                        memset(&key->ipv4, 0, sizeof(key->ipv4));
                }
        } else if (eth_p_mpls(key->eth.type)) {
-               size_t stack_len = MPLS_HLEN;
+               u8 label_count = 1;
 
+               memset(&key->mpls, 0, sizeof(key->mpls));
                skb_set_inner_network_header(skb, skb->mac_len);
                while (1) {
                        __be32 lse;
 
-                       error = check_header(skb, skb->mac_len + stack_len);
+                       error = check_header(skb, skb->mac_len +
+                                            label_count * MPLS_HLEN);
                        if (unlikely(error))
                                return 0;
 
                        memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
 
-                       if (stack_len == MPLS_HLEN)
-                               memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+                       if (label_count <= MPLS_LABEL_DEPTH)
+                               memcpy(&key->mpls.lse[label_count - 1], &lse,
+                                      MPLS_HLEN);
 
-                       skb_set_inner_network_header(skb, skb->mac_len + stack_len);
+                       skb_set_inner_network_header(skb, skb->mac_len +
+                                                    label_count * MPLS_HLEN);
                        if (lse & htonl(MPLS_LS_S_MASK))
                                break;
 
-                       stack_len += MPLS_HLEN;
+                       label_count++;
                }
+               if (label_count > MPLS_LABEL_DEPTH)
+                       label_count = MPLS_LABEL_DEPTH;
+
+               key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
        } else if (key->eth.type == htons(ETH_P_IPV6)) {
                int nh_len;             /* IPv6 Header + Extensions */
 
index b830d5f..fd8ed76 100644 (file)
@@ -30,6 +30,7 @@ enum sw_flow_mac_proto {
        MAC_PROTO_ETHERNET,
 };
 #define SW_FLOW_KEY_INVALID    0x80
+#define MPLS_LABEL_DEPTH       3
 
 /* Store options at the end of the array if they are less than the
  * maximum size. This allows us to get the benefits of variable length
@@ -84,9 +85,6 @@ struct sw_flow_key {
                                         * protocol.
                                         */
        union {
-               struct {
-                       __be32 top_lse; /* top label stack entry */
-               } mpls;
                struct {
                        u8     proto;   /* IP protocol or lower 8 bits of ARP opcode. */
                        u8     tos;         /* IP ToS. */
@@ -135,6 +133,11 @@ struct sw_flow_key {
                                } nd;
                        };
                } ipv6;
+               struct {
+                       u32 num_labels_mask;    /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
+                       __be32 lse[MPLS_LABEL_DEPTH];     /* label stack entry  */
+               } mpls;
+
                struct ovs_key_nsh nsh;         /* network service header */
        };
        struct {
@@ -166,7 +169,6 @@ struct sw_flow_key_range {
 struct sw_flow_mask {
        int ref_count;
        struct rcu_head rcu;
-       struct list_head list;
        struct sw_flow_key_range range;
        struct sw_flow_key key;
 };
index d7559c6..65c2e34 100644 (file)
@@ -424,7 +424,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
        [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
        [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
                                     .next = ovs_tunnel_key_lens, },
-       [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
+       [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
        [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
        [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
        [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
@@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 
        if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
                const struct ovs_key_mpls *mpls_key;
+               u32 hdr_len;
+               u32 label_count, label_count_mask, i;
 
                mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
-               SW_FLOW_KEY_PUT(match, mpls.top_lse,
-                               mpls_key->mpls_lse, is_mask);
+               hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
+               label_count = hdr_len / sizeof(struct ovs_key_mpls);
+
+               if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
+                   hdr_len % sizeof(struct ovs_key_mpls))
+                       return -EINVAL;
+
+               label_count_mask =  GENMASK(label_count - 1, 0);
+
+               for (i = 0 ; i < label_count; i++)
+                       SW_FLOW_KEY_PUT(match, mpls.lse[i],
+                                       mpls_key[i].mpls_lse, is_mask);
+
+               SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
+                               label_count_mask, is_mask);
 
                attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
         }
@@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
                ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
                ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
        } else if (eth_p_mpls(swkey->eth.type)) {
+               u8 i, num_labels;
                struct ovs_key_mpls *mpls_key;
 
-               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+               num_labels = hweight_long(output->mpls.num_labels_mask);
+               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
+                                 num_labels * sizeof(*mpls_key));
                if (!nla)
                        goto nla_put_failure;
+
                mpls_key = nla_data(nla);
-               mpls_key->mpls_lse = output->mpls.top_lse;
+               for (i = 0; i < num_labels; i++)
+                       mpls_key[i].mpls_lse = output->mpls.lse[i];
        }
 
        if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -2406,13 +2426,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                  const struct sw_flow_key *key,
                                  struct sw_flow_actions **sfa,
-                                 __be16 eth_type, __be16 vlan_tci, bool log);
+                                 __be16 eth_type, __be16 vlan_tci,
+                                 u32 mpls_label_count, bool log);
 
 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
                                    const struct sw_flow_key *key,
                                    struct sw_flow_actions **sfa,
                                    __be16 eth_type, __be16 vlan_tci,
-                                   bool log, bool last)
+                                   u32 mpls_label_count, bool log, bool last)
 {
        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
        const struct nlattr *probability, *actions;
@@ -2463,7 +2484,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
                return err;
 
        err = __ovs_nla_copy_actions(net, actions, key, sfa,
-                                    eth_type, vlan_tci, log);
+                                    eth_type, vlan_tci, mpls_label_count, log);
 
        if (err)
                return err;
@@ -2478,7 +2499,7 @@ static int validate_and_copy_clone(struct net *net,
                                   const struct sw_flow_key *key,
                                   struct sw_flow_actions **sfa,
                                   __be16 eth_type, __be16 vlan_tci,
-                                  bool log, bool last)
+                                  u32 mpls_label_count, bool log, bool last)
 {
        int start, err;
        u32 exec;
@@ -2498,7 +2519,7 @@ static int validate_and_copy_clone(struct net *net,
                return err;
 
        err = __ovs_nla_copy_actions(net, attr, key, sfa,
-                                    eth_type, vlan_tci, log);
+                                    eth_type, vlan_tci, mpls_label_count, log);
        if (err)
                return err;
 
@@ -2864,6 +2885,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
                                           const struct sw_flow_key *key,
                                           struct sw_flow_actions **sfa,
                                           __be16 eth_type, __be16 vlan_tci,
+                                          u32 mpls_label_count,
                                           bool log, bool last)
 {
        const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
@@ -2912,7 +2934,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
                return nested_acts_start;
 
        err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
-                                    eth_type, vlan_tci, log);
+                                    eth_type, vlan_tci, mpls_label_count, log);
 
        if (err)
                return err;
@@ -2925,7 +2947,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
                return nested_acts_start;
 
        err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
-                                    eth_type, vlan_tci, log);
+                                    eth_type, vlan_tci, mpls_label_count, log);
 
        if (err)
                return err;
@@ -2952,7 +2974,8 @@ static int copy_action(const struct nlattr *from,
 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                  const struct sw_flow_key *key,
                                  struct sw_flow_actions **sfa,
-                                 __be16 eth_type, __be16 vlan_tci, bool log)
+                                 __be16 eth_type, __be16 vlan_tci,
+                                 u32 mpls_label_count, bool log)
 {
        u8 mac_proto = ovs_key_mac_proto(key);
        const struct nlattr *a;
@@ -3065,25 +3088,36 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                             !eth_p_mpls(eth_type)))
                                return -EINVAL;
                        eth_type = mpls->mpls_ethertype;
+                       mpls_label_count++;
                        break;
                }
 
-               case OVS_ACTION_ATTR_POP_MPLS:
+               case OVS_ACTION_ATTR_POP_MPLS: {
+                       __be16  proto;
                        if (vlan_tci & htons(VLAN_CFI_MASK) ||
                            !eth_p_mpls(eth_type))
                                return -EINVAL;
 
-                       /* Disallow subsequent L2.5+ set and mpls_pop actions
-                        * as there is no check here to ensure that the new
-                        * eth_type is valid and thus set actions could
-                        * write off the end of the packet or otherwise
-                        * corrupt it.
+                       /* Disallow subsequent L2.5+ set actions and mpls_pop
+                        * actions once the last MPLS label in the packet is
+                        * is popped as there is no check here to ensure that
+                        * the new eth type is valid and thus set actions could
+                        * write off the end of the packet or otherwise corrupt
+                        * it.
                         *
                         * Support for these actions is planned using packet
                         * recirculation.
                         */
-                       eth_type = htons(0);
+                       proto = nla_get_be16(a);
+                       mpls_label_count--;
+
+                       if (!eth_p_mpls(proto) || !mpls_label_count)
+                               eth_type = htons(0);
+                       else
+                               eth_type =  proto;
+
                        break;
+               }
 
                case OVS_ACTION_ATTR_SET:
                        err = validate_set(a, key, sfa,
@@ -3106,6 +3140,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
                        err = validate_and_copy_sample(net, a, key, sfa,
                                                       eth_type, vlan_tci,
+                                                      mpls_label_count,
                                                       log, last);
                        if (err)
                                return err;
@@ -3176,6 +3211,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
                        err = validate_and_copy_clone(net, a, key, sfa,
                                                      eth_type, vlan_tci,
+                                                     mpls_label_count,
                                                      log, last);
                        if (err)
                                return err;
@@ -3188,8 +3224,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
                        err = validate_and_copy_check_pkt_len(net, a, key, sfa,
                                                              eth_type,
-                                                             vlan_tci, log,
-                                                             last);
+                                                             vlan_tci,
+                                                             mpls_label_count,
+                                                             log, last);
                        if (err)
                                return err;
                        skip_copy = true;
@@ -3219,14 +3256,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                         struct sw_flow_actions **sfa, bool log)
 {
        int err;
+       u32 mpls_label_count = 0;
 
        *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
        if (IS_ERR(*sfa))
                return PTR_ERR(*sfa);
 
+       if (eth_p_mpls(key->eth.type))
+               mpls_label_count = hweight_long(key->mpls.num_labels_mask);
+
        (*sfa)->orig_len = nla_len(attr);
        err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
-                                    key->eth.vlan.tci, log);
+                                    key->eth.vlan.tci, mpls_label_count, log);
        if (err)
                ovs_nla_free_flow_actions(*sfa);
 
index cf3582c..5904e93 100644 (file)
 #include <net/ndisc.h>
 
 #define TBL_MIN_BUCKETS                1024
+#define MASK_ARRAY_SIZE_MIN    16
 #define REHASH_INTERVAL                (10 * 60 * HZ)
 
+#define MC_HASH_SHIFT          8
+#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
+#define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
 static struct kmem_cache *flow_cache;
 struct kmem_cache *flow_stats_cache __read_mostly;
 
@@ -164,14 +169,133 @@ static struct table_instance *table_instance_alloc(int new_size)
        return ti;
 }
 
+static struct mask_array *tbl_mask_array_alloc(int size)
+{
+       struct mask_array *new;
+
+       size = max(MASK_ARRAY_SIZE_MIN, size);
+       new = kzalloc(sizeof(struct mask_array) +
+                     sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
+       if (!new)
+               return NULL;
+
+       new->count = 0;
+       new->max = size;
+
+       return new;
+}
+
+static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
+{
+       struct mask_array *old;
+       struct mask_array *new;
+
+       new = tbl_mask_array_alloc(size);
+       if (!new)
+               return -ENOMEM;
+
+       old = ovsl_dereference(tbl->mask_array);
+       if (old) {
+               int i;
+
+               for (i = 0; i < old->max; i++) {
+                       if (ovsl_dereference(old->masks[i]))
+                               new->masks[new->count++] = old->masks[i];
+               }
+       }
+
+       rcu_assign_pointer(tbl->mask_array, new);
+       kfree_rcu(old, rcu);
+
+       return 0;
+}
+
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+                                  struct sw_flow_mask *new)
+{
+       struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+       int err, ma_count = READ_ONCE(ma->count);
+
+       if (ma_count >= ma->max) {
+               err = tbl_mask_array_realloc(tbl, ma->max +
+                                             MASK_ARRAY_SIZE_MIN);
+               if (err)
+                       return err;
+
+               ma = ovsl_dereference(tbl->mask_array);
+       }
+
+       BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+       rcu_assign_pointer(ma->masks[ma_count], new);
+       WRITE_ONCE(ma->count, ma_count +1);
+
+       return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+                                   struct sw_flow_mask *mask)
+{
+       struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+       int i, ma_count = READ_ONCE(ma->count);
+
+       /* Remove the deleted mask pointers from the array */
+       for (i = 0; i < ma_count; i++) {
+               if (mask == ovsl_dereference(ma->masks[i]))
+                       goto found;
+       }
+
+       BUG();
+       return;
+
+found:
+       WRITE_ONCE(ma->count, ma_count -1);
+
+       rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+       RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+       kfree_rcu(mask, rcu);
+
+       /* Shrink the mask array if necessary. */
+       if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+           ma_count <= (ma->max / 3))
+               tbl_mask_array_realloc(tbl, ma->max / 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+       if (mask) {
+               /* ovs-lock is required to protect mask-refcount and
+                * mask list.
+                */
+               ASSERT_OVSL();
+               BUG_ON(!mask->ref_count);
+               mask->ref_count--;
+
+               if (!mask->ref_count)
+                       tbl_mask_array_del_mask(tbl, mask);
+       }
+}
+
 int ovs_flow_tbl_init(struct flow_table *table)
 {
        struct table_instance *ti, *ufid_ti;
+       struct mask_array *ma;
 
-       ti = table_instance_alloc(TBL_MIN_BUCKETS);
+       table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
+                                          MC_HASH_ENTRIES,
+                                          __alignof__(struct mask_cache_entry));
+       if (!table->mask_cache)
+               return -ENOMEM;
 
+       ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
+       if (!ma)
+               goto free_mask_cache;
+
+       ti = table_instance_alloc(TBL_MIN_BUCKETS);
        if (!ti)
-               return -ENOMEM;
+               goto free_mask_array;
 
        ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
        if (!ufid_ti)
@@ -179,7 +303,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
 
        rcu_assign_pointer(table->ti, ti);
        rcu_assign_pointer(table->ufid_ti, ufid_ti);
-       INIT_LIST_HEAD(&table->mask_list);
+       rcu_assign_pointer(table->mask_array, ma);
        table->last_rehash = jiffies;
        table->count = 0;
        table->ufid_count = 0;
@@ -187,6 +311,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
 
 free_ti:
        __table_instance_destroy(ti);
+free_mask_array:
+       kfree(ma);
+free_mask_cache:
+       free_percpu(table->mask_cache);
        return -ENOMEM;
 }
 
@@ -197,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
        __table_instance_destroy(ti);
 }
 
-static void table_instance_destroy(struct table_instance *ti,
+static void table_instance_flow_free(struct flow_table *table,
+                                 struct table_instance *ti,
+                                 struct table_instance *ufid_ti,
+                                 struct sw_flow *flow,
+                                 bool count)
+{
+       hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+       if (count)
+               table->count--;
+
+       if (ovs_identifier_is_ufid(&flow->id)) {
+               hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+
+               if (count)
+                       table->ufid_count--;
+       }
+
+       flow_mask_remove(table, flow->mask);
+}
+
+static void table_instance_destroy(struct flow_table *table,
+                                  struct table_instance *ti,
                                   struct table_instance *ufid_ti,
                                   bool deferred)
 {
@@ -214,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti,
                struct sw_flow *flow;
                struct hlist_head *head = &ti->buckets[i];
                struct hlist_node *n;
-               int ver = ti->node_ver;
-               int ufid_ver = ufid_ti->node_ver;
 
-               hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
-                       hlist_del_rcu(&flow->flow_table.node[ver]);
-                       if (ovs_identifier_is_ufid(&flow->id))
-                               hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
+               hlist_for_each_entry_safe(flow, n, head,
+                                         flow_table.node[ti->node_ver]) {
+
+                       table_instance_flow_free(table, ti, ufid_ti,
+                                                flow, false);
                        ovs_flow_free(flow, deferred);
                }
        }
@@ -243,7 +391,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
        struct table_instance *ti = rcu_dereference_raw(table->ti);
        struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
 
-       table_instance_destroy(ti, ufid_ti, false);
+       free_percpu(table->mask_cache);
+       kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
+       table_instance_destroy(table, ti, ufid_ti, false);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -359,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
        flow_table->count = 0;
        flow_table->ufid_count = 0;
 
-       table_instance_destroy(old_ti, old_ufid_ti, true);
+       table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
        return 0;
 
 err_free_ti:
@@ -370,13 +520,10 @@ err_free_ti:
 static u32 flow_hash(const struct sw_flow_key *key,
                     const struct sw_flow_key_range *range)
 {
-       int key_start = range->start;
-       int key_end = range->end;
-       const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
-       int hash_u32s = (key_end - key_start) >> 2;
+       const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
 
        /* Make sure number of hash bytes are multiple of u32. */
-       BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+       int hash_u32s = range_n_bytes(range) >> 2;
 
        return jhash2(hash_key, hash_u32s, 0);
 }
@@ -425,7 +572,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
 
 static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
                                          const struct sw_flow_key *unmasked,
-                                         const struct sw_flow_mask *mask)
+                                         const struct sw_flow_mask *mask,
+                                         u32 *n_mask_hit)
 {
        struct sw_flow *flow;
        struct hlist_head *head;
@@ -435,6 +583,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
        ovs_flow_mask_key(&masked_key, unmasked, false, mask);
        hash = flow_hash(&masked_key, &mask->range);
        head = find_bucket(ti, hash);
+       (*n_mask_hit)++;
+
        hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
                if (flow->mask == mask && flow->flow_table.hash == hash &&
                    flow_cmp_masked_key(flow, &masked_key, &mask->range))
@@ -443,46 +593,147 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
        return NULL;
 }
 
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
-                                   const struct sw_flow_key *key,
-                                   u32 *n_mask_hit)
+/* Flow lookup does full lookup on flow table. It starts with
+ * mask from index passed in *index.
+ */
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+                                  struct table_instance *ti,
+                                  struct mask_array *ma,
+                                  const struct sw_flow_key *key,
+                                  u32 *n_mask_hit,
+                                  u32 *index)
 {
-       struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+       struct sw_flow *flow;
        struct sw_flow_mask *mask;
+       int i;
+
+       if (likely(*index < ma->max)) {
+               mask = rcu_dereference_ovsl(ma->masks[*index]);
+               if (mask) {
+                       flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+                       if (flow)
+                               return flow;
+               }
+       }
+
+       for (i = 0; i < ma->max; i++)  {
+
+               if (i == *index)
+                       continue;
+
+               mask = rcu_dereference_ovsl(ma->masks[i]);
+               if (unlikely(!mask))
+                       break;
+
+               flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+               if (flow) { /* Found */
+                       *index = i;
+                       return flow;
+               }
+       }
+
+       return NULL;
+}
+
+/*
+ * mask_cache maps flow to probable mask. This cache is not tightly
+ * coupled cache, It means updates to  mask list can result in inconsistent
+ * cache entry in mask cache.
+ * This is per cpu cache and is divided in MC_HASH_SEGS segments.
+ * In case of a hash collision the entry is hashed in next segment.
+ * */
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+                                         const struct sw_flow_key *key,
+                                         u32 skb_hash,
+                                         u32 *n_mask_hit)
+{
+       struct mask_array *ma = rcu_dereference(tbl->mask_array);
+       struct table_instance *ti = rcu_dereference(tbl->ti);
+       struct mask_cache_entry *entries, *ce;
        struct sw_flow *flow;
+       u32 hash;
+       int seg;
 
        *n_mask_hit = 0;
-       list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
-               (*n_mask_hit)++;
-               flow = masked_flow_lookup(ti, key, mask);
-               if (flow)  /* Found */
+       if (unlikely(!skb_hash)) {
+               u32 mask_index = 0;
+
+               return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
+       }
+
+       /* Pre and post recirulation flows usually have the same skb_hash
+        * value. To avoid hash collisions, rehash the 'skb_hash' with
+        * 'recirc_id'.  */
+       if (key->recirc_id)
+               skb_hash = jhash_1word(skb_hash, key->recirc_id);
+
+       ce = NULL;
+       hash = skb_hash;
+       entries = this_cpu_ptr(tbl->mask_cache);
+
+       /* Find the cache entry 'ce' to operate on. */
+       for (seg = 0; seg < MC_HASH_SEGS; seg++) {
+               int index = hash & (MC_HASH_ENTRIES - 1);
+               struct mask_cache_entry *e;
+
+               e = &entries[index];
+               if (e->skb_hash == skb_hash) {
+                       flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
+                                          &e->mask_index);
+                       if (!flow)
+                               e->skb_hash = 0;
                        return flow;
+               }
+
+               if (!ce || e->skb_hash < ce->skb_hash)
+                       ce = e;  /* A better replacement cache candidate. */
+
+               hash >>= MC_HASH_SHIFT;
        }
-       return NULL;
+
+       /* Cache miss, do full lookup. */
+       flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
+       if (flow)
+               ce->skb_hash = skb_hash;
+
+       return flow;
 }
 
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
                                    const struct sw_flow_key *key)
 {
+       struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+       struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
        u32 __always_unused n_mask_hit;
+       u32 index = 0;
 
-       return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
+       return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
 }
 
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
                                          const struct sw_flow_match *match)
 {
-       struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
-       struct sw_flow_mask *mask;
-       struct sw_flow *flow;
+       struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+       int i;
 
        /* Always called under ovs-mutex. */
-       list_for_each_entry(mask, &tbl->mask_list, list) {
-               flow = masked_flow_lookup(ti, match->key, mask);
+       for (i = 0; i < ma->max; i++) {
+               struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+               u32 __always_unused n_mask_hit;
+               struct sw_flow_mask *mask;
+               struct sw_flow *flow;
+
+               mask = ovsl_dereference(ma->masks[i]);
+               if (!mask)
+                       continue;
+
+               flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
                if (flow && ovs_identifier_is_key(&flow->id) &&
-                   ovs_flow_cmp_unmasked_key(flow, match))
+                   ovs_flow_cmp_unmasked_key(flow, match)) {
                        return flow;
+               }
        }
+
        return NULL;
 }
 
@@ -528,13 +779,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
 
 int ovs_flow_tbl_num_masks(const struct flow_table *table)
 {
-       struct sw_flow_mask *mask;
-       int num = 0;
-
-       list_for_each_entry(mask, &table->mask_list, list)
-               num++;
-
-       return num;
+       struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+       return READ_ONCE(ma->count);
 }
 
 static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -543,24 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
        return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
 }
 
-/* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
-       if (mask) {
-               /* ovs-lock is required to protect mask-refcount and
-                * mask list.
-                */
-               ASSERT_OVSL();
-               BUG_ON(!mask->ref_count);
-               mask->ref_count--;
-
-               if (!mask->ref_count) {
-                       list_del_rcu(&mask->list);
-                       kfree_rcu(mask, rcu);
-               }
-       }
-}
-
 /* Must be called with OVS mutex held. */
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
@@ -568,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
        struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
 
        BUG_ON(table->count == 0);
-       hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
-       table->count--;
-       if (ovs_identifier_is_ufid(&flow->id)) {
-               hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
-               table->ufid_count--;
-       }
-
-       /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
-        * accessible as long as the RCU read lock is held.
-        */
-       flow_mask_remove(table, flow->mask);
+       table_instance_flow_free(table, ti, ufid_ti, flow, true);
 }
 
 static struct sw_flow_mask *mask_alloc(void)
@@ -606,13 +824,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
 static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
                                           const struct sw_flow_mask *mask)
 {
-       struct list_head *ml;
+       struct mask_array *ma;
+       int i;
+
+       ma = ovsl_dereference(tbl->mask_array);
+       for (i = 0; i < ma->max; i++) {
+               struct sw_flow_mask *t;
+               t = ovsl_dereference(ma->masks[i]);
 
-       list_for_each(ml, &tbl->mask_list) {
-               struct sw_flow_mask *m;
-               m = container_of(ml, struct sw_flow_mask, list);
-               if (mask_equal(mask, m))
-                       return m;
+               if (t && mask_equal(mask, t))
+                       return t;
        }
 
        return NULL;
@@ -623,6 +844,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
                            const struct sw_flow_mask *new)
 {
        struct sw_flow_mask *mask;
+
        mask = flow_mask_find(tbl, new);
        if (!mask) {
                /* Allocate a new mask if none exsits. */
@@ -631,7 +853,12 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
                        return -ENOMEM;
                mask->key = new->key;
                mask->range = new->range;
-               list_add_rcu(&mask->list, &tbl->mask_list);
+
+               /* Add mask to mask-list. */
+               if (tbl_mask_array_add_mask(tbl, mask)) {
+                       kfree(mask);
+                       return -ENOMEM;
+               }
        } else {
                BUG_ON(!mask->ref_count);
                mask->ref_count++;
index bc52045..8a5cea6 100644 (file)
 
 #include "flow.h"
 
+struct mask_cache_entry {
+       u32 skb_hash;
+       u32 mask_index;
+};
+
+struct mask_array {
+       struct rcu_head rcu;
+       int count, max;
+       struct sw_flow_mask __rcu *masks[];
+};
+
 struct table_instance {
        struct hlist_head *buckets;
        unsigned int n_buckets;
@@ -34,7 +45,8 @@ struct table_instance {
 struct flow_table {
        struct table_instance __rcu *ti;
        struct table_instance __rcu *ufid_ti;
-       struct list_head mask_list;
+       struct mask_cache_entry __percpu *mask_cache;
+       struct mask_array __rcu *mask_array;
        unsigned long last_rehash;
        unsigned int count;
        unsigned int ufid_count;
@@ -60,8 +72,9 @@ int  ovs_flow_tbl_num_masks(const struct flow_table *table);
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
                                       u32 *bucket, u32 *idx);
 struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
-                                   const struct sw_flow_key *,
-                                   u32 *n_mask_hit);
+                                         const struct sw_flow_key *,
+                                         u32 skb_hash,
+                                         u32 *n_mask_hit);
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
                                    const struct sw_flow_key *);
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
index e35869e..15ce9b6 100644 (file)
@@ -111,15 +111,11 @@ static __poll_t qrtr_tun_poll(struct file *filp, poll_table *wait)
 static int qrtr_tun_release(struct inode *inode, struct file *filp)
 {
        struct qrtr_tun *tun = filp->private_data;
-       struct sk_buff *skb;
 
        qrtr_endpoint_unregister(&tun->ep);
 
        /* Discard all SKBs */
-       while (!skb_queue_empty(&tun->queue)) {
-               skb = skb_dequeue(&tun->queue);
-               kfree_skb(skb);
-       }
+       skb_queue_purge(&tun->queue);
 
        kfree(tun);
 
index 9de2ae2..3fd5f40 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -30,6 +30,7 @@
  * SOFTWARE.
  *
  */
+#include <linux/dmapool.h>
 #include <linux/kernel.h>
 #include <linux/in.h>
 #include <linux/if.h>
@@ -107,6 +108,7 @@ static void rds_ib_dev_free(struct work_struct *work)
                rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
        if (rds_ibdev->pd)
                ib_dealloc_pd(rds_ibdev->pd);
+       dma_pool_destroy(rds_ibdev->rid_hdrs_pool);
 
        list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
                list_del(&i_ipaddr->list);
@@ -182,6 +184,12 @@ static void rds_ib_add_one(struct ib_device *device)
                rds_ibdev->pd = NULL;
                goto put_dev;
        }
+       rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name,
+                                                  device->dma_device,
+                                                  sizeof(struct rds_header),
+                                                  L1_CACHE_BYTES, 0);
+       if (!rds_ibdev->rid_hdrs_pool)
+               goto put_dev;
 
        rds_ibdev->mr_1m_pool =
                rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
index f2b558e..6e6f247 100644 (file)
@@ -165,8 +165,8 @@ struct rds_ib_connection {
        /* tx */
        struct rds_ib_work_ring i_send_ring;
        struct rm_data_op       *i_data_op;
-       struct rds_header       *i_send_hdrs;
-       dma_addr_t              i_send_hdrs_dma;
+       struct rds_header       **i_send_hdrs;
+       dma_addr_t              *i_send_hdrs_dma;
        struct rds_ib_send_work *i_sends;
        atomic_t                i_signaled_sends;
 
@@ -175,8 +175,8 @@ struct rds_ib_connection {
        struct rds_ib_work_ring i_recv_ring;
        struct rds_ib_incoming  *i_ibinc;
        u32                     i_recv_data_rem;
-       struct rds_header       *i_recv_hdrs;
-       dma_addr_t              i_recv_hdrs_dma;
+       struct rds_header       **i_recv_hdrs;
+       dma_addr_t              *i_recv_hdrs_dma;
        struct rds_ib_recv_work *i_recvs;
        u64                     i_ack_recv;     /* last ACK received */
        struct rds_ib_refill_cache i_cache_incs;
@@ -246,6 +246,7 @@ struct rds_ib_device {
        struct list_head        conn_list;
        struct ib_device        *dev;
        struct ib_pd            *pd;
+       struct dma_pool         *rid_hdrs_pool; /* RDS headers DMA pool */
        bool                    use_fastreg;
 
        unsigned int            max_mrs;
@@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
 void rds_ib_cm_connect_complete(struct rds_connection *conn,
                                struct rdma_cm_event *event);
-
+struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
+                                      struct dma_pool *pool,
+                                      dma_addr_t **dma_addrs, u32 num_hdrs);
+void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
+                      dma_addr_t *dma_addrs, u32 num_hdrs);
 
 #define rds_ib_conn_error(conn, fmt...) \
        __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
index 233f136..6b345c8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -30,6 +30,7 @@
  * SOFTWARE.
  *
  */
+#include <linux/dmapool.h>
 #include <linux/kernel.h>
 #include <linux/in.h>
 #include <linux/slab.h>
@@ -439,6 +440,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
        rds_ibdev->vector_load[index]--;
 }
 
+/* Allocate DMA coherent memory to be used to store struct rds_header for
+ * sending/receiving packets.  The pointers to the DMA memory and the
+ * associated DMA addresses are stored in two arrays.
+ *
+ * @ibdev: the IB device
+ * @pool: the DMA memory pool
+ * @dma_addrs: pointer to the array for storing DMA addresses
+ * @num_hdrs: number of headers to allocate
+ *
+ * It returns the pointer to the array storing the DMA memory pointers.  On
+ * error, NULL pointer is returned.
+ */
+struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
+                                      struct dma_pool *pool,
+                                      dma_addr_t **dma_addrs, u32 num_hdrs)
+{
+       struct rds_header **hdrs;
+       dma_addr_t *hdr_daddrs;
+       u32 i;
+
+       hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
+                            ibdev_to_node(ibdev));
+       if (!hdrs)
+               return NULL;
+
+       hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
+                                  ibdev_to_node(ibdev));
+       if (!hdr_daddrs) {
+               kvfree(hdrs);
+               return NULL;
+       }
+
+       for (i = 0; i < num_hdrs; i++) {
+               hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]);
+               if (!hdrs[i]) {
+                       rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i);
+                       return NULL;
+               }
+       }
+
+       *dma_addrs = hdr_daddrs;
+       return hdrs;
+}
+
+/* Free the DMA memory used to store struct rds_header.
+ *
+ * @pool: the DMA memory pool
+ * @hdrs: pointer to the array storing DMA memory pointers
+ * @dma_addrs: pointer to the array storing DMA addresses
+ * @num_hdars: number of headers to free.
+ */
+void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
+                      dma_addr_t *dma_addrs, u32 num_hdrs)
+{
+       u32 i;
+
+       for (i = 0; i < num_hdrs; i++)
+               dma_pool_free(pool, hdrs[i], dma_addrs[i]);
+       kvfree(hdrs);
+       kvfree(dma_addrs);
+}
+
 /*
  * This needs to be very careful to not leave IS_ERR pointers around for
  * cleanup to trip over.
@@ -451,6 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        struct ib_cq_init_attr cq_attr = {};
        struct rds_ib_device *rds_ibdev;
        int ret, fr_queue_space;
+       struct dma_pool *pool;
 
        /*
         * It's normal to see a null device if an incoming connection races
@@ -541,31 +605,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
                goto recv_cq_out;
        }
 
-       ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
-                                          ic->i_send_ring.w_nr *
-                                               sizeof(struct rds_header),
-                                          &ic->i_send_hdrs_dma, GFP_KERNEL);
+       pool = rds_ibdev->rid_hdrs_pool;
+       ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma,
+                                            ic->i_send_ring.w_nr);
        if (!ic->i_send_hdrs) {
                ret = -ENOMEM;
-               rdsdebug("ib_dma_alloc_coherent send failed\n");
+               rdsdebug("DMA send hdrs alloc failed\n");
                goto qp_out;
        }
 
-       ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
-                                          ic->i_recv_ring.w_nr *
-                                               sizeof(struct rds_header),
-                                          &ic->i_recv_hdrs_dma, GFP_KERNEL);
+       ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma,
+                                            ic->i_recv_ring.w_nr);
        if (!ic->i_recv_hdrs) {
                ret = -ENOMEM;
-               rdsdebug("ib_dma_alloc_coherent recv failed\n");
+               rdsdebug("DMA recv hdrs alloc failed\n");
                goto send_hdrs_dma_out;
        }
 
-       ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
-                                      &ic->i_ack_dma, GFP_KERNEL);
+       ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL,
+                                   &ic->i_ack_dma);
        if (!ic->i_ack) {
                ret = -ENOMEM;
-               rdsdebug("ib_dma_alloc_coherent ack failed\n");
+               rdsdebug("DMA ack header alloc failed\n");
                goto recv_hdrs_dma_out;
        }
 
@@ -596,17 +657,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 
 sends_out:
        vfree(ic->i_sends);
+
 ack_dma_out:
-       ib_dma_free_coherent(dev, sizeof(struct rds_header),
-                            ic->i_ack, ic->i_ack_dma);
+       dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+       ic->i_ack = NULL;
+
 recv_hdrs_dma_out:
-       ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
-                                       sizeof(struct rds_header),
-                                       ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+       rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
+                         ic->i_recv_ring.w_nr);
+       ic->i_recv_hdrs = NULL;
+       ic->i_recv_hdrs_dma = NULL;
+
 send_hdrs_dma_out:
-       ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
-                                       sizeof(struct rds_header),
-                                       ic->i_send_hdrs, ic->i_send_hdrs_dma);
+       rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma,
+                         ic->i_send_ring.w_nr);
+       ic->i_send_hdrs = NULL;
+       ic->i_send_hdrs_dma = NULL;
+
 qp_out:
        rdma_destroy_qp(ic->i_cm_id);
 recv_cq_out:
@@ -984,8 +1051,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
                 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
 
        if (ic->i_cm_id) {
-               struct ib_device *dev = ic->i_cm_id->device;
-
                rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
                err = rdma_disconnect(ic->i_cm_id);
                if (err) {
@@ -1035,24 +1100,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
                        ib_destroy_cq(ic->i_recv_cq);
                }
 
-               /* then free the resources that ib callbacks use */
-               if (ic->i_send_hdrs)
-                       ib_dma_free_coherent(dev,
-                                          ic->i_send_ring.w_nr *
-                                               sizeof(struct rds_header),
-                                          ic->i_send_hdrs,
-                                          ic->i_send_hdrs_dma);
-
-               if (ic->i_recv_hdrs)
-                       ib_dma_free_coherent(dev,
-                                          ic->i_recv_ring.w_nr *
-                                               sizeof(struct rds_header),
-                                          ic->i_recv_hdrs,
-                                          ic->i_recv_hdrs_dma);
-
-               if (ic->i_ack)
-                       ib_dma_free_coherent(dev, sizeof(struct rds_header),
-                                            ic->i_ack, ic->i_ack_dma);
+               if (ic->rds_ibdev) {
+                       struct dma_pool *pool;
+
+                       pool = ic->rds_ibdev->rid_hdrs_pool;
+
+                       /* then free the resources that ib callbacks use */
+                       if (ic->i_send_hdrs) {
+                               rds_dma_hdrs_free(pool, ic->i_send_hdrs,
+                                                 ic->i_send_hdrs_dma,
+                                                 ic->i_send_ring.w_nr);
+                               ic->i_send_hdrs = NULL;
+                               ic->i_send_hdrs_dma = NULL;
+                       }
+
+                       if (ic->i_recv_hdrs) {
+                               rds_dma_hdrs_free(pool, ic->i_recv_hdrs,
+                                                 ic->i_recv_hdrs_dma,
+                                                 ic->i_recv_ring.w_nr);
+                               ic->i_recv_hdrs = NULL;
+                               ic->i_recv_hdrs_dma = NULL;
+                       }
+
+                       if (ic->i_ack) {
+                               dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+                               ic->i_ack = NULL;
+                       }
+               } else {
+                       WARN_ON(ic->i_send_hdrs);
+                       WARN_ON(ic->i_send_hdrs_dma);
+                       WARN_ON(ic->i_recv_hdrs);
+                       WARN_ON(ic->i_recv_hdrs_dma);
+                       WARN_ON(ic->i_ack);
+               }
 
                if (ic->i_sends)
                        rds_ib_send_clear_ring(ic);
@@ -1071,9 +1151,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
                ic->i_pd = NULL;
                ic->i_send_cq = NULL;
                ic->i_recv_cq = NULL;
-               ic->i_send_hdrs = NULL;
-               ic->i_recv_hdrs = NULL;
-               ic->i_ack = NULL;
        }
        BUG_ON(ic->rds_ibdev);
 
index a0f99bb..694d411 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
                recv->r_wr.num_sge = RDS_IB_RECV_SGE;
 
                sge = &recv->r_sge[0];
-               sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
+               sge->addr = ic->i_recv_hdrs_dma[i];
                sge->length = sizeof(struct rds_header);
                sge->lkey = ic->i_pd->local_dma_lkey;
 
@@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
        WARN_ON(ret != 1);
 
        sge = &recv->r_sge[0];
-       sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
+       sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
        sge->length = sizeof(struct rds_header);
 
        sge = &recv->r_sge[1];
@@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
        }
        data_len -= sizeof(struct rds_header);
 
-       ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
+       ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
 
        /* Validate the checksum. */
        if (!rds_message_verify_checksum(ihdr)) {
@@ -993,10 +993,11 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
        } else {
                /* We expect errors as the qp is drained during shutdown */
                if (rds_conn_up(conn) || rds_conn_connecting(conn))
-                       rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
+                       rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
                                          &conn->c_laddr, &conn->c_faddr,
                                          conn->c_tos, wc->status,
-                                         ib_wc_status_msg(wc->status));
+                                         ib_wc_status_msg(wc->status),
+                                         wc->vendor_err);
        }
 
        /* rds_ib_process_recv() doesn't always consume the frag, and
index dfe6237..d1cc1d7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
                send->s_wr.ex.imm_data = 0;
 
                sge = &send->s_sge[0];
-               sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
+               sge->addr = ic->i_send_hdrs_dma[i];
+
                sge->length = sizeof(struct rds_header);
                sge->lkey = ic->i_pd->local_dma_lkey;
 
@@ -300,10 +301,10 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 
        /* We expect errors as the qp is drained during shutdown */
        if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
-               rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
+               rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
                                  &conn->c_laddr, &conn->c_faddr,
                                  conn->c_tos, wc->status,
-                                 ib_wc_status_msg(wc->status));
+                                 ib_wc_status_msg(wc->status), wc->vendor_err);
        }
 }
 
@@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                send->s_queued = jiffies;
                send->s_op = NULL;
 
-               send->s_sge[0].addr = ic->i_send_hdrs_dma
-                       + (pos * sizeof(struct rds_header));
+               send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
+
                send->s_sge[0].length = sizeof(struct rds_header);
 
-               memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
+               memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
+                      sizeof(struct rds_header));
+
 
                /* Set up the data, if present */
                if (i < work_alloc
@@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                         &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
 
                if (ic->i_flowctl && adv_credits) {
-                       struct rds_header *hdr = &ic->i_send_hdrs[pos];
+                       struct rds_header *hdr = ic->i_send_hdrs[pos];
 
                        /* add credit and redo the header checksum */
                        hdr->h_credit = adv_credits;
index 64830d8..452163e 100644 (file)
@@ -209,6 +209,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
  */
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
 {
+       const void *here = __builtin_return_address(0);
        struct rxrpc_peer *peer;
 
        _enter("");
@@ -230,6 +231,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
                        peer->cong_cwnd = 3;
                else
                        peer->cong_cwnd = 4;
+               trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
        }
 
        _leave(" = %p", peer);
index 69d4676..bda1ba2 100644 (file)
@@ -188,6 +188,8 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
                + nla_total_size(0) /* TCA_ACT_STATS nested */
                /* TCA_STATS_BASIC */
                + nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+               /* TCA_STATS_PKT64 */
+               + nla_total_size_64bit(sizeof(u64))
                /* TCA_STATS_QUEUE */
                + nla_total_size_64bit(sizeof(struct gnet_stats_queue))
                + nla_total_size(0) /* TCA_OPTIONS nested */
@@ -399,7 +401,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                   struct tc_action **a, const struct tc_action_ops *ops,
-                  int bind, bool cpustats)
+                  int bind, bool cpustats, u32 flags)
 {
        struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@ -427,6 +429,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
        p->tcfa_tm.install = jiffies;
        p->tcfa_tm.lastuse = jiffies;
        p->tcfa_tm.firstuse = 0;
+       p->tcfa_flags = flags;
        if (est) {
                err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
                                        &p->tcfa_rate_est,
@@ -451,6 +454,17 @@ err1:
 }
 EXPORT_SYMBOL(tcf_idr_create);
 
+int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
+                             struct nlattr *est, struct tc_action **a,
+                             const struct tc_action_ops *ops, int bind,
+                             u32 flags)
+{
+       /* Set cpustats according to actions flags. */
+       return tcf_idr_create(tn, index, est, a, ops, bind,
+                             !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags);
+}
+EXPORT_SYMBOL(tcf_idr_create_from_flags);
+
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 {
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@ -773,6 +787,14 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
        }
        rcu_read_unlock();
 
+       if (a->tcfa_flags) {
+               struct nla_bitfield32 flags = { a->tcfa_flags,
+                                               a->tcfa_flags, };
+
+               if (nla_put(skb, TCA_ACT_FLAGS, sizeof(flags), &flags))
+                       goto nla_put_failure;
+       }
+
        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
        if (nest == NULL)
                goto nla_put_failure;
@@ -831,12 +853,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
        return c;
 }
 
+static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
 static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
        [TCA_ACT_KIND]          = { .type = NLA_STRING },
        [TCA_ACT_INDEX]         = { .type = NLA_U32 },
        [TCA_ACT_COOKIE]        = { .type = NLA_BINARY,
                                    .len = TC_COOKIE_MAX_SIZE },
        [TCA_ACT_OPTIONS]       = { .type = NLA_NESTED },
+       [TCA_ACT_FLAGS]         = { .type = NLA_BITFIELD32,
+                                   .validation_data = &tca_act_flags_allowed },
 };
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -845,6 +870,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    bool rtnl_held,
                                    struct netlink_ext_ack *extack)
 {
+       struct nla_bitfield32 flags = { 0, 0 };
        struct tc_action *a;
        struct tc_action_ops *a_o;
        struct tc_cookie *cookie = NULL;
@@ -876,6 +902,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                goto err_out;
                        }
                }
+               if (tb[TCA_ACT_FLAGS])
+                       flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
        } else {
                if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
                        NL_SET_ERR_MSG(extack, "TC action name too long");
@@ -914,10 +942,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        /* backward compatibility for policer */
        if (name == NULL)
                err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-                               rtnl_held, tp, extack);
+                               rtnl_held, tp, flags.value, extack);
        else
                err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
-                               tp, extack);
+                               tp, flags.value, extack);
        if (err < 0)
                goto err_mod;
 
@@ -989,6 +1017,29 @@ err:
        return err;
 }
 
+void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
+                            bool drop, bool hw)
+{
+       if (a->cpu_bstats) {
+               _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+
+               if (drop)
+                       this_cpu_ptr(a->cpu_qstats)->drops += packets;
+
+               if (hw)
+                       _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+                                          bytes, packets);
+               return;
+       }
+
+       _bstats_update(&a->tcfa_bstats, bytes, packets);
+       if (drop)
+               a->tcfa_qstats.drops += packets;
+       if (hw)
+               _bstats_update(&a->tcfa_bstats_hw, bytes, packets);
+}
+EXPORT_SYMBOL(tcf_action_update_stats);
+
 int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
                          int compat_mode)
 {
index 04b7bd4..46f47e5 100644 (file)
@@ -275,7 +275,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act,
                        int replace, int bind, bool rtnl_held,
-                       struct tcf_proto *tp, struct netlink_ext_ack *extack)
+                       struct tcf_proto *tp, u32 flags,
+                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, bpf_net_id);
        struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -303,7 +304,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
        ret = tcf_idr_check_alloc(tn, &index, act, bind);
        if (!ret) {
                ret = tcf_idr_create(tn, index, est, act,
-                                    &act_bpf_ops, bind, true);
+                                    &act_bpf_ops, bind, true, 0);
                if (ret < 0) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index 2b43cac..43a2430 100644 (file)
@@ -94,7 +94,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                             struct nlattr *est, struct tc_action **a,
                             int ovr, int bind, bool rtnl_held,
-                            struct tcf_proto *tp,
+                            struct tcf_proto *tp, u32 flags,
                             struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, connmark_net_id);
@@ -121,7 +121,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
        ret = tcf_idr_check_alloc(tn, &index, a, bind);
        if (!ret) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_connmark_ops, bind, false);
+                                    &act_connmark_ops, bind, false, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index d3cfad8..16e67e1 100644 (file)
@@ -43,7 +43,7 @@ static struct tc_action_ops act_csum_ops;
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a, int ovr,
                         int bind, bool rtnl_held, struct tcf_proto *tp,
-                        struct netlink_ext_ack *extack)
+                        u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
        struct tcf_csum_params *params_new;
@@ -68,8 +68,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
        index = parm->index;
        err = tcf_idr_check_alloc(tn, &index, a, bind);
        if (!err) {
-               ret = tcf_idr_create(tn, index, est, a,
-                                    &act_csum_ops, bind, true);
+               ret = tcf_idr_create_from_flags(tn, index, est, a,
+                                               &act_csum_ops, bind, flags);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
@@ -580,7 +580,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
        params = rcu_dereference_bh(p->params);
 
        tcf_lastuse_update(&p->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
+       tcf_action_update_bstats(&p->common, skb);
 
        action = READ_ONCE(p->tcf_action);
        if (unlikely(action == TC_ACT_SHOT))
@@ -624,7 +624,7 @@ out:
        return action;
 
 drop:
-       qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
+       tcf_action_inc_drop_qstats(&p->common);
        action = TC_ACT_SHOT;
        goto out;
 }
index fcc4602..68d6af5 100644 (file)
@@ -465,11 +465,11 @@ out_push:
        skb_push_rcsum(skb, nh_ofs);
 
 out:
-       bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+       tcf_action_update_bstats(&c->common, skb);
        return retval;
 
 drop:
-       qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
+       tcf_action_inc_drop_qstats(&c->common);
        return TC_ACT_SHOT;
 }
 
@@ -656,7 +656,7 @@ static int tcf_ct_fill_params(struct net *net,
 static int tcf_ct_init(struct net *net, struct nlattr *nla,
                       struct nlattr *est, struct tc_action **a,
                       int replace, int bind, bool rtnl_held,
-                      struct tcf_proto *tp,
+                      struct tcf_proto *tp, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ct_net_id);
@@ -688,8 +688,8 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
                return err;
 
        if (!err) {
-               err = tcf_idr_create(tn, index, est, a,
-                                    &act_ct_ops, bind, true);
+               err = tcf_idr_create_from_flags(tn, index, est, a,
+                                               &act_ct_ops, bind, flags);
                if (err) {
                        tcf_idr_cleanup(tn, index);
                        return err;
@@ -905,11 +905,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
 {
        struct tcf_ct *c = to_ct(a);
 
-       _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-
-       if (hw)
-               _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
-                                  bytes, packets);
+       tcf_action_update_stats(a, bytes, packets, false, hw);
        c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
 }
 
index 0dbcfd1..b1e6010 100644 (file)
@@ -153,7 +153,7 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
 static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
                           int ovr, int bind, bool rtnl_held,
-                          struct tcf_proto *tp,
+                          struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
@@ -210,7 +210,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
        err = tcf_idr_check_alloc(tn, &index, a, bind);
        if (!err) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_ctinfo_ops, bind, false);
+                                    &act_ctinfo_ops, bind, false, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index 324f1d1..4160657 100644 (file)
@@ -53,7 +53,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
                         int ovr, int bind, bool rtnl_held,
-                        struct tcf_proto *tp, struct netlink_ext_ack *extack)
+                        struct tcf_proto *tp, u32 flags,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gact_net_id);
        struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -98,8 +99,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 
        err = tcf_idr_check_alloc(tn, &index, a, bind);
        if (!err) {
-               ret = tcf_idr_create(tn, index, est, a,
-                                    &act_gact_ops, bind, true);
+               ret = tcf_idr_create_from_flags(tn, index, est, a,
+                                               &act_gact_ops, bind, flags);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
@@ -161,9 +162,9 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
                action = gact_rand[ptype](gact);
        }
 #endif
-       bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
+       tcf_action_update_bstats(&gact->common, skb);
        if (action == TC_ACT_SHOT)
-               qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+               tcf_action_inc_drop_qstats(&gact->common);
 
        tcf_lastuse_update(&gact->tcf_tm);
 
@@ -177,15 +178,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
        int action = READ_ONCE(gact->tcf_action);
        struct tcf_t *tm = &gact->tcf_tm;
 
-       _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes,
-                          packets);
-       if (action == TC_ACT_SHOT)
-               this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
-
-       if (hw)
-               _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
-                                  bytes, packets);
-
+       tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT, hw);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
index 3a31e24..d562c88 100644 (file)
@@ -465,7 +465,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a,
                        int ovr, int bind, bool rtnl_held,
-                       struct tcf_proto *tp, struct netlink_ext_ack *extack)
+                       struct tcf_proto *tp, u32 flags,
+                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
        struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -522,7 +523,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a, &act_ife_ops,
-                                    bind, true);
+                                    bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        kfree(p);
index 214a03d..400a2cf 100644 (file)
@@ -95,7 +95,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
                          const struct tc_action_ops *ops, int ovr, int bind,
-                         struct tcf_proto *tp)
+                         struct tcf_proto *tp, u32 flags)
 {
        struct tc_action_net *tn = net_generic(net, id);
        struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -144,7 +144,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a, ops, bind,
-                                    false);
+                                    false, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
@@ -205,19 +205,19 @@ err1:
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a, int ovr,
                        int bind, bool rtnl_held, struct tcf_proto *tp,
-                       struct netlink_ext_ack *extack)
+                       u32 flags, struct netlink_ext_ack *extack)
 {
        return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
-                             bind, tp);
+                             bind, tp, flags);
 }
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
                       struct nlattr *est, struct tc_action **a, int ovr,
                       int bind, bool unlocked, struct tcf_proto *tp,
-                      struct netlink_ext_ack *extack)
+                      u32 flags, struct netlink_ext_ack *extack)
 {
        return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
-                             bind, tp);
+                             bind, tp, flags);
 }
 
 static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
index 08923b2..b6e1b5b 100644 (file)
@@ -93,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
                           int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp,
-                          struct netlink_ext_ack *extack)
+                          u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
        struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -148,8 +148,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                        NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
                        return -EINVAL;
                }
-               ret = tcf_idr_create(tn, index, est, a,
-                                    &act_mirred_ops, bind, true);
+               ret = tcf_idr_create_from_flags(tn, index, est, a,
+                                               &act_mirred_ops, bind, flags);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
@@ -231,7 +231,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
        }
 
        tcf_lastuse_update(&m->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+       tcf_action_update_bstats(&m->common, skb);
 
        m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
        m_eaction = READ_ONCE(m->tcfm_eaction);
@@ -289,8 +289,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
                /* let's the caller reinsert the packet, if possible */
                if (use_reinsert) {
                        res->ingress = want_ingress;
-                       res->qstats = this_cpu_ptr(m->common.cpu_qstats);
-                       skb_tc_reinsert(skb, res);
+                       if (skb_tc_reinsert(skb, res))
+                               tcf_action_inc_overlimit_qstats(&m->common);
                        __this_cpu_dec(mirred_rec_level);
                        return TC_ACT_CONSUMED;
                }
@@ -303,7 +303,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 
        if (err) {
 out:
-               qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
+               tcf_action_inc_overlimit_qstats(&m->common);
                if (tcf_mirred_is_act_redirect(m_eaction))
                        retval = TC_ACT_SHOT;
        }
@@ -318,10 +318,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
        struct tcf_mirred *m = to_mirred(a);
        struct tcf_t *tm = &m->tcf_tm;
 
-       _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-       if (hw)
-               _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
-                                  bytes, packets);
+       tcf_action_update_stats(a, bytes, packets, false, hw);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
index 4cf6c55..4d8c822 100644 (file)
@@ -131,7 +131,8 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
 static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
                         int ovr, int bind, bool rtnl_held,
-                        struct tcf_proto *tp, struct netlink_ext_ack *extack)
+                        struct tcf_proto *tp, u32 flags,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mpls_net_id);
        struct nlattr *tb[TCA_MPLS_MAX + 1];
@@ -224,7 +225,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_mpls_ops, bind, true);
+                                    &act_mpls_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index ea4c535..855a6fa 100644 (file)
@@ -36,7 +36,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
                        struct tc_action **a, int ovr, int bind,
                        bool rtnl_held, struct tcf_proto *tp,
-                       struct netlink_ext_ack *extack)
+                       u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, nat_net_id);
        struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -61,7 +61,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
        err = tcf_idr_check_alloc(tn, &index, a, bind);
        if (!err) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_nat_ops, bind, false);
+                                    &act_nat_ops, bind, false, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
@@ -206,9 +206,7 @@ static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
 
                icmph = (void *)(skb_network_header(skb) + ihl);
 
-               if ((icmph->type != ICMP_DEST_UNREACH) &&
-                   (icmph->type != ICMP_TIME_EXCEEDED) &&
-                   (icmph->type != ICMP_PARAMETERPROB))
+               if (!icmp_is_err(icmph->type))
                        break;
 
                if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
index cdfaa79..d5eff6a 100644 (file)
@@ -137,7 +137,8 @@ nla_failure:
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
                          int ovr, int bind, bool rtnl_held,
-                         struct tcf_proto *tp, struct netlink_ext_ack *extack)
+                         struct tcf_proto *tp, u32 flags,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, pedit_net_id);
        struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -190,7 +191,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                        goto out_free;
                }
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_pedit_ops, bind, false);
+                                    &act_pedit_ops, bind, false, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        goto out_free;
index 89c04c5..d962715 100644 (file)
@@ -47,7 +47,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 static int tcf_police_init(struct net *net, struct nlattr *nla,
                               struct nlattr *est, struct tc_action **a,
                               int ovr, int bind, bool rtnl_held,
-                              struct tcf_proto *tp,
+                              struct tcf_proto *tp, u32 flags,
                               struct netlink_ext_ack *extack)
 {
        int ret = 0, tcfp_result = TC_ACT_OK, err, size;
@@ -87,7 +87,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, NULL, a,
-                                    &act_police_ops, bind, true);
+                                    &act_police_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
@@ -294,10 +294,7 @@ static void tcf_police_stats_update(struct tc_action *a,
        struct tcf_police *police = to_police(a);
        struct tcf_t *tm = &police->tcf_tm;
 
-       _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-       if (hw)
-               _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
-                                  bytes, packets);
+       tcf_action_update_stats(a, bytes, packets, false, hw);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
@@ -345,10 +342,7 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
            nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
                goto nla_put_failure;
 
-       t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
-       t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse);
-       t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse);
-       t.expires = jiffies_to_clock_t(police->tcf_tm.expires);
+       tcf_tm_dump(&t, &police->tcf_tm);
        if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
                goto nla_put_failure;
        spin_unlock_bh(&police->tcf_lock);
index 514456a..29b23bf 100644 (file)
@@ -36,7 +36,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a, int ovr,
                           int bind, bool rtnl_held, struct tcf_proto *tp,
-                          struct netlink_ext_ack *extack)
+                          u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
        struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -69,7 +69,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_sample_ops, bind, true);
+                                    &act_sample_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index 6120e56..9813ca4 100644 (file)
@@ -35,7 +35,7 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
         * Example if this was the 3rd packet and the string was "hello"
         * then it would look like "hello_3" (without quotes)
         */
-       pr_info("simple: %s_%d\n",
+       pr_info("simple: %s_%llu\n",
               (char *)d->tcfd_defdata, d->tcf_bstats.packets);
        spin_unlock(&d->tcf_lock);
        return d->tcf_action;
@@ -86,7 +86,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
                         int ovr, int bind, bool rtnl_held,
-                        struct tcf_proto *tp, struct netlink_ext_ack *extack)
+                        struct tcf_proto *tp, u32 flags,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, simp_net_id);
        struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -127,7 +128,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_simp_ops, bind, false);
+                                    &act_simp_ops, bind, false, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index 6a8d333..5f7ca7f 100644 (file)
@@ -86,7 +86,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
                            int ovr, int bind, bool rtnl_held,
-                           struct tcf_proto *tp,
+                           struct tcf_proto *tp, u32 act_flags,
                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
@@ -165,7 +165,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_skbedit_ops, bind, true);
+                                    &act_skbedit_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index 888437f..39e6d94 100644 (file)
@@ -79,7 +79,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
                           int ovr, int bind, bool rtnl_held,
-                          struct tcf_proto *tp,
+                          struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbmod_net_id);
@@ -143,7 +143,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
-                                    &act_skbmod_ops, bind, true);
+                                    &act_skbmod_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
index 2f83a79..cb34e5d 100644 (file)
@@ -31,7 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
        params = rcu_dereference_bh(t->params);
 
        tcf_lastuse_update(&t->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
+       tcf_action_update_bstats(&t->common, skb);
        action = READ_ONCE(t->tcf_action);
 
        switch (params->tcft_action) {
@@ -208,7 +208,7 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
                           int ovr, int bind, bool rtnl_held,
-                          struct tcf_proto *tp,
+                          struct tcf_proto *tp, u32 act_flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
@@ -347,8 +347,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        }
 
        if (!exists) {
-               ret = tcf_idr_create(tn, index, est, a,
-                                    &act_tunnel_key_ops, bind, true);
+               ret = tcf_idr_create_from_flags(tn, index, est, a,
+                                               &act_tunnel_key_ops, bind,
+                                               act_flags);
                if (ret) {
                        NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
                        goto release_tun_meta;
index 08aaf71..b6939ab 100644 (file)
@@ -29,7 +29,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
        u16 tci;
 
        tcf_lastuse_update(&v->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
+       tcf_action_update_bstats(&v->common, skb);
 
        /* Ensure 'data' points at mac_header prior calling vlan manipulating
         * functions.
@@ -88,7 +88,7 @@ out:
        return action;
 
 drop:
-       qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+       tcf_action_inc_drop_qstats(&v->common);
        return TC_ACT_SHOT;
 }
 
@@ -102,7 +102,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
                         int ovr, int bind, bool rtnl_held,
-                        struct tcf_proto *tp, struct netlink_ext_ack *extack)
+                        struct tcf_proto *tp, u32 flags,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
        struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -188,8 +189,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        action = parm->v_action;
 
        if (!exists) {
-               ret = tcf_idr_create(tn, index, est, a,
-                                    &act_vlan_ops, bind, true);
+               ret = tcf_idr_create_from_flags(tn, index, est, a,
+                                               &act_vlan_ops, bind, flags);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
@@ -307,10 +308,7 @@ static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets,
        struct tcf_vlan *v = to_vlan(a);
        struct tcf_t *tm = &v->tcf_tm;
 
-       _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-       if (hw)
-               _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
-                                  bytes, packets);
+       tcf_action_update_stats(a, bytes, packets, false, hw);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
index 98dd87c..b1c7e72 100644 (file)
@@ -530,8 +530,7 @@ begin:
                        fq_flow_set_throttled(q, f);
                        goto begin;
                }
-               if (time_next_packet &&
-                   (s64)(now - time_next_packet - q->ce_threshold) > 0) {
+               if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
                        INET_ECN_set_ce(skb);
                        q->stat_ce_mark++;
                }
index c261c0a..968519f 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
-#include <linux/jhash.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
index 8769b4b..8561e82 100644 (file)
@@ -382,13 +382,8 @@ void __qdisc_run(struct Qdisc *q)
        int packets;
 
        while (qdisc_restart(q, &packets)) {
-               /*
-                * Ordered by possible occurrence: Postpone processing if
-                * 1. we've exceeded packet quota
-                * 2. another process needs the CPU;
-                */
                quota -= packets;
-               if (quota <= 0 || need_resched()) {
+               if (quota <= 0) {
                        __netif_schedule(q);
                        break;
                }
@@ -1214,8 +1209,13 @@ void dev_deactivate_many(struct list_head *head)
 
        /* Wait for outstanding qdisc_run calls. */
        list_for_each_entry(dev, head, close_list) {
-               while (some_qdisc_is_busy(dev))
-                       yield();
+               while (some_qdisc_is_busy(dev)) {
+                       /* wait_event() would avoid this sleep-loop but would
+                        * require expensive checks in the fast paths of packet
+                        * processing which isn't worth it.
+                        */
+                       schedule_timeout_uninterruptible(1);
+               }
                /* The new qdisc is assigned at this point so we can safely
                 * unwind stale skb lists and qdisc statistics
                 */
index d2ffc9a..1ba893b 100644 (file)
@@ -429,6 +429,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
                changeover = 1 ;
 
        asoc->peer.primary_path = transport;
+       sctp_ulpevent_nofity_peer_addr_change(transport,
+                                             SCTP_ADDR_MADE_PRIM, 0);
 
        /* Set a default msg_name for events. */
        memcpy(&asoc->peer.primary_addr, &transport->ipaddr,
@@ -569,6 +571,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
 
        asoc->peer.transport_count--;
 
+       sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0);
        sctp_transport_free(peer);
 }
 
@@ -707,6 +710,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
        list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
        asoc->peer.transport_count++;
 
+       sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0);
+
        /* If we do not yet have a primary path, set one.  */
        if (!asoc->peer.primary_path) {
                sctp_assoc_set_primary(asoc, peer);
@@ -781,10 +786,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
                                  enum sctp_transport_cmd command,
                                  sctp_sn_error_t error)
 {
-       struct sctp_ulpevent *event;
-       struct sockaddr_storage addr;
-       int spc_state = 0;
        bool ulp_notify = true;
+       int spc_state = 0;
 
        /* Record the transition on the transport.  */
        switch (command) {
@@ -836,16 +839,9 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
        /* Generate and send a SCTP_PEER_ADDR_CHANGE notification
         * to the user.
         */
-       if (ulp_notify) {
-               memset(&addr, 0, sizeof(struct sockaddr_storage));
-               memcpy(&addr, &transport->ipaddr,
-                      transport->af_specific->sockaddr_len);
-
-               event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
-                                       0, spc_state, error, GFP_ATOMIC);
-               if (event)
-                       asoc->stream.si->enqueue_event(&asoc->ulpq, event);
-       }
+       if (ulp_notify)
+               sctp_ulpevent_nofity_peer_addr_change(transport,
+                                                     spc_state, error);
 
        /* Select new active and retran paths. */
        sctp_select_active_and_retran_path(asoc);
index cc0405c..cc3ce5d 100644 (file)
@@ -75,41 +75,39 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
        struct list_head *pos, *temp;
        struct sctp_chunk *chunk;
        struct sctp_ulpevent *ev;
-       int error = 0, notify;
-
-       /* If we failed, we may need to notify. */
-       notify = msg->send_failed ? -1 : 0;
+       int error, sent;
 
        /* Release all references. */
        list_for_each_safe(pos, temp, &msg->chunks) {
                list_del_init(pos);
                chunk = list_entry(pos, struct sctp_chunk, frag_list);
-               /* Check whether we _really_ need to notify. */
-               if (notify < 0) {
-                       asoc = chunk->asoc;
-                       if (msg->send_error)
-                               error = msg->send_error;
-                       else
-                               error = asoc->outqueue.error;
-
-                       notify = sctp_ulpevent_type_enabled(asoc->subscribe,
-                                                           SCTP_SEND_FAILED);
+
+               if (!msg->send_failed) {
+                       sctp_chunk_put(chunk);
+                       continue;
                }
 
-               /* Generate a SEND FAILED event only if enabled. */
-               if (notify > 0) {
-                       int sent;
-                       if (chunk->has_tsn)
-                               sent = SCTP_DATA_SENT;
-                       else
-                               sent = SCTP_DATA_UNSENT;
+               asoc = chunk->asoc;
+               error = msg->send_error ?: asoc->outqueue.error;
+               sent = chunk->has_tsn ? SCTP_DATA_SENT : SCTP_DATA_UNSENT;
 
+               if (sctp_ulpevent_type_enabled(asoc->subscribe,
+                                              SCTP_SEND_FAILED)) {
                        ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent,
                                                            error, GFP_ATOMIC);
                        if (ev)
                                asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
                }
 
+               if (sctp_ulpevent_type_enabled(asoc->subscribe,
+                                              SCTP_SEND_FAILED_EVENT)) {
+                       ev = sctp_ulpevent_make_send_failed_event(asoc, chunk,
+                                                                 sent, error,
+                                                                 GFP_ATOMIC);
+                       if (ev)
+                               asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+               }
+
                sctp_chunk_put(chunk);
        }
 
index e0cc1ed..c82dbdc 100644 (file)
@@ -238,7 +238,7 @@ fail:
  * When a destination address on a multi-homed peer encounters a change
  * an interface details event is sent.
  */
-struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
+static struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
        const struct sctp_association *asoc,
        const struct sockaddr_storage *aaddr,
        int flags, int state, int error, gfp_t gfp)
@@ -336,6 +336,22 @@ fail:
        return NULL;
 }
 
+void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+                                          int state, int error)
+{
+       struct sctp_association *asoc = transport->asoc;
+       struct sockaddr_storage addr;
+       struct sctp_ulpevent *event;
+
+       memset(&addr, 0, sizeof(struct sockaddr_storage));
+       memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len);
+
+       event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, state,
+                                                   error, GFP_ATOMIC);
+       if (event)
+               asoc->stream.si->enqueue_event(&asoc->ulpq, event);
+}
+
 /* Create and initialize an SCTP_REMOTE_ERROR notification.
  *
  * Note: This assumes that the chunk->skb->data already points to the
@@ -511,6 +527,45 @@ fail:
        return NULL;
 }
 
+struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event(
+       const struct sctp_association *asoc, struct sctp_chunk *chunk,
+       __u16 flags, __u32 error, gfp_t gfp)
+{
+       struct sctp_send_failed_event *ssf;
+       struct sctp_ulpevent *event;
+       struct sk_buff *skb;
+       int len;
+
+       skb = skb_copy_expand(chunk->skb, sizeof(*ssf), 0, gfp);
+       if (!skb)
+               return NULL;
+
+       len = ntohs(chunk->chunk_hdr->length);
+       len -= sctp_datachk_len(&asoc->stream);
+
+       skb_pull(skb, sctp_datachk_len(&asoc->stream));
+       event = sctp_skb2event(skb);
+       sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
+
+       ssf = skb_push(skb, sizeof(*ssf));
+       ssf->ssf_type = SCTP_SEND_FAILED_EVENT;
+       ssf->ssf_flags = flags;
+       ssf->ssf_length = sizeof(*ssf) + len;
+       skb_trim(skb, ssf->ssf_length);
+       ssf->ssf_error = error;
+
+       ssf->ssfe_info.snd_sid = chunk->sinfo.sinfo_stream;
+       ssf->ssfe_info.snd_ppid = chunk->sinfo.sinfo_ppid;
+       ssf->ssfe_info.snd_context = chunk->sinfo.sinfo_context;
+       ssf->ssfe_info.snd_assoc_id = chunk->sinfo.sinfo_assoc_id;
+       ssf->ssfe_info.snd_flags = chunk->chunk_hdr->flags;
+
+       sctp_ulpevent_set_owner(event, asoc);
+       ssf->ssf_assoc_id = sctp_assoc2id(asoc);
+
+       return event;
+}
+
 /* Create and initialize a SCTP_SHUTDOWN_EVENT notification.
  *
  * Socket Extensions for SCTP - draft-01
index 47946f4..b7d9fd2 100644 (file)
@@ -174,6 +174,7 @@ static int smc_release(struct socket *sock)
        if (!sk)
                goto out;
 
+       sock_hold(sk); /* sock_put below */
        smc = smc_sk(sk);
 
        /* cleanup for a dangling non-blocking connect */
@@ -196,6 +197,7 @@ static int smc_release(struct socket *sock)
        sock->sk = NULL;
        release_sock(sk);
 
+       sock_put(sk); /* sock_hold above */
        sock_put(sk); /* final sock_put */
 out:
        return rc;
@@ -977,12 +979,14 @@ void smc_close_non_accepted(struct sock *sk)
 {
        struct smc_sock *smc = smc_sk(sk);
 
+       sock_hold(sk); /* sock_put below */
        lock_sock(sk);
        if (!sk->sk_lingertime)
                /* wait for peer closing */
                sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
        __smc_release(smc);
        release_sock(sk);
+       sock_put(sk); /* sock_hold above */
        sock_put(sk); /* final sock_put */
 }
 
index 878313f..be11ba4 100644 (file)
@@ -188,6 +188,7 @@ struct smc_connection {
                                                 * 0 for SMC-R, 32 for SMC-D
                                                 */
        u64                     peer_token;     /* SMC-D token of peer */
+       u8                      killed : 1;     /* abnormal termination */
 };
 
 struct smc_sock {                              /* smc sock container */
index d0b0f4c..7dc07ec 100644 (file)
@@ -63,7 +63,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
        rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
                                     wr_rdma_buf,
                                     (struct smc_wr_tx_pend_priv **)pend);
-       if (!conn->alert_token_local)
+       if (conn->killed)
                /* abnormal termination */
                rc = -EPIPE;
        return rc;
@@ -328,7 +328,7 @@ static void smcd_cdc_rx_tsklet(unsigned long data)
        struct smcd_cdc_msg cdc;
        struct smc_sock *smc;
 
-       if (!conn)
+       if (!conn || conn->killed)
                return;
 
        data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
index fc06720..d34e5ad 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/sched/signal.h>
 
 #include <net/sock.h>
+#include <net/tcp.h>
 
 #include "smc.h"
 #include "smc_tx.h"
@@ -65,8 +66,9 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
 
                rc = sk_wait_event(sk, &timeout,
                                   !smc_tx_prepared_sends(&smc->conn) ||
-                                  (sk->sk_err == ECONNABORTED) ||
-                                  (sk->sk_err == ECONNRESET),
+                                  sk->sk_err == ECONNABORTED ||
+                                  sk->sk_err == ECONNRESET ||
+                                  smc->conn.killed,
                                   &wait);
                if (rc)
                        break;
@@ -95,11 +97,13 @@ static int smc_close_final(struct smc_connection *conn)
                conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
        else
                conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
+       if (conn->killed)
+               return -EPIPE;
 
        return smc_cdc_get_slot_and_msg_send(conn);
 }
 
-static int smc_close_abort(struct smc_connection *conn)
+int smc_close_abort(struct smc_connection *conn)
 {
        conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
 
@@ -109,19 +113,15 @@ static int smc_close_abort(struct smc_connection *conn)
 /* terminate smc socket abnormally - active abort
  * link group is terminated, i.e. RDMA communication no longer possible
  */
-static void smc_close_active_abort(struct smc_sock *smc)
+void smc_close_active_abort(struct smc_sock *smc)
 {
        struct sock *sk = &smc->sk;
-
-       struct smc_cdc_conn_state_flags *txflags =
-               &smc->conn.local_tx_ctrl.conn_state_flags;
+       bool release_clcsock = false;
 
        if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
                sk->sk_err = ECONNABORTED;
-               if (smc->clcsock && smc->clcsock->sk) {
-                       smc->clcsock->sk->sk_err = ECONNABORTED;
-                       smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
-               }
+               if (smc->clcsock && smc->clcsock->sk)
+                       tcp_abort(smc->clcsock->sk, ECONNABORTED);
        }
        switch (sk->sk_state) {
        case SMC_ACTIVE:
@@ -129,35 +129,29 @@ static void smc_close_active_abort(struct smc_sock *smc)
                release_sock(sk);
                cancel_delayed_work_sync(&smc->conn.tx_work);
                lock_sock(sk);
+               sk->sk_state = SMC_CLOSED;
                sock_put(sk); /* passive closing */
                break;
        case SMC_APPCLOSEWAIT1:
        case SMC_APPCLOSEWAIT2:
-               if (!smc_cdc_rxed_any_close(&smc->conn))
-                       sk->sk_state = SMC_PEERABORTWAIT;
-               else
-                       sk->sk_state = SMC_CLOSED;
                release_sock(sk);
                cancel_delayed_work_sync(&smc->conn.tx_work);
                lock_sock(sk);
+               sk->sk_state = SMC_CLOSED;
+               sock_put(sk); /* postponed passive closing */
                break;
        case SMC_PEERCLOSEWAIT1:
        case SMC_PEERCLOSEWAIT2:
-               if (!txflags->peer_conn_closed) {
-                       /* just SHUTDOWN_SEND done */
-                       sk->sk_state = SMC_PEERABORTWAIT;
-               } else {
-                       sk->sk_state = SMC_CLOSED;
-               }
+       case SMC_PEERFINCLOSEWAIT:
+               sk->sk_state = SMC_CLOSED;
+               smc_conn_free(&smc->conn);
+               release_clcsock = true;
                sock_put(sk); /* passive closing */
                break;
        case SMC_PROCESSABORT:
        case SMC_APPFINCLOSEWAIT:
                sk->sk_state = SMC_CLOSED;
                break;
-       case SMC_PEERFINCLOSEWAIT:
-               sock_put(sk); /* passive closing */
-               break;
        case SMC_INIT:
        case SMC_PEERABORTWAIT:
        case SMC_CLOSED:
@@ -166,6 +160,12 @@ static void smc_close_active_abort(struct smc_sock *smc)
 
        sock_set_flag(sk, SOCK_DEAD);
        sk->sk_state_change(sk);
+
+       if (release_clcsock) {
+               release_sock(sk);
+               smc_clcsock_release(smc);
+               lock_sock(sk);
+       }
 }
 
 static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -215,8 +215,6 @@ again:
                if (sk->sk_state == SMC_ACTIVE) {
                        /* send close request */
                        rc = smc_close_final(conn);
-                       if (rc)
-                               break;
                        sk->sk_state = SMC_PEERCLOSEWAIT1;
                } else {
                        /* peer event has changed the state */
@@ -229,8 +227,6 @@ again:
                    !smc_close_sent_any_close(conn)) {
                        /* just shutdown wr done, send close request */
                        rc = smc_close_final(conn);
-                       if (rc)
-                               break;
                }
                sk->sk_state = SMC_CLOSED;
                break;
@@ -246,8 +242,6 @@ again:
                        goto again;
                /* confirm close from peer */
                rc = smc_close_final(conn);
-               if (rc)
-                       break;
                if (smc_cdc_rxed_any_close(conn)) {
                        /* peer has closed the socket already */
                        sk->sk_state = SMC_CLOSED;
@@ -263,8 +257,6 @@ again:
                    !smc_close_sent_any_close(conn)) {
                        /* just shutdown wr done, send close request */
                        rc = smc_close_final(conn);
-                       if (rc)
-                               break;
                }
                /* peer sending PeerConnectionClosed will cause transition */
                break;
@@ -272,10 +264,12 @@ again:
                /* peer sending PeerConnectionClosed will cause transition */
                break;
        case SMC_PROCESSABORT:
-               smc_close_abort(conn);
+               rc = smc_close_abort(conn);
                sk->sk_state = SMC_CLOSED;
                break;
        case SMC_PEERABORTWAIT:
+               sk->sk_state = SMC_CLOSED;
+               break;
        case SMC_CLOSED:
                /* nothing to do, add tracing in future patch */
                break;
@@ -344,12 +338,6 @@ static void smc_close_passive_work(struct work_struct *work)
        lock_sock(sk);
        old_state = sk->sk_state;
 
-       if (!conn->alert_token_local) {
-               /* abnormal termination */
-               smc_close_active_abort(smc);
-               goto wakeup;
-       }
-
        rxflags = &conn->local_rx_ctrl.conn_state_flags;
        if (rxflags->peer_conn_abort) {
                /* peer has not received all data */
@@ -451,8 +439,6 @@ again:
                        goto again;
                /* send close wr request */
                rc = smc_close_wr(conn);
-               if (rc)
-                       break;
                sk->sk_state = SMC_PEERCLOSEWAIT1;
                break;
        case SMC_APPCLOSEWAIT1:
@@ -466,8 +452,6 @@ again:
                        goto again;
                /* confirm close from peer */
                rc = smc_close_wr(conn);
-               if (rc)
-                       break;
                sk->sk_state = SMC_APPCLOSEWAIT2;
                break;
        case SMC_APPCLOSEWAIT2:
index e0e3b5d..634fea2 100644 (file)
@@ -24,5 +24,7 @@ int smc_close_active(struct smc_sock *smc);
 int smc_close_shutdown_write(struct smc_sock *smc);
 void smc_close_init(struct smc_sock *smc);
 void smc_clcsock_release(struct smc_sock *smc);
+int smc_close_abort(struct smc_connection *conn);
+void smc_close_active_abort(struct smc_sock *smc);
 
 #endif /* SMC_CLOSE_H */
index 2ba97ff..0d92456 100644 (file)
@@ -42,20 +42,40 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
                         struct smc_buf_desc *buf_desc);
 
+/* return head of link group list and its lock for a given link group */
+static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
+                                                 spinlock_t **lgr_lock)
+{
+       if (lgr->is_smcd) {
+               *lgr_lock = &lgr->smcd->lgr_lock;
+               return &lgr->smcd->lgr_list;
+       }
+
+       *lgr_lock = &smc_lgr_list.lock;
+       return &smc_lgr_list.list;
+}
+
 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
 {
        /* client link group creation always follows the server link group
         * creation. For client use a somewhat higher removal delay time,
         * otherwise there is a risk of out-of-sync link groups.
         */
-       mod_delayed_work(system_wq, &lgr->free_work,
-                        (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
-                        SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
+       if (!lgr->freeing && !lgr->freefast) {
+               mod_delayed_work(system_wq, &lgr->free_work,
+                                (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
+                                               SMC_LGR_FREE_DELAY_CLNT :
+                                               SMC_LGR_FREE_DELAY_SERV);
+       }
 }
 
 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
 {
-       mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
+       if (!lgr->freeing && !lgr->freefast) {
+               lgr->freefast = 1;
+               mod_delayed_work(system_wq, &lgr->free_work,
+                                SMC_LGR_FREE_DELAY_FAST);
+       }
 }
 
 /* Register connection's alert token in our lookup structure.
@@ -134,6 +154,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
                __smc_lgr_unregister_conn(conn);
        }
        write_unlock_bh(&lgr->conns_lock);
+       conn->lgr = NULL;
 }
 
 /* Send delete link, either as client to request the initiation
@@ -157,48 +178,62 @@ static void smc_lgr_free_work(struct work_struct *work)
        struct smc_link_group *lgr = container_of(to_delayed_work(work),
                                                  struct smc_link_group,
                                                  free_work);
+       spinlock_t *lgr_lock;
+       struct smc_link *lnk;
        bool conns;
 
-       spin_lock_bh(&smc_lgr_list.lock);
+       smc_lgr_list_head(lgr, &lgr_lock);
+       spin_lock_bh(lgr_lock);
+       if (lgr->freeing) {
+               spin_unlock_bh(lgr_lock);
+               return;
+       }
        read_lock_bh(&lgr->conns_lock);
        conns = RB_EMPTY_ROOT(&lgr->conns_all);
        read_unlock_bh(&lgr->conns_lock);
        if (!conns) { /* number of lgr connections is no longer zero */
-               spin_unlock_bh(&smc_lgr_list.lock);
+               spin_unlock_bh(lgr_lock);
                return;
        }
-       if (!list_empty(&lgr->list))
-               list_del_init(&lgr->list); /* remove from smc_lgr_list */
-       spin_unlock_bh(&smc_lgr_list.lock);
+       list_del_init(&lgr->list); /* remove from smc_lgr_list */
 
+       lnk = &lgr->lnk[SMC_SINGLE_LINK];
        if (!lgr->is_smcd && !lgr->terminating) {
-               struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
-
                /* try to send del link msg, on error free lgr immediately */
                if (lnk->state == SMC_LNK_ACTIVE &&
                    !smc_link_send_delete(lnk)) {
                        /* reschedule in case we never receive a response */
                        smc_lgr_schedule_free_work(lgr);
+                       spin_unlock_bh(lgr_lock);
                        return;
                }
        }
+       lgr->freeing = 1; /* this instance does the freeing, no new schedule */
+       spin_unlock_bh(lgr_lock);
+       cancel_delayed_work(&lgr->free_work);
 
-       if (!delayed_work_pending(&lgr->free_work)) {
-               struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+       if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
+               smc_llc_link_inactive(lnk);
+       if (lgr->is_smcd)
+               smc_ism_signal_shutdown(lgr);
+       smc_lgr_free(lgr);
+}
 
-               if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
-                       smc_llc_link_inactive(lnk);
-               if (lgr->is_smcd)
-                       smc_ism_signal_shutdown(lgr);
-               smc_lgr_free(lgr);
-       }
+static void smc_lgr_terminate_work(struct work_struct *work)
+{
+       struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+                                                 terminate_work);
+
+       smc_lgr_terminate(lgr);
 }
 
 /* create a new SMC link group */
 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 {
        struct smc_link_group *lgr;
+       struct list_head *lgr_list;
        struct smc_link *lnk;
+       spinlock_t *lgr_lock;
        u8 rndvec[3];
        int rc = 0;
        int i;
@@ -217,6 +252,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
        }
        lgr->is_smcd = ini->is_smcd;
        lgr->sync_err = 0;
+       lgr->terminating = 0;
+       lgr->freefast = 0;
+       lgr->freeing = 0;
        lgr->vlan_id = ini->vlan_id;
        rwlock_init(&lgr->sndbufs_lock);
        rwlock_init(&lgr->rmbs_lock);
@@ -228,13 +266,18 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
        smc_lgr_list.num += SMC_LGR_NUM_INCR;
        memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
        INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
+       INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
        lgr->conns_all = RB_ROOT;
        if (ini->is_smcd) {
                /* SMC-D specific settings */
+               get_device(&ini->ism_dev->dev);
                lgr->peer_gid = ini->ism_gid;
                lgr->smcd = ini->ism_dev;
+               lgr_list = &ini->ism_dev->lgr_list;
+               lgr_lock = &lgr->smcd->lgr_lock;
        } else {
                /* SMC-R specific settings */
+               get_device(&ini->ib_dev->ibdev->dev);
                lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
                memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
                       SMC_SYSTEMID_LEN);
@@ -245,6 +288,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
                lnk->link_id = SMC_SINGLE_LINK;
                lnk->smcibdev = ini->ib_dev;
                lnk->ibport = ini->ib_port;
+               lgr_list = &smc_lgr_list.list;
+               lgr_lock = &smc_lgr_list.lock;
                lnk->path_mtu =
                        ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
                if (!ini->ib_dev->initialized)
@@ -274,9 +319,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
                        goto destroy_qp;
        }
        smc->conn.lgr = lgr;
-       spin_lock_bh(&smc_lgr_list.lock);
-       list_add(&lgr->list, &smc_lgr_list.list);
-       spin_unlock_bh(&smc_lgr_list.lock);
+       spin_lock_bh(lgr_lock);
+       list_add(&lgr->list, lgr_list);
+       spin_unlock_bh(lgr_lock);
        return 0;
 
 destroy_qp:
@@ -309,7 +354,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
                conn->sndbuf_desc->used = 0;
        if (conn->rmb_desc) {
                if (!conn->rmb_desc->regerr) {
-                       if (!lgr->is_smcd) {
+                       if (!lgr->is_smcd && !list_empty(&lgr->list)) {
                                /* unregister rmb with peer */
                                smc_llc_do_delete_rkey(
                                                &lgr->lnk[SMC_SINGLE_LINK],
@@ -340,9 +385,10 @@ void smc_conn_free(struct smc_connection *conn)
        } else {
                smc_cdc_tx_dismiss_slots(conn);
        }
-       smc_lgr_unregister_conn(conn);
-       smc_buf_unuse(conn, lgr);               /* allow buffer reuse */
-       conn->lgr = NULL;
+       if (!list_empty(&lgr->list)) {
+               smc_lgr_unregister_conn(conn);
+               smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+       }
 
        if (!lgr->conns_num)
                smc_lgr_schedule_free_work(lgr);
@@ -433,23 +479,50 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 static void smc_lgr_free(struct smc_link_group *lgr)
 {
        smc_lgr_free_bufs(lgr);
-       if (lgr->is_smcd)
+       if (lgr->is_smcd) {
                smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
-       else
+               put_device(&lgr->smcd->dev);
+       } else {
                smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+               put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
+       }
        kfree(lgr);
 }
 
 void smc_lgr_forget(struct smc_link_group *lgr)
 {
-       spin_lock_bh(&smc_lgr_list.lock);
+       struct list_head *lgr_list;
+       spinlock_t *lgr_lock;
+
+       lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+       spin_lock_bh(lgr_lock);
        /* do not use this link group for new connections */
-       if (!list_empty(&lgr->list))
-               list_del_init(&lgr->list);
-       spin_unlock_bh(&smc_lgr_list.lock);
+       if (!list_empty(lgr_list))
+               list_del_init(lgr_list);
+       spin_unlock_bh(lgr_lock);
+}
+
+static void smc_sk_wake_ups(struct smc_sock *smc)
+{
+       smc->sk.sk_write_space(&smc->sk);
+       smc->sk.sk_data_ready(&smc->sk);
+       smc->sk.sk_state_change(&smc->sk);
 }
 
-/* terminate linkgroup abnormally */
+/* kill a connection */
+static void smc_conn_kill(struct smc_connection *conn)
+{
+       struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+       smc_close_abort(conn);
+       conn->killed = 1;
+       smc_sk_wake_ups(smc);
+       smc_lgr_unregister_conn(conn);
+       smc->sk.sk_err = ECONNABORTED;
+       smc_close_active_abort(smc);
+}
+
+/* terminate link group */
 static void __smc_lgr_terminate(struct smc_link_group *lgr)
 {
        struct smc_connection *conn;
@@ -459,52 +532,65 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
        if (lgr->terminating)
                return; /* lgr already terminating */
        lgr->terminating = 1;
-       if (!list_empty(&lgr->list)) /* forget lgr */
-               list_del_init(&lgr->list);
        if (!lgr->is_smcd)
                smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 
-       write_lock_bh(&lgr->conns_lock);
+       /* kill remaining link group connections */
+       read_lock_bh(&lgr->conns_lock);
        node = rb_first(&lgr->conns_all);
        while (node) {
+               read_unlock_bh(&lgr->conns_lock);
                conn = rb_entry(node, struct smc_connection, alert_node);
                smc = container_of(conn, struct smc_sock, conn);
-               sock_hold(&smc->sk); /* sock_put in close work */
-               conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
-               __smc_lgr_unregister_conn(conn);
-               conn->lgr = NULL;
-               write_unlock_bh(&lgr->conns_lock);
-               if (!schedule_work(&conn->close_work))
-                       sock_put(&smc->sk);
-               write_lock_bh(&lgr->conns_lock);
+               sock_hold(&smc->sk); /* sock_put below */
+               lock_sock(&smc->sk);
+               smc_conn_kill(conn);
+               release_sock(&smc->sk);
+               sock_put(&smc->sk); /* sock_hold above */
+               read_lock_bh(&lgr->conns_lock);
                node = rb_first(&lgr->conns_all);
        }
-       write_unlock_bh(&lgr->conns_lock);
+       read_unlock_bh(&lgr->conns_lock);
        if (!lgr->is_smcd)
                wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
-       smc_lgr_schedule_free_work(lgr);
+       smc_lgr_schedule_free_work_fast(lgr);
 }
 
+/* unlink and terminate link group */
 void smc_lgr_terminate(struct smc_link_group *lgr)
 {
-       spin_lock_bh(&smc_lgr_list.lock);
+       spinlock_t *lgr_lock;
+
+       smc_lgr_list_head(lgr, &lgr_lock);
+       spin_lock_bh(lgr_lock);
+       if (lgr->terminating) {
+               spin_unlock_bh(lgr_lock);
+               return; /* lgr already terminating */
+       }
+       list_del_init(&lgr->list);
+       spin_unlock_bh(lgr_lock);
        __smc_lgr_terminate(lgr);
-       spin_unlock_bh(&smc_lgr_list.lock);
 }
 
 /* Called when IB port is terminated */
 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 {
        struct smc_link_group *lgr, *l;
+       LIST_HEAD(lgr_free_list);
 
        spin_lock_bh(&smc_lgr_list.lock);
        list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
                if (!lgr->is_smcd &&
                    lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
                    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
-                       __smc_lgr_terminate(lgr);
+                       list_move(&lgr->list, &lgr_free_list);
        }
        spin_unlock_bh(&smc_lgr_list.lock);
+
+       list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
+               list_del_init(&lgr->list);
+               __smc_lgr_terminate(lgr);
+       }
 }
 
 /* Called when SMC-D device is terminated or peer is lost */
@@ -514,20 +600,19 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
        LIST_HEAD(lgr_free_list);
 
        /* run common cleanup function and build free list */
-       spin_lock_bh(&smc_lgr_list.lock);
-       list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-               if (lgr->is_smcd && lgr->smcd == dev &&
-                   (!peer_gid || lgr->peer_gid == peer_gid) &&
+       spin_lock_bh(&dev->lgr_lock);
+       list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
+               if ((!peer_gid || lgr->peer_gid == peer_gid) &&
                    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
-                       __smc_lgr_terminate(lgr);
                        list_move(&lgr->list, &lgr_free_list);
                }
        }
-       spin_unlock_bh(&smc_lgr_list.lock);
+       spin_unlock_bh(&dev->lgr_lock);
 
        /* cancel the regular free workers and actually free lgrs */
        list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
                list_del_init(&lgr->list);
+               __smc_lgr_terminate(lgr);
                cancel_delayed_work_sync(&lgr->free_work);
                if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
                        smc_ism_signal_shutdown(lgr);
@@ -607,10 +692,14 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 {
        struct smc_connection *conn = &smc->conn;
+       struct list_head *lgr_list;
        struct smc_link_group *lgr;
        enum smc_lgr_role role;
+       spinlock_t *lgr_lock;
        int rc = 0;
 
+       lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
+       lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
        ini->cln_first_contact = SMC_FIRST_CONTACT;
        role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
        if (role == SMC_CLNT && ini->srv_first_contact)
@@ -618,8 +707,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
                goto create;
 
        /* determine if an existing link group can be reused */
-       spin_lock_bh(&smc_lgr_list.lock);
-       list_for_each_entry(lgr, &smc_lgr_list.list, list) {
+       spin_lock_bh(lgr_lock);
+       list_for_each_entry(lgr, lgr_list, list) {
                write_lock_bh(&lgr->conns_lock);
                if ((ini->is_smcd ?
                     smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
@@ -639,7 +728,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
                }
                write_unlock_bh(&lgr->conns_lock);
        }
-       spin_unlock_bh(&smc_lgr_list.lock);
+       spin_unlock_bh(lgr_lock);
 
        if (role == SMC_CLNT && !ini->srv_first_contact &&
            ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -1027,16 +1116,45 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
        return 0;
 }
 
+static void smc_core_going_away(void)
+{
+       struct smc_ib_device *smcibdev;
+       struct smcd_dev *smcd;
+
+       spin_lock(&smc_ib_devices.lock);
+       list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
+               int i;
+
+               for (i = 0; i < SMC_MAX_PORTS; i++)
+                       set_bit(i, smcibdev->ports_going_away);
+       }
+       spin_unlock(&smc_ib_devices.lock);
+
+       spin_lock(&smcd_dev_list.lock);
+       list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+               smcd->going_away = 1;
+       }
+       spin_unlock(&smcd_dev_list.lock);
+}
+
 /* Called (from smc_exit) when module is removed */
 void smc_core_exit(void)
 {
        struct smc_link_group *lgr, *lg;
        LIST_HEAD(lgr_freeing_list);
+       struct smcd_dev *smcd;
+
+       smc_core_going_away();
 
        spin_lock_bh(&smc_lgr_list.lock);
-       if (!list_empty(&smc_lgr_list.list))
-               list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
+       list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
        spin_unlock_bh(&smc_lgr_list.lock);
+
+       spin_lock(&smcd_dev_list.lock);
+       list_for_each_entry(smcd, &smcd_dev_list.list, list)
+               list_splice_init(&smcd->lgr_list, &lgr_freeing_list);
+       spin_unlock(&smcd_dev_list.lock);
+
        list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
                list_del_init(&lgr->list);
                if (!lgr->is_smcd) {
index c00ac61..e6fd1ed 100644 (file)
@@ -202,8 +202,11 @@ struct smc_link_group {
 
        u8                      id[SMC_LGR_ID_SIZE];    /* unique lgr id */
        struct delayed_work     free_work;      /* delayed freeing of an lgr */
+       struct work_struct      terminate_work; /* abnormal lgr termination */
        u8                      sync_err : 1;   /* lgr no longer fits to peer */
        u8                      terminating : 1;/* lgr is terminating */
+       u8                      freefast : 1;   /* free worker scheduled fast */
+       u8                      freeing : 1;    /* lgr is being freed */
 
        bool                    is_smcd;        /* SMC-R or SMC-D */
        union {
@@ -280,6 +283,12 @@ static inline struct smc_connection *smc_lgr_find_conn(
        return res;
 }
 
+static inline void smc_lgr_terminate_sched(struct smc_link_group *lgr)
+{
+       if (!lgr->terminating)
+               schedule_work(&lgr->terminate_work);
+}
+
 struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 struct smc_clc_msg_local;
index d14ca4a..af05dae 100644 (file)
@@ -242,8 +242,12 @@ static void smc_ib_port_event_work(struct work_struct *work)
        for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
                smc_ib_remember_port_attr(smcibdev, port_idx + 1);
                clear_bit(port_idx, &smcibdev->port_event_mask);
-               if (!smc_ib_port_active(smcibdev, port_idx + 1))
+               if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
+                       set_bit(port_idx, smcibdev->ports_going_away);
                        smc_port_terminate(smcibdev, port_idx + 1);
+               } else {
+                       clear_bit(port_idx, smcibdev->ports_going_away);
+               }
        }
 }
 
@@ -259,8 +263,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
        switch (ibevent->event) {
        case IB_EVENT_DEVICE_FATAL:
                /* terminate all ports on device */
-               for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
+               for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) {
                        set_bit(port_idx, &smcibdev->port_event_mask);
+                       set_bit(port_idx, smcibdev->ports_going_away);
+               }
                schedule_work(&smcibdev->port_event_work);
                break;
        case IB_EVENT_PORT_ERR:
@@ -269,6 +275,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
                port_idx = ibevent->element.port_num - 1;
                if (port_idx < SMC_MAX_PORTS) {
                        set_bit(port_idx, &smcibdev->port_event_mask);
+                       if (ibevent->event == IB_EVENT_PORT_ERR)
+                               set_bit(port_idx, smcibdev->ports_going_away);
+                       else if (ibevent->event == IB_EVENT_PORT_ACTIVE)
+                               clear_bit(port_idx, smcibdev->ports_going_away);
                        schedule_work(&smcibdev->port_event_work);
                }
                break;
@@ -307,6 +317,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
                port_idx = ibevent->element.qp->port - 1;
                if (port_idx < SMC_MAX_PORTS) {
                        set_bit(port_idx, &smcibdev->port_event_mask);
+                       set_bit(port_idx, smcibdev->ports_going_away);
                        schedule_work(&smcibdev->port_event_work);
                }
                break;
index da60ab9..6a0069d 100644 (file)
@@ -47,6 +47,7 @@ struct smc_ib_device {                                /* ib-device infos for smc */
        u8                      initialized : 1; /* ib dev CQ, evthdl done */
        struct work_struct      port_event_work;
        unsigned long           port_event_mask;
+       DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS);
 };
 
 struct smc_buf_desc;
index e89e918..ee73408 100644 (file)
@@ -286,7 +286,9 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
        smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
 
        spin_lock_init(&smcd->lock);
+       spin_lock_init(&smcd->lgr_lock);
        INIT_LIST_HEAD(&smcd->vlan);
+       INIT_LIST_HEAD(&smcd->lgr_list);
        smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
                                                 WQ_MEM_RECLAIM, name);
        if (!smcd->event_wq) {
@@ -313,6 +315,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
        spin_lock(&smcd_dev_list.lock);
        list_del(&smcd->list);
        spin_unlock(&smcd_dev_list.lock);
+       smcd->going_away = 1;
        flush_workqueue(smcd->event_wq);
        destroy_workqueue(smcd->event_wq);
        smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
@@ -342,6 +345,8 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
 {
        struct smc_ism_event_work *wrk;
 
+       if (smcd->going_away)
+               return;
        /* copy event to event work queue, and let it be handled there */
        wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
        if (!wrk)
index 4fd60c5..e1918ff 100644 (file)
@@ -475,7 +475,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
                        smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
                }
                smc_llc_send_message(link, llc, sizeof(*llc));
-               smc_lgr_schedule_free_work_fast(lgr);
+               smc_lgr_terminate_sched(lgr);
        }
 }
 
index 2920b00..352ee2f 100644 (file)
@@ -781,6 +781,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
                        dev_put(ndev);
                        if (netdev == ndev &&
                            smc_ib_port_active(ibdev, i) &&
+                           !test_bit(i - 1, ibdev->ports_going_away) &&
                            !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
                                                  ini->ib_gid, NULL)) {
                                ini->ib_dev = ibdev;
@@ -820,6 +821,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
                                continue;
                        if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
                            smc_ib_port_active(ibdev, i) &&
+                           !test_bit(i - 1, ibdev->ports_going_away) &&
                            !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
                                                  ini->ib_gid, NULL)) {
                                ini->ib_dev = ibdev;
@@ -846,7 +848,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
 
        spin_lock(&smcd_dev_list.lock);
        list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
-               if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
+               if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
+                   !ismdev->going_away) {
                        ini->ism_dev = ismdev;
                        break;
                }
index 97e8369..39d7b34 100644 (file)
@@ -201,6 +201,8 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
 {
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct smc_connection *conn = &smc->conn;
+       struct smc_cdc_conn_state_flags *cflags =
+                                       &conn->local_tx_ctrl.conn_state_flags;
        struct sock *sk = &smc->sk;
        int rc;
 
@@ -210,7 +212,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
        add_wait_queue(sk_sleep(sk), &wait);
        rc = sk_wait_event(sk, timeo,
                           sk->sk_err ||
+                          cflags->peer_conn_abort ||
                           sk->sk_shutdown & RCV_SHUTDOWN ||
+                          conn->killed ||
                           fcrit(conn),
                           &wait);
        remove_wait_queue(sk_sleep(sk), &wait);
@@ -314,11 +318,13 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
                if (read_done >= target || (pipe && read_done))
                        break;
 
+               if (conn->killed)
+                       break;
+
                if (smc_rx_recvmsg_data_available(smc))
                        goto copy;
 
-               if (sk->sk_shutdown & RCV_SHUTDOWN ||
-                   conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) {
+               if (sk->sk_shutdown & RCV_SHUTDOWN) {
                        /* smc_cdc_msg_recv_action() could have run after
                         * above smc_rx_recvmsg_data_available()
                         */
index 6c8f09c..824f096 100644 (file)
@@ -86,6 +86,7 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
                if (sk->sk_err ||
                    (sk->sk_shutdown & SEND_SHUTDOWN) ||
+                   conn->killed ||
                    conn->local_tx_ctrl.conn_state_flags.peer_done_writing) {
                        rc = -EPIPE;
                        break;
@@ -155,7 +156,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
                        return -ENOTCONN;
                if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
                    (smc->sk.sk_err == ECONNABORTED) ||
-                   conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+                   conn->killed)
                        return -EPIPE;
                if (smc_cdc_rxed_any_close(conn))
                        return send_done ?: -ECONNRESET;
@@ -282,10 +283,8 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
                peer_rmbe_offset;
        rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
        rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
-       if (rc) {
-               conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+       if (rc)
                smc_lgr_terminate(lgr);
-       }
        return rc;
 }
 
@@ -495,10 +494,11 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 
                        if (smc->sk.sk_err == ECONNABORTED)
                                return sock_error(&smc->sk);
+                       if (conn->killed)
+                               return -EPIPE;
                        rc = 0;
-                       if (conn->alert_token_local) /* connection healthy */
-                               mod_delayed_work(system_wq, &conn->tx_work,
-                                                SMC_TX_WORK_DELAY);
+                       mod_delayed_work(system_wq, &conn->tx_work,
+                                        SMC_TX_WORK_DELAY);
                }
                return rc;
        }
@@ -547,6 +547,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
 {
        int rc;
 
+       if (conn->killed ||
+           conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+               return -EPIPE;  /* connection being aborted */
        if (conn->lgr->is_smcd)
                rc = smcd_tx_sndbuf_nonempty(conn);
        else
@@ -573,9 +576,7 @@ void smc_tx_work(struct work_struct *work)
        int rc;
 
        lock_sock(&smc->sk);
-       if (smc->sk.sk_err ||
-           !conn->alert_token_local ||
-           conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+       if (smc->sk.sk_err)
                goto out;
 
        rc = smc_tx_sndbuf_nonempty(conn);
@@ -608,8 +609,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
            ((to_confirm > conn->rmbe_update_limit) &&
             ((sender_free <= (conn->rmb_desc->len / 2)) ||
              conn->local_rx_ctrl.prod_flags.write_blocked))) {
+               if (conn->killed ||
+                   conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+                       return;
                if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
-                   conn->alert_token_local) { /* connection healthy */
+                   !conn->killed) {
                        schedule_delayed_work(&conn->tx_work,
                                              SMC_TX_WORK_DELAY);
                        return;
index 253aa75..50743dc 100644 (file)
@@ -101,7 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
                        clear_bit(i, link->wr_tx_mask);
                }
                /* terminate connections of this link group abnormally */
-               smc_lgr_terminate(smc_get_lgr(link));
+               smc_lgr_terminate_sched(smc_get_lgr(link));
        }
        if (pnd_snd.handler)
                pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -191,7 +191,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
                        SMC_WR_TX_WAIT_FREE_SLOT_TIME);
                if (!rc) {
                        /* timeout - terminate connections */
-                       smc_lgr_terminate(smc_get_lgr(link));
+                       smc_lgr_terminate_sched(smc_get_lgr(link));
                        return -EPIPE;
                }
                if (idx == link->wr_tx_cnt)
@@ -247,7 +247,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
        rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
        if (rc) {
                smc_wr_tx_put_slot(link, priv);
-               smc_lgr_terminate(smc_get_lgr(link));
+               smc_lgr_terminate_sched(smc_get_lgr(link));
        }
        return rc;
 }
@@ -272,7 +272,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
                                              SMC_WR_REG_MR_WAIT_TIME);
        if (!rc) {
                /* timeout - terminate connections */
-               smc_lgr_terminate(smc_get_lgr(link));
+               smc_lgr_terminate_sched(smc_get_lgr(link));
                return -EPIPE;
        }
        if (rc == -ERESTARTSYS)
@@ -373,7 +373,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
                                /* terminate connections of this link group
                                 * abnormally
                                 */
-                               smc_lgr_terminate(smc_get_lgr(link));
+                               smc_lgr_terminate_sched(smc_get_lgr(link));
                                break;
                        default:
                                smc_wr_rx_post(link); /* refill WR RX */
index 23cb379..ab648dd 100644 (file)
@@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net)
        tipc_sk_rht_destroy(net);
 }
 
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+       tipc_node_pre_cleanup_net(net);
+}
+
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+       .pre_exit = tipc_pernet_pre_exit,
+};
+
 static struct pernet_operations tipc_net_ops = {
        .init = tipc_init_net,
        .exit = tipc_exit_net,
@@ -151,6 +160,10 @@ static int __init tipc_init(void)
        if (err)
                goto out_pernet_topsrv;
 
+       err = register_pernet_subsys(&tipc_pernet_pre_exit_ops);
+       if (err)
+               goto out_register_pernet_subsys;
+
        err = tipc_bearer_setup();
        if (err)
                goto out_bearer;
@@ -158,6 +171,8 @@ static int __init tipc_init(void)
        pr_info("Started in single node mode\n");
        return 0;
 out_bearer:
+       unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
+out_register_pernet_subsys:
        unregister_pernet_device(&tipc_topsrv_net_ops);
 out_pernet_topsrv:
        tipc_socket_stop();
@@ -177,6 +192,7 @@ out_netlink:
 static void __exit tipc_exit(void)
 {
        tipc_bearer_cleanup();
+       unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
        unregister_pernet_device(&tipc_topsrv_net_ops);
        tipc_socket_stop();
        unregister_pernet_device(&tipc_net_ops);
index 60d8295..8776d32 100644 (file)
@@ -59,6 +59,7 @@
 #include <net/netns/generic.h>
 #include <linux/rhashtable.h>
 #include <net/genetlink.h>
+#include <net/netns/hash.h>
 
 struct tipc_node;
 struct tipc_bearer;
@@ -185,6 +186,11 @@ static inline int in_range(u16 val, u16 min, u16 max)
        return !less(val, min) && !more(val, max);
 }
 
+static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand)
+{
+       return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand;
+}
+
 #ifdef CONFIG_SYSCTL
 int tipc_register_sysctl(void);
 void tipc_unregister_sysctl(void);
index c138d68..b043e8c 100644 (file)
@@ -94,6 +94,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
        msg_set_dest_domain(hdr, dest_domain);
        msg_set_bc_netid(hdr, tn->net_id);
        b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+       msg_set_peer_net_hash(hdr, tipc_net_hash_mixes(net, tn->random));
        msg_set_node_id(hdr, tipc_own_id(net));
 }
 
@@ -242,7 +243,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
        if (!tipc_in_scope(legacy, b->domain, src))
                return;
        tipc_node_check_dest(net, src, peer_id, b, caps, signature,
-                            &maddr, &respond, &dupl_addr);
+                            msg_peer_net_hash(hdr), &maddr, &respond,
+                            &dupl_addr);
        if (dupl_addr)
                disc_dupl_alert(b, src, &maddr);
        if (!respond)
index 999eab5..038861b 100644 (file)
@@ -940,16 +940,17 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
                   struct sk_buff_head *xmitq)
 {
        struct tipc_msg *hdr = buf_msg(skb_peek(list));
-       unsigned int maxwin = l->window;
-       int imp = msg_importance(hdr);
-       unsigned int mtu = l->mtu;
+       struct sk_buff_head *backlogq = &l->backlogq;
+       struct sk_buff_head *transmq = &l->transmq;
+       struct sk_buff *skb, *_skb;
+       u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
        u16 ack = l->rcv_nxt - 1;
        u16 seqno = l->snd_nxt;
-       u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
-       struct sk_buff_head *transmq = &l->transmq;
-       struct sk_buff_head *backlogq = &l->backlogq;
-       struct sk_buff *skb, *_skb, **tskb;
        int pkt_cnt = skb_queue_len(list);
+       int imp = msg_importance(hdr);
+       unsigned int maxwin = l->window;
+       unsigned int mtu = l->mtu;
+       bool new_bundle;
        int rc = 0;
 
        if (unlikely(msg_size(hdr) > mtu)) {
@@ -975,20 +976,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
        }
 
        /* Prepare each packet for sending, and add to relevant queue: */
-       while (skb_queue_len(list)) {
-               skb = skb_peek(list);
-               hdr = buf_msg(skb);
-               msg_set_seqno(hdr, seqno);
-               msg_set_ack(hdr, ack);
-               msg_set_bcast_ack(hdr, bc_ack);
-
+       while ((skb = __skb_dequeue(list))) {
                if (likely(skb_queue_len(transmq) < maxwin)) {
+                       hdr = buf_msg(skb);
+                       msg_set_seqno(hdr, seqno);
+                       msg_set_ack(hdr, ack);
+                       msg_set_bcast_ack(hdr, bc_ack);
                        _skb = skb_clone(skb, GFP_ATOMIC);
                        if (!_skb) {
+                               kfree_skb(skb);
                                __skb_queue_purge(list);
                                return -ENOBUFS;
                        }
-                       __skb_dequeue(list);
                        __skb_queue_tail(transmq, skb);
                        /* next retransmit attempt */
                        if (link_is_bc_sndlink(l))
@@ -1000,22 +999,26 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
                        seqno++;
                        continue;
                }
-               tskb = &l->backlog[imp].target_bskb;
-               if (tipc_msg_bundle(*tskb, hdr, mtu)) {
-                       kfree_skb(__skb_dequeue(list));
-                       l->stats.sent_bundled++;
-                       continue;
-               }
-               if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) {
-                       kfree_skb(__skb_dequeue(list));
-                       __skb_queue_tail(backlogq, *tskb);
-                       l->backlog[imp].len++;
-                       l->stats.sent_bundled++;
-                       l->stats.sent_bundles++;
+               if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb,
+                                       mtu - INT_H_SIZE, l->addr,
+                                       &new_bundle)) {
+                       if (skb) {
+                               /* Keep a ref. to the skb for next try */
+                               l->backlog[imp].target_bskb = skb;
+                               l->backlog[imp].len++;
+                               __skb_queue_tail(backlogq, skb);
+                       } else {
+                               if (new_bundle) {
+                                       l->stats.sent_bundles++;
+                                       l->stats.sent_bundled++;
+                               }
+                               l->stats.sent_bundled++;
+                       }
                        continue;
                }
                l->backlog[imp].target_bskb = NULL;
-               l->backlog[imp].len += skb_queue_len(list);
+               l->backlog[imp].len += (1 + skb_queue_len(list));
+               __skb_queue_tail(backlogq, skb);
                skb_queue_splice_tail_init(list, backlogq);
        }
        l->snd_nxt = seqno;
@@ -1873,7 +1876,7 @@ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
 
        tipc_link_create_dummy_tnl_msg(tnl, xmitq);
 
-       /* This failover link enpoint was never established before,
+       /* This failover link endpoint was never established before,
         * so it has not received anything from peer.
         * Otherwise, it must be a normal failover situation or the
         * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
index 922d262..acb7be5 100644 (file)
@@ -190,6 +190,59 @@ err:
        return 0;
 }
 
+/**
+ * tipc_msg_append(): Append data to tail of an existing buffer queue
+ * @hdr: header to be used
+ * @m: the data to be appended
+ * @mss: max allowable size of buffer
+ * @dlen: size of data to be appended
+ * @txq: queue to appand to
+ * Returns the number og 1k blocks appended or errno value
+ */
+int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
+                   int mss, struct sk_buff_head *txq)
+{
+       struct sk_buff *skb, *prev;
+       int accounted, total, curr;
+       int mlen, cpy, rem = dlen;
+       struct tipc_msg *hdr;
+
+       skb = skb_peek_tail(txq);
+       accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
+       total = accounted;
+
+       while (rem) {
+               if (!skb || skb->len >= mss) {
+                       prev = skb;
+                       skb = tipc_buf_acquire(mss, GFP_KERNEL);
+                       if (unlikely(!skb))
+                               return -ENOMEM;
+                       skb_orphan(skb);
+                       skb_trim(skb, MIN_H_SIZE);
+                       hdr = buf_msg(skb);
+                       skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE);
+                       msg_set_hdr_sz(hdr, MIN_H_SIZE);
+                       msg_set_size(hdr, MIN_H_SIZE);
+                       __skb_queue_tail(txq, skb);
+                       total += 1;
+                       if (prev)
+                               msg_set_ack_required(buf_msg(prev), 0);
+                       msg_set_ack_required(hdr, 1);
+               }
+               hdr = buf_msg(skb);
+               curr = msg_blocks(hdr);
+               mlen = msg_size(hdr);
+               cpy = min_t(int, rem, mss - mlen);
+               if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
+                       return -EFAULT;
+               msg_set_size(hdr, mlen + cpy);
+               skb_put(skb, cpy);
+               rem -= cpy;
+               total += msg_blocks(hdr) - curr;
+       }
+       return total - accounted;
+}
+
 /* tipc_msg_validate - validate basic format of received message
  *
  * This routine ensures a TIPC message has an acceptable header, and at least
@@ -419,48 +472,98 @@ error:
 }
 
 /**
- * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
- * @skb: the buffer to append to ("bundle")
- * @msg:  message to be appended
- * @mtu:  max allowable size for the bundle buffer
- * Consumes buffer if successful
- * Returns true if bundling could be performed, otherwise false
+ * tipc_msg_bundle - Append contents of a buffer to tail of an existing one
+ * @bskb: the bundle buffer to append to
+ * @msg: message to be appended
+ * @max: max allowable size for the bundle buffer
+ *
+ * Returns "true" if bundling has been performed, otherwise "false"
  */
-bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu)
+static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
+                           u32 max)
 {
-       struct tipc_msg *bmsg;
-       unsigned int bsz;
-       unsigned int msz = msg_size(msg);
-       u32 start, pad;
-       u32 max = mtu - INT_H_SIZE;
+       struct tipc_msg *bmsg = buf_msg(bskb);
+       u32 msz, bsz, offset, pad;
 
-       if (likely(msg_user(msg) == MSG_FRAGMENTER))
-               return false;
-       if (!skb)
-               return false;
-       bmsg = buf_msg(skb);
+       msz = msg_size(msg);
        bsz = msg_size(bmsg);
-       start = align(bsz);
-       pad = start - bsz;
+       offset = align(bsz);
+       pad = offset - bsz;
 
-       if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL))
+       if (unlikely(skb_tailroom(bskb) < (pad + msz)))
                return false;
-       if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
+       if (unlikely(max < (offset + msz)))
                return false;
-       if (unlikely(msg_user(bmsg) != MSG_BUNDLER))
+
+       skb_put(bskb, pad + msz);
+       skb_copy_to_linear_data_offset(bskb, offset, msg, msz);
+       msg_set_size(bmsg, offset + msz);
+       msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
+       return true;
+}
+
+/**
+ * tipc_msg_try_bundle - Try to bundle a new message to the last one
+ * @tskb: the last/target message to which the new one will be appended
+ * @skb: the new message skb pointer
+ * @mss: max message size (header inclusive)
+ * @dnode: destination node for the message
+ * @new_bundle: if this call made a new bundle or not
+ *
+ * Return: "true" if the new message skb is potential for bundling this time or
+ * later, in the case a bundling has been done this time, the skb is consumed
+ * (the skb pointer = NULL).
+ * Otherwise, "false" if the skb cannot be bundled at all.
+ */
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+                        u32 dnode, bool *new_bundle)
+{
+       struct tipc_msg *msg, *inner, *outer;
+       u32 tsz;
+
+       /* First, check if the new buffer is suitable for bundling */
+       msg = buf_msg(*skb);
+       if (msg_user(msg) == MSG_FRAGMENTER)
                return false;
-       if (unlikely(skb_tailroom(skb) < (pad + msz)))
+       if (msg_user(msg) == TUNNEL_PROTOCOL)
                return false;
-       if (unlikely(max < (start + msz)))
+       if (msg_user(msg) == BCAST_PROTOCOL)
                return false;
-       if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) &&
-           (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE))
+       if (mss <= INT_H_SIZE + msg_size(msg))
                return false;
 
-       skb_put(skb, pad + msz);
-       skb_copy_to_linear_data_offset(skb, start, msg, msz);
-       msg_set_size(bmsg, start + msz);
-       msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
+       /* Ok, but the last/target buffer can be empty? */
+       if (unlikely(!tskb))
+               return true;
+
+       /* Is it a bundle already? Try to bundle the new message to it */
+       if (msg_user(buf_msg(tskb)) == MSG_BUNDLER) {
+               *new_bundle = false;
+               goto bundle;
+       }
+
+       /* Make a new bundle of the two messages if possible */
+       tsz = msg_size(buf_msg(tskb));
+       if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg)))
+               return true;
+       if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
+                                     GFP_ATOMIC)))
+               return true;
+       inner = buf_msg(tskb);
+       skb_push(tskb, INT_H_SIZE);
+       outer = buf_msg(tskb);
+       tipc_msg_init(msg_prevnode(inner), outer, MSG_BUNDLER, 0, INT_H_SIZE,
+                     dnode);
+       msg_set_importance(outer, msg_importance(inner));
+       msg_set_size(outer, INT_H_SIZE + tsz);
+       msg_set_msgcnt(outer, 1);
+       *new_bundle = true;
+
+bundle:
+       if (likely(tipc_msg_bundle(tskb, msg, mss))) {
+               consume_skb(*skb);
+               *skb = NULL;
+       }
        return true;
 }
 
@@ -509,49 +612,6 @@ none:
        return false;
 }
 
-/**
- * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
- * @list: the buffer chain, where head is the buffer to replace/append
- * @skb: buffer to be created, appended to and returned in case of success
- * @msg: message to be appended
- * @mtu: max allowable size for the bundle buffer, inclusive header
- * @dnode: destination node for message. (Not always present in header)
- * Returns true if success, otherwise false
- */
-bool tipc_msg_make_bundle(struct sk_buff **skb,  struct tipc_msg *msg,
-                         u32 mtu, u32 dnode)
-{
-       struct sk_buff *_skb;
-       struct tipc_msg *bmsg;
-       u32 msz = msg_size(msg);
-       u32 max = mtu - INT_H_SIZE;
-
-       if (msg_user(msg) == MSG_FRAGMENTER)
-               return false;
-       if (msg_user(msg) == TUNNEL_PROTOCOL)
-               return false;
-       if (msg_user(msg) == BCAST_PROTOCOL)
-               return false;
-       if (msz > (max / 2))
-               return false;
-
-       _skb = tipc_buf_acquire(max, GFP_ATOMIC);
-       if (!_skb)
-               return false;
-
-       skb_trim(_skb, INT_H_SIZE);
-       bmsg = buf_msg(_skb);
-       tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
-                     INT_H_SIZE, dnode);
-       msg_set_importance(bmsg, msg_importance(msg));
-       msg_set_seqno(bmsg, msg_seqno(msg));
-       msg_set_ack(bmsg, msg_ack(msg));
-       msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
-       tipc_msg_bundle(_skb, msg, mtu);
-       *skb = _skb;
-       return true;
-}
-
 /**
  * tipc_msg_reverse(): swap source and destination addresses and add error code
  * @own_node: originating node id for reversed message
index 0daa6f0..14697e6 100644 (file)
@@ -290,6 +290,16 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d)
        msg_set_bits(m, 0, 18, 1, d);
 }
 
+static inline int msg_ack_required(struct tipc_msg *m)
+{
+       return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_ack_required(struct tipc_msg *m, u32 d)
+{
+       msg_set_bits(m, 0, 18, 1, d);
+}
+
 static inline bool msg_is_rcast(struct tipc_msg *m)
 {
        return msg_bits(m, 0, 18, 0x1);
@@ -1026,6 +1036,20 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
        return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
 }
 
+/* Word 13
+ */
+static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n)
+{
+       msg_set_word(m, 13, n);
+}
+
+static inline u32 msg_peer_net_hash(struct tipc_msg *m)
+{
+       return msg_word(m, 13);
+}
+
+/* Word 14
+ */
 static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
 {
        return msg_word(m, 14);
@@ -1057,14 +1081,15 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
                                uint data_sz, u32 dnode, u32 onode,
                                u32 dport, u32 oport, int errcode);
 int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
-bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu);
-bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
-                         u32 mtu, u32 dnode);
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+                        u32 dnode, bool *new_bundle);
 bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
 int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
                      int pktmax, struct sk_buff_head *frags);
 int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
                   int offset, int dsz, int mtu, struct sk_buff_head *list);
+int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen,
+                   int mss, struct sk_buff_head *txq);
 bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
 bool tipc_msg_assemble(struct sk_buff_head *list);
 bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
index 836e629..5feaf3b 100644 (file)
@@ -146,7 +146,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
        struct publication *publ;
        struct sk_buff *skb = NULL;
        struct distr_item *item = NULL;
-       u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+       u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
                        ITEM_SIZE) * ITEM_SIZE;
        u32 msg_rem = msg_dsz;
 
index d6165ad..d32bbd0 100644 (file)
@@ -176,7 +176,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
        },
        {
                .cmd    = TIPC_NL_PUBL_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                .dumpit = tipc_nl_publ_dump,
        },
        {
@@ -239,7 +240,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
        },
        {
                .cmd    = TIPC_NL_MON_PEER_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                .dumpit = tipc_nl_node_dump_monitor_peer,
        },
        {
@@ -250,7 +252,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 #ifdef CONFIG_TIPC_MEDIA_UDP
        {
                .cmd    = TIPC_NL_UDP_GET_REMOTEIP,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .validate = GENL_DONT_VALIDATE_STRICT |
+                           GENL_DONT_VALIDATE_DUMP_STRICT,
                .dumpit = tipc_udp_nl_dump_remoteip,
        },
 #endif
@@ -268,18 +271,6 @@ struct genl_family tipc_genl_family __ro_after_init = {
        .n_ops          = ARRAY_SIZE(tipc_genl_v2_ops),
 };
 
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
-{
-       u32 maxattr = tipc_genl_family.maxattr;
-
-       *attr = genl_family_attrbuf(&tipc_genl_family);
-       if (!*attr)
-               return -EOPNOTSUPP;
-
-       return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr,
-                                     tipc_nl_policy, NULL);
-}
-
 int __init tipc_netlink_start(void)
 {
        int res;
index 4ba0ad4..7cf7777 100644 (file)
@@ -38,7 +38,6 @@
 #include <net/netlink.h>
 
 extern struct genl_family tipc_genl_family;
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
 
 struct tipc_nl_msg {
        struct sk_buff *skb;
index e135d4e..17a5297 100644 (file)
@@ -181,15 +181,18 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
                                   struct tipc_nl_compat_msg *msg,
                                   struct sk_buff *arg)
 {
+       struct genl_dumpit_info info;
        int len = 0;
        int err;
        struct sk_buff *buf;
        struct nlmsghdr *nlmsg;
        struct netlink_callback cb;
+       struct nlattr **attrbuf;
 
        memset(&cb, 0, sizeof(cb));
        cb.nlh = (struct nlmsghdr *)arg->data;
        cb.skb = arg;
+       cb.data = &info;
 
        buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!buf)
@@ -201,19 +204,35 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
                return -ENOMEM;
        }
 
+       attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1,
+                               sizeof(struct nlattr *), GFP_KERNEL);
+       if (!attrbuf) {
+               err = -ENOMEM;
+               goto err_out;
+       }
+
+       info.attrs = attrbuf;
+       err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
+                                    tipc_genl_family.maxattr,
+                                    tipc_genl_family.policy, NULL);
+       if (err)
+               goto err_out;
+
        do {
                int rem;
 
                len = (*cmd->dumpit)(buf, &cb);
 
                nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
-                       struct nlattr **attrs;
-
-                       err = tipc_nlmsg_parse(nlmsg, &attrs);
+                       err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN,
+                                                    attrbuf,
+                                                    tipc_genl_family.maxattr,
+                                                    tipc_genl_family.policy,
+                                                    NULL);
                        if (err)
                                goto err_out;
 
-                       err = (*cmd->format)(msg, attrs);
+                       err = (*cmd->format)(msg, attrbuf);
                        if (err)
                                goto err_out;
 
@@ -231,6 +250,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
        err = 0;
 
 err_out:
+       kfree(attrbuf);
        tipc_dump_done(&cb);
        kfree_skb(buf);
 
index c8f6177..4b60928 100644 (file)
@@ -126,6 +126,8 @@ struct tipc_node {
        struct timer_list timer;
        struct rcu_head rcu;
        unsigned long delete_at;
+       struct net *peer_net;
+       u32 peer_hash_mix;
 };
 
 /* Node FSM states and events:
@@ -184,7 +186,7 @@ static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
        return n->links[bearer_id].link;
 }
 
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected)
 {
        struct tipc_node *n;
        int bearer_id;
@@ -194,6 +196,14 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
        if (unlikely(!n))
                return mtu;
 
+       /* Allow MAX_MSG_SIZE when building connection oriented message
+        * if they are in the same core network
+        */
+       if (n->peer_net && connected) {
+               tipc_node_put(n);
+               return mtu;
+       }
+
        bearer_id = n->active_links[sel & 1];
        if (likely(bearer_id != INVALID_BEARER_ID))
                mtu = n->links[bearer_id].mtu;
@@ -360,8 +370,37 @@ static void tipc_node_write_unlock(struct tipc_node *n)
        }
 }
 
+static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes)
+{
+       int net_id = tipc_netid(n->net);
+       struct tipc_net *tn_peer;
+       struct net *tmp;
+       u32 hash_chk;
+
+       if (n->peer_net)
+               return;
+
+       for_each_net_rcu(tmp) {
+               tn_peer = tipc_net(tmp);
+               if (!tn_peer)
+                       continue;
+               /* Integrity checking whether node exists in namespace or not */
+               if (tn_peer->net_id != net_id)
+                       continue;
+               if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN))
+                       continue;
+               hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random);
+               if (hash_mixes ^ hash_chk)
+                       continue;
+               n->peer_net = tmp;
+               n->peer_hash_mix = hash_mixes;
+               break;
+       }
+}
+
 static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
-                                         u8 *peer_id, u16 capabilities)
+                                         u8 *peer_id, u16 capabilities,
+                                         u32 signature, u32 hash_mixes)
 {
        struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct tipc_node *n, *temp_node;
@@ -372,6 +411,8 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
        spin_lock_bh(&tn->node_list_lock);
        n = tipc_node_find(net, addr);
        if (n) {
+               if (n->peer_hash_mix ^ hash_mixes)
+                       tipc_node_assign_peer_net(n, hash_mixes);
                if (n->capabilities == capabilities)
                        goto exit;
                /* Same node may come back with new capabilities */
@@ -389,6 +430,7 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
                list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
                        tn->capabilities &= temp_node->capabilities;
                }
+
                goto exit;
        }
        n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -399,6 +441,10 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
        n->addr = addr;
        memcpy(&n->peer_id, peer_id, 16);
        n->net = net;
+       n->peer_net = NULL;
+       n->peer_hash_mix = 0;
+       /* Assign kernel local namespace if exists */
+       tipc_node_assign_peer_net(n, hash_mixes);
        n->capabilities = capabilities;
        kref_init(&n->kref);
        rwlock_init(&n->lock);
@@ -426,6 +472,10 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
                                 tipc_bc_sndlink(net),
                                 &n->bc_entry.link)) {
                pr_warn("Broadcast rcv link creation failed, no memory\n");
+               if (n->peer_net) {
+                       n->peer_net = NULL;
+                       n->peer_hash_mix = 0;
+               }
                kfree(n);
                n = NULL;
                goto exit;
@@ -979,7 +1029,7 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
 
 void tipc_node_check_dest(struct net *net, u32 addr,
                          u8 *peer_id, struct tipc_bearer *b,
-                         u16 capabilities, u32 signature,
+                         u16 capabilities, u32 signature, u32 hash_mixes,
                          struct tipc_media_addr *maddr,
                          bool *respond, bool *dupl_addr)
 {
@@ -998,7 +1048,8 @@ void tipc_node_check_dest(struct net *net, u32 addr,
        *dupl_addr = false;
        *respond = false;
 
-       n = tipc_node_create(net, addr, peer_id, capabilities);
+       n = tipc_node_create(net, addr, peer_id, capabilities, signature,
+                            hash_mixes);
        if (!n)
                return;
 
@@ -1343,6 +1394,10 @@ static void node_lost_contact(struct tipc_node *n,
        /* Notify publications from this node */
        n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
 
+       if (n->peer_net) {
+               n->peer_net = NULL;
+               n->peer_hash_mix = 0;
+       }
        /* Notify sockets connected to node */
        list_for_each_entry_safe(conn, safe, conns, list) {
                skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
@@ -1424,6 +1479,56 @@ msg_full:
        return -EMSGSIZE;
 }
 
+static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
+{
+       struct tipc_msg *hdr = buf_msg(skb_peek(list));
+       struct sk_buff_head inputq;
+
+       switch (msg_user(hdr)) {
+       case TIPC_LOW_IMPORTANCE:
+       case TIPC_MEDIUM_IMPORTANCE:
+       case TIPC_HIGH_IMPORTANCE:
+       case TIPC_CRITICAL_IMPORTANCE:
+               if (msg_connected(hdr) || msg_named(hdr)) {
+                       tipc_loopback_trace(peer_net, list);
+                       spin_lock_init(&list->lock);
+                       tipc_sk_rcv(peer_net, list);
+                       return;
+               }
+               if (msg_mcast(hdr)) {
+                       tipc_loopback_trace(peer_net, list);
+                       skb_queue_head_init(&inputq);
+                       tipc_sk_mcast_rcv(peer_net, list, &inputq);
+                       __skb_queue_purge(list);
+                       skb_queue_purge(&inputq);
+                       return;
+               }
+               return;
+       case MSG_FRAGMENTER:
+               if (tipc_msg_assemble(list)) {
+                       tipc_loopback_trace(peer_net, list);
+                       skb_queue_head_init(&inputq);
+                       tipc_sk_mcast_rcv(peer_net, list, &inputq);
+                       __skb_queue_purge(list);
+                       skb_queue_purge(&inputq);
+               }
+               return;
+       case GROUP_PROTOCOL:
+       case CONN_MANAGER:
+               tipc_loopback_trace(peer_net, list);
+               spin_lock_init(&list->lock);
+               tipc_sk_rcv(peer_net, list);
+               return;
+       case LINK_PROTOCOL:
+       case NAME_DISTRIBUTOR:
+       case TUNNEL_PROTOCOL:
+       case BCAST_PROTOCOL:
+               return;
+       default:
+               return;
+       };
+}
+
 /**
  * tipc_node_xmit() is the general link level function for message sending
  * @net: the applicable net namespace
@@ -1439,6 +1544,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
        struct tipc_link_entry *le = NULL;
        struct tipc_node *n;
        struct sk_buff_head xmitq;
+       bool node_up = false;
        int bearer_id;
        int rc;
 
@@ -1456,6 +1562,17 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
        }
 
        tipc_node_read_lock(n);
+       node_up = node_is_up(n);
+       if (node_up && n->peer_net && check_net(n->peer_net)) {
+               /* xmit inner linux container */
+               tipc_lxc_xmit(n->peer_net, list);
+               if (likely(skb_queue_empty(list))) {
+                       tipc_node_read_unlock(n);
+                       tipc_node_put(n);
+                       return 0;
+               }
+       }
+
        bearer_id = n->active_links[selector & 1];
        if (unlikely(bearer_id == INVALID_BEARER_ID)) {
                tipc_node_read_unlock(n);
@@ -2484,13 +2601,9 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
        int err;
 
        if (!prev_node) {
-               struct nlattr **attrs;
+               struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
                struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
 
-               err = tipc_nlmsg_parse(cb->nlh, &attrs);
-               if (err)
-                       return err;
-
                if (!attrs[TIPC_NLA_MON])
                        return -EINVAL;
 
@@ -2591,3 +2704,33 @@ int tipc_node_dump(struct tipc_node *n, bool more, char *buf)
 
        return i;
 }
+
+void tipc_node_pre_cleanup_net(struct net *exit_net)
+{
+       struct tipc_node *n;
+       struct tipc_net *tn;
+       struct net *tmp;
+
+       rcu_read_lock();
+       for_each_net_rcu(tmp) {
+               if (tmp == exit_net)
+                       continue;
+               tn = tipc_net(tmp);
+               if (!tn)
+                       continue;
+               spin_lock_bh(&tn->node_list_lock);
+               list_for_each_entry_rcu(n, &tn->node_list, list) {
+                       if (!n->peer_net)
+                               continue;
+                       if (n->peer_net != exit_net)
+                               continue;
+                       tipc_node_write_lock(n);
+                       n->peer_net = NULL;
+                       n->peer_hash_mix = 0;
+                       tipc_node_write_unlock_fast(n);
+                       break;
+               }
+               spin_unlock_bh(&tn->node_list_lock);
+       }
+       rcu_read_unlock();
+}
index 291d0ec..c39cd86 100644 (file)
@@ -54,7 +54,8 @@ enum {
        TIPC_LINK_PROTO_SEQNO = (1 << 6),
        TIPC_MCAST_RBCTL      = (1 << 7),
        TIPC_GAP_ACK_BLOCK    = (1 << 8),
-       TIPC_TUNNEL_ENHANCED  = (1 << 9)
+       TIPC_TUNNEL_ENHANCED  = (1 << 9),
+       TIPC_NAGLE            = (1 << 10)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT           |  \
@@ -66,7 +67,9 @@ enum {
                                TIPC_LINK_PROTO_SEQNO  |   \
                                TIPC_MCAST_RBCTL       |   \
                                TIPC_GAP_ACK_BLOCK     |   \
-                               TIPC_TUNNEL_ENHANCED)
+                               TIPC_TUNNEL_ENHANCED   |   \
+                               TIPC_NAGLE)
+
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
@@ -75,7 +78,7 @@ u32 tipc_node_get_addr(struct tipc_node *node);
 u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
 void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
                          struct tipc_bearer *bearer,
-                         u16 capabilities, u32 signature,
+                         u16 capabilities, u32 signature, u32 hash_mixes,
                          struct tipc_media_addr *maddr,
                          bool *respond, bool *dupl_addr);
 void tipc_node_delete_links(struct net *net, int bearer_id);
@@ -92,7 +95,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
 void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
 int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
 void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
 bool tipc_node_is_up(struct net *net, u32 addr);
 u16 tipc_node_get_capabilities(struct net *net, u32 addr);
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
@@ -107,4 +110,5 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
                                   struct netlink_callback *cb);
+void tipc_node_pre_cleanup_net(struct net *exit_net);
 #endif
index 4b92b19..5d7859a 100644 (file)
@@ -75,6 +75,7 @@ struct sockaddr_pair {
  * @conn_instance: TIPC instance used when connection was established
  * @published: non-zero if port has one or more associated names
  * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @maxnagle: maximum size of msg which can be subject to nagle
  * @portid: unique port identity in TIPC socket hash table
  * @phdr: preformatted message header used when sending messages
  * #cong_links: list of congested links
@@ -97,6 +98,7 @@ struct tipc_sock {
        u32 conn_instance;
        int published;
        u32 max_pkt;
+       u32 maxnagle;
        u32 portid;
        struct tipc_msg phdr;
        struct list_head cong_links;
@@ -116,6 +118,10 @@ struct tipc_sock {
        struct tipc_mc_method mc_method;
        struct rcu_head rcu;
        struct tipc_group *group;
+       u32 oneway;
+       u16 snd_backlog;
+       bool expect_ack;
+       bool nodelay;
        bool group_is_open;
 };
 
@@ -137,6 +143,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk);
 static void tipc_sk_remove(struct tipc_sock *tsk);
 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk);
 
 static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
@@ -227,6 +234,26 @@ static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
        return 1;
 }
 
+/* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle
+ */
+static void tsk_set_nagle(struct tipc_sock *tsk)
+{
+       struct sock *sk = &tsk->sk;
+
+       tsk->maxnagle = 0;
+       if (sk->sk_type != SOCK_STREAM)
+               return;
+       if (tsk->nodelay)
+               return;
+       if (!(tsk->peer_caps & TIPC_NAGLE))
+               return;
+       /* Limit node local buffer size to avoid receive queue overflow */
+       if (tsk->max_pkt == MAX_MSG_SIZE)
+               tsk->maxnagle = 1500;
+       else
+               tsk->maxnagle = tsk->max_pkt;
+}
+
 /**
  * tsk_advance_rx_queue - discard first buffer in socket receive queue
  *
@@ -446,6 +473,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 
        tsk = tipc_sk(sk);
        tsk->max_pkt = MAX_PKT_DEFAULT;
+       tsk->maxnagle = 0;
        INIT_LIST_HEAD(&tsk->publications);
        INIT_LIST_HEAD(&tsk->cong_links);
        msg = &tsk->phdr;
@@ -512,8 +540,12 @@ static void __tipc_shutdown(struct socket *sock, int error)
        tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
                                            !tsk_conn_cong(tsk)));
 
-       /* Remove any pending SYN message */
-       __skb_queue_purge(&sk->sk_write_queue);
+       /* Push out unsent messages or remove if pending SYN */
+       skb = skb_peek(&sk->sk_write_queue);
+       if (skb && !msg_is_syn(buf_msg(skb)))
+               tipc_sk_push_backlog(tsk);
+       else
+               __skb_queue_purge(&sk->sk_write_queue);
 
        /* Reject all unreceived messages, except on an active connection
         * (which disconnects locally & sends a 'FIN+' to peer).
@@ -854,7 +886,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
 
        /* Build message as chain of buffers */
        __skb_queue_head_init(&pkts);
-       mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+       mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
        rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
        if (unlikely(rc != dlen))
                return rc;
@@ -1208,6 +1240,27 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
        tipc_sk_rcv(net, inputq);
 }
 
+/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
+ *                         when socket is in Nagle mode
+ */
+static void tipc_sk_push_backlog(struct tipc_sock *tsk)
+{
+       struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+       struct net *net = sock_net(&tsk->sk);
+       u32 dnode = tsk_peer_node(tsk);
+       int rc;
+
+       if (skb_queue_empty(txq) || tsk->cong_link_cnt)
+               return;
+
+       tsk->snt_unacked += tsk->snd_backlog;
+       tsk->snd_backlog = 0;
+       tsk->expect_ack = true;
+       rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
+       if (rc == -ELINKCONG)
+               tsk->cong_link_cnt = 1;
+}
+
 /**
  * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
  * @tsk: receiving socket
@@ -1221,7 +1274,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
        u32 onode = tsk_own_node(tsk);
        struct sock *sk = &tsk->sk;
        int mtyp = msg_type(hdr);
-       bool conn_cong;
+       bool was_cong;
 
        /* Ignore if connection cannot be validated: */
        if (!tsk_peer_msg(tsk, hdr)) {
@@ -1254,11 +1307,13 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
                        __skb_queue_tail(xmitq, skb);
                return;
        } else if (mtyp == CONN_ACK) {
-               conn_cong = tsk_conn_cong(tsk);
+               was_cong = tsk_conn_cong(tsk);
+               tsk->expect_ack = false;
+               tipc_sk_push_backlog(tsk);
                tsk->snt_unacked -= msg_conn_ack(hdr);
                if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
                        tsk->snd_win = msg_adv_win(hdr);
-               if (conn_cong)
+               if (was_cong && !tsk_conn_cong(tsk))
                        sk->sk_write_space(sk);
        } else if (mtyp != CONN_PROBE_REPLY) {
                pr_warn("Received unknown CONN_PROTO msg\n");
@@ -1388,7 +1443,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
                return rc;
 
        __skb_queue_head_init(&pkts);
-       mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+       mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
        rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
        if (unlikely(rc != dlen))
                return rc;
@@ -1437,15 +1492,15 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
        struct sock *sk = sock->sk;
        DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
        long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+       struct sk_buff_head *txq = &sk->sk_write_queue;
        struct tipc_sock *tsk = tipc_sk(sk);
        struct tipc_msg *hdr = &tsk->phdr;
        struct net *net = sock_net(sk);
-       struct sk_buff_head pkts;
        u32 dnode = tsk_peer_node(tsk);
+       int maxnagle = tsk->maxnagle;
+       int maxpkt = tsk->max_pkt;
        int send, sent = 0;
-       int rc = 0;
-
-       __skb_queue_head_init(&pkts);
+       int blocks, rc = 0;
 
        if (unlikely(dlen > INT_MAX))
                return -EMSGSIZE;
@@ -1467,21 +1522,35 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
                                         tipc_sk_connected(sk)));
                if (unlikely(rc))
                        break;
-
                send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
-               rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
-               if (unlikely(rc != send))
-                       break;
-
-               trace_tipc_sk_sendstream(sk, skb_peek(&pkts),
+               blocks = tsk->snd_backlog;
+               if (tsk->oneway++ >= 4 && send <= maxnagle) {
+                       rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
+                       if (unlikely(rc < 0))
+                               break;
+                       blocks += rc;
+                       if (blocks <= 64 && tsk->expect_ack) {
+                               tsk->snd_backlog = blocks;
+                               sent += send;
+                               break;
+                       }
+                       tsk->expect_ack = true;
+               } else {
+                       rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
+                       if (unlikely(rc != send))
+                               break;
+                       blocks += tsk_inc(tsk, send + MIN_H_SIZE);
+               }
+               trace_tipc_sk_sendstream(sk, skb_peek(txq),
                                         TIPC_DUMP_SK_SNDQ, " ");
-               rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+               rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
                if (unlikely(rc == -ELINKCONG)) {
                        tsk->cong_link_cnt = 1;
                        rc = 0;
                }
                if (likely(!rc)) {
-                       tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
+                       tsk->snt_unacked += blocks;
+                       tsk->snd_backlog = 0;
                        sent += send;
                }
        } while (sent < dlen && !rc);
@@ -1526,8 +1595,9 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
        sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
        tipc_set_sk_state(sk, TIPC_ESTABLISHED);
        tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
-       tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
+       tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true);
        tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+       tsk_set_nagle(tsk);
        __skb_queue_purge(&sk->sk_write_queue);
        if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
                return;
@@ -1848,6 +1918,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
        bool peek = flags & MSG_PEEK;
        int offset, required, copy, copied = 0;
        int hlen, dlen, err, rc;
+       bool ack = false;
        long timeout;
 
        /* Catch invalid receive attempts */
@@ -1892,6 +1963,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
 
                /* Copy data if msg ok, otherwise return error/partial data */
                if (likely(!err)) {
+                       ack = msg_ack_required(hdr);
                        offset = skb_cb->bytes_read;
                        copy = min_t(int, dlen - offset, buflen - copied);
                        rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
@@ -1919,7 +1991,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
 
                /* Send connection flow control advertisement when applicable */
                tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
-               if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
+               if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
                        tipc_sk_send_ack(tsk);
 
                /* Exit if all requested data or FIN/error received */
@@ -1990,6 +2062,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
                smp_wmb();
                tsk->cong_link_cnt--;
                wakeup = true;
+               tipc_sk_push_backlog(tsk);
                break;
        case GROUP_PROTOCOL:
                tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
@@ -2029,6 +2102,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
 
        if (unlikely(msg_mcast(hdr)))
                return false;
+       tsk->oneway = 0;
 
        switch (sk->sk_state) {
        case TIPC_CONNECTING:
@@ -2074,6 +2148,8 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
                        return true;
                return false;
        case TIPC_ESTABLISHED:
+               if (!skb_queue_empty(&sk->sk_write_queue))
+                       tipc_sk_push_backlog(tsk);
                /* Accept only connection-based messages sent by peer */
                if (likely(con_msg && !err && pport == oport && pnode == onode))
                        return true;
@@ -2959,6 +3035,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
        case TIPC_SRC_DROPPABLE:
        case TIPC_DEST_DROPPABLE:
        case TIPC_CONN_TIMEOUT:
+       case TIPC_NODELAY:
                if (ol < sizeof(value))
                        return -EINVAL;
                if (get_user(value, (u32 __user *)ov))
@@ -3007,6 +3084,10 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
        case TIPC_GROUP_LEAVE:
                res = tipc_sk_leave(tsk);
                break;
+       case TIPC_NODELAY:
+               tsk->nodelay = !!value;
+               tsk_set_nagle(tsk);
+               break;
        default:
                res = -EINVAL;
        }
@@ -3588,13 +3669,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
        struct tipc_sock *tsk;
 
        if (!tsk_portid) {
-               struct nlattr **attrs;
+               struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
                struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
 
-               err = tipc_nlmsg_parse(cb->nlh, &attrs);
-               if (err)
-                       return err;
-
                if (!attrs[TIPC_NLA_SOCK])
                        return -EINVAL;
 
index 287df68..43ca5fd 100644 (file)
@@ -448,15 +448,11 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
        int i;
 
        if (!bid && !skip_cnt) {
+               struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
                struct net *net = sock_net(skb->sk);
                struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
-               struct nlattr **attrs;
                char *bname;
 
-               err = tipc_nlmsg_parse(cb->nlh, &attrs);
-               if (err)
-                       return err;
-
                if (!attrs[TIPC_NLA_BEARER])
                        return -EINVAL;
 
index e4328b3..61ec785 100644 (file)
@@ -26,3 +26,13 @@ config TLS_DEVICE
        Enable kernel support for HW offload of the TLS protocol.
 
        If unsure, say N.
+
+config TLS_TOE
+       bool "Transport Layer Security TCP stack bypass"
+       depends on TLS
+       default n
+       help
+       Enable kernel support for legacy HW offload of the TLS protocol,
+       which is incompatible with the Linux networking stack semantics.
+
+       If unsure, say N.
index ef0dc74..f1ffbfe 100644 (file)
@@ -3,8 +3,11 @@
 # Makefile for the TLS subsystem.
 #
 
+CFLAGS_trace.o := -I$(src)
+
 obj-$(CONFIG_TLS) += tls.o
 
-tls-y := tls_main.o tls_sw.o
+tls-y := tls_main.o tls_sw.o tls_proc.o trace.o
 
+tls-$(CONFIG_TLS_TOE) += tls_toe.o
 tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o
index f959487..33b267b 100644 (file)
@@ -38,6 +38,8 @@
 #include <net/tcp.h>
 #include <net/tls.h>
 
+#include "trace.h"
+
 /* device_offload_lock is used to synchronize tls_dev_add
  * against NETDEV_DOWN notifications.
  */
@@ -202,6 +204,15 @@ void tls_device_free_resources_tx(struct sock *sk)
        tls_free_partial_record(sk, tls_ctx);
 }
 
+void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+       trace_tls_device_tx_resync_req(sk, got_seq, exp_seq);
+       WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags));
+}
+EXPORT_SYMBOL_GPL(tls_offload_tx_resync_request);
+
 static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
                                 u32 seq)
 {
@@ -216,6 +227,7 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
 
        rcd_sn = tls_ctx->tx.rec_seq;
 
+       trace_tls_device_tx_resync_send(sk, seq, rcd_sn);
        down_read(&device_offload_lock);
        netdev = tls_ctx->netdev;
        if (netdev)
@@ -419,7 +431,7 @@ static int tls_push_data(struct sock *sk,
            ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
                return -ENOTSUPP;
 
-       if (sk->sk_err)
+       if (unlikely(sk->sk_err))
                return -sk->sk_err;
 
        flags |= MSG_SENDPAGE_DECRYPTED;
@@ -440,9 +452,8 @@ static int tls_push_data(struct sock *sk,
        max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
                              prot->prepend_size;
        do {
-               rc = tls_do_allocation(sk, ctx, pfrag,
-                                      prot->prepend_size);
-               if (rc) {
+               rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size);
+               if (unlikely(rc)) {
                        rc = sk_stream_wait_memory(sk, &timeo);
                        if (!rc)
                                continue;
@@ -637,15 +648,19 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
 static void tls_device_resync_rx(struct tls_context *tls_ctx,
                                 struct sock *sk, u32 seq, u8 *rcd_sn)
 {
+       struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
        struct net_device *netdev;
 
        if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
                return;
+
+       trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type);
        netdev = READ_ONCE(tls_ctx->netdev);
        if (netdev)
                netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn,
                                                   TLS_OFFLOAD_CTX_DIR_RX);
        clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC);
 }
 
 void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
@@ -653,8 +668,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_offload_context_rx *rx_ctx;
        u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
+       u32 sock_data, is_req_pending;
        struct tls_prot_info *prot;
-       u32 is_req_pending;
        s64 resync_req;
        u32 req_seq;
 
@@ -683,8 +698,12 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
                /* head of next rec is already in, note that the sock_inq will
                 * include the currently parsed message when called from parser
                 */
-               if (tcp_inq(sk) > rcd_len)
+               sock_data = tcp_inq(sk);
+               if (sock_data > rcd_len) {
+                       trace_tls_device_rx_resync_nh_delay(sk, sock_data,
+                                                           rcd_len);
                        return;
+               }
 
                rx_ctx->resync_nh_do_now = 0;
                seq += rcd_len;
@@ -728,6 +747,7 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
 
        /* head of next rec is already in, parser will sync for us */
        if (tcp_inq(sk) > rxm->full_len) {
+               trace_tls_device_rx_resync_nh_schedule(sk);
                ctx->resync_nh_do_now = 1;
        } else {
                struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -826,9 +846,9 @@ free_buf:
        return err;
 }
 
-int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+                        struct sk_buff *skb, struct strp_msg *rxm)
 {
-       struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
        int is_decrypted = skb->decrypted;
        int is_encrypted = !is_decrypted;
@@ -840,6 +860,10 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
                is_encrypted &= !skb_iter->decrypted;
        }
 
+       trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
+                                  tls_ctx->rx.rec_seq, rxm->full_len,
+                                  is_encrypted, is_decrypted);
+
        ctx->sw.decrypted |= is_decrypted;
 
        /* Return immediately if the record is either entirely plaintext or
@@ -1013,6 +1037,8 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
        rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX,
                                             &ctx->crypto_send.info,
                                             tcp_sk(sk)->write_seq);
+       trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_TX,
+                                    tcp_sk(sk)->write_seq, rec_seq, rc);
        if (rc)
                goto release_lock;
 
@@ -1049,6 +1075,7 @@ free_marker_record:
 
 int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
 {
+       struct tls12_crypto_info_aes_gcm_128 *info;
        struct tls_offload_context_rx *context;
        struct net_device *netdev;
        int rc = 0;
@@ -1096,6 +1123,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
        rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
                                             &ctx->crypto_recv.info,
                                             tcp_sk(sk)->copied_seq);
+       info = (void *)&ctx->crypto_recv.info;
+       trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_RX,
+                                    tcp_sk(sk)->copied_seq, info->rec_seq, rc);
        if (rc)
                goto free_sw_resources;
 
index ac88877..f144b96 100644 (file)
@@ -41,7 +41,9 @@
 #include <linux/inetdevice.h>
 #include <linux/inet_diag.h>
 
+#include <net/snmp.h>
 #include <net/tls.h>
+#include <net/tls_toe.h>
 
 MODULE_AUTHOR("Mellanox Technologies");
 MODULE_DESCRIPTION("Transport Layer Security Support");
@@ -58,14 +60,12 @@ static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
 static struct proto *saved_tcpv4_prot;
 static DEFINE_MUTEX(tcpv4_prot_mutex);
-static LIST_HEAD(device_list);
-static DEFINE_SPINLOCK(device_spinlock);
 static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
 static struct proto_ops tls_sw_proto_ops;
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
                         struct proto *base);
 
-static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
        int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 
@@ -286,14 +286,19 @@ static void tls_sk_proto_cleanup(struct sock *sk,
                kfree(ctx->tx.rec_seq);
                kfree(ctx->tx.iv);
                tls_sw_release_resources_tx(sk);
+               TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
        } else if (ctx->tx_conf == TLS_HW) {
                tls_device_free_resources_tx(sk);
+               TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
        }
 
-       if (ctx->rx_conf == TLS_SW)
+       if (ctx->rx_conf == TLS_SW) {
                tls_sw_release_resources_rx(sk);
-       else if (ctx->rx_conf == TLS_HW)
+               TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
+       } else if (ctx->rx_conf == TLS_HW) {
                tls_device_offload_cleanup_rx(sk);
+               TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE);
+       }
 }
 
 static void tls_sk_proto_close(struct sock *sk, long timeout)
@@ -534,19 +539,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
        if (tx) {
                rc = tls_set_device_offload(sk, ctx);
                conf = TLS_HW;
-               if (rc) {
+               if (!rc) {
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE);
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
+               } else {
                        rc = tls_set_sw_offload(sk, ctx, 1);
                        if (rc)
                                goto err_crypto_info;
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW);
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
                        conf = TLS_SW;
                }
        } else {
                rc = tls_set_device_offload_rx(sk, ctx);
                conf = TLS_HW;
-               if (rc) {
+               if (!rc) {
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE);
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE);
+               } else {
                        rc = tls_set_sw_offload(sk, ctx, 0);
                        if (rc)
                                goto err_crypto_info;
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW);
+                       TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
                        conf = TLS_SW;
                }
                tls_sw_strparser_arm(sk, ctx);
@@ -603,7 +618,7 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
        return do_tls_setsockopt(sk, optname, optval, optlen);
 }
 
-static struct tls_context *create_ctx(struct sock *sk)
+struct tls_context *tls_ctx_create(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tls_context *ctx;
@@ -643,90 +658,6 @@ static void tls_build_proto(struct sock *sk)
        }
 }
 
-static void tls_hw_sk_destruct(struct sock *sk)
-{
-       struct tls_context *ctx = tls_get_ctx(sk);
-       struct inet_connection_sock *icsk = inet_csk(sk);
-
-       ctx->sk_destruct(sk);
-       /* Free ctx */
-       rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
-       tls_ctx_free(sk, ctx);
-}
-
-static int tls_hw_prot(struct sock *sk)
-{
-       struct tls_context *ctx;
-       struct tls_device *dev;
-       int rc = 0;
-
-       spin_lock_bh(&device_spinlock);
-       list_for_each_entry(dev, &device_list, dev_list) {
-               if (dev->feature && dev->feature(dev)) {
-                       ctx = create_ctx(sk);
-                       if (!ctx)
-                               goto out;
-
-                       spin_unlock_bh(&device_spinlock);
-                       tls_build_proto(sk);
-                       ctx->sk_destruct = sk->sk_destruct;
-                       sk->sk_destruct = tls_hw_sk_destruct;
-                       ctx->rx_conf = TLS_HW_RECORD;
-                       ctx->tx_conf = TLS_HW_RECORD;
-                       update_sk_prot(sk, ctx);
-                       spin_lock_bh(&device_spinlock);
-                       rc = 1;
-                       break;
-               }
-       }
-out:
-       spin_unlock_bh(&device_spinlock);
-       return rc;
-}
-
-static void tls_hw_unhash(struct sock *sk)
-{
-       struct tls_context *ctx = tls_get_ctx(sk);
-       struct tls_device *dev;
-
-       spin_lock_bh(&device_spinlock);
-       list_for_each_entry(dev, &device_list, dev_list) {
-               if (dev->unhash) {
-                       kref_get(&dev->kref);
-                       spin_unlock_bh(&device_spinlock);
-                       dev->unhash(dev, sk);
-                       kref_put(&dev->kref, dev->release);
-                       spin_lock_bh(&device_spinlock);
-               }
-       }
-       spin_unlock_bh(&device_spinlock);
-       ctx->sk_proto->unhash(sk);
-}
-
-static int tls_hw_hash(struct sock *sk)
-{
-       struct tls_context *ctx = tls_get_ctx(sk);
-       struct tls_device *dev;
-       int err;
-
-       err = ctx->sk_proto->hash(sk);
-       spin_lock_bh(&device_spinlock);
-       list_for_each_entry(dev, &device_list, dev_list) {
-               if (dev->hash) {
-                       kref_get(&dev->kref);
-                       spin_unlock_bh(&device_spinlock);
-                       err |= dev->hash(dev, sk);
-                       kref_put(&dev->kref, dev->release);
-                       spin_lock_bh(&device_spinlock);
-               }
-       }
-       spin_unlock_bh(&device_spinlock);
-
-       if (err)
-               tls_hw_unhash(sk);
-       return err;
-}
-
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
                         struct proto *base)
 {
@@ -764,10 +695,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 
        prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
 #endif
-
+#ifdef CONFIG_TLS_TOE
        prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
-       prot[TLS_HW_RECORD][TLS_HW_RECORD].hash         = tls_hw_hash;
-       prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash       = tls_hw_unhash;
+       prot[TLS_HW_RECORD][TLS_HW_RECORD].hash         = tls_toe_hash;
+       prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash       = tls_toe_unhash;
+#endif
 }
 
 static int tls_init(struct sock *sk)
@@ -775,8 +707,12 @@ static int tls_init(struct sock *sk)
        struct tls_context *ctx;
        int rc = 0;
 
-       if (tls_hw_prot(sk))
+       tls_build_proto(sk);
+
+#ifdef CONFIG_TLS_TOE
+       if (tls_toe_bypass(sk))
                return 0;
+#endif
 
        /* The TLS ulp is currently supported only for TCP sockets
         * in ESTABLISHED state.
@@ -787,11 +723,9 @@ static int tls_init(struct sock *sk)
        if (sk->sk_state != TCP_ESTABLISHED)
                return -ENOTSUPP;
 
-       tls_build_proto(sk);
-
        /* allocate tls context */
        write_lock_bh(&sk->sk_callback_lock);
-       ctx = create_ctx(sk);
+       ctx = tls_ctx_create(sk);
        if (!ctx) {
                rc = -ENOMEM;
                goto out;
@@ -877,21 +811,34 @@ static size_t tls_get_info_size(const struct sock *sk)
        return size;
 }
 
-void tls_register_device(struct tls_device *device)
+static int __net_init tls_init_net(struct net *net)
 {
-       spin_lock_bh(&device_spinlock);
-       list_add_tail(&device->dev_list, &device_list);
-       spin_unlock_bh(&device_spinlock);
+       int err;
+
+       net->mib.tls_statistics = alloc_percpu(struct linux_tls_mib);
+       if (!net->mib.tls_statistics)
+               return -ENOMEM;
+
+       err = tls_proc_init(net);
+       if (err)
+               goto err_free_stats;
+
+       return 0;
+err_free_stats:
+       free_percpu(net->mib.tls_statistics);
+       return err;
 }
-EXPORT_SYMBOL(tls_register_device);
 
-void tls_unregister_device(struct tls_device *device)
+static void __net_exit tls_exit_net(struct net *net)
 {
-       spin_lock_bh(&device_spinlock);
-       list_del(&device->dev_list);
-       spin_unlock_bh(&device_spinlock);
+       tls_proc_fini(net);
+       free_percpu(net->mib.tls_statistics);
 }
-EXPORT_SYMBOL(tls_unregister_device);
+
+static struct pernet_operations tls_proc_ops = {
+       .init = tls_init_net,
+       .exit = tls_exit_net,
+};
 
 static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
        .name                   = "tls",
@@ -904,6 +851,12 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 
 static int __init tls_register(void)
 {
+       int err;
+
+       err = register_pernet_subsys(&tls_proc_ops);
+       if (err)
+               return err;
+
        tls_sw_proto_ops = inet_stream_ops;
        tls_sw_proto_ops.splice_read = tls_sw_splice_read;
 
@@ -917,6 +870,7 @@ static void __exit tls_unregister(void)
 {
        tcp_unregister_ulp(&tcp_tls_ulp_ops);
        tls_device_cleanup();
+       unregister_pernet_subsys(&tls_proc_ops);
 }
 
 module_init(tls_register);
diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c
new file mode 100644 (file)
index 0000000..83d9c80
--- /dev/null
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/snmp.h>
+#include <net/tls.h>
+
+static const struct snmp_mib tls_mib_list[] = {
+       SNMP_MIB_ITEM("TlsCurrTxSw", LINUX_MIB_TLSCURRTXSW),
+       SNMP_MIB_ITEM("TlsCurrRxSw", LINUX_MIB_TLSCURRRXSW),
+       SNMP_MIB_ITEM("TlsCurrTxDevice", LINUX_MIB_TLSCURRTXDEVICE),
+       SNMP_MIB_ITEM("TlsCurrRxDevice", LINUX_MIB_TLSCURRRXDEVICE),
+       SNMP_MIB_ITEM("TlsTxSw", LINUX_MIB_TLSTXSW),
+       SNMP_MIB_ITEM("TlsRxSw", LINUX_MIB_TLSRXSW),
+       SNMP_MIB_ITEM("TlsTxDevice", LINUX_MIB_TLSTXDEVICE),
+       SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE),
+       SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR),
+       SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC),
+       SNMP_MIB_SENTINEL
+};
+
+static int tls_statistics_seq_show(struct seq_file *seq, void *v)
+{
+       unsigned long buf[LINUX_MIB_TLSMAX] = {};
+       struct net *net = seq->private;
+       int i;
+
+       snmp_get_cpu_field_batch(buf, tls_mib_list, net->mib.tls_statistics);
+       for (i = 0; tls_mib_list[i].name; i++)
+               seq_printf(seq, "%-32s\t%lu\n", tls_mib_list[i].name, buf[i]);
+
+       return 0;
+}
+
+int __net_init tls_proc_init(struct net *net)
+{
+       if (!proc_create_net_single("tls_stat", 0444, net->proc_net,
+                                   tls_statistics_seq_show, NULL))
+               return -ENOMEM;
+       return 0;
+}
+
+void __net_exit tls_proc_fini(struct net *net)
+{
+       remove_proc_entry("tls_stat", net->proc_net);
+}
index c2b5e0d..de7561d 100644 (file)
@@ -168,6 +168,9 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 
        /* Propagate if there was an err */
        if (err) {
+               if (err == -EBADMSG)
+                       TLS_INC_STATS(sock_net(skb->sk),
+                                     LINUX_MIB_TLSDECRYPTERROR);
                ctx->async_wait.err = err;
                tls_err_abort(skb->sk, err);
        } else {
@@ -253,6 +256,8 @@ static int tls_do_decryption(struct sock *sk,
                        return ret;
 
                ret = crypto_wait_req(ret, &ctx->async_wait);
+       } else if (ret == -EBADMSG) {
+               TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
        }
 
        if (async)
@@ -1490,7 +1495,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 
        if (!ctx->decrypted) {
                if (tls_ctx->rx_conf == TLS_HW) {
-                       err = tls_device_decrypted(sk, skb);
+                       err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
                        if (err < 0)
                                return err;
                }
@@ -1518,7 +1523,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
                rxm->offset += prot->prepend_size;
                rxm->full_len -= prot->overhead_size;
                tls_advance_record_sn(sk, prot, &tls_ctx->rx);
-               ctx->decrypted = true;
+               ctx->decrypted = 1;
                ctx->saved_data_ready(sk);
        } else {
                *zc = false;
@@ -1928,7 +1933,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
                        tls_err_abort(sk, EBADMSG);
                        goto splice_read_end;
                }
-               ctx->decrypted = true;
+               ctx->decrypted = 1;
        }
        rxm = strp_msg(skb);
 
@@ -2029,7 +2034,7 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
        struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
        struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
-       ctx->decrypted = false;
+       ctx->decrypted = 0;
 
        ctx->recv_pkt = skb;
        strp_pause(strp);
@@ -2386,10 +2391,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
                tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
 
                if (crypto_info->version == TLS_1_3_VERSION)
-                       sw_ctx_rx->async_capable = false;
+                       sw_ctx_rx->async_capable = 0;
                else
                        sw_ctx_rx->async_capable =
-                               tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+                               !!(tfm->__crt_alg->cra_flags &
+                                  CRYPTO_ALG_ASYNC);
 
                /* Set up strparser */
                memset(&cb, 0, sizeof(cb));
diff --git a/net/tls/tls_toe.c b/net/tls/tls_toe.c
new file mode 100644 (file)
index 0000000..7e1330f
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <net/inet_connection_sock.h>
+#include <net/tls.h>
+#include <net/tls_toe.h>
+
+static LIST_HEAD(device_list);
+static DEFINE_SPINLOCK(device_spinlock);
+
+static void tls_toe_sk_destruct(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tls_context *ctx = tls_get_ctx(sk);
+
+       ctx->sk_destruct(sk);
+       /* Free ctx */
+       rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+       tls_ctx_free(sk, ctx);
+}
+
+int tls_toe_bypass(struct sock *sk)
+{
+       struct tls_toe_device *dev;
+       struct tls_context *ctx;
+       int rc = 0;
+
+       spin_lock_bh(&device_spinlock);
+       list_for_each_entry(dev, &device_list, dev_list) {
+               if (dev->feature && dev->feature(dev)) {
+                       ctx = tls_ctx_create(sk);
+                       if (!ctx)
+                               goto out;
+
+                       ctx->sk_destruct = sk->sk_destruct;
+                       sk->sk_destruct = tls_toe_sk_destruct;
+                       ctx->rx_conf = TLS_HW_RECORD;
+                       ctx->tx_conf = TLS_HW_RECORD;
+                       update_sk_prot(sk, ctx);
+                       rc = 1;
+                       break;
+               }
+       }
+out:
+       spin_unlock_bh(&device_spinlock);
+       return rc;
+}
+
+void tls_toe_unhash(struct sock *sk)
+{
+       struct tls_context *ctx = tls_get_ctx(sk);
+       struct tls_toe_device *dev;
+
+       spin_lock_bh(&device_spinlock);
+       list_for_each_entry(dev, &device_list, dev_list) {
+               if (dev->unhash) {
+                       kref_get(&dev->kref);
+                       spin_unlock_bh(&device_spinlock);
+                       dev->unhash(dev, sk);
+                       kref_put(&dev->kref, dev->release);
+                       spin_lock_bh(&device_spinlock);
+               }
+       }
+       spin_unlock_bh(&device_spinlock);
+       ctx->sk_proto->unhash(sk);
+}
+
+int tls_toe_hash(struct sock *sk)
+{
+       struct tls_context *ctx = tls_get_ctx(sk);
+       struct tls_toe_device *dev;
+       int err;
+
+       err = ctx->sk_proto->hash(sk);
+       spin_lock_bh(&device_spinlock);
+       list_for_each_entry(dev, &device_list, dev_list) {
+               if (dev->hash) {
+                       kref_get(&dev->kref);
+                       spin_unlock_bh(&device_spinlock);
+                       err |= dev->hash(dev, sk);
+                       kref_put(&dev->kref, dev->release);
+                       spin_lock_bh(&device_spinlock);
+               }
+       }
+       spin_unlock_bh(&device_spinlock);
+
+       if (err)
+               tls_toe_unhash(sk);
+       return err;
+}
+
+void tls_toe_register_device(struct tls_toe_device *device)
+{
+       spin_lock_bh(&device_spinlock);
+       list_add_tail(&device->dev_list, &device_list);
+       spin_unlock_bh(&device_spinlock);
+}
+EXPORT_SYMBOL(tls_toe_register_device);
+
+void tls_toe_unregister_device(struct tls_toe_device *device)
+{
+       spin_lock_bh(&device_spinlock);
+       list_del(&device->dev_list);
+       spin_unlock_bh(&device_spinlock);
+}
+EXPORT_SYMBOL(tls_toe_unregister_device);
diff --git a/net/tls/trace.c b/net/tls/trace.c
new file mode 100644 (file)
index 0000000..e374913
--- /dev/null
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/module.h>
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#endif
diff --git a/net/tls/trace.h b/net/tls/trace.h
new file mode 100644 (file)
index 0000000..9ba5f60
--- /dev/null
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tls
+
+#if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _TLS_TRACE_H_
+
+#include <asm/unaligned.h>
+#include <linux/tracepoint.h>
+
+struct sock;
+
+TRACE_EVENT(tls_device_offload_set,
+
+       TP_PROTO(struct sock *sk, int dir, u32 tcp_seq, u8 *rec_no, int ret),
+
+       TP_ARGS(sk, dir, tcp_seq, rec_no, ret),
+
+       TP_STRUCT__entry(
+               __field(        struct sock *,  sk              )
+               __field(        u64,            rec_no          )
+               __field(        int,            dir             )
+               __field(        u32,            tcp_seq         )
+               __field(        int,            ret             )
+       ),
+
+       TP_fast_assign(
+               __entry->sk = sk;
+               __entry->rec_no = get_unaligned_be64(rec_no);
+               __entry->dir = dir;
+               __entry->tcp_seq = tcp_seq;
+               __entry->ret = ret;
+       ),
+
+       TP_printk(
+               "sk=%p direction=%d tcp_seq=%u rec_no=%llu ret=%d",
+               __entry->sk, __entry->dir, __entry->tcp_seq, __entry->rec_no,
+               __entry->ret
+       )
+);
+
+TRACE_EVENT(tls_device_decrypted,
+
+       TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, u32 rec_len,
+                bool encrypted, bool decrypted),
+
+       TP_ARGS(sk, tcp_seq, rec_no, rec_len, encrypted, decrypted),
+
+       TP_STRUCT__entry(
+               __field(        struct sock *,  sk              )
+               __field(        u64,            rec_no          )
+               __field(        u32,            tcp_seq         )
+               __field(        u32,            rec_len         )
+               __field(        bool,           encrypted       )
+               __field(        bool,           decrypted       )
+       ),
+
+       TP_fast_assign(
+               __entry->sk = sk;
+               __entry->rec_no = get_unaligned_be64(rec_no);
+               __entry->tcp_seq = tcp_seq;
+               __entry->rec_len = rec_len;
+               __entry->encrypted = encrypted;
+               __entry->decrypted = decrypted;
+       ),
+
+       TP_printk(
+               "sk=%p tcp_seq=%u rec_no=%llu len=%u encrypted=%d decrypted=%d",
+               __entry->sk, __entry->tcp_seq,
+               __entry->rec_no, __entry->rec_len,
+               __entry->encrypted, __entry->decrypted
+       )
+);
+
+TRACE_EVENT(tls_device_rx_resync_send,
+
+       TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, int sync_type),
+
+       TP_ARGS(sk, tcp_seq, rec_no, sync_type),
+
+       TP_STRUCT__entry(
+               __field(        struct sock *,  sk              )
+               __field(        u64,            rec_no          )
+               __field(        u32,            tcp_seq         )
+               __field(        int,            sync_type       )
+       ),
+
+       TP_fast_assign(
+               __entry->sk = sk;
+               __entry->rec_no = get_unaligned_be64(rec_no);
+               __entry->tcp_seq = tcp_seq;
+               __entry->sync_type = sync_type;
+       ),
+
+       TP_printk(
+               "sk=%p tcp_seq=%u rec_no=%llu sync_type=%d",
+               __entry->sk, __entry->tcp_seq, __entry->rec_no,
+               __entry->sync_type
+       )
+);
+
+TRACE_EVENT(tls_device_rx_resync_nh_schedule,
+
+       TP_PROTO(struct sock *sk),
+
+       TP_ARGS(sk),
+
+       TP_STRUCT__entry(
+               __field(        struct sock *,  sk              )
+       ),
+
+       TP_fast_assign(
+               __entry->sk = sk;
+       ),
+
+       TP_printk(
+               "sk=%p", __entry->sk
+       )
+);
+
+TRACE_EVENT(tls_device_rx_resync_nh_delay,
+
+       TP_PROTO(struct sock *sk, u32 sock_data, u32 rec_len),
+
+       TP_ARGS(sk, sock_data, rec_len),
+
+       TP_STRUCT__entry(
+               __field(        struct sock *,  sk              )
+               __field(        u32,            sock_data       )
+               __field(        u32,            rec_len         )
+       ),
+
+       TP_fast_assign(
+               __entry->sk = sk;
+               __entry->sock_data = sock_data;
+               __entry->rec_len = rec_len;
+       ),
+
+       TP_printk(
+               "sk=%p sock_data=%u rec_len=%u",
+               __entry->sk, __entry->sock_data, __entry->rec_len
+       )
+);
+
+TRACE_EVENT(tls_device_tx_resync_req,
+
+       TP_PROTO(struct sock *sk, u32 tcp_seq, u32 exp_tcp_seq),
+
+       TP_ARGS(sk, tcp_seq, exp_tcp_seq),
+
+       TP_STRUCT__entry(
+               __field(        struct sock *,  sk              )
+               __field(        u32,            tcp_seq         )
+               __field(        u32,            exp_tcp_seq     )
+       ),
+
+       TP_fast_assign(
+               __entry->sk = sk;
+               __entry->tcp_seq = tcp_seq;
+               __entry->exp_tcp_seq = exp_tcp_seq;
+       ),
+
+       TP_printk(
+               "sk=%p tcp_seq=%u exp_tcp_seq=%u",
+               __entry->sk, __entry->tcp_seq, __entry->exp_tcp_seq
+       )
+);
+
+TRACE_EVENT(tls_device_tx_resync_send,
+
+       TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no),
+
+       TP_ARGS(sk, tcp_seq, rec_no),
+
+       TP_STRUCT__entry(
+               __field(        struct sock *,  sk              )
+               __field(        u64,            rec_no          )
+               __field(        u32,            tcp_seq         )
+       ),
+
+       TP_fast_assign(
+               __entry->sk = sk;
+               __entry->rec_no = get_unaligned_be64(rec_no);
+               __entry->tcp_seq = tcp_seq;
+       ),
+
+       TP_printk(
+               "sk=%p tcp_seq=%u rec_no=%llu",
+               __entry->sk, __entry->tcp_seq, __entry->rec_no
+       )
+);
+
+#endif /* _TLS_TRACE_H_ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
index 0d8da80..193cba2 100644 (file)
@@ -284,11 +284,9 @@ static struct sock *__unix_find_socket_byname(struct net *net,
 
                if (u->addr->len == len &&
                    !memcmp(u->addr->name, sunname, len))
-                       goto found;
+                       return s;
        }
-       s = NULL;
-found:
-       return s;
+       return NULL;
 }
 
 static inline struct sock *unix_find_socket_byname(struct net *net,
index 582a3e4..c0856e7 100644 (file)
@@ -641,7 +641,6 @@ EXPORT_SYMBOL_GPL(__vsock_create);
 static void __vsock_release(struct sock *sk, int level)
 {
        if (sk) {
-               struct sk_buff *skb;
                struct sock *pending;
                struct vsock_sock *vsk;
 
@@ -662,8 +661,7 @@ static void __vsock_release(struct sock *sk, int level)
                sock_orphan(sk);
                sk->sk_shutdown = SHUTDOWN_MASK;
 
-               while ((skb = skb_dequeue(&sk->sk_receive_queue)))
-                       kfree_skb(skb);
+               skb_queue_purge(&sk->sk_receive_queue);
 
                /* Clean up any sockets that never were accepted. */
                while ((pending = vsock_dequeue_accept(sk)) != NULL) {
index c443db7..bef8772 100644 (file)
 #include <linux/hyperv.h>
 #include <net/sock.h>
 #include <net/af_vsock.h>
+#include <asm/hyperv-tlfs.h>
 
 /* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
- * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
- * hosts don't have this limitation; but, keep the defaults the same for compat.
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages.
+ * hyperv-tlfs defines HV_HYP_PAGE_SIZE as 4K. Newer hosts don't have this
+ * limitation; but, keep the defaults the same for compat.
  */
-#define PAGE_SIZE_4K           4096
-#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
-#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
-#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
+#define RINGBUFFER_HVS_RCV_SIZE (HV_HYP_PAGE_SIZE * 6)
+#define RINGBUFFER_HVS_SND_SIZE (HV_HYP_PAGE_SIZE * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (HV_HYP_PAGE_SIZE * 64)
 
 /* The MTU is 16KB per the host side's design */
 #define HVS_MTU_SIZE           (1024 * 16)
@@ -54,7 +55,8 @@ struct hvs_recv_buf {
  * ringbuffer APIs that allow us to directly copy data from userspace buffer
  * to VMBus ringbuffer.
  */
-#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
+#define HVS_SEND_BUF_SIZE \
+               (HV_HYP_PAGE_SIZE - sizeof(struct vmpipe_proto_header))
 
 struct hvs_send_buf {
        /* The header before the payload data */
@@ -393,10 +395,10 @@ static void hvs_open_connection(struct vmbus_channel *chan)
        } else {
                sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
                sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
-               sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+               sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE);
                rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
                rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
-               rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+               rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
        }
 
        ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
@@ -670,7 +672,7 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
        ssize_t ret = 0;
        ssize_t bytes_written = 0;
 
-       BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
+       BUILD_BUG_ON(sizeof(*send_buf) != HV_HYP_PAGE_SIZE);
 
        send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
        if (!send_buf)
index 481f7f8..d02c9b4 100644 (file)
@@ -267,6 +267,55 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
        return virtio_transport_send_pkt_info(vsk, &info);
 }
 
+static ssize_t
+virtio_transport_stream_do_peek(struct vsock_sock *vsk,
+                               struct msghdr *msg,
+                               size_t len)
+{
+       struct virtio_vsock_sock *vvs = vsk->trans;
+       struct virtio_vsock_pkt *pkt;
+       size_t bytes, total = 0, off;
+       int err = -EFAULT;
+
+       spin_lock_bh(&vvs->rx_lock);
+
+       list_for_each_entry(pkt, &vvs->rx_queue, list) {
+               off = pkt->off;
+
+               if (total == len)
+                       break;
+
+               while (total < len && off < pkt->len) {
+                       bytes = len - total;
+                       if (bytes > pkt->len - off)
+                               bytes = pkt->len - off;
+
+                       /* sk_lock is held by caller so no one else can dequeue.
+                        * Unlock rx_lock since memcpy_to_msg() may sleep.
+                        */
+                       spin_unlock_bh(&vvs->rx_lock);
+
+                       err = memcpy_to_msg(msg, pkt->buf + off, bytes);
+                       if (err)
+                               goto out;
+
+                       spin_lock_bh(&vvs->rx_lock);
+
+                       total += bytes;
+                       off += bytes;
+               }
+       }
+
+       spin_unlock_bh(&vvs->rx_lock);
+
+       return total;
+
+out:
+       if (total)
+               err = total;
+       return err;
+}
+
 static ssize_t
 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
                                   struct msghdr *msg,
@@ -339,9 +388,9 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk,
                                size_t len, int flags)
 {
        if (flags & MSG_PEEK)
-               return -EOPNOTSUPP;
-
-       return virtio_transport_stream_do_dequeue(vsk, msg, len);
+               return virtio_transport_stream_do_peek(vsk, msg, len);
+       else
+               return virtio_transport_stream_do_dequeue(vsk, msg, len);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
 
index 7b72286..7186cb6 100644 (file)
@@ -8265,10 +8265,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
        /* leave request id zero for legacy request
         * or if driver does not support multi-scheduled scan
         */
-       if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) {
-               while (!sched_scan_req->reqid)
-                       sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
-       }
+       if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1)
+               sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
 
        err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
        if (err)
index dc8f689..f9e8303 100644 (file)
@@ -114,7 +114,7 @@ void regulatory_hint_country_ie(struct wiphy *wiphy,
                         u8 country_ie_len);
 
 /**
- * regulatory_hint_disconnect - informs all devices have been disconneted
+ * regulatory_hint_disconnect - informs all devices have been disconnected
  *
  * Regulotory rules can be enhanced further upon scanning and upon
  * connection to an AP. These rules become stale if we disconnect
index 9044073..6040bc2 100644 (file)
@@ -196,7 +196,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
        return false;
 }
 
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
        u32 len;
 
@@ -212,7 +212,7 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
                __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
 }
 
-void xsk_flush(struct xdp_sock *xs)
+static void xsk_flush(struct xdp_sock *xs)
 {
        xskq_produce_flush_desc(xs->rx);
        xs->sk.sk_data_ready(&xs->sk);
@@ -264,6 +264,35 @@ out_unlock:
        return err;
 }
 
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+                      struct xdp_sock *xs)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+       int err;
+
+       err = xsk_rcv(xs, xdp);
+       if (err)
+               return err;
+
+       if (!xs->flush_node.prev)
+               list_add(&xs->flush_node, flush_list);
+
+       return 0;
+}
+
+void __xsk_map_flush(struct bpf_map *map)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+       struct xdp_sock *xs, *tmp;
+
+       list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
+               xsk_flush(xs);
+               __list_del_clearprev(&xs->flush_node);
+       }
+}
+
 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
 {
        xskq_produce_flush_addr_n(umem->cq, nb_entries);
index 1d9be26..4df11dd 100644 (file)
@@ -4,55 +4,53 @@ BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src))
 TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools
 
 # List of programs to build
-hostprogs-y := test_lru_dist
-hostprogs-y += sock_example
-hostprogs-y += fds_example
-hostprogs-y += sockex1
-hostprogs-y += sockex2
-hostprogs-y += sockex3
-hostprogs-y += tracex1
-hostprogs-y += tracex2
-hostprogs-y += tracex3
-hostprogs-y += tracex4
-hostprogs-y += tracex5
-hostprogs-y += tracex6
-hostprogs-y += tracex7
-hostprogs-y += test_probe_write_user
-hostprogs-y += trace_output
-hostprogs-y += lathist
-hostprogs-y += offwaketime
-hostprogs-y += spintest
-hostprogs-y += map_perf_test
-hostprogs-y += test_overhead
-hostprogs-y += test_cgrp2_array_pin
-hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_sock
-hostprogs-y += test_cgrp2_sock2
-hostprogs-y += xdp1
-hostprogs-y += xdp2
-hostprogs-y += xdp_router_ipv4
-hostprogs-y += test_current_task_under_cgroup
-hostprogs-y += trace_event
-hostprogs-y += sampleip
-hostprogs-y += tc_l2_redirect
-hostprogs-y += lwt_len_hist
-hostprogs-y += xdp_tx_iptunnel
-hostprogs-y += test_map_in_map
-hostprogs-y += per_socket_stats_example
-hostprogs-y += xdp_redirect
-hostprogs-y += xdp_redirect_map
-hostprogs-y += xdp_redirect_cpu
-hostprogs-y += xdp_monitor
-hostprogs-y += xdp_rxq_info
-hostprogs-y += syscall_tp
-hostprogs-y += cpustat
-hostprogs-y += xdp_adjust_tail
-hostprogs-y += xdpsock
-hostprogs-y += xdp_fwd
-hostprogs-y += task_fd_query
-hostprogs-y += xdp_sample_pkts
-hostprogs-y += ibumad
-hostprogs-y += hbm
+tprogs-y := test_lru_dist
+tprogs-y += sock_example
+tprogs-y += fds_example
+tprogs-y += sockex1
+tprogs-y += sockex2
+tprogs-y += sockex3
+tprogs-y += tracex1
+tprogs-y += tracex2
+tprogs-y += tracex3
+tprogs-y += tracex4
+tprogs-y += tracex5
+tprogs-y += tracex6
+tprogs-y += tracex7
+tprogs-y += test_probe_write_user
+tprogs-y += trace_output
+tprogs-y += lathist
+tprogs-y += offwaketime
+tprogs-y += spintest
+tprogs-y += map_perf_test
+tprogs-y += test_overhead
+tprogs-y += test_cgrp2_array_pin
+tprogs-y += test_cgrp2_attach
+tprogs-y += test_cgrp2_sock
+tprogs-y += test_cgrp2_sock2
+tprogs-y += xdp1
+tprogs-y += xdp2
+tprogs-y += xdp_router_ipv4
+tprogs-y += test_current_task_under_cgroup
+tprogs-y += trace_event
+tprogs-y += sampleip
+tprogs-y += tc_l2_redirect
+tprogs-y += lwt_len_hist
+tprogs-y += xdp_tx_iptunnel
+tprogs-y += test_map_in_map
+tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect_cpu
+tprogs-y += xdp_monitor
+tprogs-y += xdp_rxq_info
+tprogs-y += syscall_tp
+tprogs-y += cpustat
+tprogs-y += xdp_adjust_tail
+tprogs-y += xdpsock
+tprogs-y += xdp_fwd
+tprogs-y += task_fd_query
+tprogs-y += xdp_sample_pkts
+tprogs-y += ibumad
+tprogs-y += hbm
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -111,7 +109,7 @@ ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS)
 hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
 
 # Tell kbuild to always build the programs
-always := $(hostprogs-y)
+always := $(tprogs-y)
 always += sockex1_kern.o
 always += sockex2_kern.o
 always += sockex3_kern.o
@@ -145,7 +143,6 @@ always += sampleip_kern.o
 always += lwt_len_hist_kern.o
 always += xdp_tx_iptunnel_kern.o
 always += test_map_in_map_kern.o
-always += cookie_uid_helper_example.o
 always += tcp_synrto_kern.o
 always += tcp_rwnd_kern.o
 always += tcp_bufs_kern.o
@@ -171,20 +168,38 @@ always += ibumad_kern.o
 always += hbm_out_kern.o
 always += hbm_edt_kern.o
 
-KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf
+ifeq ($(ARCH), arm)
+# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
+# headers when arm instruction set identification is requested.
+ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS))
+BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
+TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
+endif
+
+TPROGS_CFLAGS += -Wall -O2
+TPROGS_CFLAGS += -Wmissing-prototypes
+TPROGS_CFLAGS += -Wstrict-prototypes
+
+TPROGS_CFLAGS += -I$(objtree)/usr/include
+TPROGS_CFLAGS += -I$(srctree)/tools/lib/bpf/
+TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
+TPROGS_CFLAGS += -I$(srctree)/tools/lib/
+TPROGS_CFLAGS += -I$(srctree)/tools/include
+TPROGS_CFLAGS += -I$(srctree)/tools/perf
 
-HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+ifdef SYSROOT
+TPROGS_CFLAGS += --sysroot=$(SYSROOT)
+TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
+endif
+
+TPROGCFLAGS_bpf_load.o += -Wno-unused-variable
 
-KBUILD_HOSTLDLIBS              += $(LIBBPF) -lelf
-HOSTLDLIBS_tracex4             += -lrt
-HOSTLDLIBS_trace_output        += -lrt
-HOSTLDLIBS_map_perf_test       += -lrt
-HOSTLDLIBS_test_overhead       += -lrt
-HOSTLDLIBS_xdpsock             += -pthread
+TPROGS_LDLIBS                  += $(LIBBPF) -lelf
+TPROGLDLIBS_tracex4            += -lrt
+TPROGLDLIBS_trace_output       += -lrt
+TPROGLDLIBS_map_perf_test      += -lrt
+TPROGLDLIBS_test_overhead      += -lrt
+TPROGLDLIBS_xdpsock            += -pthread
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
@@ -195,15 +210,14 @@ BTF_PAHOLE ?= pahole
 
 # Detect that we're cross compiling and use the cross compiler
 ifdef CROSS_COMPILE
-HOSTCC = $(CROSS_COMPILE)gcc
-CLANG_ARCH_ARGS = -target $(ARCH)
+CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%))
 endif
 
 # Don't evaluate probes and warnings if we need to run make recursively
 ifneq ($(src),)
-HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
-       $(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \
-       echo okay)
+HDR_PROBE := $(shell printf "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
+       $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \
+       -o /dev/null 2>/dev/null && echo okay)
 
 ifeq ($(HDR_PROBE),)
 $(warning WARNING: Detected possible issues with include path.)
@@ -219,10 +233,10 @@ BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
                          /bin/rm -f ./llvm_btf_verify.o)
 
 ifneq ($(BTF_LLVM_PROBE),)
-       EXTRA_CFLAGS += -g
+       BPF_EXTRA_CFLAGS += -g
 else
 ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
-       EXTRA_CFLAGS += -g
+       BPF_EXTRA_CFLAGS += -g
        LLC_FLAGS += -mattr=dwarfris
        DWARF2BTF = y
 endif
@@ -239,7 +253,8 @@ clean:
 
 $(LIBBPF): FORCE
 # Fix up variables inherited from Kbuild that tools/ build system won't like
-       $(MAKE) -C $(dir $@) RM='rm -rf' LDFLAGS= srctree=$(BPF_SAMPLES_PATH)/../../ O=
+       $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
+               LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
 
 $(obj)/syscall_nrs.h:  $(obj)/syscall_nrs.s FORCE
        $(call filechk,offsets,__SYSCALL_NRS_H__)
@@ -276,13 +291,16 @@ $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
 $(obj)/hbm.o: $(src)/hbm.h
 $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
 
+-include $(BPF_SAMPLES_PATH)/Makefile.target
+
 # asm/sysreg.h - inline assembly used by it is incompatible with llvm.
 # But, there is no easy way to fix it, so just exclude it since it is
 # useless for BPF samples.
 $(obj)/%.o: $(src)/%.c
        @echo "  CLANG-bpf " $@
-       $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-               -I$(srctree)/tools/testing/selftests/bpf/ \
+       $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
+               -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
+               -I$(srctree)/tools/lib/bpf/ \
                -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
                -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
                -Wno-gnu-variable-sized-type-not-at-end \
diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target
new file mode 100644 (file)
index 0000000..7621f55
--- /dev/null
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+# ==========================================================================
+# Building binaries on the host system
+# Binaries are not used during the compilation of the kernel, and intended
+# to be build for target board, target board can be host of course. Added to
+# build binaries to run not on host system.
+#
+# Sample syntax
+# tprogs-y := xsk_example
+# Will compile xsk_example.c and create an executable named xsk_example
+#
+# tprogs-y    := xdpsock
+# xdpsock-objs := xdpsock_1.o xdpsock_2.o
+# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable
+# xdpsock, based on xdpsock_1.o and xdpsock_2.o
+#
+# Derived from scripts/Makefile.host
+#
+__tprogs := $(sort $(tprogs-y))
+
+# C code
+# Executables compiled from a single .c file
+tprog-csingle  := $(foreach m,$(__tprogs), \
+                       $(if $($(m)-objs),,$(m)))
+
+# C executables linked based on several .o files
+tprog-cmulti   := $(foreach m,$(__tprogs),\
+                       $(if $($(m)-objs),$(m)))
+
+# Object (.o) files compiled from .c files
+tprog-cobjs    := $(sort $(foreach m,$(__tprogs),$($(m)-objs)))
+
+tprog-csingle  := $(addprefix $(obj)/,$(tprog-csingle))
+tprog-cmulti   := $(addprefix $(obj)/,$(tprog-cmulti))
+tprog-cobjs    := $(addprefix $(obj)/,$(tprog-cobjs))
+
+#####
+# Handle options to gcc. Support building with separate output directory
+
+_tprogc_flags   = $(TPROGS_CFLAGS) \
+                 $(TPROGCFLAGS_$(basetarget).o)
+
+# $(objtree)/$(obj) for including generated headers from checkin source files
+ifeq ($(KBUILD_EXTMOD),)
+ifdef building_out_of_srctree
+_tprogc_flags   += -I $(objtree)/$(obj)
+endif
+endif
+
+tprogc_flags    = -Wp,-MD,$(depfile) $(_tprogc_flags)
+
+# Create executable from a single .c file
+# tprog-csingle -> Executable
+quiet_cmd_tprog-csingle        = CC  $@
+      cmd_tprog-csingle        = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \
+               $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE
+       $(call if_changed_dep,tprog-csingle)
+
+# Link an executable based on list of .o files, all plain c
+# tprog-cmulti -> executable
+quiet_cmd_tprog-cmulti = LD  $@
+      cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \
+                         $(addprefix $(obj)/,$($(@F)-objs)) \
+                         $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-cmulti): $(tprog-cobjs) FORCE
+       $(call if_changed,tprog-cmulti)
+$(call multi_depend, $(tprog-cmulti), , -objs)
+
+# Create .o file from a single .c file
+# tprog-cobjs -> .o
+quiet_cmd_tprog-cobjs  = CC  $@
+      cmd_tprog-cobjs  = $(CC) $(tprogc_flags) -c -o $@ $<
+$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
+       $(call if_changed_dep,tprog-cobjs)
index 5f27e4f..cc1f00a 100644 (file)
@@ -14,6 +14,20 @@ Compiling requires having installed:
 Note that LLVM's tool 'llc' must support target 'bpf', list version
 and supported targets with command: ``llc --version``
 
+Clean and configuration
+-----------------------
+
+It can be needed to clean tools, samples or kernel before trying new arch or
+after some changes (on demand)::
+
+ make -C tools clean
+ make -C samples/bpf clean
+ make clean
+
+Configure kernel, defconfig for instance::
+
+ make defconfig
+
 Kernel headers
 --------------
 
@@ -68,9 +82,26 @@ It is also possible to point make to the newly compiled 'llc' or
 Cross compiling samples
 -----------------------
 In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH
-environment variables before calling make. This will direct make to build
-samples for the cross target.
+environment variables before calling make. But do this before clean,
+cofiguration and header install steps described above. This will direct make to
+build samples for the cross target::
+
+ export ARCH=arm64
+ export CROSS_COMPILE="aarch64-linux-gnu-"
+
+Headers can be also installed on RFS of target board if need to keep them in
+sync (not necessarily and it creates a local "usr/include" directory also)::
+
+ make INSTALL_HDR_PATH=~/some_sysroot/usr headers_install
+
+Pointing LLC and CLANG is not necessarily if it's installed on HOST and have
+in its targets appropriate arm64 arch (usually it has several arches).
+Build samples::
+
+ make samples/bpf/
+
+Or build samples with SYSROOT if some header or library is absent in toolchain,
+say libelf, providing address to file system containing headers and libs,
+can be RFS of target board::
 
-export ARCH=arm64
-export CROSS_COMPILE="aarch64-linux-gnu-"
-make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+ make samples/bpf/ SYSROOT=~/some_sysroot
index aa207a2..4edaf47 100644 (file)
 #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
 #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
 
-struct bpf_map_def SEC("maps") queue_state = {
-       .type = BPF_MAP_TYPE_CGROUP_STORAGE,
-       .key_size = sizeof(struct bpf_cgroup_storage_key),
-       .value_size = sizeof(struct hbm_vqueue),
-};
-BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key,
-                    struct hbm_vqueue);
-
-struct bpf_map_def SEC("maps") queue_stats = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(u32),
-       .value_size = sizeof(struct hbm_queue_stats),
-       .max_entries = 1,
-};
-BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
+struct {
+       __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+       __type(key, struct bpf_cgroup_storage_key);
+       __type(value, struct hbm_vqueue);
+} queue_state SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, u32);
+       __type(value, struct hvm_queue_stats);
+} queue_stats SEC(".maps");
 
 struct hbm_pkt_info {
        int     cwnd;
index 2b2ffb9..281bcda 100644 (file)
@@ -9,25 +9,27 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
+#include "bpf_tracing.h"
 
 #define MAX_ENTRIES 1000
 #define MAX_NR_CPUS 1024
 
-struct bpf_map_def SEC("maps") hash_map = {
+struct bpf_map_def_legacy SEC("maps") hash_map = {
        .type = BPF_MAP_TYPE_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
        .max_entries = MAX_ENTRIES,
 };
 
-struct bpf_map_def SEC("maps") lru_hash_map = {
+struct bpf_map_def_legacy SEC("maps") lru_hash_map = {
        .type = BPF_MAP_TYPE_LRU_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
        .max_entries = 10000,
 };
 
-struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
+struct bpf_map_def_legacy SEC("maps") nocommon_lru_hash_map = {
        .type = BPF_MAP_TYPE_LRU_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
@@ -35,7 +37,7 @@ struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
        .map_flags = BPF_F_NO_COMMON_LRU,
 };
 
-struct bpf_map_def SEC("maps") inner_lru_hash_map = {
+struct bpf_map_def_legacy SEC("maps") inner_lru_hash_map = {
        .type = BPF_MAP_TYPE_LRU_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
@@ -44,20 +46,20 @@ struct bpf_map_def SEC("maps") inner_lru_hash_map = {
        .numa_node = 0,
 };
 
-struct bpf_map_def SEC("maps") array_of_lru_hashs = {
+struct bpf_map_def_legacy SEC("maps") array_of_lru_hashs = {
        .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
        .key_size = sizeof(u32),
        .max_entries = MAX_NR_CPUS,
 };
 
-struct bpf_map_def SEC("maps") percpu_hash_map = {
+struct bpf_map_def_legacy SEC("maps") percpu_hash_map = {
        .type = BPF_MAP_TYPE_PERCPU_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
        .max_entries = MAX_ENTRIES,
 };
 
-struct bpf_map_def SEC("maps") hash_map_alloc = {
+struct bpf_map_def_legacy SEC("maps") hash_map_alloc = {
        .type = BPF_MAP_TYPE_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
@@ -65,7 +67,7 @@ struct bpf_map_def SEC("maps") hash_map_alloc = {
        .map_flags = BPF_F_NO_PREALLOC,
 };
 
-struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
+struct bpf_map_def_legacy SEC("maps") percpu_hash_map_alloc = {
        .type = BPF_MAP_TYPE_PERCPU_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
@@ -73,7 +75,7 @@ struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
        .map_flags = BPF_F_NO_PREALLOC,
 };
 
-struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
+struct bpf_map_def_legacy SEC("maps") lpm_trie_map_alloc = {
        .type = BPF_MAP_TYPE_LPM_TRIE,
        .key_size = 8,
        .value_size = sizeof(long),
@@ -81,14 +83,14 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
        .map_flags = BPF_F_NO_PREALLOC,
 };
 
-struct bpf_map_def SEC("maps") array_map = {
+struct bpf_map_def_legacy SEC("maps") array_map = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
        .max_entries = MAX_ENTRIES,
 };
 
-struct bpf_map_def SEC("maps") lru_hash_lookup_map = {
+struct bpf_map_def_legacy SEC("maps") lru_hash_lookup_map = {
        .type = BPF_MAP_TYPE_LRU_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(long),
@@ -179,8 +181,8 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
        if (addrlen != sizeof(*in6))
                return 0;
 
-       ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
-                            &in6->sin6_addr);
+       ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
+                                 &in6->sin6_addr);
        if (ret)
                goto done;
 
index e7d9a0a..9cb5207 100644 (file)
@@ -6,6 +6,7 @@
  */
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 #include <uapi/linux/ptrace.h>
 #include <uapi/linux/perf_event.h>
 #include <linux/version.h>
index 6db6b21..ef58923 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/udp.h>
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
 
 #define DEFAULT_PKTGEN_UDP_PORT        9
 #define IP_MF                  0x2000
index ceabf31..4a19089 100644 (file)
@@ -9,6 +9,7 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/bpf_perf_event.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 #define MAX_IPS                8192
 
index ed18e9a..f96943f 100644 (file)
@@ -3,6 +3,7 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_ARRAY,
index f2f9dbc..5566fa7 100644 (file)
@@ -1,5 +1,6 @@
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
 #include <uapi/linux/in.h>
 #include <uapi/linux/if.h>
 #include <uapi/linux/if_ether.h>
index c527b57..151dd84 100644 (file)
@@ -6,6 +6,7 @@
  */
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
 #include <uapi/linux/in.h>
 #include <uapi/linux/if.h>
 #include <uapi/linux/if_ether.h>
index ce0167d..6e9478a 100644 (file)
@@ -10,6 +10,7 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/perf_event.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_HASH,
index 274c884..ff43341 100644 (file)
@@ -8,6 +8,7 @@
 #include <uapi/linux/filter.h>
 #include <uapi/linux/pkt_cls.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
 
 /* compiler workaround */
 #define _htonl __builtin_bswap32
index 42c44d0..32ee752 100644 (file)
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/in6.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
+#include "bpf_tracing.h"
 
 #define MAX_NR_PORTS 65536
 
 /* map #0 */
-struct bpf_map_def SEC("maps") port_a = {
+struct bpf_map_def_legacy SEC("maps") port_a = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(u32),
        .value_size = sizeof(int),
@@ -23,7 +25,7 @@ struct bpf_map_def SEC("maps") port_a = {
 };
 
 /* map #1 */
-struct bpf_map_def SEC("maps") port_h = {
+struct bpf_map_def_legacy SEC("maps") port_h = {
        .type = BPF_MAP_TYPE_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(int),
@@ -31,7 +33,7 @@ struct bpf_map_def SEC("maps") port_h = {
 };
 
 /* map #2 */
-struct bpf_map_def SEC("maps") reg_result_h = {
+struct bpf_map_def_legacy SEC("maps") reg_result_h = {
        .type = BPF_MAP_TYPE_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(int),
@@ -39,7 +41,7 @@ struct bpf_map_def SEC("maps") reg_result_h = {
 };
 
 /* map #3 */
-struct bpf_map_def SEC("maps") inline_result_h = {
+struct bpf_map_def_legacy SEC("maps") inline_result_h = {
        .type = BPF_MAP_TYPE_HASH,
        .key_size = sizeof(u32),
        .value_size = sizeof(int),
@@ -47,7 +49,7 @@ struct bpf_map_def SEC("maps") inline_result_h = {
 };
 
 /* map #4 */ /* Test case #0 */
-struct bpf_map_def SEC("maps") a_of_port_a = {
+struct bpf_map_def_legacy SEC("maps") a_of_port_a = {
        .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
        .key_size = sizeof(u32),
        .inner_map_idx = 0, /* map_fd[0] is port_a */
@@ -55,7 +57,7 @@ struct bpf_map_def SEC("maps") a_of_port_a = {
 };
 
 /* map #5 */ /* Test case #1 */
-struct bpf_map_def SEC("maps") h_of_port_a = {
+struct bpf_map_def_legacy SEC("maps") h_of_port_a = {
        .type = BPF_MAP_TYPE_HASH_OF_MAPS,
        .key_size = sizeof(u32),
        .inner_map_idx = 0, /* map_fd[0] is port_a */
@@ -63,7 +65,7 @@ struct bpf_map_def SEC("maps") h_of_port_a = {
 };
 
 /* map #6 */ /* Test case #2 */
-struct bpf_map_def SEC("maps") h_of_port_h = {
+struct bpf_map_def_legacy SEC("maps") h_of_port_h = {
        .type = BPF_MAP_TYPE_HASH_OF_MAPS,
        .key_size = sizeof(u32),
        .inner_map_idx = 1, /* map_fd[1] is port_h */
@@ -116,7 +118,7 @@ int trace_sys_connect(struct pt_regs *ctx)
        if (addrlen != sizeof(*in6))
                return 0;
 
-       ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+       ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
        if (ret) {
                inline_ret = ret;
                goto done;
@@ -127,7 +129,7 @@ int trace_sys_connect(struct pt_regs *ctx)
 
        test_case = dst6[7];
 
-       ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
+       ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
        if (ret) {
                inline_ret = ret;
                goto done;
index 468a66a..8d2518e 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
 
index 3a677c8..b7c48f3 100644 (file)
@@ -9,6 +9,7 @@
 #include <uapi/linux/bpf.h>
 #include <linux/version.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 struct bpf_map_def SEC("maps") dnat_map = {
        .type = BPF_MAP_TYPE_HASH,
@@ -36,7 +37,7 @@ int bpf_prog1(struct pt_regs *ctx)
        if (sockaddr_len > sizeof(orig_addr))
                return 0;
 
-       if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
+       if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
                return 0;
 
        mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
index 7068fbd..8dc18d2 100644 (file)
@@ -10,6 +10,7 @@
 #include <uapi/linux/bpf_perf_event.h>
 #include <uapi/linux/perf_event.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 struct key_t {
        char comm[TASK_COMM_LEN];
index 107da14..1a15f66 100644 (file)
@@ -9,6 +9,7 @@
 #include <uapi/linux/bpf.h>
 #include <linux/version.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
 
index 5e11c20..d70b3ea 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_HASH,
index ea1d4c1..9af546b 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_HASH,
index 6dd8e38..2a02cbe 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 struct pair {
        u64 val;
index 35cb0ee..b3557b2 100644 (file)
@@ -11,6 +11,7 @@
 #include <uapi/linux/unistd.h>
 #include "syscall_nrs.h"
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
 
index 411fdb2..c616508 100644 (file)
@@ -25,6 +25,9 @@
 #define ICMP_TOOBIG_SIZE 98
 #define ICMP_TOOBIG_PAYLOAD_SIZE 92
 
+/* volatile to prevent compiler optimizations */
+static volatile __u32 max_pcktsz = MAX_PCKT_SIZE;
+
 struct bpf_map_def SEC("maps") icmpcnt = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(__u32),
@@ -92,7 +95,7 @@ static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
        orig_iph = data + off;
        icmp_hdr->type = ICMP_DEST_UNREACH;
        icmp_hdr->code = ICMP_FRAG_NEEDED;
-       icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
+       icmp_hdr->un.frag.mtu = htons(max_pcktsz - sizeof(struct ethhdr));
        icmp_hdr->checksum = 0;
        ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
        icmp_hdr->checksum = csum;
@@ -121,7 +124,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
        int pckt_size = data_end - data;
        int offset;
 
-       if (pckt_size > MAX_PCKT_SIZE) {
+       if (pckt_size > max(max_pcktsz, ICMP_TOOBIG_SIZE)) {
                offset = pckt_size - ICMP_TOOBIG_SIZE;
                if (bpf_xdp_adjust_tail(xdp, 0 - offset))
                        return XDP_PASS;
index a3596b6..d86e9ad 100644 (file)
@@ -23,6 +23,7 @@
 #include "libbpf.h"
 
 #define STATS_INTERVAL_S 2U
+#define MAX_PCKT_SIZE 600
 
 static int ifindex = -1;
 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -72,6 +73,7 @@ static void usage(const char *cmd)
        printf("Usage: %s [...]\n", cmd);
        printf("    -i <ifname|ifindex> Interface\n");
        printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
+       printf("    -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE);
        printf("    -S use skb-mode\n");
        printf("    -N enforce native mode\n");
        printf("    -F force loading prog\n");
@@ -85,13 +87,14 @@ int main(int argc, char **argv)
                .prog_type      = BPF_PROG_TYPE_XDP,
        };
        unsigned char opt_flags[256] = {};
-       const char *optstr = "i:T:SNFh";
+       const char *optstr = "i:T:P:SNFh";
        struct bpf_prog_info info = {};
        __u32 info_len = sizeof(info);
        unsigned int kill_after_s = 0;
        int i, prog_fd, map_fd, opt;
        struct bpf_object *obj;
-       struct bpf_map *map;
+       __u32 max_pckt_size = 0;
+       __u32 key = 0;
        char filename[256];
        int err;
 
@@ -110,6 +113,9 @@ int main(int argc, char **argv)
                case 'T':
                        kill_after_s = atoi(optarg);
                        break;
+               case 'P':
+                       max_pckt_size = atoi(optarg);
+                       break;
                case 'S':
                        xdp_flags |= XDP_FLAGS_SKB_MODE;
                        break;
@@ -150,15 +156,20 @@ int main(int argc, char **argv)
        if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
                return 1;
 
-       map = bpf_map__next(NULL, obj);
-       if (!map) {
-               printf("finding a map in obj file failed\n");
-               return 1;
+       /* static global var 'max_pcktsz' is accessible from .data section */
+       if (max_pckt_size) {
+               map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data");
+               if (map_fd < 0) {
+                       printf("finding a max_pcktsz map in obj file failed\n");
+                       return 1;
+               }
+               bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY);
        }
-       map_fd = bpf_map__fd(map);
 
-       if (!prog_fd) {
-               printf("load_bpf_file: %s\n", strerror(errno));
+       /* fetch icmpcnt map */
+       map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt");
+       if (map_fd < 0) {
+               printf("finding a icmpcnt map in obj file failed\n");
                return 1;
        }
 
index df011ac..405c4e0 100644 (file)
@@ -378,7 +378,7 @@ static void usage(const char *prog)
                "  -q, --queue=n        Use queue n (default 0)\n"
                "  -p, --poll           Use poll syscall\n"
                "  -S, --xdp-skb=n      Use XDP skb-mod\n"
-               "  -N, --xdp-native=n   Enfore XDP native mode\n"
+               "  -N, --xdp-native=n   Enforce XDP native mode\n"
                "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
                "  -z, --zero-copy      Force zero-copy mode.\n"
                "  -c, --copy           Force copy mode.\n"
index fd39215..3f6483e 100644 (file)
@@ -18,7 +18,7 @@ across the sample scripts.  Usage example is printed on errors::
  Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX
   -i : ($DEV)       output interface/device (required)
   -s : ($PKT_SIZE)  packet size
-  -d : ($DEST_IP)   destination IP
+  -d : ($DEST_IP)   destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed
   -m : ($DST_MAC)   destination MAC-addr
   -p : ($DST_PORT)  destination PORT range (e.g. 433-444) is also allowed
   -t : ($THREADS)   threads to start
index 4af4046..dae06d5 100644 (file)
@@ -5,6 +5,8 @@
 # Author: Jesper Dangaaard Brouer
 # License: GPL
 
+set -o errexit
+
 ## -- General shell logging cmds --
 function err() {
     local exitcode=$1
@@ -58,6 +60,7 @@ function pg_set() {
 function proc_cmd() {
     local result
     local proc_file=$1
+    local status=0
     # after shift, the remaining args are contained in $@
     shift
     local proc_ctrl=${PROC_DIR}/$proc_file
@@ -73,13 +76,13 @@ function proc_cmd() {
        echo "cmd: $@ > $proc_ctrl"
     fi
     # Quoting of "$@" is important for space expansion
-    echo "$@" > "$proc_ctrl"
-    local status=$?
+    echo "$@" > "$proc_ctrl" || status=$?
 
-    result=$(grep "Result: OK:" $proc_ctrl)
-    # Due to pgctrl, cannot use exit code $? from grep
-    if [[ "$result" == "" ]]; then
-       grep "Result:" $proc_ctrl >&2
+    if [[ "$proc_file" != "pgctrl" ]]; then
+        result=$(grep "Result: OK:" $proc_ctrl) || true
+        if [[ "$result" == "" ]]; then
+            grep "Result:" $proc_ctrl >&2
+        fi
     fi
     if (( $status != 0 )); then
        err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\""
@@ -105,6 +108,8 @@ function pgset() {
     fi
 }
 
+[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT
+
 ## -- General shell tricks --
 
 function root_check_run_with_sudo() {
@@ -163,6 +168,137 @@ function get_node_cpus()
        echo $node_cpu_list
 }
 
+# Check $1 is in between $2, $3 ($2 <= $1 <= $3)
+function in_between() { [[ ($1 -ge $2) && ($1 -le $3) ]] ; }
+
+# Extend shrunken IPv6 address.
+# fe80::42:bcff:fe84:e10a => fe80:0:0:0:42:bcff:fe84:e10a
+function extend_addr6()
+{
+    local addr=$1
+    local sep=: sep2=::
+    local sep_cnt=$(tr -cd $sep <<< $1 | wc -c)
+    local shrink
+
+    # separator count should be (2 <= $sep_cnt <= 7)
+    if ! (in_between $sep_cnt 2 7); then
+        err 5 "Invalid IP6 address: $1"
+    fi
+
+    # if shrink '::' occurs multiple, it's malformed.
+    shrink=( $(egrep -o "$sep{2,}" <<< $addr) )
+    if [[ ${#shrink[@]} -ne 0 ]]; then
+        if [[ ${#shrink[@]} -gt 1 || ( ${shrink[0]} != $sep2 ) ]]; then
+            err 5 "Invalid IP6 address: $1"
+        fi
+    fi
+
+    # add 0 at begin & end, and extend addr by adding :0
+    [[ ${addr:0:1} == $sep ]] && addr=0${addr}
+    [[ ${addr: -1} == $sep ]] && addr=${addr}0
+    echo "${addr/$sep2/$(printf ':0%.s' $(seq $[8-sep_cnt])):}"
+}
+
+# Given a single IP(v4/v6) address, whether it is valid.
+function validate_addr()
+{
+    # check function is called with (funcname)6
+    [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6
+    local bitlen=$[ IP6 ? 128 : 32 ]
+    local len=$[ IP6 ? 8 : 4 ]
+    local max=$[ 2**(len*2)-1 ]
+    local net prefix
+    local addr sep
+
+    IFS='/' read net prefix <<< $1
+    [[ $IP6 ]] && net=$(extend_addr6 $net)
+
+    # if prefix exists, check (0 <= $prefix <= $bitlen)
+    if [[ -n $prefix ]]; then
+        if ! (in_between $prefix 0 $bitlen); then
+            err 5 "Invalid prefix: /$prefix"
+        fi
+    fi
+
+    # set separator for each IP(v4/v6)
+    [[ $IP6 ]] && sep=: || sep=.
+    IFS=$sep read -a addr <<< $net
+
+    # array length
+    if [[ ${#addr[@]} != $len ]]; then
+        err 5 "Invalid IP$IP6 address: $1"
+    fi
+
+    # check each digit (0 <= $digit <= $max)
+    for digit in "${addr[@]}"; do
+        [[ $IP6 ]] && digit=$[ 16#$digit ]
+        if ! (in_between $digit 0 $max); then
+            err 5 "Invalid IP$IP6 address: $1"
+        fi
+    done
+
+    return 0
+}
+
+function validate_addr6() { validate_addr $@ ; }
+
+# Given a single IP(v4/v6) or CIDR, return minimum and maximum IP addr.
+function parse_addr()
+{
+    # check function is called with (funcname)6
+    [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6
+    local net prefix
+    local min_ip max_ip
+
+    IFS='/' read net prefix <<< $1
+    [[ $IP6 ]] && net=$(extend_addr6 $net)
+
+    if [[ -z $prefix ]]; then
+        min_ip=$net
+        max_ip=$net
+    else
+        # defining array for converting Decimal 2 Binary
+        # 00000000 00000001 00000010 00000011 00000100 ...
+        local d2b='{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}'
+        [[ $IP6 ]] && d2b+=$d2b
+        eval local D2B=($d2b)
+
+        local bitlen=$[ IP6 ? 128 : 32 ]
+        local remain=$[ bitlen-prefix ]
+        local octet=$[ IP6 ? 16 : 8 ]
+        local min_mask max_mask
+        local min max
+        local ip_bit
+        local ip sep
+
+        # set separator for each IP(v4/v6)
+        [[ $IP6 ]] && sep=: || sep=.
+        IFS=$sep read -ra ip <<< $net
+
+        min_mask="$(printf '1%.s' $(seq $prefix))$(printf '0%.s' $(seq $remain))"
+        max_mask="$(printf '0%.s' $(seq $prefix))$(printf '1%.s' $(seq $remain))"
+
+        # calculate min/max ip with &,| operator
+        for i in "${!ip[@]}"; do
+            digit=$[ IP6 ? 16#${ip[$i]} : ${ip[$i]} ]
+            ip_bit=${D2B[$digit]}
+
+            idx=$[ octet*i ]
+            min[$i]=$[ 2#$ip_bit & 2#${min_mask:$idx:$octet} ]
+            max[$i]=$[ 2#$ip_bit | 2#${max_mask:$idx:$octet} ]
+            [[ $IP6 ]] && { min[$i]=$(printf '%X' ${min[$i]});
+                            max[$i]=$(printf '%X' ${max[$i]}); }
+        done
+
+        min_ip=$(IFS=$sep; echo "${min[*]}")
+        max_ip=$(IFS=$sep; echo "${max[*]}")
+    fi
+
+    echo $min_ip $max_ip
+}
+
+function parse_addr6() { parse_addr $@ ; }
+
 # Given a single or range of port(s), return minimum and maximum port number.
 function parse_ports()
 {
@@ -185,9 +321,9 @@ function validate_ports()
     local min_port=$1
     local max_port=$2
 
-    # 0 < port < 65536
-    if [[ $min_port -gt 0 && $min_port -lt 65536 ]]; then
-       if [[ $max_port -gt 0 && $max_port -lt 65536 ]]; then
+    # 1 <= port <= 65535
+    if (in_between $min_port 1 65535); then
+       if (in_between $max_port 1 65535); then
            if [[ $min_port -le $max_port ]]; then
                return 0
            fi
index a06b00a..ff0ed47 100644 (file)
@@ -8,7 +8,7 @@ function usage() {
     echo "Usage: $0 [-vx] -i ethX"
     echo "  -i : (\$DEV)       output interface/device (required)"
     echo "  -s : (\$PKT_SIZE)  packet size"
-    echo "  -d : (\$DEST_IP)   destination IP"
+    echo "  -d : (\$DEST_IP)   destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed"
     echo "  -m : (\$DST_MAC)   destination MAC-addr"
     echo "  -p : (\$DST_PORT)  destination PORT range (e.g. 433-444) is also allowed"
     echo "  -t : (\$THREADS)   threads to start"
index e14b1a9..1b62041 100755 (executable)
@@ -41,9 +41,13 @@ fi
 [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$BURST" ] && BURST=1024
 [ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # Base Config
@@ -71,13 +75,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst$IP6 $DEST_IP"
+    pg_set $dev "dst${IP6}_min $DST_MIN"
+    pg_set $dev "dst${IP6}_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
        pg_set $dev "flag UDPDST_RND"
-       pg_set $dev "udp_dst_min $DST_MIN"
-       pg_set $dev "udp_dst_max $DST_MAX"
+       pg_set $dev "udp_dst_min $UDP_DST_MIN"
+       pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
     # Inject packet into RX path of stack
index 82c3e50..e607cb3 100755 (executable)
@@ -24,9 +24,13 @@ if [[ -n "$BURST" ]]; then
     err 1 "Bursting not supported for this mode"
 fi
 [ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # Base Config
@@ -54,13 +58,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst$IP6 $DEST_IP"
+    pg_set $dev "dst${IP6}_min $DST_MIN"
+    pg_set $dev "dst${IP6}_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
        pg_set $dev "flag UDPDST_RND"
-       pg_set $dev "udp_dst_min $DST_MIN"
-       pg_set $dev "udp_dst_max $DST_MAX"
+       pg_set $dev "udp_dst_min $UDP_DST_MIN"
+       pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
     # Inject packet into TX qdisc egress path of stack
index d1702fd..a4e250b 100755 (executable)
@@ -22,17 +22,21 @@ fi
 # Example enforce param "-m" for dst_mac
 [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac"
 [ -z "$COUNT" ]   && COUNT="100000" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # Base Config
 DELAY="0"        # Zero means max speed
 
 # Flow variation random source port between min and max
-UDP_MIN=9
-UDP_MAX=109
+UDP_SRC_MIN=9
+UDP_SRC_MAX=109
 
 # General cleanup everything since last run
 # (especially important if other threads were configured by other scripts)
@@ -61,19 +65,20 @@ pg_set $DEV "flag NO_TIMESTAMP"
 
 # Destination
 pg_set $DEV "dst_mac $DST_MAC"
-pg_set $DEV "dst$IP6 $DEST_IP"
+pg_set $DEV "dst${IP6}_min $DST_MIN"
+pg_set $DEV "dst${IP6}_max $DST_MAX"
 
 if [ -n "$DST_PORT" ]; then
     # Single destination port or random port range
     pg_set $DEV "flag UDPDST_RND"
-    pg_set $DEV "udp_dst_min $DST_MIN"
-    pg_set $DEV "udp_dst_max $DST_MAX"
+    pg_set $DEV "udp_dst_min $UDP_DST_MIN"
+    pg_set $DEV "udp_dst_max $UDP_DST_MAX"
 fi
 
 # Setup random UDP port src range
 pg_set $DEV "flag UDPSRC_RND"
-pg_set $DEV "udp_src_min $UDP_MIN"
-pg_set $DEV "udp_src_max $UDP_MAX"
+pg_set $DEV "udp_src_min $UDP_SRC_MIN"
+pg_set $DEV "udp_src_max $UDP_SRC_MAX"
 
 # start_run
 echo "Running... ctrl^C to stop" >&2
index 7f7a9a2..cb2495f 100755 (executable)
@@ -21,17 +21,21 @@ DELAY="0"        # Zero means max speed
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 
 # Flow variation random source port between min and max
-UDP_MIN=9
-UDP_MAX=109
+UDP_SRC_MIN=9
+UDP_SRC_MAX=109
 
 # (example of setting default params in your script)
 if [ -z "$DEST_IP" ]; then
     [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
 fi
 [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+if [ -n "$DEST_IP" ]; then
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # General cleanup everything since last run
@@ -62,19 +66,20 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst$IP6 $DEST_IP"
+    pg_set $dev "dst${IP6}_min $DST_MIN"
+    pg_set $dev "dst${IP6}_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
        pg_set $dev "flag UDPDST_RND"
-       pg_set $dev "udp_dst_min $DST_MIN"
-       pg_set $dev "udp_dst_max $DST_MAX"
+       pg_set $dev "udp_dst_min $UDP_DST_MIN"
+       pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
     # Setup random UDP port src range
     pg_set $dev "flag UDPSRC_RND"
-    pg_set $dev "udp_src_min $UDP_MIN"
-    pg_set $dev "udp_src_max $UDP_MAX"
+    pg_set $dev "udp_src_min $UDP_SRC_MIN"
+    pg_set $dev "udp_src_max $UDP_SRC_MAX"
 done
 
 # start_run
index b520637..fff5076 100755 (executable)
@@ -33,9 +33,13 @@ fi
 [ -z "$BURST" ]     && BURST=32
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # No need for clones when bursting
 [ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # Base Config
@@ -62,13 +66,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst$IP6 $DEST_IP"
+    pg_set $dev "dst${IP6}_min $DST_MIN"
+    pg_set $dev "dst${IP6}_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
        pg_set $dev "flag UDPDST_RND"
-       pg_set $dev "udp_dst_min $DST_MIN"
-       pg_set $dev "udp_dst_max $DST_MAX"
+       pg_set $dev "udp_dst_min $UDP_DST_MIN"
+       pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
     # Setup burst, for easy testing -b 0 disable bursting
index 5b6e9d9..2cd6b70 100755 (executable)
@@ -17,9 +17,13 @@ source ${basedir}/parameters.sh
 [ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 [ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+    validate_addr $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # NOTICE:  Script specific settings
@@ -37,6 +41,9 @@ if [[ -n "$BURST" ]]; then
     err 1 "Bursting not supported for this mode"
 fi
 
+# 198.18.0.0 / 198.19.255.255
+read -r SRC_MIN SRC_MAX <<< $(parse_addr 198.18.0.0/15)
+
 # General cleanup everything since last run
 pg_ctrl "reset"
 
@@ -58,19 +65,20 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Single destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst $DEST_IP"
+    pg_set $dev "dst_min $DST_MIN"
+    pg_set $dev "dst_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
        pg_set $dev "flag UDPDST_RND"
-       pg_set $dev "udp_dst_min $DST_MIN"
-       pg_set $dev "udp_dst_max $DST_MAX"
+       pg_set $dev "udp_dst_min $UDP_DST_MIN"
+       pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
     # Randomize source IP-addresses
     pg_set $dev "flag IPSRC_RND"
-    pg_set $dev "src_min 198.18.0.0"
-    pg_set $dev "src_max 198.19.255.255"
+    pg_set $dev "src_min $SRC_MIN"
+    pg_set $dev "src_max $SRC_MAX"
 
     # Limit number of flows (max 65535)
     pg_set $dev "flows $FLOWS"
index 0c06e63..4cb6252 100755 (executable)
@@ -22,9 +22,13 @@ source ${basedir}/parameters.sh
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 [ -z "$BURST" ]     && BURST=32
 [ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+    validate_addr $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # Base Config
@@ -51,13 +55,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Single destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst $DEST_IP"
+    pg_set $dev "dst_min $DST_MIN"
+    pg_set $dev "dst_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
        pg_set $dev "flag UDPDST_RND"
-       pg_set $dev "udp_dst_min $DST_MIN"
-       pg_set $dev "udp_dst_max $DST_MAX"
+       pg_set $dev "udp_dst_min $UDP_DST_MIN"
+       pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
     # Setup source IP-addresses based on thread number
index 97f0266..7281060 100755 (executable)
@@ -20,8 +20,8 @@ DELAY="0"        # Zero means max speed
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 
 # Flow variation random source port between min and max
-UDP_MIN=9
-UDP_MAX=109
+UDP_SRC_MIN=9
+UDP_SRC_MAX=109
 
 node=`get_iface_node $DEV`
 irq_array=(`get_iface_irqs $DEV`)
@@ -35,9 +35,13 @@ if [ -z "$DEST_IP" ]; then
     [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
 fi
 [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+if [ -n "$DEST_IP" ]; then
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
 if [ -n "$DST_PORT" ]; then
-    read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
-    validate_ports $DST_MIN $DST_MAX
+    read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+    validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
 # General cleanup everything since last run
@@ -79,19 +83,20 @@ for ((i = 0; i < $THREADS; i++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst$IP6 $DEST_IP"
+    pg_set $dev "dst${IP6}_min $DST_MIN"
+    pg_set $dev "dst${IP6}_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
        pg_set $dev "flag UDPDST_RND"
-       pg_set $dev "udp_dst_min $DST_MIN"
-       pg_set $dev "udp_dst_max $DST_MAX"
+       pg_set $dev "udp_dst_min $UDP_DST_MIN"
+       pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
     # Setup random UDP port src range
     pg_set $dev "flag UDPSRC_RND"
-    pg_set $dev "udp_src_min $UDP_MIN"
-    pg_set $dev "udp_src_max $UDP_MAX"
+    pg_set $dev "udp_src_min $UDP_SRC_MIN"
+    pg_set $dev "udp_src_max $UDP_SRC_MAX"
 done
 
 # start_run
index 894cc58..7548569 100755 (executable)
@@ -391,6 +391,154 @@ SEE ALSO
 
         print('')
 
+class PrinterHelpers(Printer):
+    """
+    A printer for dumping collected information about helpers as C header to
+    be included from BPF program.
+    @helpers: array of Helper objects to print to standard output
+    """
+
+    type_fwds = [
+            'struct bpf_fib_lookup',
+            'struct bpf_perf_event_data',
+            'struct bpf_perf_event_value',
+            'struct bpf_sock',
+            'struct bpf_sock_addr',
+            'struct bpf_sock_ops',
+            'struct bpf_sock_tuple',
+            'struct bpf_spin_lock',
+            'struct bpf_sysctl',
+            'struct bpf_tcp_sock',
+            'struct bpf_tunnel_key',
+            'struct bpf_xfrm_state',
+            'struct pt_regs',
+            'struct sk_reuseport_md',
+            'struct sockaddr',
+            'struct tcphdr',
+
+            'struct __sk_buff',
+            'struct sk_msg_md',
+            'struct xdp_md',
+    ]
+    known_types = {
+            '...',
+            'void',
+            'const void',
+            'char',
+            'const char',
+            'int',
+            'long',
+            'unsigned long',
+
+            '__be16',
+            '__be32',
+            '__wsum',
+
+            'struct bpf_fib_lookup',
+            'struct bpf_perf_event_data',
+            'struct bpf_perf_event_value',
+            'struct bpf_sock',
+            'struct bpf_sock_addr',
+            'struct bpf_sock_ops',
+            'struct bpf_sock_tuple',
+            'struct bpf_spin_lock',
+            'struct bpf_sysctl',
+            'struct bpf_tcp_sock',
+            'struct bpf_tunnel_key',
+            'struct bpf_xfrm_state',
+            'struct pt_regs',
+            'struct sk_reuseport_md',
+            'struct sockaddr',
+            'struct tcphdr',
+    }
+    mapped_types = {
+            'u8': '__u8',
+            'u16': '__u16',
+            'u32': '__u32',
+            'u64': '__u64',
+            's8': '__s8',
+            's16': '__s16',
+            's32': '__s32',
+            's64': '__s64',
+            'size_t': 'unsigned long',
+            'struct bpf_map': 'void',
+            'struct sk_buff': 'struct __sk_buff',
+            'const struct sk_buff': 'const struct __sk_buff',
+            'struct sk_msg_buff': 'struct sk_msg_md',
+            'struct xdp_buff': 'struct xdp_md',
+    }
+
+    def print_header(self):
+        header = '''\
+/* This is auto-generated file. See bpf_helpers_doc.py for details. */
+
+/* Forward declarations of BPF structs */'''
+
+        print(header)
+        for fwd in self.type_fwds:
+            print('%s;' % fwd)
+        print('')
+
+    def print_footer(self):
+        footer = ''
+        print(footer)
+
+    def map_type(self, t):
+        if t in self.known_types:
+            return t
+        if t in self.mapped_types:
+            return self.mapped_types[t]
+        print("Unrecognized type '%s', please add it to known types!" % t,
+              file=sys.stderr)
+        sys.exit(1)
+
+    seen_helpers = set()
+
+    def print_one(self, helper):
+        proto = helper.proto_break_down()
+
+        if proto['name'] in self.seen_helpers:
+            return
+        self.seen_helpers.add(proto['name'])
+
+        print('/*')
+        print(" * %s" % proto['name'])
+        print(" *")
+        if (helper.desc):
+            # Do not strip all newline characters: formatted code at the end of
+            # a section must be followed by a blank line.
+            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
+                print(' *{}{}'.format(' \t' if line else '', line))
+
+        if (helper.ret):
+            print(' *')
+            print(' * Returns')
+            for line in helper.ret.rstrip().split('\n'):
+                print(' *{}{}'.format(' \t' if line else '', line))
+
+        print(' */')
+        print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']),
+                                      proto['ret_star'], proto['name']), end='')
+        comma = ''
+        for i, a in enumerate(proto['args']):
+            t = a['type']
+            n = a['name']
+            if proto['name'] == 'bpf_get_socket_cookie' and i == 0:
+                    t = 'void'
+                    n = 'ctx'
+            one_arg = '{}{}'.format(comma, self.map_type(t))
+            if n:
+                if a['star']:
+                    one_arg += ' {}'.format(a['star'])
+                else:
+                    one_arg += ' '
+                one_arg += '{}'.format(n)
+            comma = ', '
+            print(one_arg, end='')
+
+        print(') = (void *) %d;' % len(self.seen_helpers))
+        print('')
+
 ###############################################################################
 
 # If script is launched from scripts/ from kernel tree and can access
@@ -405,6 +553,8 @@ Parse eBPF header file and generate documentation for eBPF helper functions.
 The RST-formatted output produced can be turned into a manual page with the
 rst2man utility.
 """)
+argParser.add_argument('--header', action='store_true',
+                       help='generate C header file')
 if (os.path.isfile(bpfh)):
     argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
                            default=bpfh)
@@ -417,5 +567,8 @@ headerParser = HeaderParser(args.filename)
 headerParser.run()
 
 # Print formatted output to standard output.
-printer = PrinterRST(headerParser.helpers)
+if args.header:
+    printer = PrinterHelpers(headerParser.helpers)
+else:
+    printer = PrinterRST(headerParser.helpers)
 printer.print_all()
index 58345ba..c97fdae 100644 (file)
@@ -83,6 +83,8 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
        { RTM_NEWNEXTHOP,       NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
        { RTM_DELNEXTHOP,       NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
        { RTM_GETNEXTHOP,       NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+       { RTM_NEWLINKPROP,      NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+       { RTM_DELLINKPROP,      NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -166,7 +168,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
                 * structures at the top of this file with the new mappings
                 * before updating the BUILD_BUG_ON() macro!
                 */
-               BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOP + 3));
+               BUILD_BUG_ON(RTM_MAX != (RTM_NEWLINKPROP + 3));
                err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
                                 sizeof(nlmsg_route_perms));
                break;
index 9a9376d..a7b8bf2 100644 (file)
@@ -12,6 +12,9 @@
 #include <libbpf.h>
 #include <linux/btf.h>
 #include <linux/hashtable.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
 
 #include "btf.h"
 #include "json_writer.h"
@@ -388,6 +391,54 @@ done:
        return err;
 }
 
+static struct btf *btf__parse_raw(const char *file)
+{
+       struct btf *btf;
+       struct stat st;
+       __u8 *buf;
+       FILE *f;
+
+       if (stat(file, &st))
+               return NULL;
+
+       f = fopen(file, "rb");
+       if (!f)
+               return NULL;
+
+       buf = malloc(st.st_size);
+       if (!buf) {
+               btf = ERR_PTR(-ENOMEM);
+               goto exit_close;
+       }
+
+       if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
+               btf = ERR_PTR(-EINVAL);
+               goto exit_free;
+       }
+
+       btf = btf__new(buf, st.st_size);
+
+exit_free:
+       free(buf);
+exit_close:
+       fclose(f);
+       return btf;
+}
+
+static bool is_btf_raw(const char *file)
+{
+       __u16 magic = 0;
+       int fd;
+
+       fd = open(file, O_RDONLY);
+       if (fd < 0)
+               return false;
+
+       read(fd, &magic, sizeof(magic));
+       close(fd);
+       return magic == BTF_MAGIC;
+}
+
 static int do_dump(int argc, char **argv)
 {
        struct btf *btf = NULL;
@@ -465,7 +516,11 @@ static int do_dump(int argc, char **argv)
                }
                NEXT_ARG();
        } else if (is_prefix(src, "file")) {
-               btf = btf__parse_elf(*argv, NULL);
+               if (is_btf_raw(*argv))
+                       btf = btf__parse_raw(*argv);
+               else
+                       btf = btf__parse_elf(*argv, NULL);
+
                if (IS_ERR(btf)) {
                        err = PTR_ERR(btf);
                        btf = NULL;
index 93d0086..4764581 100644 (file)
@@ -27,7 +27,7 @@ bool json_output;
 bool show_pinned;
 bool block_mount;
 bool verifier_logs;
-int bpf_flags;
+bool relaxed_maps;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 
@@ -396,7 +396,7 @@ int main(int argc, char **argv)
                        show_pinned = true;
                        break;
                case 'm':
-                       bpf_flags = MAPS_RELAX_COMPAT;
+                       relaxed_maps = true;
                        break;
                case 'n':
                        block_mount = true;
index af9ad56..2899095 100644 (file)
@@ -94,7 +94,7 @@ extern bool json_output;
 extern bool show_pinned;
 extern bool block_mount;
 extern bool verifier_logs;
-extern int bpf_flags;
+extern bool relaxed_maps;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 
index 43fdbbf..4535c86 100644 (file)
@@ -1091,10 +1091,11 @@ free_data_in:
 
 static int load_with_options(int argc, char **argv, bool first_prog_only)
 {
+       enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC;
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
+               .relaxed_maps = relaxed_maps,
+       );
        struct bpf_object_load_attr load_attr = { 0 };
-       struct bpf_object_open_attr open_attr = {
-               .prog_type = BPF_PROG_TYPE_UNSPEC,
-       };
        enum bpf_attach_type expected_attach_type;
        struct map_replace *map_replace = NULL;
        struct bpf_program *prog = NULL, *pos;
@@ -1105,11 +1106,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
        const char *pinfile;
        unsigned int i, j;
        __u32 ifindex = 0;
+       const char *file;
        int idx, err;
 
+
        if (!REQ_ARGS(2))
                return -1;
-       open_attr.file = GET_ARG();
+       file = GET_ARG();
        pinfile = GET_ARG();
 
        while (argc) {
@@ -1118,7 +1121,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
                        NEXT_ARG();
 
-                       if (open_attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
+                       if (common_prog_type != BPF_PROG_TYPE_UNSPEC) {
                                p_err("program type already specified");
                                goto err_free_reuse_maps;
                        }
@@ -1135,8 +1138,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
                        strcat(type, *argv);
                        strcat(type, "/");
 
-                       err = libbpf_prog_type_by_name(type,
-                                                      &open_attr.prog_type,
+                       err = libbpf_prog_type_by_name(type, &common_prog_type,
                                                       &expected_attach_type);
                        free(type);
                        if (err < 0)
@@ -1224,16 +1226,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
        set_max_rlimit();
 
-       obj = __bpf_object__open_xattr(&open_attr, bpf_flags);
+       obj = bpf_object__open_file(file, &open_opts);
        if (IS_ERR_OR_NULL(obj)) {
                p_err("failed to open object file");
                goto err_free_reuse_maps;
        }
 
        bpf_object__for_each_program(pos, obj) {
-               enum bpf_prog_type prog_type = open_attr.prog_type;
+               enum bpf_prog_type prog_type = common_prog_type;
 
-               if (open_attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
+               if (prog_type == BPF_PROG_TYPE_UNSPEC) {
                        const char *sec_name = bpf_program__title(pos, false);
 
                        err = libbpf_prog_type_by_name(sec_name, &prog_type,
index 77c6be9..df6809a 100644 (file)
@@ -173,6 +173,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_CGROUP_SYSCTL,
        BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
        BPF_PROG_TYPE_CGROUP_SOCKOPT,
+       BPF_PROG_TYPE_TRACING,
 };
 
 enum bpf_attach_type {
@@ -199,6 +200,7 @@ enum bpf_attach_type {
        BPF_CGROUP_UDP6_RECVMSG,
        BPF_CGROUP_GETSOCKOPT,
        BPF_CGROUP_SETSOCKOPT,
+       BPF_TRACE_RAW_TP,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -420,6 +422,7 @@ union bpf_attr {
                __u32           line_info_rec_size;     /* userspace bpf_line_info size */
                __aligned_u64   line_info;      /* line info */
                __u32           line_info_cnt;  /* number of bpf_line_info records */
+               __u32           attach_btf_id;  /* in-kernel BTF type id to attach to */
        };
 
        struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -560,10 +563,13 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
  *     Description
  *             For tracing programs, safely attempt to read *size* bytes from
- *             address *src* and store the data in *dst*.
+ *             kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ *             Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ *             instead.
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
@@ -794,7 +800,7 @@ union bpf_attr {
  *             A 64-bit integer containing the current GID and UID, and
  *             created as such: *current_gid* **<< 32 \|** *current_uid*.
  *
- * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * int bpf_get_current_comm(void *buf, u32 size_of_buf)
  *     Description
  *             Copy the **comm** attribute of the current task into *buf* of
  *             *size_of_buf*. The **comm** attribute contains the name of
@@ -1023,7 +1029,7 @@ union bpf_attr {
  *             The realm of the route for the packet associated to *skb*, or 0
  *             if none was found.
  *
- * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  *     Description
  *             Write raw *data* blob into a special BPF perf event held by
  *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1068,7 +1074,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
  *     Description
  *             This helper was provided as an easy way to load data from a
  *             packet. It can be used to load *len* bytes from *offset* from
@@ -1085,7 +1091,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
+ * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
  *     Description
  *             Walk a user or a kernel stack and return its id. To achieve
  *             this, the helper needs *ctx*, which is a pointer to the context
@@ -1154,7 +1160,7 @@ union bpf_attr {
  *             The checksum result, or a negative error code in case of
  *             failure.
  *
- * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  *     Description
  *             Retrieve tunnel options metadata for the packet associated to
  *             *skb*, and store the raw tunnel option data to the buffer *opt*
@@ -1172,7 +1178,7 @@ union bpf_attr {
  *     Return
  *             The size of the option data retrieved.
  *
- * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  *     Description
  *             Set tunnel options metadata for the packet associated to *skb*
  *             to the option data contained in the raw buffer *opt* of *size*.
@@ -1425,45 +1431,14 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
  *     Description
- *             Copy a NUL terminated string from an unsafe address
- *             *unsafe_ptr* to *dst*. The *size* should include the
- *             terminating NUL byte. In case the string length is smaller than
- *             *size*, the target is not padded with further NUL bytes. If the
- *             string length is larger than *size*, just *size*-1 bytes are
- *             copied and the last byte is set to NUL.
- *
- *             On success, the length of the copied string is returned. This
- *             makes this helper useful in tracing programs for reading
- *             strings, and more importantly to get its length at runtime. See
- *             the following snippet:
- *
- *             ::
- *
- *                     SEC("kprobe/sys_open")
- *                     void bpf_sys_open(struct pt_regs *ctx)
- *                     {
- *                             char buf[PATHLEN]; // PATHLEN is defined to 256
- *                             int res = bpf_probe_read_str(buf, sizeof(buf),
- *                                                          ctx->di);
- *
- *                             // Consume buf, for example push it to
- *                             // userspace via bpf_perf_event_output(); we
- *                             // can use res (the string length) as event
- *                             // size, after checking its boundaries.
- *                     }
- *
- *             In comparison, using **bpf_probe_read()** helper here instead
- *             to read the string would require to estimate the length at
- *             compile time, and would often result in copying more memory
- *             than necessary.
+ *             Copy a NUL terminated string from an unsafe kernel address
+ *             *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ *             more details.
  *
- *             Another useful use case is when parsing individual process
- *             arguments or individual environment variables navigating
- *             *current*\ **->mm->arg_start** and *current*\
- *             **->mm->env_start**: using this helper and the return value,
- *             one can quickly iterate at the right offset of the memory area.
+ *             Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ *             instead.
  *     Return
  *             On success, the strictly positive length of the string,
  *             including the trailing NUL character. On error, a negative
@@ -1511,7 +1486,7 @@ union bpf_attr {
  *     Return
  *             0
  *
- * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
  *     Description
  *             Emulate a call to **setsockopt()** on the socket associated to
  *             *bpf_socket*, which must be a full socket. The *level* at
@@ -1595,7 +1570,7 @@ union bpf_attr {
  *     Return
  *             **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
  *
- * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
  *     Description
  *             Redirect the packet to the socket referenced by *map* (of type
  *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
@@ -1715,7 +1690,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
  *     Description
  *             Emulate a call to **getsockopt()** on the socket associated to
  *             *bpf_socket*, which must be a full socket. The *level* at
@@ -1947,7 +1922,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
  *     Description
  *             Return a user or a kernel stack in bpf program provided buffer.
  *             To achieve this, the helper needs *ctx*, which is a pointer
@@ -1980,7 +1955,7 @@ union bpf_attr {
  *             A non-negative value equal to or less than *size* on success,
  *             or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
  *     Description
  *             This helper is similar to **bpf_skb_load_bytes**\ () in that
  *             it provides an easy way to load *len* bytes from *offset*
@@ -2033,7 +2008,7 @@ union bpf_attr {
  *             * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *               packet is not forwarded or needs assist from full stack
  *
- * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *     Description
  *             Add an entry to, or update a sockhash *map* referencing sockets.
  *             The *skops* is used as a new value for the entry associated to
@@ -2392,7 +2367,7 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *     Description
  *             For socket policies, insert *len* bytes into *msg* at offset
  *             *start*.
@@ -2408,9 +2383,9 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *     Description
- *             Will remove *pop* bytes from a *msg* starting at byte *start*.
+ *             Will remove *len* bytes from a *msg* starting at byte *start*.
  *             This may result in **ENOMEM** errors under certain situations if
  *             an allocation and copy are required due to a full ring buffer.
  *             However, the helper will try to avoid doing the allocation
@@ -2505,7 +2480,7 @@ union bpf_attr {
  *             A **struct bpf_tcp_sock** pointer on success, or **NULL** in
  *             case of failure.
  *
- * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
+ * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
  *     Description
  *             Set ECN (Explicit Congestion Notification) field of IP header
  *             to **CE** (Congestion Encountered) if current value is **ECT**
@@ -2750,6 +2725,96 @@ union bpf_attr {
  *             **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
  *
  *             **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ *             This helper is similar to **bpf_perf_event_output**\ () but
+ *             restricted to raw_tracepoint bpf programs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Safely attempt to read *size* bytes from user space address
+ *             *unsafe_ptr* and store the data in *dst*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Safely attempt to read *size* bytes from kernel space address
+ *             *unsafe_ptr* and store the data in *dst*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Copy a NUL terminated string from an unsafe user address
+ *             *unsafe_ptr* to *dst*. The *size* should include the
+ *             terminating NUL byte. In case the string length is smaller than
+ *             *size*, the target is not padded with further NUL bytes. If the
+ *             string length is larger than *size*, just *size*-1 bytes are
+ *             copied and the last byte is set to NUL.
+ *
+ *             On success, the length of the copied string is returned. This
+ *             makes this helper useful in tracing programs for reading
+ *             strings, and more importantly to get its length at runtime. See
+ *             the following snippet:
+ *
+ *             ::
+ *
+ *                     SEC("kprobe/sys_open")
+ *                     void bpf_sys_open(struct pt_regs *ctx)
+ *                     {
+ *                             char buf[PATHLEN]; // PATHLEN is defined to 256
+ *                             int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ *                                                               ctx->di);
+ *
+ *                             // Consume buf, for example push it to
+ *                             // userspace via bpf_perf_event_output(); we
+ *                             // can use res (the string length) as event
+ *                             // size, after checking its boundaries.
+ *                     }
+ *
+ *             In comparison, using **bpf_probe_read_user()** helper here
+ *             instead to read the string would require to estimate the length
+ *             at compile time, and would often result in copying more memory
+ *             than necessary.
+ *
+ *             Another useful use case is when parsing individual process
+ *             arguments or individual environment variables navigating
+ *             *current*\ **->mm->arg_start** and *current*\
+ *             **->mm->env_start**: using this helper and the return value,
+ *             one can quickly iterate at the right offset of the memory area.
+ *     Return
+ *             On success, the strictly positive length of the string,
+ *             including the trailing NUL character. On error, a negative
+ *             value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ *     Description
+ *             Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ *             to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ *     Return
+ *             On success, the strictly positive length of the string, including
+ *             the trailing NUL character. On error, a negative value.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2862,7 +2927,12 @@ union bpf_attr {
        FN(sk_storage_get),             \
        FN(sk_storage_delete),          \
        FN(send_signal),                \
-       FN(tcp_gen_syncookie),
+       FN(tcp_gen_syncookie),          \
+       FN(skb_output),                 \
+       FN(probe_read_user),            \
+       FN(probe_read_kernel),          \
+       FN(probe_read_user_str),        \
+       FN(probe_read_kernel_str),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
index 4a8c02c..8aec876 100644 (file)
@@ -167,6 +167,8 @@ enum {
        IFLA_NEW_IFINDEX,
        IFLA_MIN_MTU,
        IFLA_MAX_MTU,
+       IFLA_PROP_LIST,
+       IFLA_ALT_IFNAME, /* Alternative ifname */
        __IFLA_MAX
 };
 
index d9e9dec..35bf013 100644 (file)
@@ -3,3 +3,7 @@ libbpf.pc
 FEATURE-DUMP.libbpf
 test_libbpf
 libbpf.so.*
+TAGS
+tags
+cscope.*
+/bpf_helper_defs.h
index 56ce629..99425d0 100644 (file)
@@ -56,7 +56,7 @@ ifndef VERBOSE
 endif
 
 FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx
+FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
 FEATURE_DISPLAY = libelf bpf
 
 INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
@@ -143,6 +143,8 @@ LIB_TARGET  := $(addprefix $(OUTPUT),$(LIB_TARGET))
 LIB_FILE       := $(addprefix $(OUTPUT),$(LIB_FILE))
 PC_FILE                := $(addprefix $(OUTPUT),$(PC_FILE))
 
+TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags)
+
 GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
                           cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
                           awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \
@@ -150,22 +152,14 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
 VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
                              grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
-CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
-
-CXX_TEST_TARGET = $(OUTPUT)test_libbpf
-
-ifeq ($(feature-cxx), 1)
-       CMD_TARGETS += $(CXX_TEST_TARGET)
-endif
-
-TARGETS = $(CMD_TARGETS)
+CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) $(OUTPUT)test_libbpf
 
 all: fixdep
        $(Q)$(MAKE) all_cmd
 
 all_cmd: $(CMD_TARGETS) check
 
-$(BPF_IN_SHARED): force elfdep bpfdep
+$(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h
        @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
        (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
        echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@@ -183,22 +177,27 @@ $(BPF_IN_SHARED): force elfdep bpfdep
        echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
        $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
 
-$(BPF_IN_STATIC): force elfdep bpfdep
+$(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h
        $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
 
+bpf_helper_defs.h: $(srctree)/include/uapi/linux/bpf.h
+       $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header              \
+               --file $(srctree)/include/uapi/linux/bpf.h > bpf_helper_defs.h
+
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
 
 $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
-       $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
-                                   -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
+       $(QUIET_LINK)$(CC) $(LDFLAGS) \
+               --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
+               -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
        @ln -sf $(@F) $(OUTPUT)libbpf.so
        @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
 
 $(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
        $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
-$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
-       $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@
+$(OUTPUT)test_libbpf: test_libbpf.c $(OUTPUT)libbpf.a
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(INCLUDES) $^ -lelf -o $@
 
 $(OUTPUT)libbpf.pc:
        $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
@@ -247,13 +246,18 @@ install_lib: all_cmd
                $(call do_install_mkdir,$(libdir_SQ)); \
                cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
-install_headers:
+install_headers: bpf_helper_defs.h
        $(call QUIET_INSTALL, headers) \
                $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
                $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
                $(call do_install,btf.h,$(prefix)/include/bpf,644); \
                $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
-               $(call do_install,xsk.h,$(prefix)/include/bpf,644);
+               $(call do_install,xsk.h,$(prefix)/include/bpf,644); \
+               $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
+               $(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \
+               $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
+               $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
+               $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
 
 install_pkgconfig: $(PC_FILE)
        $(call QUIET_INSTALL, $(PC_FILE)) \
@@ -268,14 +272,15 @@ config-clean:
        $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
 clean:
-       $(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \
+       $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
                *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
-               *.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR)
+               *.pc LIBBPF-CFLAGS bpf_helper_defs.h \
+               $(SHARED_OBJDIR) $(STATIC_OBJDIR)
        $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
 
 
 
-PHONY += force elfdep bpfdep
+PHONY += force elfdep bpfdep cscope tags
 force:
 
 elfdep:
@@ -284,6 +289,17 @@ elfdep:
 bpfdep:
        @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi
 
+cscope:
+       ls *.c *.h > cscope.files
+       cscope -b -q -I $(srctree)/include -f cscope.out
+
+tags:
+       rm -f TAGS tags
+       ls *.c *.h | xargs $(TAGS_PROG) -a
+
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable so we can use it in if_changed and friends.
 .PHONY: $(PHONY)
+
+# Delete partially updated (corrupted) files on error
+.DELETE_ON_ERROR:
index cbb9335..ca0d635 100644 (file)
@@ -228,6 +228,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
        memset(&attr, 0, sizeof(attr));
        attr.prog_type = load_attr->prog_type;
        attr.expected_attach_type = load_attr->expected_attach_type;
+       if (attr.prog_type == BPF_PROG_TYPE_TRACING)
+               attr.attach_btf_id = load_attr->attach_btf_id;
+       else
+               attr.prog_ifindex = load_attr->prog_ifindex;
        attr.insn_cnt = (__u32)load_attr->insns_cnt;
        attr.insns = ptr_to_u64(load_attr->insns);
        attr.license = ptr_to_u64(load_attr->license);
@@ -242,7 +246,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
        }
 
        attr.kern_version = load_attr->kern_version;
-       attr.prog_ifindex = load_attr->prog_ifindex;
        attr.prog_btf_fd = load_attr->prog_btf_fd;
        attr.func_info_rec_size = load_attr->func_info_rec_size;
        attr.func_info_cnt = load_attr->func_info_cnt;
index 0db0133..1c53bc5 100644 (file)
@@ -78,7 +78,10 @@ struct bpf_load_program_attr {
        size_t insns_cnt;
        const char *license;
        __u32 kern_version;
-       __u32 prog_ifindex;
+       union {
+               __u32 prog_ifindex;
+               __u32 attach_btf_id;
+       };
        __u32 prog_btf_fd;
        __u32 func_info_rec_size;
        const void *func_info;
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
new file mode 100644 (file)
index 0000000..a273df3
--- /dev/null
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_CORE_READ_H__
+#define __BPF_CORE_READ_H__
+
+/*
+ * enum bpf_field_info_kind is passed as a second argument into
+ * __builtin_preserve_field_info() built-in to get a specific aspect of
+ * a field, captured as a first argument. __builtin_preserve_field_info(field,
+ * info_kind) returns __u32 integer and produces BTF field relocation, which
+ * is understood and processed by libbpf during BPF object loading. See
+ * selftests/bpf for examples.
+ */
+enum bpf_field_info_kind {
+       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
+       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
+};
+
+/*
+ * Convenience macro to check that field actually exists in target kernel's.
+ * Returns:
+ *    1, if matching field is present in target kernel;
+ *    0, if no matching field found.
+ */
+#define bpf_core_field_exists(field)                                       \
+       __builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
+
+/*
+ * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
+ * relocation for source address using __builtin_preserve_access_index()
+ * built-in, provided by Clang.
+ *
+ * __builtin_preserve_access_index() takes as an argument an expression of
+ * taking an address of a field within struct/union. It makes compiler emit
+ * a relocation, which records BTF type ID describing root struct/union and an
+ * accessor string which describes exact embedded field that was used to take
+ * an address. See detailed description of this relocation format and
+ * semantics in comments to struct bpf_field_reloc in libbpf_internal.h.
+ *
+ * This relocation allows libbpf to adjust BPF instruction to use correct
+ * actual field offset, based on target kernel BTF type that matches original
+ * (local) BTF, used to record relocation.
+ */
+#define bpf_core_read(dst, sz, src)                                        \
+       bpf_probe_read(dst, sz,                                             \
+                      (const void *)__builtin_preserve_access_index(src))
+
+/*
+ * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
+ * additionally emitting BPF CO-RE field relocation for specified source
+ * argument.
+ */
+#define bpf_core_read_str(dst, sz, src)                                            \
+       bpf_probe_read_str(dst, sz,                                         \
+                          (const void *)__builtin_preserve_access_index(src))
+
+#define ___concat(a, b) a ## b
+#define ___apply(fn, n) ___concat(fn, n)
+#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N
+
+/*
+ * return number of provided arguments; used for switch-based variadic macro
+ * definitions (see ___last, ___arrow, etc below)
+ */
+#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+/*
+ * return 0 if no arguments are passed, N - otherwise; used for
+ * recursively-defined macros to specify termination (0) case, and generic
+ * (N) case (e.g., ___read_ptrs, ___core_read)
+ */
+#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___last1(x) x
+#define ___last2(a, x) x
+#define ___last3(a, b, x) x
+#define ___last4(a, b, c, x) x
+#define ___last5(a, b, c, d, x) x
+#define ___last6(a, b, c, d, e, x) x
+#define ___last7(a, b, c, d, e, f, x) x
+#define ___last8(a, b, c, d, e, f, g, x) x
+#define ___last9(a, b, c, d, e, f, g, h, x) x
+#define ___last10(a, b, c, d, e, f, g, h, i, x) x
+#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___nolast2(a, _) a
+#define ___nolast3(a, b, _) a, b
+#define ___nolast4(a, b, c, _) a, b, c
+#define ___nolast5(a, b, c, d, _) a, b, c, d
+#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e
+#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f
+#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g
+#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h
+#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i
+#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___arrow1(a) a
+#define ___arrow2(a, b) a->b
+#define ___arrow3(a, b, c) a->b->c
+#define ___arrow4(a, b, c, d) a->b->c->d
+#define ___arrow5(a, b, c, d, e) a->b->c->d->e
+#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f
+#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g
+#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h
+#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i
+#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
+#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___type(...) typeof(___arrow(__VA_ARGS__))
+
+#define ___read(read_fn, dst, src_type, src, accessor)                     \
+       read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
+
+/* "recursively" read a sequence of inner pointers using local __t var */
+#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a);
+#define ___rd_last(...)                                                            \
+       ___read(bpf_core_read, &__t,                                        \
+               ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__));
+#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__)
+#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___read_ptrs(src, ...)                                             \
+       ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__)
+
+#define ___core_read0(fn, dst, src, a)                                     \
+       ___read(fn, dst, ___type(src), src, a);
+#define ___core_readN(fn, dst, src, ...)                                   \
+       ___read_ptrs(src, ___nolast(__VA_ARGS__))                           \
+       ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t,         \
+               ___last(__VA_ARGS__));
+#define ___core_read(fn, dst, src, a, ...)                                 \
+       ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst,              \
+                                                     src, a, ##__VA_ARGS__)
+
+/*
+ * BPF_CORE_READ_INTO() is a more performance-conscious variant of
+ * BPF_CORE_READ(), in which final field is read into user-provided storage.
+ * See BPF_CORE_READ() below for more details on general usage.
+ */
+#define BPF_CORE_READ_INTO(dst, src, a, ...)                               \
+       ({                                                                  \
+               ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__)     \
+       })
+
+/*
+ * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as
+ * BPF_CORE_READ() for intermediate pointers, but then executes (and returns
+ * corresponding error code) bpf_core_read_str() for final string read.
+ */
+#define BPF_CORE_READ_STR_INTO(dst, src, a, ...)                           \
+       ({                                                                  \
+               ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+       })
+
+/*
+ * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially
+ * when there are few pointer chasing steps.
+ * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like:
+ *     int x = s->a.b.c->d.e->f->g;
+ * can be succinctly achieved using BPF_CORE_READ as:
+ *     int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
+ *
+ * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
+ * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * 1. const void *__t = s->a.b.c;
+ * 2. __t = __t->d.e;
+ * 3. __t = __t->f;
+ * 4. return __t->g;
+ *
+ * Equivalence is logical, because there is a heavy type casting/preservation
+ * involved, as well as all the reads are happening through bpf_probe_read()
+ * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ *
+ * N.B. Only up to 9 "field accessors" are supported, which should be more
+ * than enough for any practical purpose.
+ */
+#define BPF_CORE_READ(src, a, ...)                                         \
+       ({                                                                  \
+               ___type(src, a, ##__VA_ARGS__) __r;                         \
+               BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__);            \
+               __r;                                                        \
+       })
+
+#endif
+
diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h
new file mode 100644 (file)
index 0000000..fbe2800
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <linux/stddef.h>
+#include <linux/swab.h>
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x)                        __builtin_bswap16(x)
+# define __bpf_htons(x)                        __builtin_bswap16(x)
+# define __bpf_constant_ntohs(x)       ___constant_swab16(x)
+# define __bpf_constant_htons(x)       ___constant_swab16(x)
+# define __bpf_ntohl(x)                        __builtin_bswap32(x)
+# define __bpf_htonl(x)                        __builtin_bswap32(x)
+# define __bpf_constant_ntohl(x)       ___constant_swab32(x)
+# define __bpf_constant_htonl(x)       ___constant_swab32(x)
+# define __bpf_be64_to_cpu(x)          __builtin_bswap64(x)
+# define __bpf_cpu_to_be64(x)          __builtin_bswap64(x)
+# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x)                        (x)
+# define __bpf_htons(x)                        (x)
+# define __bpf_constant_ntohs(x)       (x)
+# define __bpf_constant_htons(x)       (x)
+# define __bpf_ntohl(x)                        (x)
+# define __bpf_htonl(x)                        (x)
+# define __bpf_constant_ntohl(x)       (x)
+# define __bpf_constant_htonl(x)       (x)
+# define __bpf_be64_to_cpu(x)          (x)
+# define __bpf_cpu_to_be64(x)          (x)
+# define __bpf_constant_be64_to_cpu(x)  (x)
+# define __bpf_constant_cpu_to_be64(x)  (x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x)                           \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x)                           \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x)                           \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x)                           \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+#define bpf_cpu_to_be64(x)                     \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
+#define bpf_be64_to_cpu(x)                     \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
new file mode 100644 (file)
index 0000000..0c7d282
--- /dev/null
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_HELPERS__
+#define __BPF_HELPERS__
+
+#include "bpf_helper_defs.h"
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+
+/* Helper macro to print out debug messages */
+#define bpf_printk(fmt, ...)                           \
+({                                                     \
+       char ____fmt[] = fmt;                           \
+       bpf_trace_printk(____fmt, sizeof(____fmt),      \
+                        ##__VA_ARGS__);                \
+})
+
+/*
+ * Helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#ifndef __always_inline
+#define __always_inline __attribute__((always_inline))
+#endif
+
+/*
+ * Helper structure used by eBPF C program
+ * to describe BPF map attributes to libbpf loader
+ */
+struct bpf_map_def {
+       unsigned int type;
+       unsigned int key_size;
+       unsigned int value_size;
+       unsigned int max_entries;
+       unsigned int map_flags;
+};
+
+enum libbpf_pin_type {
+       LIBBPF_PIN_NONE,
+       /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+       LIBBPF_PIN_BY_NAME,
+};
+
+#endif
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
new file mode 100644 (file)
index 0000000..b0dafe8
--- /dev/null
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_H__
+#define __BPF_TRACING_H__
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+       #define bpf_target_x86
+       #define bpf_target_defined
+#elif defined(__TARGET_ARCH_s390)
+       #define bpf_target_s390
+       #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm)
+       #define bpf_target_arm
+       #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+       #define bpf_target_arm64
+       #define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+       #define bpf_target_mips
+       #define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+       #define bpf_target_powerpc
+       #define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+       #define bpf_target_sparc
+       #define bpf_target_defined
+#else
+       #undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+       #define bpf_target_x86
+#elif defined(__s390__)
+       #define bpf_target_s390
+#elif defined(__arm__)
+       #define bpf_target_arm
+#elif defined(__aarch64__)
+       #define bpf_target_arm64
+#elif defined(__mips__)
+       #define bpf_target_mips
+#elif defined(__powerpc__)
+       #define bpf_target_powerpc
+#elif defined(__sparc__)
+       #define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#ifdef __KERNEL__
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+#else
+#ifdef __i386__
+/* i386 kernel is built with -mregparm=3 */
+#define PT_REGS_PARM1(x) ((x)->eax)
+#define PT_REGS_PARM2(x) ((x)->edx)
+#define PT_REGS_PARM3(x) ((x)->ecx)
+#define PT_REGS_PARM4(x) 0
+#define PT_REGS_PARM5(x) 0
+#define PT_REGS_RET(x) ((x)->esp)
+#define PT_REGS_FP(x) ((x)->ebp)
+#define PT_REGS_RC(x) ((x)->eax)
+#define PT_REGS_SP(x) ((x)->esp)
+#define PT_REGS_IP(x) ((x)->eip)
+#else
+#define PT_REGS_PARM1(x) ((x)->rdi)
+#define PT_REGS_PARM2(x) ((x)->rsi)
+#define PT_REGS_PARM3(x) ((x)->rdx)
+#define PT_REGS_PARM4(x) ((x)->rcx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->rsp)
+#define PT_REGS_FP(x) ((x)->rbp)
+#define PT_REGS_RC(x) ((x)->rax)
+#define PT_REGS_SP(x) ((x)->rsp)
+#define PT_REGS_IP(x) ((x)->rip)
+#endif
+#endif
+
+#elif defined(bpf_target_s390)
+
+/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_S390 const volatile user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
+#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
+#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
+#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
+#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
+#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
+#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
+
+#elif defined(bpf_target_arm)
+
+#define PT_REGS_PARM1(x) ((x)->uregs[0])
+#define PT_REGS_PARM2(x) ((x)->uregs[1])
+#define PT_REGS_PARM3(x) ((x)->uregs[2])
+#define PT_REGS_PARM4(x) ((x)->uregs[3])
+#define PT_REGS_PARM5(x) ((x)->uregs[4])
+#define PT_REGS_RET(x) ((x)->uregs[14])
+#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->uregs[0])
+#define PT_REGS_SP(x) ((x)->uregs[13])
+#define PT_REGS_IP(x) ((x)->uregs[12])
+
+#elif defined(bpf_target_arm64)
+
+/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_ARM64 const volatile struct user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
+#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
+#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
+#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
+#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
+#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
+#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
+#endif
+
+#if defined(bpf_target_powerpc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)                ({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP              BPF_KPROBE_READ_RET_IP
+#elif defined(bpf_target_sparc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)                ({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP              BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)                                            \
+       ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)                                 \
+       ({ bpf_probe_read(&(ip), sizeof(ip),                                \
+                         (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#endif
index 1aa189a..d72e9a7 100644 (file)
@@ -390,14 +390,14 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
        GElf_Ehdr ehdr;
 
        if (elf_version(EV_CURRENT) == EV_NONE) {
-               pr_warning("failed to init libelf for %s\n", path);
+               pr_warn("failed to init libelf for %s\n", path);
                return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
        }
 
        fd = open(path, O_RDONLY);
        if (fd < 0) {
                err = -errno;
-               pr_warning("failed to open %s: %s\n", path, strerror(errno));
+               pr_warn("failed to open %s: %s\n", path, strerror(errno));
                return ERR_PTR(err);
        }
 
@@ -405,19 +405,19 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 
        elf = elf_begin(fd, ELF_C_READ, NULL);
        if (!elf) {
-               pr_warning("failed to open %s as ELF file\n", path);
+               pr_warn("failed to open %s as ELF file\n", path);
                goto done;
        }
        if (!gelf_getehdr(elf, &ehdr)) {
-               pr_warning("failed to get EHDR from %s\n", path);
+               pr_warn("failed to get EHDR from %s\n", path);
                goto done;
        }
        if (!btf_check_endianness(&ehdr)) {
-               pr_warning("non-native ELF endianness is not supported\n");
+               pr_warn("non-native ELF endianness is not supported\n");
                goto done;
        }
        if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
-               pr_warning("failed to get e_shstrndx from %s\n", path);
+               pr_warn("failed to get e_shstrndx from %s\n", path);
                goto done;
        }
 
@@ -427,29 +427,29 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 
                idx++;
                if (gelf_getshdr(scn, &sh) != &sh) {
-                       pr_warning("failed to get section(%d) header from %s\n",
-                                  idx, path);
+                       pr_warn("failed to get section(%d) header from %s\n",
+                               idx, path);
                        goto done;
                }
                name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
                if (!name) {
-                       pr_warning("failed to get section(%d) name from %s\n",
-                                  idx, path);
+                       pr_warn("failed to get section(%d) name from %s\n",
+                               idx, path);
                        goto done;
                }
                if (strcmp(name, BTF_ELF_SEC) == 0) {
                        btf_data = elf_getdata(scn, 0);
                        if (!btf_data) {
-                               pr_warning("failed to get section(%d, %s) data from %s\n",
-                                          idx, name, path);
+                               pr_warn("failed to get section(%d, %s) data from %s\n",
+                                       idx, name, path);
                                goto done;
                        }
                        continue;
                } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
                        btf_ext_data = elf_getdata(scn, 0);
                        if (!btf_ext_data) {
-                               pr_warning("failed to get section(%d, %s) data from %s\n",
-                                          idx, name, path);
+                               pr_warn("failed to get section(%d, %s) data from %s\n",
+                                       idx, name, path);
                                goto done;
                        }
                        continue;
@@ -600,9 +600,9 @@ int btf__load(struct btf *btf)
                               log_buf, log_buf_size, false);
        if (btf->fd < 0) {
                err = -errno;
-               pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+               pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
                if (*log_buf)
-                       pr_warning("%s\n", log_buf);
+                       pr_warn("%s\n", log_buf);
                goto done;
        }
 
@@ -707,8 +707,8 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
        if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
            max_name) {
-               pr_warning("map:%s length of '____btf_map_%s' is too long\n",
-                          map_name, map_name);
+               pr_warn("map:%s length of '____btf_map_%s' is too long\n",
+                       map_name, map_name);
                return -EINVAL;
        }
 
@@ -721,14 +721,14 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
        container_type = btf__type_by_id(btf, container_id);
        if (!container_type) {
-               pr_warning("map:%s cannot find BTF type for container_id:%u\n",
-                          map_name, container_id);
+               pr_warn("map:%s cannot find BTF type for container_id:%u\n",
+                       map_name, container_id);
                return -EINVAL;
        }
 
        if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
-               pr_warning("map:%s container_name:%s is an invalid container struct\n",
-                          map_name, container_name);
+               pr_warn("map:%s container_name:%s is an invalid container struct\n",
+                       map_name, container_name);
                return -EINVAL;
        }
 
@@ -737,25 +737,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
        key_size = btf__resolve_size(btf, key->type);
        if (key_size < 0) {
-               pr_warning("map:%s invalid BTF key_type_size\n", map_name);
+               pr_warn("map:%s invalid BTF key_type_size\n", map_name);
                return key_size;
        }
 
        if (expected_key_size != key_size) {
-               pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
-                          map_name, (__u32)key_size, expected_key_size);
+               pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+                       map_name, (__u32)key_size, expected_key_size);
                return -EINVAL;
        }
 
        value_size = btf__resolve_size(btf, value->type);
        if (value_size < 0) {
-               pr_warning("map:%s invalid BTF value_type_size\n", map_name);
+               pr_warn("map:%s invalid BTF value_type_size\n", map_name);
                return value_size;
        }
 
        if (expected_value_size != value_size) {
-               pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
-                          map_name, (__u32)value_size, expected_value_size);
+               pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+                       map_name, (__u32)value_size, expected_value_size);
                return -EINVAL;
        }
 
@@ -888,14 +888,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
        return btf_ext_setup_info(btf_ext, &param);
 }
 
-static int btf_ext_setup_offset_reloc(struct btf_ext *btf_ext)
+static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
 {
        struct btf_ext_sec_setup_param param = {
-               .off = btf_ext->hdr->offset_reloc_off,
-               .len = btf_ext->hdr->offset_reloc_len,
-               .min_rec_size = sizeof(struct bpf_offset_reloc),
-               .ext_info = &btf_ext->offset_reloc_info,
-               .desc = "offset_reloc",
+               .off = btf_ext->hdr->field_reloc_off,
+               .len = btf_ext->hdr->field_reloc_len,
+               .min_rec_size = sizeof(struct bpf_field_reloc),
+               .ext_info = &btf_ext->field_reloc_info,
+               .desc = "field_reloc",
        };
 
        return btf_ext_setup_info(btf_ext, &param);
@@ -975,9 +975,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
                goto done;
 
        if (btf_ext->hdr->hdr_len <
-           offsetofend(struct btf_ext_header, offset_reloc_len))
+           offsetofend(struct btf_ext_header, field_reloc_len))
                goto done;
-       err = btf_ext_setup_offset_reloc(btf_ext);
+       err = btf_ext_setup_field_reloc(btf_ext);
        if (err)
                goto done;
 
index 9cb44b4..b189941 100644 (file)
@@ -60,8 +60,8 @@ struct btf_ext_header {
        __u32   line_info_len;
 
        /* optional part of .BTF.ext header */
-       __u32   offset_reloc_off;
-       __u32   offset_reloc_len;
+       __u32   field_reloc_off;
+       __u32   field_reloc_len;
 };
 
 LIBBPF_API void btf__free(struct btf *btf);
index ede55fe..cb126d8 100644 (file)
@@ -428,7 +428,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
                /* type loop, but resolvable through fwd declaration */
                if (btf_is_composite(t) && through_ptr && t->name_off != 0)
                        return 0;
-               pr_warning("unsatisfiable type cycle, id:[%u]\n", id);
+               pr_warn("unsatisfiable type cycle, id:[%u]\n", id);
                return -ELOOP;
        }
 
@@ -636,8 +636,8 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
                        if (id == cont_id)
                                return;
                        if (t->name_off == 0) {
-                               pr_warning("anonymous struct/union loop, id:[%u]\n",
-                                          id);
+                               pr_warn("anonymous struct/union loop, id:[%u]\n",
+                                       id);
                                return;
                        }
                        btf_dump_emit_struct_fwd(d, id, t);
@@ -782,7 +782,7 @@ static int btf_align_of(const struct btf *btf, __u32 id)
                return align;
        }
        default:
-               pr_warning("unsupported BTF_KIND:%u\n", btf_kind(t));
+               pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
                return 1;
        }
 }
@@ -876,7 +876,6 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
        __u16 vlen = btf_vlen(t);
 
        packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
-       align = packed ? 1 : btf_align_of(d->btf, id);
 
        btf_dump_printf(d, "%s%s%s {",
                        is_struct ? "struct" : "union",
@@ -906,6 +905,13 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
                btf_dump_printf(d, ";");
        }
 
+       /* pad at the end, if necessary */
+       if (is_struct) {
+               align = packed ? 1 : btf_align_of(d->btf, id);
+               btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
+                                         lvl + 1);
+       }
+
        if (vlen)
                btf_dump_printf(d, "\n");
        btf_dump_printf(d, "%s}", pfx(lvl));
@@ -969,6 +975,17 @@ static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
 {
        const char *name = btf_dump_ident_name(d, id);
 
+       /*
+        * Old GCC versions are emitting invalid typedef for __gnuc_va_list
+        * pointing to VOID. This generates warnings from btf_dump() and
+        * results in uncompilable header file, so we are fixing it up here
+        * with valid typedef into __builtin_va_list.
+        */
+       if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) {
+               btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list");
+               return;
+       }
+
        btf_dump_printf(d, "typedef ");
        btf_dump_emit_type_decl(d, t->type, name, lvl);
 }
@@ -1050,7 +1067,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
                         * chain, restore stack, emit warning, and try to
                         * proceed nevertheless
                         */
-                       pr_warning("not enough memory for decl stack:%d", err);
+                       pr_warn("not enough memory for decl stack:%d", err);
                        d->decl_stack_cnt = stack_start;
                        return;
                }
@@ -1079,8 +1096,8 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
                case BTF_KIND_TYPEDEF:
                        goto done;
                default:
-                       pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
-                                  btf_kind(t), id);
+                       pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+                               btf_kind(t), id);
                        goto done;
                }
        }
@@ -1306,8 +1323,8 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                        return;
                }
                default:
-                       pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
-                                  kind, id);
+                       pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+                               kind, id);
                        return;
                }
 
index e027652..7aa2a2a 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/limits.h>
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
+#include <linux/version.h>
 #include <sys/epoll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -187,6 +188,7 @@ struct bpf_program {
        bpf_program_clear_priv_t clear_priv;
 
        enum bpf_attach_type expected_attach_type;
+       __u32 attach_btf_id;
        void *func_info;
        __u32 func_info_rec_size;
        __u32 func_info_cnt;
@@ -225,6 +227,8 @@ struct bpf_map {
        void *priv;
        bpf_map_clear_priv_t clear_priv;
        enum libbpf_map_type libbpf_type;
+       char *pin_path;
+       bool pinned;
 };
 
 struct bpf_secdata {
@@ -248,6 +252,7 @@ struct bpf_object {
 
        bool loaded;
        bool has_pseudo_calls;
+       bool relaxed_core_relocs;
 
        /*
         * Information when doing elf related work. Only valid if fd
@@ -255,7 +260,7 @@ struct bpf_object {
         */
        struct {
                int fd;
-               void *obj_buf;
+               const void *obj_buf;
                size_t obj_buf_sz;
                Elf *elf;
                GElf_Ehdr ehdr;
@@ -310,8 +315,8 @@ void bpf_program__unload(struct bpf_program *prog)
                for (i = 0; i < prog->instances.nr; i++)
                        zclose(prog->instances.fds[i]);
        } else if (prog->instances.nr != -1) {
-               pr_warning("Internal error: instances.nr is %d\n",
-                          prog->instances.nr);
+               pr_warn("Internal error: instances.nr is %d\n",
+                       prog->instances.nr);
        }
 
        prog->instances.nr = -1;
@@ -362,8 +367,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
        const size_t bpf_insn_sz = sizeof(struct bpf_insn);
 
        if (size == 0 || size % bpf_insn_sz) {
-               pr_warning("corrupted section '%s', size: %zu\n",
-                          section_name, size);
+               pr_warn("corrupted section '%s', size: %zu\n",
+                       section_name, size);
                return -EINVAL;
        }
 
@@ -371,22 +376,22 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 
        prog->section_name = strdup(section_name);
        if (!prog->section_name) {
-               pr_warning("failed to alloc name for prog under section(%d) %s\n",
-                          idx, section_name);
+               pr_warn("failed to alloc name for prog under section(%d) %s\n",
+                       idx, section_name);
                goto errout;
        }
 
        prog->pin_name = __bpf_program__pin_name(prog);
        if (!prog->pin_name) {
-               pr_warning("failed to alloc pin name for prog under section(%d) %s\n",
-                          idx, section_name);
+               pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
+                       idx, section_name);
                goto errout;
        }
 
        prog->insns = malloc(size);
        if (!prog->insns) {
-               pr_warning("failed to alloc insns for prog under section %s\n",
-                          section_name);
+               pr_warn("failed to alloc insns for prog under section %s\n",
+                       section_name);
                goto errout;
        }
        prog->insns_cnt = size / bpf_insn_sz;
@@ -424,8 +429,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
                 * is still valid, so don't need special treat for
                 * bpf_close_object().
                 */
-               pr_warning("failed to alloc a new program under section '%s'\n",
-                          section_name);
+               pr_warn("failed to alloc a new program under section '%s'\n",
+                       section_name);
                bpf_program__exit(&prog);
                return -ENOMEM;
        }
@@ -465,8 +470,8 @@ bpf_object__init_prog_names(struct bpf_object *obj)
                                          obj->efile.strtabidx,
                                          sym.st_name);
                        if (!name) {
-                               pr_warning("failed to get sym name string for prog %s\n",
-                                          prog->section_name);
+                               pr_warn("failed to get sym name string for prog %s\n",
+                                       prog->section_name);
                                return -LIBBPF_ERRNO__LIBELF;
                        }
                }
@@ -475,15 +480,15 @@ bpf_object__init_prog_names(struct bpf_object *obj)
                        name = ".text";
 
                if (!name) {
-                       pr_warning("failed to find sym for prog %s\n",
-                                  prog->section_name);
+                       pr_warn("failed to find sym for prog %s\n",
+                               prog->section_name);
                        return -EINVAL;
                }
 
                prog->name = strdup(name);
                if (!prog->name) {
-                       pr_warning("failed to allocate memory for prog sym %s\n",
-                                  name);
+                       pr_warn("failed to allocate memory for prog sym %s\n",
+                               name);
                        return -ENOMEM;
                }
        }
@@ -491,25 +496,43 @@ bpf_object__init_prog_names(struct bpf_object *obj)
        return 0;
 }
 
+static __u32 get_kernel_version(void)
+{
+       __u32 major, minor, patch;
+       struct utsname info;
+
+       uname(&info);
+       if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+               return 0;
+       return KERNEL_VERSION(major, minor, patch);
+}
+
 static struct bpf_object *bpf_object__new(const char *path,
-                                         void *obj_buf,
-                                         size_t obj_buf_sz)
+                                         const void *obj_buf,
+                                         size_t obj_buf_sz,
+                                         const char *obj_name)
 {
        struct bpf_object *obj;
        char *end;
 
        obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
        if (!obj) {
-               pr_warning("alloc memory failed for %s\n", path);
+               pr_warn("alloc memory failed for %s\n", path);
                return ERR_PTR(-ENOMEM);
        }
 
        strcpy(obj->path, path);
-       /* Using basename() GNU version which doesn't modify arg. */
-       strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1);
-       end = strchr(obj->name, '.');
-       if (end)
-               *end = 0;
+       if (obj_name) {
+               strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
+               obj->name[sizeof(obj->name) - 1] = 0;
+       } else {
+               /* Using basename() GNU version which doesn't modify arg. */
+               strncpy(obj->name, basename((void *)path),
+                       sizeof(obj->name) - 1);
+               end = strchr(obj->name, '.');
+               if (end)
+                       *end = 0;
+       }
 
        obj->efile.fd = -1;
        /*
@@ -526,6 +549,7 @@ static struct bpf_object *bpf_object__new(const char *path,
        obj->efile.rodata_shndx = -1;
        obj->efile.bss_shndx = -1;
 
+       obj->kern_version = get_kernel_version();
        obj->loaded = false;
 
        INIT_LIST_HEAD(&obj->list);
@@ -560,7 +584,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
        GElf_Ehdr *ep;
 
        if (obj_elf_valid(obj)) {
-               pr_warning("elf init: internal error\n");
+               pr_warn("elf init: internal error\n");
                return -LIBBPF_ERRNO__LIBELF;
        }
 
@@ -569,7 +593,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
                 * obj_buf should have been validated by
                 * bpf_object__open_buffer().
                 */
-               obj->efile.elf = elf_memory(obj->efile.obj_buf,
+               obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
                                            obj->efile.obj_buf_sz);
        } else {
                obj->efile.fd = open(obj->path, O_RDONLY);
@@ -578,7 +602,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 
                        err = -errno;
                        cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-                       pr_warning("failed to open %s: %s\n", obj->path, cp);
+                       pr_warn("failed to open %s: %s\n", obj->path, cp);
                        return err;
                }
 
@@ -587,13 +611,13 @@ static int bpf_object__elf_init(struct bpf_object *obj)
        }
 
        if (!obj->efile.elf) {
-               pr_warning("failed to open %s as ELF file\n", obj->path);
+               pr_warn("failed to open %s as ELF file\n", obj->path);
                err = -LIBBPF_ERRNO__LIBELF;
                goto errout;
        }
 
        if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
-               pr_warning("failed to get EHDR from %s\n", obj->path);
+               pr_warn("failed to get EHDR from %s\n", obj->path);
                err = -LIBBPF_ERRNO__FORMAT;
                goto errout;
        }
@@ -602,7 +626,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
        /* Old LLVM set e_machine to EM_NONE */
        if (ep->e_type != ET_REL ||
            (ep->e_machine && ep->e_machine != EM_BPF)) {
-               pr_warning("%s is not an eBPF object file\n", obj->path);
+               pr_warn("%s is not an eBPF object file\n", obj->path);
                err = -LIBBPF_ERRNO__FORMAT;
                goto errout;
        }
@@ -624,7 +648,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
 #else
 # error "Unrecognized __BYTE_ORDER__"
 #endif
-       pr_warning("endianness mismatch.\n");
+       pr_warn("endianness mismatch.\n");
        return -LIBBPF_ERRNO__ENDIAN;
 }
 
@@ -642,7 +666,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
        __u32 kver;
 
        if (size != sizeof(kver)) {
-               pr_warning("invalid kver section in %s\n", obj->path);
+               pr_warn("invalid kver section in %s\n", obj->path);
                return -LIBBPF_ERRNO__FORMAT;
        }
        memcpy(&kver, data, sizeof(kver));
@@ -684,15 +708,15 @@ static int bpf_object_search_section_size(const struct bpf_object *obj,
 
                idx++;
                if (gelf_getshdr(scn, &sh) != &sh) {
-                       pr_warning("failed to get section(%d) header from %s\n",
-                                  idx, obj->path);
+                       pr_warn("failed to get section(%d) header from %s\n",
+                               idx, obj->path);
                        return -EIO;
                }
 
                sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
                if (!sec_name) {
-                       pr_warning("failed to get section(%d) name from %s\n",
-                                  idx, obj->path);
+                       pr_warn("failed to get section(%d) name from %s\n",
+                               idx, obj->path);
                        return -EIO;
                }
 
@@ -701,8 +725,8 @@ static int bpf_object_search_section_size(const struct bpf_object *obj,
 
                data = elf_getdata(scn, 0);
                if (!data) {
-                       pr_warning("failed to get section(%d) data from %s(%s)\n",
-                                  idx, name, obj->path);
+                       pr_warn("failed to get section(%d) data from %s(%s)\n",
+                               idx, name, obj->path);
                        return -EIO;
                }
 
@@ -762,8 +786,8 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
                sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
                                   sym.st_name);
                if (!sname) {
-                       pr_warning("failed to get sym name string for var %s\n",
-                                  name);
+                       pr_warn("failed to get sym name string for var %s\n",
+                               name);
                        return -EIO;
                }
                if (strcmp(name, sname) == 0) {
@@ -787,7 +811,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
        new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
        new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
        if (!new_maps) {
-               pr_warning("alloc maps for object failed\n");
+               pr_warn("alloc maps for object failed\n");
                return ERR_PTR(-ENOMEM);
        }
 
@@ -828,7 +852,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
                 libbpf_type_to_btf_name[type]);
        map->name = strdup(map_name);
        if (!map->name) {
-               pr_warning("failed to alloc map name\n");
+               pr_warn("failed to alloc map name\n");
                return -ENOMEM;
        }
        pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n",
@@ -844,7 +868,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
                *data_buff = malloc(data->d_size);
                if (!*data_buff) {
                        zfree(&map->name);
-                       pr_warning("failed to alloc map content buffer\n");
+                       pr_warn("failed to alloc map content buffer\n");
                        return -ENOMEM;
                }
                memcpy(*data_buff, data->d_buf, data->d_size);
@@ -906,8 +930,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
        if (scn)
                data = elf_getdata(scn, NULL);
        if (!scn || !data) {
-               pr_warning("failed to get Elf_Data from map section %d\n",
-                          obj->efile.maps_shndx);
+               pr_warn("failed to get Elf_Data from map section %d\n",
+                       obj->efile.maps_shndx);
                return -EINVAL;
        }
 
@@ -934,9 +958,9 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 
        map_def_sz = data->d_size / nr_maps;
        if (!data->d_size || (data->d_size % nr_maps) != 0) {
-               pr_warning("unable to determine map definition size "
-                          "section %s, %d maps in %zd bytes\n",
-                          obj->path, nr_maps, data->d_size);
+               pr_warn("unable to determine map definition size "
+                       "section %s, %d maps in %zd bytes\n",
+                       obj->path, nr_maps, data->d_size);
                return -EINVAL;
        }
 
@@ -959,8 +983,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
                map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
                                      sym.st_name);
                if (!map_name) {
-                       pr_warning("failed to get map #%d name sym string for obj %s\n",
-                                  i, obj->path);
+                       pr_warn("failed to get map #%d name sym string for obj %s\n",
+                               i, obj->path);
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
@@ -970,14 +994,14 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
                pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
                         map_name, map->sec_idx, map->sec_offset);
                if (sym.st_value + map_def_sz > data->d_size) {
-                       pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
-                                  obj->path, map_name);
+                       pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
+                               obj->path, map_name);
                        return -EINVAL;
                }
 
                map->name = strdup(map_name);
                if (!map->name) {
-                       pr_warning("failed to alloc map name\n");
+                       pr_warn("failed to alloc map name\n");
                        return -ENOMEM;
                }
                pr_debug("map %d is \"%s\"\n", i, map->name);
@@ -1001,10 +1025,10 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
                        for (b = ((char *)def) + sizeof(struct bpf_map_def);
                             b < ((char *)def) + map_def_sz; b++) {
                                if (*b != 0) {
-                                       pr_warning("maps section in %s: \"%s\" "
-                                                  "has unrecognized, non-zero "
-                                                  "options\n",
-                                                  obj->path, map_name);
+                                       pr_warn("maps section in %s: \"%s\" "
+                                               "has unrecognized, non-zero "
+                                               "options\n",
+                                               obj->path, map_name);
                                        if (strict)
                                                return -EINVAL;
                                }
@@ -1048,20 +1072,20 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
        const struct btf_type *arr_t;
 
        if (!btf_is_ptr(t)) {
-               pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
-                          map_name, name, btf_kind(t));
+               pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
+                       map_name, name, btf_kind(t));
                return false;
        }
 
        arr_t = btf__type_by_id(btf, t->type);
        if (!arr_t) {
-               pr_warning("map '%s': attr '%s': type [%u] not found.\n",
-                          map_name, name, t->type);
+               pr_warn("map '%s': attr '%s': type [%u] not found.\n",
+                       map_name, name, t->type);
                return false;
        }
        if (!btf_is_array(arr_t)) {
-               pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
-                          map_name, name, btf_kind(arr_t));
+               pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
+                       map_name, name, btf_kind(arr_t));
                return false;
        }
        arr_info = btf_array(arr_t);
@@ -1069,10 +1093,32 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
        return true;
 }
 
+static int build_map_pin_path(struct bpf_map *map, const char *path)
+{
+       char buf[PATH_MAX];
+       int err, len;
+
+       if (!path)
+               path = "/sys/fs/bpf";
+
+       len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
+       if (len < 0)
+               return -EINVAL;
+       else if (len >= PATH_MAX)
+               return -ENAMETOOLONG;
+
+       err = bpf_map__set_pin_path(map, buf);
+       if (err)
+               return err;
+
+       return 0;
+}
+
 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
                                         const struct btf_type *sec,
                                         int var_idx, int sec_idx,
-                                        const Elf_Data *data, bool strict)
+                                        const Elf_Data *data, bool strict,
+                                        const char *pin_root_path)
 {
        const struct btf_type *var, *def, *t;
        const struct btf_var_secinfo *vi;
@@ -1089,33 +1135,33 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
        vlen = btf_vlen(var);
 
        if (map_name == NULL || map_name[0] == '\0') {
-               pr_warning("map #%d: empty name.\n", var_idx);
+               pr_warn("map #%d: empty name.\n", var_idx);
                return -EINVAL;
        }
        if ((__u64)vi->offset + vi->size > data->d_size) {
-               pr_warning("map '%s' BTF data is corrupted.\n", map_name);
+               pr_warn("map '%s' BTF data is corrupted.\n", map_name);
                return -EINVAL;
        }
        if (!btf_is_var(var)) {
-               pr_warning("map '%s': unexpected var kind %u.\n",
-                          map_name, btf_kind(var));
+               pr_warn("map '%s': unexpected var kind %u.\n",
+                       map_name, btf_kind(var));
                return -EINVAL;
        }
        if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
            var_extra->linkage != BTF_VAR_STATIC) {
-               pr_warning("map '%s': unsupported var linkage %u.\n",
-                          map_name, var_extra->linkage);
+               pr_warn("map '%s': unsupported var linkage %u.\n",
+                       map_name, var_extra->linkage);
                return -EOPNOTSUPP;
        }
 
        def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
        if (!btf_is_struct(def)) {
-               pr_warning("map '%s': unexpected def kind %u.\n",
-                          map_name, btf_kind(var));
+               pr_warn("map '%s': unexpected def kind %u.\n",
+                       map_name, btf_kind(var));
                return -EINVAL;
        }
        if (def->size > vi->size) {
-               pr_warning("map '%s': invalid def size.\n", map_name);
+               pr_warn("map '%s': invalid def size.\n", map_name);
                return -EINVAL;
        }
 
@@ -1124,7 +1170,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
                return PTR_ERR(map);
        map->name = strdup(map_name);
        if (!map->name) {
-               pr_warning("map '%s': failed to alloc map name.\n", map_name);
+               pr_warn("map '%s': failed to alloc map name.\n", map_name);
                return -ENOMEM;
        }
        map->libbpf_type = LIBBPF_MAP_UNSPEC;
@@ -1140,8 +1186,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
                const char *name = btf__name_by_offset(obj->btf, m->name_off);
 
                if (!name) {
-                       pr_warning("map '%s': invalid field #%d.\n",
-                                  map_name, i);
+                       pr_warn("map '%s': invalid field #%d.\n", map_name, i);
                        return -EINVAL;
                }
                if (strcmp(name, "type") == 0) {
@@ -1171,8 +1216,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
                        pr_debug("map '%s': found key_size = %u.\n",
                                 map_name, sz);
                        if (map->def.key_size && map->def.key_size != sz) {
-                               pr_warning("map '%s': conflicting key size %u != %u.\n",
-                                          map_name, map->def.key_size, sz);
+                               pr_warn("map '%s': conflicting key size %u != %u.\n",
+                                       map_name, map->def.key_size, sz);
                                return -EINVAL;
                        }
                        map->def.key_size = sz;
@@ -1181,26 +1226,26 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 
                        t = btf__type_by_id(obj->btf, m->type);
                        if (!t) {
-                               pr_warning("map '%s': key type [%d] not found.\n",
-                                          map_name, m->type);
+                               pr_warn("map '%s': key type [%d] not found.\n",
+                                       map_name, m->type);
                                return -EINVAL;
                        }
                        if (!btf_is_ptr(t)) {
-                               pr_warning("map '%s': key spec is not PTR: %u.\n",
-                                          map_name, btf_kind(t));
+                               pr_warn("map '%s': key spec is not PTR: %u.\n",
+                                       map_name, btf_kind(t));
                                return -EINVAL;
                        }
                        sz = btf__resolve_size(obj->btf, t->type);
                        if (sz < 0) {
-                               pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n",
-                                          map_name, t->type, sz);
+                               pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n",
+                                       map_name, t->type, sz);
                                return sz;
                        }
                        pr_debug("map '%s': found key [%u], sz = %lld.\n",
                                 map_name, t->type, sz);
                        if (map->def.key_size && map->def.key_size != sz) {
-                               pr_warning("map '%s': conflicting key size %u != %lld.\n",
-                                          map_name, map->def.key_size, sz);
+                               pr_warn("map '%s': conflicting key size %u != %lld.\n",
+                                       map_name, map->def.key_size, sz);
                                return -EINVAL;
                        }
                        map->def.key_size = sz;
@@ -1214,8 +1259,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
                        pr_debug("map '%s': found value_size = %u.\n",
                                 map_name, sz);
                        if (map->def.value_size && map->def.value_size != sz) {
-                               pr_warning("map '%s': conflicting value size %u != %u.\n",
-                                          map_name, map->def.value_size, sz);
+                               pr_warn("map '%s': conflicting value size %u != %u.\n",
+                                       map_name, map->def.value_size, sz);
                                return -EINVAL;
                        }
                        map->def.value_size = sz;
@@ -1224,34 +1269,58 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 
                        t = btf__type_by_id(obj->btf, m->type);
                        if (!t) {
-                               pr_warning("map '%s': value type [%d] not found.\n",
-                                          map_name, m->type);
+                               pr_warn("map '%s': value type [%d] not found.\n",
+                                       map_name, m->type);
                                return -EINVAL;
                        }
                        if (!btf_is_ptr(t)) {
-                               pr_warning("map '%s': value spec is not PTR: %u.\n",
-                                          map_name, btf_kind(t));
+                               pr_warn("map '%s': value spec is not PTR: %u.\n",
+                                       map_name, btf_kind(t));
                                return -EINVAL;
                        }
                        sz = btf__resolve_size(obj->btf, t->type);
                        if (sz < 0) {
-                               pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n",
-                                          map_name, t->type, sz);
+                               pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n",
+                                       map_name, t->type, sz);
                                return sz;
                        }
                        pr_debug("map '%s': found value [%u], sz = %lld.\n",
                                 map_name, t->type, sz);
                        if (map->def.value_size && map->def.value_size != sz) {
-                               pr_warning("map '%s': conflicting value size %u != %lld.\n",
-                                          map_name, map->def.value_size, sz);
+                               pr_warn("map '%s': conflicting value size %u != %lld.\n",
+                                       map_name, map->def.value_size, sz);
                                return -EINVAL;
                        }
                        map->def.value_size = sz;
                        map->btf_value_type_id = t->type;
+               } else if (strcmp(name, "pinning") == 0) {
+                       __u32 val;
+                       int err;
+
+                       if (!get_map_field_int(map_name, obj->btf, def, m,
+                                              &val))
+                               return -EINVAL;
+                       pr_debug("map '%s': found pinning = %u.\n",
+                                map_name, val);
+
+                       if (val != LIBBPF_PIN_NONE &&
+                           val != LIBBPF_PIN_BY_NAME) {
+                               pr_warn("map '%s': invalid pinning value %u.\n",
+                                       map_name, val);
+                               return -EINVAL;
+                       }
+                       if (val == LIBBPF_PIN_BY_NAME) {
+                               err = build_map_pin_path(map, pin_root_path);
+                               if (err) {
+                                       pr_warn("map '%s': couldn't build pin path.\n",
+                                               map_name);
+                                       return err;
+                               }
+                       }
                } else {
                        if (strict) {
-                               pr_warning("map '%s': unknown field '%s'.\n",
-                                          map_name, name);
+                               pr_warn("map '%s': unknown field '%s'.\n",
+                                       map_name, name);
                                return -ENOTSUP;
                        }
                        pr_debug("map '%s': ignoring unknown field '%s'.\n",
@@ -1260,14 +1329,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
        }
 
        if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
-               pr_warning("map '%s': map type isn't specified.\n", map_name);
+               pr_warn("map '%s': map type isn't specified.\n", map_name);
                return -EINVAL;
        }
 
        return 0;
 }
 
-static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
+                                         const char *pin_root_path)
 {
        const struct btf_type *sec = NULL;
        int nr_types, i, vlen, err;
@@ -1283,8 +1353,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
        if (scn)
                data = elf_getdata(scn, NULL);
        if (!scn || !data) {
-               pr_warning("failed to get Elf_Data from map section %d (%s)\n",
-                          obj->efile.maps_shndx, MAPS_ELF_SEC);
+               pr_warn("failed to get Elf_Data from map section %d (%s)\n",
+                       obj->efile.maps_shndx, MAPS_ELF_SEC);
                return -EINVAL;
        }
 
@@ -1301,7 +1371,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
        }
 
        if (!sec) {
-               pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
+               pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
                return -ENOENT;
        }
 
@@ -1309,7 +1379,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
        for (i = 0; i < vlen; i++) {
                err = bpf_object__init_user_btf_map(obj, sec, i,
                                                    obj->efile.btf_maps_shndx,
-                                                   data, strict);
+                                                   data, strict, pin_root_path);
                if (err)
                        return err;
        }
@@ -1317,16 +1387,17 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
        return 0;
 }
 
-static int bpf_object__init_maps(struct bpf_object *obj, int flags)
+static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps,
+                                const char *pin_root_path)
 {
-       bool strict = !(flags & MAPS_RELAX_COMPAT);
+       bool strict = !relaxed_maps;
        int err;
 
        err = bpf_object__init_user_maps(obj, strict);
        if (err)
                return err;
 
-       err = bpf_object__init_user_btf_maps(obj, strict);
+       err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
        if (err)
                return err;
 
@@ -1445,14 +1516,13 @@ static int bpf_object__init_btf(struct bpf_object *obj,
        if (btf_data) {
                obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
                if (IS_ERR(obj->btf)) {
-                       pr_warning("Error loading ELF section %s: %d.\n",
-                                  BTF_ELF_SEC, err);
+                       pr_warn("Error loading ELF section %s: %d.\n",
+                               BTF_ELF_SEC, err);
                        goto out;
                }
                err = btf__finalize_data(obj, obj->btf);
                if (err) {
-                       pr_warning("Error finalizing %s: %d.\n",
-                                  BTF_ELF_SEC, err);
+                       pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
                        goto out;
                }
        }
@@ -1465,8 +1535,8 @@ static int bpf_object__init_btf(struct bpf_object *obj,
                obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
                                            btf_ext_data->d_size);
                if (IS_ERR(obj->btf_ext)) {
-                       pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
-                                  BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+                       pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
+                               BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
                        obj->btf_ext = NULL;
                        goto out;
                }
@@ -1482,7 +1552,7 @@ out:
                obj->btf = NULL;
        }
        if (btf_required && !obj->btf) {
-               pr_warning("BTF is required, but is missing or corrupted.\n");
+               pr_warn("BTF is required, but is missing or corrupted.\n");
                return err == 0 ? -ENOENT : err;
        }
        return 0;
@@ -1500,8 +1570,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 
        err = btf__load(obj->btf);
        if (err) {
-               pr_warning("Error loading %s into kernel: %d.\n",
-                          BTF_ELF_SEC, err);
+               pr_warn("Error loading %s into kernel: %d.\n",
+                       BTF_ELF_SEC, err);
                btf__free(obj->btf);
                obj->btf = NULL;
                /* btf_ext can't exist without btf, so free it as well */
@@ -1516,7 +1586,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
        return 0;
 }
 
-static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
+static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
+                                  const char *pin_root_path)
 {
        Elf *elf = obj->efile.elf;
        GElf_Ehdr *ep = &obj->efile.ehdr;
@@ -1527,7 +1598,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 
        /* Elf is corrupted/truncated, avoid calling elf_strptr. */
        if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
-               pr_warning("failed to get e_shstrndx from %s\n", obj->path);
+               pr_warn("failed to get e_shstrndx from %s\n", obj->path);
                return -LIBBPF_ERRNO__FORMAT;
        }
 
@@ -1538,22 +1609,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 
                idx++;
                if (gelf_getshdr(scn, &sh) != &sh) {
-                       pr_warning("failed to get section(%d) header from %s\n",
-                                  idx, obj->path);
+                       pr_warn("failed to get section(%d) header from %s\n",
+                               idx, obj->path);
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
                name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
                if (!name) {
-                       pr_warning("failed to get section(%d) name from %s\n",
-                                  idx, obj->path);
+                       pr_warn("failed to get section(%d) name from %s\n",
+                               idx, obj->path);
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
                data = elf_getdata(scn, 0);
                if (!data) {
-                       pr_warning("failed to get section(%d) data from %s(%s)\n",
-                                  idx, name, obj->path);
+                       pr_warn("failed to get section(%d) data from %s(%s)\n",
+                               idx, name, obj->path);
                        return -LIBBPF_ERRNO__FORMAT;
                }
                pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
@@ -1583,8 +1654,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
                        btf_ext_data = data;
                } else if (sh.sh_type == SHT_SYMTAB) {
                        if (obj->efile.symbols) {
-                               pr_warning("bpf: multiple SYMTAB in %s\n",
-                                          obj->path);
+                               pr_warn("bpf: multiple SYMTAB in %s\n",
+                                       obj->path);
                                return -LIBBPF_ERRNO__FORMAT;
                        }
                        obj->efile.symbols = data;
@@ -1600,8 +1671,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
                                        char *cp = libbpf_strerror_r(-err, errmsg,
                                                                     sizeof(errmsg));
 
-                                       pr_warning("failed to alloc program %s (%s): %s",
-                                                  name, obj->path, cp);
+                                       pr_warn("failed to alloc program %s (%s): %s",
+                                               name, obj->path, cp);
                                        return err;
                                }
                        } else if (strcmp(name, ".data") == 0) {
@@ -1628,7 +1699,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
                        reloc = reallocarray(reloc, nr_reloc + 1,
                                             sizeof(*obj->efile.reloc));
                        if (!reloc) {
-                               pr_warning("realloc failed\n");
+                               pr_warn("realloc failed\n");
                                return -ENOMEM;
                        }
 
@@ -1645,13 +1716,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
                }
        }
 
-       if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
-               pr_warning("Corrupted ELF file: index of strtab invalid\n");
+       if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
+               pr_warn("Corrupted ELF file: index of strtab invalid\n");
                return -LIBBPF_ERRNO__FORMAT;
        }
        err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
        if (!err)
-               err = bpf_object__init_maps(obj, flags);
+               err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);
        if (!err)
                err = bpf_object__sanitize_and_load_btf(obj);
        if (!err)
@@ -1736,7 +1807,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 
        prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
        if (!prog->reloc_desc) {
-               pr_warning("failed to alloc memory in relocation\n");
+               pr_warn("failed to alloc memory in relocation\n");
                return -ENOMEM;
        }
        prog->nr_reloc = nrels;
@@ -1752,13 +1823,13 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
                GElf_Rel rel;
 
                if (!gelf_getrel(data, i, &rel)) {
-                       pr_warning("relocation: failed to get %d reloc\n", i);
+                       pr_warn("relocation: failed to get %d reloc\n", i);
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
                if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
-                       pr_warning("relocation: symbol %"PRIx64" not found\n",
-                                  GELF_R_SYM(rel.r_info));
+                       pr_warn("relocation: symbol %"PRIx64" not found\n",
+                               GELF_R_SYM(rel.r_info));
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
@@ -1775,20 +1846,20 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
                         insn_idx, shdr_idx);
 
                if (shdr_idx >= SHN_LORESERVE) {
-                       pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n",
-                                  name, shdr_idx, insn_idx,
-                                  insns[insn_idx].code);
+                       pr_warn("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n",
+                               name, shdr_idx, insn_idx,
+                               insns[insn_idx].code);
                        return -LIBBPF_ERRNO__RELOC;
                }
                if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
-                       pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
-                                  prog->section_name, shdr_idx);
+                       pr_warn("Program '%s' contains unrecognized relo data pointing to section %u\n",
+                               prog->section_name, shdr_idx);
                        return -LIBBPF_ERRNO__RELOC;
                }
 
                if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
                        if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
-                               pr_warning("incorrect bpf_call opcode\n");
+                               pr_warn("incorrect bpf_call opcode\n");
                                return -LIBBPF_ERRNO__RELOC;
                        }
                        prog->reloc_desc[i].type = RELO_CALL;
@@ -1799,8 +1870,8 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
                }
 
                if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
-                       pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
-                                  insn_idx, insns[insn_idx].code);
+                       pr_warn("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
+                               insn_idx, insns[insn_idx].code);
                        return -LIBBPF_ERRNO__RELOC;
                }
 
@@ -1809,13 +1880,13 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
                        type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
                        if (type != LIBBPF_MAP_UNSPEC) {
                                if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) {
-                                       pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
-                                                  name, insn_idx, insns[insn_idx].code);
+                                       pr_warn("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
+                                               name, insn_idx, insns[insn_idx].code);
                                        return -LIBBPF_ERRNO__RELOC;
                                }
                                if (!obj->caps.global_data) {
-                                       pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
-                                                  name, insn_idx);
+                                       pr_warn("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
+                                               name, insn_idx);
                                        return -LIBBPF_ERRNO__RELOC;
                                }
                        }
@@ -1836,8 +1907,8 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
                        }
 
                        if (map_idx >= nr_maps) {
-                               pr_warning("bpf relocation: map_idx %d larger than %d\n",
-                                          (int)map_idx, (int)nr_maps - 1);
+                               pr_warn("bpf relocation: map_idx %d larger than %d\n",
+                                       (int)map_idx, (int)nr_maps - 1);
                                return -LIBBPF_ERRNO__RELOC;
                        }
 
@@ -1897,16 +1968,22 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
                return -errno;
 
        new_fd = open("/", O_RDONLY | O_CLOEXEC);
-       if (new_fd < 0)
+       if (new_fd < 0) {
+               err = -errno;
                goto err_free_new_name;
+       }
 
        new_fd = dup3(fd, new_fd, O_CLOEXEC);
-       if (new_fd < 0)
+       if (new_fd < 0) {
+               err = -errno;
                goto err_close_new_fd;
+       }
 
        err = zclose(map->fd);
-       if (err)
+       if (err) {
+               err = -errno;
                goto err_close_new_fd;
+       }
        free(map->name);
 
        map->fd = new_fd;
@@ -1925,7 +2002,7 @@ err_close_new_fd:
        close(new_fd);
 err_free_new_name:
        free(new_name);
-       return -errno;
+       return err;
 }
 
 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
@@ -1964,8 +2041,8 @@ bpf_object__probe_name(struct bpf_object *obj)
        ret = bpf_load_program_xattr(&attr, NULL, 0);
        if (ret < 0) {
                cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-               pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
-                          __func__, cp, errno);
+               pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
+                       __func__, cp, errno);
                return -errno;
        }
        close(ret);
@@ -2005,8 +2082,8 @@ bpf_object__probe_global_data(struct bpf_object *obj)
        map = bpf_create_map_xattr(&map_attr);
        if (map < 0) {
                cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-               pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n",
-                          __func__, cp, errno);
+               pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+                       __func__, cp, errno);
                return -errno;
        }
 
@@ -2101,6 +2178,66 @@ bpf_object__probe_caps(struct bpf_object *obj)
        return 0;
 }
 
+static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
+{
+       struct bpf_map_info map_info = {};
+       char msg[STRERR_BUFSIZE];
+       __u32 map_info_len;
+
+       map_info_len = sizeof(map_info);
+
+       if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
+               pr_warn("failed to get map info for map FD %d: %s\n",
+                       map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
+               return false;
+       }
+
+       return (map_info.type == map->def.type &&
+               map_info.key_size == map->def.key_size &&
+               map_info.value_size == map->def.value_size &&
+               map_info.max_entries == map->def.max_entries &&
+               map_info.map_flags == map->def.map_flags);
+}
+
+static int
+bpf_object__reuse_map(struct bpf_map *map)
+{
+       char *cp, errmsg[STRERR_BUFSIZE];
+       int err, pin_fd;
+
+       pin_fd = bpf_obj_get(map->pin_path);
+       if (pin_fd < 0) {
+               err = -errno;
+               if (err == -ENOENT) {
+                       pr_debug("found no pinned map to reuse at '%s'\n",
+                                map->pin_path);
+                       return 0;
+               }
+
+               cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+               pr_warn("couldn't retrieve pinned map '%s': %s\n",
+                       map->pin_path, cp);
+               return err;
+       }
+
+       if (!map_is_reuse_compat(map, pin_fd)) {
+               pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
+                       map->pin_path);
+               close(pin_fd);
+               return -EINVAL;
+       }
+
+       err = bpf_map__reuse_fd(map, pin_fd);
+       if (err) {
+               close(pin_fd);
+               return err;
+       }
+       map->pinned = true;
+       pr_debug("reused pinned map at '%s'\n", map->pin_path);
+
+       return 0;
+}
+
 static int
 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 {
@@ -2121,8 +2258,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
                err = bpf_map_freeze(map->fd);
                if (err) {
                        cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-                       pr_warning("Error freezing map(%s) as read-only: %s\n",
-                                  map->name, cp);
+                       pr_warn("Error freezing map(%s) as read-only: %s\n",
+                               map->name, cp);
                        err = 0;
                }
        }
@@ -2143,6 +2280,15 @@ bpf_object__create_maps(struct bpf_object *obj)
                char *cp, errmsg[STRERR_BUFSIZE];
                int *pfd = &map->fd;
 
+               if (map->pin_path) {
+                       err = bpf_object__reuse_map(map);
+                       if (err) {
+                               pr_warn("error reusing pinned map %s\n",
+                                       map->name);
+                               return err;
+                       }
+               }
+
                if (map->fd >= 0) {
                        pr_debug("skip map create (preset) %s: fd=%d\n",
                                 map->name, map->fd);
@@ -2161,8 +2307,8 @@ bpf_object__create_maps(struct bpf_object *obj)
                        if (!nr_cpus)
                                nr_cpus = libbpf_num_possible_cpus();
                        if (nr_cpus < 0) {
-                               pr_warning("failed to determine number of system CPUs: %d\n",
-                                          nr_cpus);
+                               pr_warn("failed to determine number of system CPUs: %d\n",
+                                       nr_cpus);
                                err = nr_cpus;
                                goto err_out;
                        }
@@ -2190,8 +2336,8 @@ bpf_object__create_maps(struct bpf_object *obj)
                                 create_attr.btf_value_type_id)) {
                        err = -errno;
                        cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-                       pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
-                                  map->name, cp, err);
+                       pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+                               map->name, cp, err);
                        create_attr.btf_fd = 0;
                        create_attr.btf_key_type_id = 0;
                        create_attr.btf_value_type_id = 0;
@@ -2206,8 +2352,8 @@ bpf_object__create_maps(struct bpf_object *obj)
                        err = -errno;
 err_out:
                        cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-                       pr_warning("failed to create map (name: '%s'): %s(%d)\n",
-                                  map->name, cp, err);
+                       pr_warn("failed to create map (name: '%s'): %s(%d)\n",
+                               map->name, cp, err);
                        for (j = 0; j < i; j++)
                                zclose(obj->maps[j].fd);
                        return err;
@@ -2221,6 +2367,15 @@ err_out:
                        }
                }
 
+               if (map->pin_path && !map->pinned) {
+                       err = bpf_map__pin(map, NULL);
+                       if (err) {
+                               pr_warn("failed to auto-pin map name '%s' at '%s'\n",
+                                       map->name, map->pin_path);
+                               return err;
+                       }
+               }
+
                pr_debug("created map %s: fd=%d\n", map->name, *pfd);
        }
 
@@ -2232,8 +2387,8 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,
                        void *btf_prog_info, const char *info_name)
 {
        if (err != -ENOENT) {
-               pr_warning("Error in loading %s for sec %s.\n",
-                          info_name, prog->section_name);
+               pr_warn("Error in loading %s for sec %s.\n",
+                       info_name, prog->section_name);
                return err;
        }
 
@@ -2244,14 +2399,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,
                 * Some info has already been found but has problem
                 * in the last btf_ext reloc. Must have to error out.
                 */
-               pr_warning("Error in relocating %s for sec %s.\n",
-                          info_name, prog->section_name);
+               pr_warn("Error in relocating %s for sec %s.\n",
+                       info_name, prog->section_name);
                return err;
        }
 
        /* Have problem loading the very first info. Ignore the rest. */
-       pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n",
-                  info_name, prog->section_name, info_name);
+       pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
+               info_name, prog->section_name, info_name);
        return 0;
 }
 
@@ -2325,7 +2480,7 @@ static bool str_is_empty(const char *s)
 }
 
 /*
- * Turn bpf_offset_reloc into a low- and high-level spec representation,
+ * Turn bpf_field_reloc into a low- and high-level spec representation,
  * validating correctness along the way, as well as calculating resulting
  * field offset (in bytes), specified by accessor string. Low-level spec
  * captures every single level of nestedness, including traversing anonymous
@@ -2452,8 +2607,8 @@ static int bpf_core_spec_parse(const struct btf *btf,
                                return sz;
                        spec->offset += access_idx * sz;
                } else {
-                       pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
-                                  type_id, spec_str, i, id, btf_kind(t));
+                       pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
+                               type_id, spec_str, i, id, btf_kind(t));
                        return -EINVAL;
                }
        }
@@ -2597,8 +2752,8 @@ recur:
                targ_id = btf_array(targ_type)->type;
                goto recur;
        default:
-               pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n",
-                          btf_kind(local_type), local_id, targ_id);
+               pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+                       btf_kind(local_type), local_id, targ_id);
                return 0;
        }
 }
@@ -2770,26 +2925,54 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 
 /*
  * Patch relocatable BPF instruction.
- * Expected insn->imm value is provided for validation, as well as the new
- * relocated value.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For field existence relocation target spec will be NULL if field is not
+ * found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
  *
  * Currently three kinds of BPF instructions are supported:
  * 1. rX = <imm> (assignment with immediate operand);
  * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. *(rX) = <imm> (indirect memory assignment with immediate operand).
- *
- * If actual insn->imm value is wrong, bail out.
  */
-static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,
-                              __u32 orig_off, __u32 new_off)
+static int bpf_core_reloc_insn(struct bpf_program *prog,
+                              const struct bpf_field_reloc *relo,
+                              const struct bpf_core_spec *local_spec,
+                              const struct bpf_core_spec *targ_spec)
 {
+       __u32 orig_val, new_val;
        struct bpf_insn *insn;
        int insn_idx;
        __u8 class;
 
-       if (insn_off % sizeof(struct bpf_insn))
+       if (relo->insn_off % sizeof(struct bpf_insn))
                return -EINVAL;
-       insn_idx = insn_off / sizeof(struct bpf_insn);
+       insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+
+       switch (relo->kind) {
+       case BPF_FIELD_BYTE_OFFSET:
+               orig_val = local_spec->offset;
+               if (targ_spec) {
+                       new_val = targ_spec->offset;
+               } else {
+                       pr_warn("prog '%s': patching insn #%d w/ failed reloc, imm %d -> %d\n",
+                               bpf_program__title(prog, false), insn_idx,
+                               orig_val, -1);
+                       new_val = (__u32)-1;
+               }
+               break;
+       case BPF_FIELD_EXISTS:
+               orig_val = 1; /* can't generate EXISTS relo w/o local field */
+               new_val = targ_spec ? 1 : 0;
+               break;
+       default:
+               pr_warn("prog '%s': unknown relo %d at insn #%d'\n",
+                       bpf_program__title(prog, false),
+                       relo->kind, insn_idx);
+               return -EINVAL;
+       }
 
        insn = &prog->insns[insn_idx];
        class = BPF_CLASS(insn->code);
@@ -2797,19 +2980,20 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,
        if (class == BPF_ALU || class == BPF_ALU64) {
                if (BPF_SRC(insn->code) != BPF_K)
                        return -EINVAL;
-               if (insn->imm != orig_off)
+               if (insn->imm != orig_val)
                        return -EINVAL;
-               insn->imm = new_off;
+               insn->imm = new_val;
                pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n",
                         bpf_program__title(prog, false),
-                        insn_idx, orig_off, new_off);
+                        insn_idx, orig_val, new_val);
        } else {
-               pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
-                          bpf_program__title(prog, false),
-                          insn_idx, insn->code, insn->src_reg, insn->dst_reg,
-                          insn->off, insn->imm);
+               pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+                       bpf_program__title(prog, false),
+                       insn_idx, insn->code, insn->src_reg, insn->dst_reg,
+                       insn->off, insn->imm);
                return -EINVAL;
        }
+
        return 0;
 }
 
@@ -2895,7 +3079,7 @@ static struct btf *bpf_core_find_kernel_btf(void)
                return btf;
        }
 
-       pr_warning("failed to find valid kernel BTF\n");
+       pr_warn("failed to find valid kernel BTF\n");
        return ERR_PTR(-ESRCH);
 }
 
@@ -2976,7 +3160,7 @@ static void *u32_as_hash_key(__u32 x)
  *    types should be compatible (see bpf_core_fields_are_compat for details).
  * 3. It is supported and expected that there might be multiple flavors
  *    matching the spec. As long as all the specs resolve to the same set of
- *    offsets across all candidates, there is not error. If there is any
+ *    offsets across all candidates, there is no error. If there is any
  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
  *    imprefection of BTF deduplication, which can cause slight duplication of
  *    the same BTF type, if some directly or indirectly referenced (by
@@ -2991,12 +3175,12 @@ static void *u32_as_hash_key(__u32 x)
  *    CPU-wise compared to prebuilding a map from all local type names to
  *    a list of candidate type names. It's also sped up by caching resolved
  *    list of matching candidates per each local "root" type ID, that has at
- *    least one bpf_offset_reloc associated with it. This list is shared
+ *    least one bpf_field_reloc associated with it. This list is shared
  *    between multiple relocations for the same type ID and is updated as some
  *    of the candidates are pruned due to structural incompatibility.
  */
-static int bpf_core_reloc_offset(struct bpf_program *prog,
-                                const struct bpf_offset_reloc *relo,
+static int bpf_core_reloc_field(struct bpf_program *prog,
+                                const struct bpf_field_reloc *relo,
                                 int relo_idx,
                                 const struct btf *local_btf,
                                 const struct btf *targ_btf,
@@ -3027,9 +3211,9 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,
 
        err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
        if (err) {
-               pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
-                          prog_name, relo_idx, local_id, local_name, spec_str,
-                          err);
+               pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
+                       prog_name, relo_idx, local_id, local_name, spec_str,
+                       err);
                return -EINVAL;
        }
 
@@ -3040,9 +3224,9 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,
        if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
                cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
                if (IS_ERR(cand_ids)) {
-                       pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
-                                  prog_name, relo_idx, local_id, local_name,
-                                  PTR_ERR(cand_ids));
+                       pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
+                               prog_name, relo_idx, local_id, local_name,
+                               PTR_ERR(cand_ids));
                        return PTR_ERR(cand_ids);
                }
                err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
@@ -3064,8 +3248,8 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,
                bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
                libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
                if (err < 0) {
-                       pr_warning("prog '%s': relo #%d: matching error: %d\n",
-                                  prog_name, relo_idx, err);
+                       pr_warn("prog '%s': relo #%d: matching error: %d\n",
+                               prog_name, relo_idx, err);
                        return err;
                }
                if (err == 0)
@@ -3077,27 +3261,38 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,
                        /* if there are many candidates, they should all
                         * resolve to the same offset
                         */
-                       pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
-                                  prog_name, relo_idx, cand_spec.offset,
-                                  targ_spec.offset);
+                       pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
+                               prog_name, relo_idx, cand_spec.offset,
+                               targ_spec.offset);
                        return -EINVAL;
                }
 
                cand_ids->data[j++] = cand_spec.spec[0].type_id;
        }
 
-       cand_ids->len = j;
-       if (cand_ids->len == 0) {
-               pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
-                          prog_name, relo_idx, local_id, local_name, spec_str);
+       /*
+        * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is
+        * requested, it's expected that we might not find any candidates.
+        * In this case, if field wasn't found in any candidate, the list of
+        * candidates shouldn't change at all, we'll just handle relocating
+        * appropriately, depending on relo's kind.
+        */
+       if (j > 0)
+               cand_ids->len = j;
+
+       if (j == 0 && !prog->obj->relaxed_core_relocs &&
+           relo->kind != BPF_FIELD_EXISTS) {
+               pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
+                       prog_name, relo_idx, local_id, local_name, spec_str);
                return -ESRCH;
        }
 
-       err = bpf_core_reloc_insn(prog, relo->insn_off,
-                                 local_spec.offset, targ_spec.offset);
+       /* bpf_core_reloc_insn should know how to handle missing targ_spec */
+       err = bpf_core_reloc_insn(prog, relo, &local_spec,
+                                 j ? &targ_spec : NULL);
        if (err) {
-               pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
-                          prog_name, relo_idx, relo->insn_off, err);
+               pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
+                       prog_name, relo_idx, relo->insn_off, err);
                return -EINVAL;
        }
 
@@ -3105,10 +3300,10 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,
 }
 
 static int
-bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
+bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
 {
        const struct btf_ext_info_sec *sec;
-       const struct bpf_offset_reloc *rec;
+       const struct bpf_field_reloc *rec;
        const struct btf_ext_info *seg;
        struct hashmap_entry *entry;
        struct hashmap *cand_cache = NULL;
@@ -3122,8 +3317,7 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
        else
                targ_btf = bpf_core_find_kernel_btf();
        if (IS_ERR(targ_btf)) {
-               pr_warning("failed to get target BTF: %ld\n",
-                          PTR_ERR(targ_btf));
+               pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
                return PTR_ERR(targ_btf);
        }
 
@@ -3133,7 +3327,7 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
                goto out;
        }
 
-       seg = &obj->btf_ext->offset_reloc_info;
+       seg = &obj->btf_ext->field_reloc_info;
        for_each_btf_ext_sec(seg, sec) {
                sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
                if (str_is_empty(sec_name)) {
@@ -3142,8 +3336,8 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
                }
                prog = bpf_object__find_program_by_title(obj, sec_name);
                if (!prog) {
-                       pr_warning("failed to find program '%s' for CO-RE offset relocation\n",
-                                  sec_name);
+                       pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
+                               sec_name);
                        err = -EINVAL;
                        goto out;
                }
@@ -3152,11 +3346,11 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
                         sec_name, sec->num_info);
 
                for_each_btf_ext_rec(seg, sec, i, rec) {
-                       err = bpf_core_reloc_offset(prog, rec, i, obj->btf,
-                                                   targ_btf, cand_cache);
+                       err = bpf_core_reloc_field(prog, rec, i, obj->btf,
+                                                  targ_btf, cand_cache);
                        if (err) {
-                               pr_warning("prog '%s': relo #%d: failed to relocate: %d\n",
-                                          sec_name, i, err);
+                               pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
+                                       sec_name, i, err);
                                goto out;
                        }
                }
@@ -3178,8 +3372,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 {
        int err = 0;
 
-       if (obj->btf_ext->offset_reloc_info.len)
-               err = bpf_core_reloc_offsets(obj, targ_btf_path);
+       if (obj->btf_ext->field_reloc_info.len)
+               err = bpf_core_reloc_fields(obj, targ_btf_path);
 
        return err;
 }
@@ -3197,21 +3391,21 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
                return -LIBBPF_ERRNO__RELOC;
 
        if (prog->idx == obj->efile.text_shndx) {
-               pr_warning("relo in .text insn %d into off %d\n",
-                          relo->insn_idx, relo->text_off);
+               pr_warn("relo in .text insn %d into off %d\n",
+                       relo->insn_idx, relo->text_off);
                return -LIBBPF_ERRNO__RELOC;
        }
 
        if (prog->main_prog_cnt == 0) {
                text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
                if (!text) {
-                       pr_warning("no .text section found yet relo into text exist\n");
+                       pr_warn("no .text section found yet relo into text exist\n");
                        return -LIBBPF_ERRNO__RELOC;
                }
                new_cnt = prog->insns_cnt + text->insns_cnt;
                new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
                if (!new_insn) {
-                       pr_warning("oom in prog realloc\n");
+                       pr_warn("oom in prog realloc\n");
                        return -ENOMEM;
                }
 
@@ -3266,8 +3460,8 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
                        map_idx = prog->reloc_desc[i].map_idx;
 
                        if (insn_idx + 1 >= (int)prog->insns_cnt) {
-                               pr_warning("relocation out of range: '%s'\n",
-                                          prog->section_name);
+                               pr_warn("relocation out of range: '%s'\n",
+                                       prog->section_name);
                                return -LIBBPF_ERRNO__RELOC;
                        }
 
@@ -3301,8 +3495,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
        if (obj->btf_ext) {
                err = bpf_object__relocate_core(obj, targ_btf_path);
                if (err) {
-                       pr_warning("failed to perform CO-RE relocations: %d\n",
-                                  err);
+                       pr_warn("failed to perform CO-RE relocations: %d\n",
+                               err);
                        return err;
                }
        }
@@ -3311,8 +3505,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 
                err = bpf_program__relocate(prog, obj);
                if (err) {
-                       pr_warning("failed to relocate '%s'\n",
-                                  prog->section_name);
+                       pr_warn("failed to relocate '%s'\n", prog->section_name);
                        return err;
                }
        }
@@ -3324,7 +3517,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
        int i, err;
 
        if (!obj_elf_valid(obj)) {
-               pr_warning("Internal error: elf object is closed\n");
+               pr_warn("Internal error: elf object is closed\n");
                return -LIBBPF_ERRNO__INTERNAL;
        }
 
@@ -3335,13 +3528,13 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
                struct bpf_program *prog;
 
                if (shdr->sh_type != SHT_REL) {
-                       pr_warning("internal error at %d\n", __LINE__);
+                       pr_warn("internal error at %d\n", __LINE__);
                        return -LIBBPF_ERRNO__INTERNAL;
                }
 
                prog = bpf_object__find_prog_by_idx(obj, idx);
                if (!prog) {
-                       pr_warning("relocation failed: no section(%d)\n", idx);
+                       pr_warn("relocation failed: no section(%d)\n", idx);
                        return -LIBBPF_ERRNO__RELOC;
                }
 
@@ -3389,11 +3582,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
        load_attr.line_info_cnt = prog->line_info_cnt;
        load_attr.log_level = prog->log_level;
        load_attr.prog_flags = prog->prog_flags;
+       load_attr.attach_btf_id = prog->attach_btf_id;
 
 retry_load:
        log_buf = malloc(log_buf_size);
        if (!log_buf)
-               pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
+               pr_warn("Alloc log buffer for bpf loader error, continue without log\n");
 
        ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
 
@@ -3412,16 +3606,16 @@ retry_load:
        }
        ret = -LIBBPF_ERRNO__LOAD;
        cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-       pr_warning("load bpf program failed: %s\n", cp);
+       pr_warn("load bpf program failed: %s\n", cp);
 
        if (log_buf && log_buf[0] != '\0') {
                ret = -LIBBPF_ERRNO__VERIFY;
-               pr_warning("-- BEGIN DUMP LOG ---\n");
-               pr_warning("\n%s\n", log_buf);
-               pr_warning("-- END LOG --\n");
+               pr_warn("-- BEGIN DUMP LOG ---\n");
+               pr_warn("\n%s\n", log_buf);
+               pr_warn("-- END LOG --\n");
        } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
-               pr_warning("Program too large (%zu insns), at most %d insns\n",
-                          load_attr.insns_cnt, BPF_MAXINSNS);
+               pr_warn("Program too large (%zu insns), at most %d insns\n",
+                       load_attr.insns_cnt, BPF_MAXINSNS);
                ret = -LIBBPF_ERRNO__PROG2BIG;
        } else {
                /* Wrong program type? */
@@ -3455,14 +3649,14 @@ bpf_program__load(struct bpf_program *prog,
 
        if (prog->instances.nr < 0 || !prog->instances.fds) {
                if (prog->preprocessor) {
-                       pr_warning("Internal error: can't load program '%s'\n",
-                                  prog->section_name);
+                       pr_warn("Internal error: can't load program '%s'\n",
+                               prog->section_name);
                        return -LIBBPF_ERRNO__INTERNAL;
                }
 
                prog->instances.fds = malloc(sizeof(int));
                if (!prog->instances.fds) {
-                       pr_warning("Not enough memory for BPF fds\n");
+                       pr_warn("Not enough memory for BPF fds\n");
                        return -ENOMEM;
                }
                prog->instances.nr = 1;
@@ -3471,8 +3665,8 @@ bpf_program__load(struct bpf_program *prog,
 
        if (!prog->preprocessor) {
                if (prog->instances.nr != 1) {
-                       pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
-                                  prog->section_name, prog->instances.nr);
+                       pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
+                               prog->section_name, prog->instances.nr);
                }
                err = load_program(prog, prog->insns, prog->insns_cnt,
                                   license, kern_version, &fd);
@@ -3489,8 +3683,8 @@ bpf_program__load(struct bpf_program *prog,
                err = preprocessor(prog, i, prog->insns,
                                   prog->insns_cnt, &result);
                if (err) {
-                       pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
-                                  i, prog->section_name);
+                       pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
+                               i, prog->section_name);
                        goto out;
                }
 
@@ -3508,8 +3702,8 @@ bpf_program__load(struct bpf_program *prog,
                                   license, kern_version, &fd);
 
                if (err) {
-                       pr_warning("Loading the %dth instance of program '%s' failed\n",
-                                       i, prog->section_name);
+                       pr_warn("Loading the %dth instance of program '%s' failed\n",
+                               i, prog->section_name);
                        goto out;
                }
 
@@ -3519,8 +3713,7 @@ bpf_program__load(struct bpf_program *prog,
        }
 out:
        if (err)
-               pr_warning("failed to load program '%s'\n",
-                          prog->section_name);
+               pr_warn("failed to load program '%s'\n", prog->section_name);
        zfree(&prog->insns);
        prog->insns_cnt = 0;
        return err;
@@ -3551,93 +3744,99 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
        return 0;
 }
 
-static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
-{
-       switch (type) {
-       case BPF_PROG_TYPE_SOCKET_FILTER:
-       case BPF_PROG_TYPE_SCHED_CLS:
-       case BPF_PROG_TYPE_SCHED_ACT:
-       case BPF_PROG_TYPE_XDP:
-       case BPF_PROG_TYPE_CGROUP_SKB:
-       case BPF_PROG_TYPE_CGROUP_SOCK:
-       case BPF_PROG_TYPE_LWT_IN:
-       case BPF_PROG_TYPE_LWT_OUT:
-       case BPF_PROG_TYPE_LWT_XMIT:
-       case BPF_PROG_TYPE_LWT_SEG6LOCAL:
-       case BPF_PROG_TYPE_SOCK_OPS:
-       case BPF_PROG_TYPE_SK_SKB:
-       case BPF_PROG_TYPE_CGROUP_DEVICE:
-       case BPF_PROG_TYPE_SK_MSG:
-       case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
-       case BPF_PROG_TYPE_LIRC_MODE2:
-       case BPF_PROG_TYPE_SK_REUSEPORT:
-       case BPF_PROG_TYPE_FLOW_DISSECTOR:
-       case BPF_PROG_TYPE_UNSPEC:
-       case BPF_PROG_TYPE_TRACEPOINT:
-       case BPF_PROG_TYPE_RAW_TRACEPOINT:
-       case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
-       case BPF_PROG_TYPE_PERF_EVENT:
-       case BPF_PROG_TYPE_CGROUP_SYSCTL:
-       case BPF_PROG_TYPE_CGROUP_SOCKOPT:
-               return false;
-       case BPF_PROG_TYPE_KPROBE:
-       default:
-               return true;
-       }
-}
-
-static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
-{
-       if (needs_kver && obj->kern_version == 0) {
-               pr_warning("%s doesn't provide kernel version\n",
-                          obj->path);
-               return -LIBBPF_ERRNO__KVERSION;
-       }
-       return 0;
-}
+static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id);
 
 static struct bpf_object *
-__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
-                  bool needs_kver, int flags)
+__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
+                  struct bpf_object_open_opts *opts)
 {
+       const char *pin_root_path;
+       struct bpf_program *prog;
        struct bpf_object *obj;
+       const char *obj_name;
+       char tmp_name[64];
+       bool relaxed_maps;
        int err;
 
        if (elf_version(EV_CURRENT) == EV_NONE) {
-               pr_warning("failed to init libelf for %s\n", path);
+               pr_warn("failed to init libelf for %s\n",
+                       path ? : "(mem buf)");
                return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
        }
 
-       obj = bpf_object__new(path, obj_buf, obj_buf_sz);
+       if (!OPTS_VALID(opts, bpf_object_open_opts))
+               return ERR_PTR(-EINVAL);
+
+       obj_name = OPTS_GET(opts, object_name, path);
+       if (obj_buf) {
+               if (!obj_name) {
+                       snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+                                (unsigned long)obj_buf,
+                                (unsigned long)obj_buf_sz);
+                       obj_name = tmp_name;
+               }
+               path = obj_name;
+               pr_debug("loading object '%s' from buffer\n", obj_name);
+       }
+
+       obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
        if (IS_ERR(obj))
                return obj;
 
+       obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
+       relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
+       pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
+
        CHECK_ERR(bpf_object__elf_init(obj), err, out);
        CHECK_ERR(bpf_object__check_endianness(obj), err, out);
        CHECK_ERR(bpf_object__probe_caps(obj), err, out);
-       CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out);
+       CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path),
+                 err, out);
        CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
-       CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
-
        bpf_object__elf_finish(obj);
+
+       bpf_object__for_each_program(prog, obj) {
+               enum bpf_prog_type prog_type;
+               enum bpf_attach_type attach_type;
+               __u32 btf_id;
+
+               err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
+                                              &attach_type);
+               if (err == -ESRCH)
+                       /* couldn't guess, but user might manually specify */
+                       continue;
+               if (err)
+                       goto out;
+
+               bpf_program__set_type(prog, prog_type);
+               bpf_program__set_expected_attach_type(prog, attach_type);
+               if (prog_type == BPF_PROG_TYPE_TRACING) {
+                       err = libbpf_attach_btf_id_by_name(prog->section_name, &btf_id);
+                       if (err)
+                               goto out;
+                       prog->attach_btf_id = btf_id;
+               }
+       }
+
        return obj;
 out:
        bpf_object__close(obj);
        return ERR_PTR(err);
 }
 
-struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
-                                           int flags)
+static struct bpf_object *
+__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
 {
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+               .relaxed_maps = flags & MAPS_RELAX_COMPAT,
+       );
+
        /* param validation */
        if (!attr->file)
                return NULL;
 
        pr_debug("loading %s\n", attr->file);
-
-       return __bpf_object__open(attr->file, NULL, 0,
-                                 bpf_prog_type__needs_kver(attr->prog_type),
-                                 flags);
+       return __bpf_object__open(attr->file, NULL, 0, &opts);
 }
 
 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
@@ -3655,25 +3854,42 @@ struct bpf_object *bpf_object__open(const char *path)
        return bpf_object__open_xattr(&attr);
 }
 
-struct bpf_object *bpf_object__open_buffer(void *obj_buf,
-                                          size_t obj_buf_sz,
-                                          const char *name)
+struct bpf_object *
+bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)
 {
-       char tmp_name[64];
+       if (!path)
+               return ERR_PTR(-EINVAL);
 
-       /* param validation */
-       if (!obj_buf || obj_buf_sz <= 0)
-               return NULL;
+       pr_debug("loading %s\n", path);
 
-       if (!name) {
-               snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
-                        (unsigned long)obj_buf,
-                        (unsigned long)obj_buf_sz);
-               name = tmp_name;
-       }
-       pr_debug("loading object '%s' from buffer\n", name);
+       return __bpf_object__open(path, NULL, 0, opts);
+}
+
+struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+                    struct bpf_object_open_opts *opts)
+{
+       if (!obj_buf || obj_buf_sz == 0)
+               return ERR_PTR(-EINVAL);
+
+       return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+}
 
-       return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
+struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+                       const char *name)
+{
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+               .object_name = name,
+               /* wrong default, but backwards-compatible */
+               .relaxed_maps = true,
+       );
+
+       /* returning NULL is wrong, but backwards-compatible */
+       if (!obj_buf || obj_buf_sz == 0)
+               return NULL;
+
+       return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
 }
 
 int bpf_object__unload(struct bpf_object *obj)
@@ -3704,7 +3920,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
                return -EINVAL;
 
        if (obj->loaded) {
-               pr_warning("object should not be loaded twice\n");
+               pr_warn("object should not be loaded twice\n");
                return -EINVAL;
        }
 
@@ -3717,7 +3933,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
        return 0;
 out:
        bpf_object__unload(obj);
-       pr_warning("failed to load object '%s'\n", obj->path);
+       pr_warn("failed to load object '%s'\n", obj->path);
        return err;
 }
 
@@ -3730,6 +3946,28 @@ int bpf_object__load(struct bpf_object *obj)
        return bpf_object__load_xattr(&attr);
 }
 
+static int make_parent_dir(const char *path)
+{
+       char *cp, errmsg[STRERR_BUFSIZE];
+       char *dname, *dir;
+       int err = 0;
+
+       dname = strdup(path);
+       if (dname == NULL)
+               return -ENOMEM;
+
+       dir = dirname(dname);
+       if (mkdir(dir, 0700) && errno != EEXIST)
+               err = -errno;
+
+       free(dname);
+       if (err) {
+               cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+               pr_warn("failed to mkdir %s: %s\n", path, cp);
+       }
+       return err;
+}
+
 static int check_path(const char *path)
 {
        char *cp, errmsg[STRERR_BUFSIZE];
@@ -3747,13 +3985,13 @@ static int check_path(const char *path)
        dir = dirname(dname);
        if (statfs(dir, &st_fs)) {
                cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-               pr_warning("failed to statfs %s: %s\n", dir, cp);
+               pr_warn("failed to statfs %s: %s\n", dir, cp);
                err = -errno;
        }
        free(dname);
 
        if (!err && st_fs.f_type != BPF_FS_MAGIC) {
-               pr_warning("specified path %s is not on BPF FS\n", path);
+               pr_warn("specified path %s is not on BPF FS\n", path);
                err = -EINVAL;
        }
 
@@ -3766,24 +4004,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
        char *cp, errmsg[STRERR_BUFSIZE];
        int err;
 
+       err = make_parent_dir(path);
+       if (err)
+               return err;
+
        err = check_path(path);
        if (err)
                return err;
 
        if (prog == NULL) {
-               pr_warning("invalid program pointer\n");
+               pr_warn("invalid program pointer\n");
                return -EINVAL;
        }
 
        if (instance < 0 || instance >= prog->instances.nr) {
-               pr_warning("invalid prog instance %d of prog %s (max %d)\n",
-                          instance, prog->section_name, prog->instances.nr);
+               pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+                       instance, prog->section_name, prog->instances.nr);
                return -EINVAL;
        }
 
        if (bpf_obj_pin(prog->instances.fds[instance], path)) {
                cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-               pr_warning("failed to pin program: %s\n", cp);
+               pr_warn("failed to pin program: %s\n", cp);
                return -errno;
        }
        pr_debug("pinned program '%s'\n", path);
@@ -3801,13 +4043,13 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
                return err;
 
        if (prog == NULL) {
-               pr_warning("invalid program pointer\n");
+               pr_warn("invalid program pointer\n");
                return -EINVAL;
        }
 
        if (instance < 0 || instance >= prog->instances.nr) {
-               pr_warning("invalid prog instance %d of prog %s (max %d)\n",
-                          instance, prog->section_name, prog->instances.nr);
+               pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+                       instance, prog->section_name, prog->instances.nr);
                return -EINVAL;
        }
 
@@ -3819,36 +4061,25 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
        return 0;
 }
 
-static int make_dir(const char *path)
-{
-       char *cp, errmsg[STRERR_BUFSIZE];
-       int err = 0;
-
-       if (mkdir(path, 0700) && errno != EEXIST)
-               err = -errno;
-
-       if (err) {
-               cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
-               pr_warning("failed to mkdir %s: %s\n", path, cp);
-       }
-       return err;
-}
-
 int bpf_program__pin(struct bpf_program *prog, const char *path)
 {
        int i, err;
 
+       err = make_parent_dir(path);
+       if (err)
+               return err;
+
        err = check_path(path);
        if (err)
                return err;
 
        if (prog == NULL) {
-               pr_warning("invalid program pointer\n");
+               pr_warn("invalid program pointer\n");
                return -EINVAL;
        }
 
        if (prog->instances.nr <= 0) {
-               pr_warning("no instances of prog %s to pin\n",
+               pr_warn("no instances of prog %s to pin\n",
                           prog->section_name);
                return -EINVAL;
        }
@@ -3858,10 +4089,6 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
                return bpf_program__pin_instance(prog, path, 0);
        }
 
-       err = make_dir(path);
-       if (err)
-               return err;
-
        for (i = 0; i < prog->instances.nr; i++) {
                char buf[PATH_MAX];
                int len;
@@ -3910,12 +4137,12 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
                return err;
 
        if (prog == NULL) {
-               pr_warning("invalid program pointer\n");
+               pr_warn("invalid program pointer\n");
                return -EINVAL;
        }
 
        if (prog->instances.nr <= 0) {
-               pr_warning("no instances of prog %s to pin\n",
+               pr_warn("no instances of prog %s to pin\n",
                           prog->section_name);
                return -EINVAL;
        }
@@ -3952,47 +4179,123 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
        char *cp, errmsg[STRERR_BUFSIZE];
        int err;
 
-       err = check_path(path);
-       if (err)
-               return err;
-
        if (map == NULL) {
-               pr_warning("invalid map pointer\n");
+               pr_warn("invalid map pointer\n");
                return -EINVAL;
        }
 
-       if (bpf_obj_pin(map->fd, path)) {
-               cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-               pr_warning("failed to pin map: %s\n", cp);
-               return -errno;
+       if (map->pin_path) {
+               if (path && strcmp(path, map->pin_path)) {
+                       pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+                               bpf_map__name(map), map->pin_path, path);
+                       return -EINVAL;
+               } else if (map->pinned) {
+                       pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
+                                bpf_map__name(map), map->pin_path);
+                       return 0;
+               }
+       } else {
+               if (!path) {
+                       pr_warn("missing a path to pin map '%s' at\n",
+                               bpf_map__name(map));
+                       return -EINVAL;
+               } else if (map->pinned) {
+                       pr_warn("map '%s' already pinned\n", bpf_map__name(map));
+                       return -EEXIST;
+               }
+
+               map->pin_path = strdup(path);
+               if (!map->pin_path) {
+                       err = -errno;
+                       goto out_err;
+               }
        }
 
-       pr_debug("pinned map '%s'\n", path);
+       err = make_parent_dir(map->pin_path);
+       if (err)
+               return err;
+
+       err = check_path(map->pin_path);
+       if (err)
+               return err;
+
+       if (bpf_obj_pin(map->fd, map->pin_path)) {
+               err = -errno;
+               goto out_err;
+       }
+
+       map->pinned = true;
+       pr_debug("pinned map '%s'\n", map->pin_path);
 
        return 0;
+
+out_err:
+       cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+       pr_warn("failed to pin map: %s\n", cp);
+       return err;
 }
 
 int bpf_map__unpin(struct bpf_map *map, const char *path)
 {
        int err;
 
-       err = check_path(path);
-       if (err)
-               return err;
-
        if (map == NULL) {
-               pr_warning("invalid map pointer\n");
+               pr_warn("invalid map pointer\n");
+               return -EINVAL;
+       }
+
+       if (map->pin_path) {
+               if (path && strcmp(path, map->pin_path)) {
+                       pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+                               bpf_map__name(map), map->pin_path, path);
+                       return -EINVAL;
+               }
+               path = map->pin_path;
+       } else if (!path) {
+               pr_warn("no path to unpin map '%s' from\n",
+                       bpf_map__name(map));
                return -EINVAL;
        }
 
+       err = check_path(path);
+       if (err)
+               return err;
+
        err = unlink(path);
        if (err != 0)
                return -errno;
-       pr_debug("unpinned map '%s'\n", path);
 
+       map->pinned = false;
+       pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
+
+       return 0;
+}
+
+int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
+{
+       char *new = NULL;
+
+       if (path) {
+               new = strdup(path);
+               if (!new)
+                       return -errno;
+       }
+
+       free(map->pin_path);
+       map->pin_path = new;
        return 0;
 }
 
+const char *bpf_map__get_pin_path(const struct bpf_map *map)
+{
+       return map->pin_path;
+}
+
+bool bpf_map__is_pinned(const struct bpf_map *map)
+{
+       return map->pinned;
+}
+
 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 {
        struct bpf_map *map;
@@ -4002,29 +4305,32 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
                return -ENOENT;
 
        if (!obj->loaded) {
-               pr_warning("object not yet loaded; load it first\n");
+               pr_warn("object not yet loaded; load it first\n");
                return -ENOENT;
        }
 
-       err = make_dir(path);
-       if (err)
-               return err;
-
        bpf_object__for_each_map(map, obj) {
+               char *pin_path = NULL;
                char buf[PATH_MAX];
-               int len;
 
-               len = snprintf(buf, PATH_MAX, "%s/%s", path,
-                              bpf_map__name(map));
-               if (len < 0) {
-                       err = -EINVAL;
-                       goto err_unpin_maps;
-               } else if (len >= PATH_MAX) {
-                       err = -ENAMETOOLONG;
-                       goto err_unpin_maps;
+               if (path) {
+                       int len;
+
+                       len = snprintf(buf, PATH_MAX, "%s/%s", path,
+                                      bpf_map__name(map));
+                       if (len < 0) {
+                               err = -EINVAL;
+                               goto err_unpin_maps;
+                       } else if (len >= PATH_MAX) {
+                               err = -ENAMETOOLONG;
+                               goto err_unpin_maps;
+                       }
+                       pin_path = buf;
+               } else if (!map->pin_path) {
+                       continue;
                }
 
-               err = bpf_map__pin(map, buf);
+               err = bpf_map__pin(map, pin_path);
                if (err)
                        goto err_unpin_maps;
        }
@@ -4033,17 +4339,10 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 
 err_unpin_maps:
        while ((map = bpf_map__prev(map, obj))) {
-               char buf[PATH_MAX];
-               int len;
-
-               len = snprintf(buf, PATH_MAX, "%s/%s", path,
-                              bpf_map__name(map));
-               if (len < 0)
-                       continue;
-               else if (len >= PATH_MAX)
+               if (!map->pin_path)
                        continue;
 
-               bpf_map__unpin(map, buf);
+               bpf_map__unpin(map, NULL);
        }
 
        return err;
@@ -4058,17 +4357,24 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
                return -ENOENT;
 
        bpf_object__for_each_map(map, obj) {
+               char *pin_path = NULL;
                char buf[PATH_MAX];
-               int len;
 
-               len = snprintf(buf, PATH_MAX, "%s/%s", path,
-                              bpf_map__name(map));
-               if (len < 0)
-                       return -EINVAL;
-               else if (len >= PATH_MAX)
-                       return -ENAMETOOLONG;
+               if (path) {
+                       int len;
+
+                       len = snprintf(buf, PATH_MAX, "%s/%s", path,
+                                      bpf_map__name(map));
+                       if (len < 0)
+                               return -EINVAL;
+                       else if (len >= PATH_MAX)
+                               return -ENAMETOOLONG;
+                       pin_path = buf;
+               } else if (!map->pin_path) {
+                       continue;
+               }
 
-               err = bpf_map__unpin(map, buf);
+               err = bpf_map__unpin(map, pin_path);
                if (err)
                        return err;
        }
@@ -4085,14 +4391,10 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
                return -ENOENT;
 
        if (!obj->loaded) {
-               pr_warning("object not yet loaded; load it first\n");
+               pr_warn("object not yet loaded; load it first\n");
                return -ENOENT;
        }
 
-       err = make_dir(path);
-       if (err)
-               return err;
-
        bpf_object__for_each_program(prog, obj) {
                char buf[PATH_MAX];
                int len;
@@ -4193,6 +4495,7 @@ void bpf_object__close(struct bpf_object *obj)
 
        for (i = 0; i < obj->nr_maps; i++) {
                zfree(&obj->maps[i].name);
+               zfree(&obj->maps[i].pin_path);
                if (obj->maps[i].clear_priv)
                        obj->maps[i].clear_priv(&obj->maps[i],
                                                obj->maps[i].priv);
@@ -4236,7 +4539,7 @@ bpf_object__next(struct bpf_object *prev)
 
 const char *bpf_object__name(const struct bpf_object *obj)
 {
-       return obj ? obj->path : ERR_PTR(-EINVAL);
+       return obj ? obj->name : ERR_PTR(-EINVAL);
 }
 
 unsigned int bpf_object__kversion(const struct bpf_object *obj)
@@ -4286,7 +4589,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
                        &obj->programs[nr_programs - 1];
 
        if (p->obj != obj) {
-               pr_warning("error: program handler doesn't match object\n");
+               pr_warn("error: program handler doesn't match object\n");
                return NULL;
        }
 
@@ -4349,7 +4652,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
        if (needs_copy) {
                title = strdup(title);
                if (!title) {
-                       pr_warning("failed to strdup program title\n");
+                       pr_warn("failed to strdup program title\n");
                        return ERR_PTR(-ENOMEM);
                }
        }
@@ -4371,13 +4674,13 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
                return -EINVAL;
 
        if (prog->instances.nr > 0 || prog->instances.fds) {
-               pr_warning("Can't set pre-processor after loading\n");
+               pr_warn("Can't set pre-processor after loading\n");
                return -EINVAL;
        }
 
        instances_fds = malloc(sizeof(int) * nr_instances);
        if (!instances_fds) {
-               pr_warning("alloc memory failed for fds\n");
+               pr_warn("alloc memory failed for fds\n");
                return -ENOMEM;
        }
 
@@ -4398,21 +4701,26 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
                return -EINVAL;
 
        if (n >= prog->instances.nr || n < 0) {
-               pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
-                          n, prog->section_name, prog->instances.nr);
+               pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
+                       n, prog->section_name, prog->instances.nr);
                return -EINVAL;
        }
 
        fd = prog->instances.fds[n];
        if (fd < 0) {
-               pr_warning("%dth instance of program '%s' is invalid\n",
-                          n, prog->section_name);
+               pr_warn("%dth instance of program '%s' is invalid\n",
+                       n, prog->section_name);
                return -ENOENT;
        }
 
        return fd;
 }
 
+enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+{
+       return prog->type;
+}
+
 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 {
        prog->type = type;
@@ -4446,6 +4754,13 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
+BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
+
+enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog)
+{
+       return prog->expected_attach_type;
+}
 
 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
                                           enum bpf_attach_type type)
@@ -4453,19 +4768,23 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
        prog->expected_attach_type = type;
 }
 
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
-       { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \
+       { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }
 
 /* Programs that can NOT be attached. */
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
 
 /* Programs that can be attached. */
 #define BPF_APROG_SEC(string, ptype, atype) \
-       BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
+       BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)
 
 /* Programs that must specify expected attach type at load time. */
 #define BPF_EAPROG_SEC(string, ptype, eatype) \
-       BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
+       BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
+
+/* Programs that use BTF to identify attach point */
+#define BPF_PROG_BTF(string, ptype, eatype) \
+       BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
 
 /* Programs that can be attached but attach type can't be identified by section
  * name. Kept for backward compatibility.
@@ -4477,16 +4796,23 @@ static const struct {
        size_t len;
        enum bpf_prog_type prog_type;
        enum bpf_attach_type expected_attach_type;
-       int is_attachable;
+       bool is_attachable;
+       bool is_attach_btf;
        enum bpf_attach_type attach_type;
 } section_names[] = {
        BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
        BPF_PROG_SEC("kprobe/",                 BPF_PROG_TYPE_KPROBE),
+       BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
        BPF_PROG_SEC("kretprobe/",              BPF_PROG_TYPE_KPROBE),
+       BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
        BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
        BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
        BPF_PROG_SEC("tracepoint/",             BPF_PROG_TYPE_TRACEPOINT),
+       BPF_PROG_SEC("tp/",                     BPF_PROG_TYPE_TRACEPOINT),
        BPF_PROG_SEC("raw_tracepoint/",         BPF_PROG_TYPE_RAW_TRACEPOINT),
+       BPF_PROG_SEC("raw_tp/",                 BPF_PROG_TYPE_RAW_TRACEPOINT),
+       BPF_PROG_BTF("tp_btf/",                 BPF_PROG_TYPE_TRACING,
+                                               BPF_TRACE_RAW_TP),
        BPF_PROG_SEC("xdp",                     BPF_PROG_TYPE_XDP),
        BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
        BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
@@ -4593,14 +4919,54 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
                *expected_attach_type = section_names[i].expected_attach_type;
                return 0;
        }
-       pr_warning("failed to guess program type based on ELF section name '%s'\n", name);
+       pr_warn("failed to guess program type based on ELF section name '%s'\n", name);
        type_names = libbpf_get_type_names(false);
        if (type_names != NULL) {
                pr_info("supported section(type) names are:%s\n", type_names);
                free(type_names);
        }
 
-       return -EINVAL;
+       return -ESRCH;
+}
+
+#define BTF_PREFIX "btf_trace_"
+static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id)
+{
+       struct btf *btf = bpf_core_find_kernel_btf();
+       char raw_tp_btf_name[128] = BTF_PREFIX;
+       char *dst = raw_tp_btf_name + sizeof(BTF_PREFIX) - 1;
+       int ret, i, err = -EINVAL;
+
+       if (IS_ERR(btf)) {
+               pr_warn("vmlinux BTF is not found\n");
+               return -EINVAL;
+       }
+
+       if (!name)
+               goto out;
+
+       for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+               if (!section_names[i].is_attach_btf)
+                       continue;
+               if (strncmp(name, section_names[i].sec, section_names[i].len))
+                       continue;
+               /* prepend "btf_trace_" prefix per kernel convention */
+               strncat(dst, name + section_names[i].len,
+                       sizeof(raw_tp_btf_name) - sizeof(BTF_PREFIX));
+               ret = btf__find_by_name(btf, raw_tp_btf_name);
+               if (ret <= 0) {
+                       pr_warn("%s is not found in vmlinux BTF\n", dst);
+                       goto out;
+               }
+               *btf_id = ret;
+               err = 0;
+               goto out;
+       }
+       pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
+       err = -ESRCH;
+out:
+       btf__free(btf);
+       return err;
 }
 
 int libbpf_attach_type_by_name(const char *name,
@@ -4620,7 +4986,7 @@ int libbpf_attach_type_by_name(const char *name,
                *attach_type = section_names[i].attach_type;
                return 0;
        }
-       pr_warning("failed to guess attach type based on ELF section name '%s'\n", name);
+       pr_warn("failed to guess attach type based on ELF section name '%s'\n", name);
        type_names = libbpf_get_type_names(true);
        if (type_names != NULL) {
                pr_info("attachable section(type) names are:%s\n", type_names);
@@ -4630,15 +4996,6 @@ int libbpf_attach_type_by_name(const char *name,
        return -EINVAL;
 }
 
-static int
-bpf_program__identify_section(struct bpf_program *prog,
-                             enum bpf_prog_type *prog_type,
-                             enum bpf_attach_type *expected_attach_type)
-{
-       return libbpf_prog_type_by_name(prog->section_name, prog_type,
-                                       expected_attach_type);
-}
-
 int bpf_map__fd(const struct bpf_map *map)
 {
        return map ? map->fd : -EINVAL;
@@ -4703,11 +5060,11 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 {
        if (!bpf_map_type__is_map_in_map(map->def.type)) {
-               pr_warning("error: unsupported map type\n");
+               pr_warn("error: unsupported map type\n");
                return -EINVAL;
        }
        if (map->inner_map_fd != -1) {
-               pr_warning("error: inner_map_fd already specified\n");
+               pr_warn("error: inner_map_fd already specified\n");
                return -EINVAL;
        }
        map->inner_map_fd = fd;
@@ -4727,8 +5084,8 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
        e = obj->maps + obj->nr_maps;
 
        if ((m < s) || (m >= e)) {
-               pr_warning("error in %s: map handler doesn't belong to object\n",
-                          __func__);
+               pr_warn("error in %s: map handler doesn't belong to object\n",
+                        __func__);
                return NULL;
        }
 
@@ -4806,8 +5163,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 {
        struct bpf_object_open_attr open_attr = {};
        struct bpf_program *prog, *first_prog = NULL;
-       enum bpf_attach_type expected_attach_type;
-       enum bpf_prog_type prog_type;
        struct bpf_object *obj;
        struct bpf_map *map;
        int err;
@@ -4825,26 +5180,27 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                return -ENOENT;
 
        bpf_object__for_each_program(prog, obj) {
+               enum bpf_attach_type attach_type = attr->expected_attach_type;
                /*
-                * If type is not specified, try to guess it based on
-                * section name.
+                * to preserve backwards compatibility, bpf_prog_load treats
+                * attr->prog_type, if specified, as an override to whatever
+                * bpf_object__open guessed
                 */
-               prog_type = attr->prog_type;
-               prog->prog_ifindex = attr->ifindex;
-               expected_attach_type = attr->expected_attach_type;
-               if (prog_type == BPF_PROG_TYPE_UNSPEC) {
-                       err = bpf_program__identify_section(prog, &prog_type,
-                                                           &expected_attach_type);
-                       if (err < 0) {
-                               bpf_object__close(obj);
-                               return -EINVAL;
-                       }
+               if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
+                       bpf_program__set_type(prog, attr->prog_type);
+                       bpf_program__set_expected_attach_type(prog,
+                                                             attach_type);
+               }
+               if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
+                       /*
+                        * we haven't guessed from section name and user
+                        * didn't provide a fallback type, too bad...
+                        */
+                       bpf_object__close(obj);
+                       return -EINVAL;
                }
 
-               bpf_program__set_type(prog, prog_type);
-               bpf_program__set_expected_attach_type(prog,
-                                                     expected_attach_type);
-
+               prog->prog_ifindex = attr->ifindex;
                prog->log_level = attr->log_level;
                prog->prog_flags = attr->prog_flags;
                if (!first_prog)
@@ -4857,7 +5213,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        }
 
        if (!first_prog) {
-               pr_warning("object file doesn't contain bpf program\n");
+               pr_warn("object file doesn't contain bpf program\n");
                bpf_object__close(obj);
                return -ENOENT;
        }
@@ -4916,14 +5272,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
        int prog_fd, err;
 
        if (pfd < 0) {
-               pr_warning("program '%s': invalid perf event FD %d\n",
-                          bpf_program__title(prog, false), pfd);
+               pr_warn("program '%s': invalid perf event FD %d\n",
+                       bpf_program__title(prog, false), pfd);
                return ERR_PTR(-EINVAL);
        }
        prog_fd = bpf_program__fd(prog);
        if (prog_fd < 0) {
-               pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
-                          bpf_program__title(prog, false));
+               pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
+                       bpf_program__title(prog, false));
                return ERR_PTR(-EINVAL);
        }
 
@@ -4936,16 +5292,16 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
        if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
                err = -errno;
                free(link);
-               pr_warning("program '%s': failed to attach to pfd %d: %s\n",
-                          bpf_program__title(prog, false), pfd,
+               pr_warn("program '%s': failed to attach to pfd %d: %s\n",
+                       bpf_program__title(prog, false), pfd,
                           libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return ERR_PTR(err);
        }
        if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
                err = -errno;
                free(link);
-               pr_warning("program '%s': failed to enable pfd %d: %s\n",
-                          bpf_program__title(prog, false), pfd,
+               pr_warn("program '%s': failed to enable pfd %d: %s\n",
+                       bpf_program__title(prog, false), pfd,
                           libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return ERR_PTR(err);
        }
@@ -5020,9 +5376,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
        type = uprobe ? determine_uprobe_perf_type()
                      : determine_kprobe_perf_type();
        if (type < 0) {
-               pr_warning("failed to determine %s perf type: %s\n",
-                          uprobe ? "uprobe" : "kprobe",
-                          libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+               pr_warn("failed to determine %s perf type: %s\n",
+                       uprobe ? "uprobe" : "kprobe",
+                       libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
                return type;
        }
        if (retprobe) {
@@ -5030,10 +5386,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
                                 : determine_kprobe_retprobe_bit();
 
                if (bit < 0) {
-                       pr_warning("failed to determine %s retprobe bit: %s\n",
-                                  uprobe ? "uprobe" : "kprobe",
-                                  libbpf_strerror_r(bit, errmsg,
-                                                    sizeof(errmsg)));
+                       pr_warn("failed to determine %s retprobe bit: %s\n",
+                               uprobe ? "uprobe" : "kprobe",
+                               libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
                        return bit;
                }
                attr.config |= 1 << bit;
@@ -5050,9 +5405,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
                      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
        if (pfd < 0) {
                err = -errno;
-               pr_warning("%s perf_event_open() failed: %s\n",
-                          uprobe ? "uprobe" : "kprobe",
-                          libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+               pr_warn("%s perf_event_open() failed: %s\n",
+                       uprobe ? "uprobe" : "kprobe",
+                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return err;
        }
        return pfd;
@@ -5069,20 +5424,20 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
        pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
                                    0 /* offset */, -1 /* pid */);
        if (pfd < 0) {
-               pr_warning("program '%s': failed to create %s '%s' perf event: %s\n",
-                          bpf_program__title(prog, false),
-                          retprobe ? "kretprobe" : "kprobe", func_name,
-                          libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+               pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
+                       bpf_program__title(prog, false),
+                       retprobe ? "kretprobe" : "kprobe", func_name,
+                       libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return ERR_PTR(pfd);
        }
        link = bpf_program__attach_perf_event(prog, pfd);
        if (IS_ERR(link)) {
                close(pfd);
                err = PTR_ERR(link);
-               pr_warning("program '%s': failed to attach to %s '%s': %s\n",
-                          bpf_program__title(prog, false),
-                          retprobe ? "kretprobe" : "kprobe", func_name,
-                          libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+               pr_warn("program '%s': failed to attach to %s '%s': %s\n",
+                       bpf_program__title(prog, false),
+                       retprobe ? "kretprobe" : "kprobe", func_name,
+                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return link;
        }
        return link;
@@ -5100,22 +5455,22 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
        pfd = perf_event_open_probe(true /* uprobe */, retprobe,
                                    binary_path, func_offset, pid);
        if (pfd < 0) {
-               pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
-                          bpf_program__title(prog, false),
-                          retprobe ? "uretprobe" : "uprobe",
-                          binary_path, func_offset,
-                          libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+               pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+                       bpf_program__title(prog, false),
+                       retprobe ? "uretprobe" : "uprobe",
+                       binary_path, func_offset,
+                       libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return ERR_PTR(pfd);
        }
        link = bpf_program__attach_perf_event(prog, pfd);
        if (IS_ERR(link)) {
                close(pfd);
                err = PTR_ERR(link);
-               pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
-                          bpf_program__title(prog, false),
-                          retprobe ? "uretprobe" : "uprobe",
-                          binary_path, func_offset,
-                          libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+               pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
+                       bpf_program__title(prog, false),
+                       retprobe ? "uretprobe" : "uprobe",
+                       binary_path, func_offset,
+                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return link;
        }
        return link;
@@ -5149,9 +5504,9 @@ static int perf_event_open_tracepoint(const char *tp_category,
 
        tp_id = determine_tracepoint_id(tp_category, tp_name);
        if (tp_id < 0) {
-               pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
-                          tp_category, tp_name,
-                          libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
+               pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
+                       tp_category, tp_name,
+                       libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
                return tp_id;
        }
 
@@ -5163,9 +5518,9 @@ static int perf_event_open_tracepoint(const char *tp_category,
                      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
        if (pfd < 0) {
                err = -errno;
-               pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n",
-                          tp_category, tp_name,
-                          libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+               pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
+                       tp_category, tp_name,
+                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return err;
        }
        return pfd;
@@ -5181,20 +5536,20 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
 
        pfd = perf_event_open_tracepoint(tp_category, tp_name);
        if (pfd < 0) {
-               pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
-                          bpf_program__title(prog, false),
-                          tp_category, tp_name,
-                          libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+               pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+                       bpf_program__title(prog, false),
+                       tp_category, tp_name,
+                       libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return ERR_PTR(pfd);
        }
        link = bpf_program__attach_perf_event(prog, pfd);
        if (IS_ERR(link)) {
                close(pfd);
                err = PTR_ERR(link);
-               pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
-                          bpf_program__title(prog, false),
-                          tp_category, tp_name,
-                          libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+               pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
+                       bpf_program__title(prog, false),
+                       tp_category, tp_name,
+                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return link;
        }
        return link;
@@ -5216,8 +5571,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 
        prog_fd = bpf_program__fd(prog);
        if (prog_fd < 0) {
-               pr_warning("program '%s': can't attach before loaded\n",
-                          bpf_program__title(prog, false));
+               pr_warn("program '%s': can't attach before loaded\n",
+                       bpf_program__title(prog, false));
                return ERR_PTR(-EINVAL);
        }
 
@@ -5230,9 +5585,9 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
        if (pfd < 0) {
                pfd = -errno;
                free(link);
-               pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n",
-                          bpf_program__title(prog, false), tp_name,
-                          libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+               pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
+                       bpf_program__title(prog, false), tp_name,
+                       libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return ERR_PTR(pfd);
        }
        link->fd = pfd;
@@ -5334,7 +5689,7 @@ static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
                return;
        if (cpu_buf->base &&
            munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
-               pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
+               pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
        if (cpu_buf->fd >= 0) {
                ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
                close(cpu_buf->fd);
@@ -5384,8 +5739,8 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
                              -1, PERF_FLAG_FD_CLOEXEC);
        if (cpu_buf->fd < 0) {
                err = -errno;
-               pr_warning("failed to open perf buffer event on cpu #%d: %s\n",
-                          cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+               pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
+                       cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
                goto error;
        }
 
@@ -5395,15 +5750,15 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
        if (cpu_buf->base == MAP_FAILED) {
                cpu_buf->base = NULL;
                err = -errno;
-               pr_warning("failed to mmap perf buffer on cpu #%d: %s\n",
-                          cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+               pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
+                       cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
                goto error;
        }
 
        if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
                err = -errno;
-               pr_warning("failed to enable perf buffer event on cpu #%d: %s\n",
-                          cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+               pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
+                       cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
                goto error;
        }
 
@@ -5463,8 +5818,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
        int err, i;
 
        if (page_cnt & (page_cnt - 1)) {
-               pr_warning("page count should be power of two, but is %zu\n",
-                          page_cnt);
+               pr_warn("page count should be power of two, but is %zu\n",
+                       page_cnt);
                return ERR_PTR(-EINVAL);
        }
 
@@ -5472,14 +5827,14 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
        err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
        if (err) {
                err = -errno;
-               pr_warning("failed to get map info for map FD %d: %s\n",
-                          map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+               pr_warn("failed to get map info for map FD %d: %s\n",
+                       map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
                return ERR_PTR(err);
        }
 
        if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
-               pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
-                          map.name);
+               pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+                       map.name);
                return ERR_PTR(-EINVAL);
        }
 
@@ -5499,8 +5854,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
        pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
        if (pb->epoll_fd < 0) {
                err = -errno;
-               pr_warning("failed to create epoll instance: %s\n",
-                          libbpf_strerror_r(err, msg, sizeof(msg)));
+               pr_warn("failed to create epoll instance: %s\n",
+                       libbpf_strerror_r(err, msg, sizeof(msg)));
                goto error;
        }
 
@@ -5519,13 +5874,13 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
        pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
        if (!pb->events) {
                err = -ENOMEM;
-               pr_warning("failed to allocate events: out of memory\n");
+               pr_warn("failed to allocate events: out of memory\n");
                goto error;
        }
        pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
        if (!pb->cpu_bufs) {
                err = -ENOMEM;
-               pr_warning("failed to allocate buffers: out of memory\n");
+               pr_warn("failed to allocate buffers: out of memory\n");
                goto error;
        }
 
@@ -5548,9 +5903,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
                                          &cpu_buf->fd, 0);
                if (err) {
                        err = -errno;
-                       pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
-                                  cpu, map_key, cpu_buf->fd,
-                                  libbpf_strerror_r(err, msg, sizeof(msg)));
+                       pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
+                               cpu, map_key, cpu_buf->fd,
+                               libbpf_strerror_r(err, msg, sizeof(msg)));
                        goto error;
                }
 
@@ -5559,9 +5914,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
                if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
                              &pb->events[i]) < 0) {
                        err = -errno;
-                       pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
-                                  cpu, cpu_buf->fd,
-                                  libbpf_strerror_r(err, msg, sizeof(msg)));
+                       pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
+                               cpu, cpu_buf->fd,
+                               libbpf_strerror_r(err, msg, sizeof(msg)));
                        goto error;
                }
        }
@@ -5614,7 +5969,7 @@ perf_buffer__process_record(struct perf_event_header *e, void *ctx)
                break;
        }
        default:
-               pr_warning("unknown perf sample type %d\n", e->type);
+               pr_warn("unknown perf sample type %d\n", e->type);
                return LIBBPF_PERF_EVENT_ERROR;
        }
        return LIBBPF_PERF_EVENT_CONT;
@@ -5644,7 +5999,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
 
                err = perf_buffer__process_records(pb, cpu_buf);
                if (err) {
-                       pr_warning("error while processing records: %d\n", err);
+                       pr_warn("error while processing records: %d\n", err);
                        return err;
                }
        }
@@ -5841,13 +6196,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
                v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
                                                   desc->count_offset);
                if (v1 != v2)
-                       pr_warning("%s: mismatch in element count\n", __func__);
+                       pr_warn("%s: mismatch in element count\n", __func__);
 
                v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
                v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
                                                   desc->size_offset);
                if (v1 != v2)
-                       pr_warning("%s: mismatch in rec size\n", __func__);
+                       pr_warn("%s: mismatch in rec size\n", __func__);
        }
 
        /* step 7: update info_len and data_len */
@@ -5915,20 +6270,19 @@ int libbpf_num_possible_cpus(void)
        fd = open(fcpu, O_RDONLY);
        if (fd < 0) {
                error = errno;
-               pr_warning("Failed to open file %s: %s\n",
-                          fcpu, strerror(error));
+               pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error));
                return -error;
        }
        len = read(fd, buf, sizeof(buf));
        close(fd);
        if (len <= 0) {
                error = len ? errno : EINVAL;
-               pr_warning("Failed to read # of possible cpus from %s: %s\n",
-                          fcpu, strerror(error));
+               pr_warn("Failed to read # of possible cpus from %s: %s\n",
+                       fcpu, strerror(error));
                return -error;
        }
        if (len == sizeof(buf)) {
-               pr_warning("File %s size overflow\n", fcpu);
+               pr_warn("File %s size overflow\n", fcpu);
                return -EOVERFLOW;
        }
        buf[len] = '\0';
@@ -5939,8 +6293,8 @@ int libbpf_num_possible_cpus(void)
                        buf[ir] = '\0';
                        n = sscanf(&buf[il], "%u-%u", &start, &end);
                        if (n <= 0) {
-                               pr_warning("Failed to get # CPUs from %s\n",
-                                          &buf[il]);
+                               pr_warn("Failed to get # CPUs from %s\n",
+                                       &buf[il]);
                                return -EINVAL;
                        } else if (n == 1) {
                                end = start;
@@ -5950,7 +6304,7 @@ int libbpf_num_possible_cpus(void)
                }
        }
        if (tmp_cpus <= 0) {
-               pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
+               pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
                return -EINVAL;
        }
 
index e8f7097..6ddc041 100644 (file)
@@ -67,18 +67,79 @@ struct bpf_object_open_attr {
        enum bpf_prog_type prog_type;
 };
 
+/* Helper macro to declare and initialize libbpf options struct
+ *
+ * This dance with uninitialized declaration, followed by memset to zero,
+ * followed by assignment using compound literal syntax is done to preserve
+ * ability to use a nice struct field initialization syntax and **hopefully**
+ * have all the padding bytes initialized to zero. It's not guaranteed though,
+ * when copying literal, that compiler won't copy garbage in literal's padding
+ * bytes, but that's the best way I've found and it seems to work in practice.
+ *
+ * Macro declares opts struct of given type and name, zero-initializes,
+ * including any extra padding, it with memset() and then assigns initial
+ * values provided by users in struct initializer-syntax as varargs.
+ */
+#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...)                               \
+       struct TYPE NAME = ({                                               \
+               memset(&NAME, 0, sizeof(struct TYPE));                      \
+               (struct TYPE) {                                             \
+                       .sz = sizeof(struct TYPE),                          \
+                       __VA_ARGS__                                         \
+               };                                                          \
+       })
+
+struct bpf_object_open_opts {
+       /* size of this struct, for forward/backward compatiblity */
+       size_t sz;
+       /* object name override, if provided:
+        * - for object open from file, this will override setting object
+        *   name from file path's base name;
+        * - for object open from memory buffer, this will specify an object
+        *   name and will override default "<addr>-<buf-size>" name;
+        */
+       const char *object_name;
+       /* parse map definitions non-strictly, allowing extra attributes/data */
+       bool relaxed_maps;
+       /* process CO-RE relocations non-strictly, allowing them to fail */
+       bool relaxed_core_relocs;
+       /* maps that set the 'pinning' attribute in their definition will have
+        * their pin_path attribute set to a file in this directory, and be
+        * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
+        */
+       const char *pin_root_path;
+};
+#define bpf_object_open_opts__last_field pin_root_path
+
 LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
 LIBBPF_API struct bpf_object *
+bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts);
+LIBBPF_API struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+                    struct bpf_object_open_opts *opts);
+
+/* deprecated bpf_object__open variants */
+LIBBPF_API struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+                       const char *name);
+LIBBPF_API struct bpf_object *
 bpf_object__open_xattr(struct bpf_object_open_attr *attr);
-struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
-                                           int flags);
-LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
-                                                     size_t obj_buf_sz,
-                                                     const char *name);
+
 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
                             __u32 *size);
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
                                __u32 *off);
+
+enum libbpf_pin_type {
+       LIBBPF_PIN_NONE,
+       /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+       LIBBPF_PIN_BY_NAME,
+};
+
+/* pin_maps and unpin_maps can both be called with a NULL path, in which case
+ * they will use the pin_path attribute of each map (and ignore all maps that
+ * don't have a pin_path set).
+ */
 LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
 LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
                                      const char *path);
@@ -262,8 +323,14 @@ LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
                                      enum bpf_prog_type type);
+
+LIBBPF_API enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog);
 LIBBPF_API void
 bpf_program__set_expected_attach_type(struct bpf_program *prog,
                                      enum bpf_attach_type type);
@@ -276,6 +343,7 @@ LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
 
 /*
  * No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -335,6 +403,9 @@ LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
 
index d04c7cb..86173cb 100644 (file)
@@ -190,3 +190,16 @@ LIBBPF_0.0.5 {
        global:
                bpf_btf_get_next_id;
 } LIBBPF_0.0.4;
+
+LIBBPF_0.0.6 {
+       global:
+               bpf_map__get_pin_path;
+               bpf_map__is_pinned;
+               bpf_map__set_pin_path;
+               bpf_object__open_file;
+               bpf_object__open_mem;
+               bpf_program__get_expected_attach_type;
+               bpf_program__get_type;
+               bpf_program__is_tracing;
+               bpf_program__set_tracing;
+} LIBBPF_0.0.5;
index 98216a6..bd6f48e 100644 (file)
@@ -59,10 +59,42 @@ do {                                \
        libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__);     \
 } while (0)
 
-#define pr_warning(fmt, ...)   __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__)
+#define pr_warn(fmt, ...)      __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__)
 #define pr_info(fmt, ...)      __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
 #define pr_debug(fmt, ...)     __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
 
+static inline bool libbpf_validate_opts(const char *opts,
+                                       size_t opts_sz, size_t user_sz,
+                                       const char *type_name)
+{
+       if (user_sz < sizeof(size_t)) {
+               pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
+               return false;
+       }
+       if (user_sz > opts_sz) {
+               size_t i;
+
+               for (i = opts_sz; i < user_sz; i++) {
+                       if (opts[i]) {
+                               pr_warn("%s has non-zero extra bytes",
+                                       type_name);
+                               return false;
+                       }
+               }
+       }
+       return true;
+}
+
+#define OPTS_VALID(opts, type)                                               \
+       (!(opts) || libbpf_validate_opts((const char *)opts,                  \
+                                        offsetofend(struct type,             \
+                                                    type##__last_field),     \
+                                        (opts)->sz, #type))
+#define OPTS_HAS(opts, field) \
+       ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
+#define OPTS_GET(opts, field, fallback_value) \
+       (OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
                         const char *str_sec, size_t str_len);
 
@@ -94,7 +126,7 @@ struct btf_ext {
        };
        struct btf_ext_info func_info;
        struct btf_ext_info line_info;
-       struct btf_ext_info offset_reloc_info;
+       struct btf_ext_info field_reloc_info;
        __u32 data_size;
 };
 
@@ -119,13 +151,23 @@ struct bpf_line_info_min {
        __u32   line_col;
 };
 
-/* The minimum bpf_offset_reloc checked by the loader
+/* bpf_field_info_kind encodes which aspect of captured field has to be
+ * adjusted by relocations. Currently supported values are:
+ *   - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
+ *   - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
+ */
+enum bpf_field_info_kind {
+       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
+       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
+};
+
+/* The minimum bpf_field_reloc checked by the loader
  *
- * Offset relocation captures the following data:
+ * Field relocation captures the following data:
  * - insn_off - instruction offset (in bytes) within a BPF program that needs
- *   its insn->imm field to be relocated with actual offset;
+ *   its insn->imm field to be relocated with actual field info;
  * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- *   offset;
+ *   field;
  * - access_str_off - offset into corresponding .BTF string section. String
  *   itself encodes an accessed field using a sequence of field and array
  *   indicies, separated by colon (:). It's conceptually very close to LLVM's
@@ -156,15 +198,16 @@ struct bpf_line_info_min {
  * bpf_probe_read(&dst, sizeof(dst),
  *               __builtin_preserve_access_index(&src->a.b.c));
  *
- * In this case Clang will emit offset relocation recording necessary data to
+ * In this case Clang will emit field relocation recording necessary data to
  * be able to find offset of embedded `a.b.c` field within `src` struct.
  *
  *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
  */
-struct bpf_offset_reloc {
+struct bpf_field_reloc {
        __u32   insn_off;
        __u32   type_id;
        __u32   access_str_off;
+       enum bpf_field_info_kind kind;
 };
 
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
index 4b0b036..a9eb8b3 100644 (file)
@@ -102,6 +102,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
        case BPF_PROG_TYPE_FLOW_DISSECTOR:
        case BPF_PROG_TYPE_CGROUP_SYSCTL:
        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+       case BPF_PROG_TYPE_TRACING:
        default:
                break;
        }
diff --git a/tools/lib/bpf/test_libbpf.c b/tools/lib/bpf/test_libbpf.c
new file mode 100644 (file)
index 0000000..f0eb272
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+
+/* do nothing, just make sure we can link successfully */
+
+int main(int argc, char *argv[])
+{
+       /* libbpf.h */
+       libbpf_set_print(NULL);
+
+       /* bpf.h */
+       bpf_prog_get_fd_by_id(0);
+
+       /* btf.h */
+       btf__new(NULL, 0);
+
+       return 0;
+}
diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp
deleted file mode 100644 (file)
index fc13487..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#include "libbpf.h"
-#include "bpf.h"
-#include "btf.h"
-
-/* do nothing, just make sure we can link successfully */
-
-int main(int argc, char *argv[])
-{
-    /* libbpf.h */
-    libbpf_set_print(NULL);
-
-    /* bpf.h */
-    bpf_prog_get_fd_by_id(0);
-
-    /* btf.h */
-    btf__new(NULL, 0);
-}
index a902838..74d84f3 100644 (file)
@@ -73,6 +73,21 @@ struct xsk_nl_info {
        int fd;
 };
 
+/* Up until and including Linux 5.3 */
+struct xdp_ring_offset_v1 {
+       __u64 producer;
+       __u64 consumer;
+       __u64 desc;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_mmap_offsets_v1 {
+       struct xdp_ring_offset_v1 rx;
+       struct xdp_ring_offset_v1 tx;
+       struct xdp_ring_offset_v1 fr;
+       struct xdp_ring_offset_v1 cr;
+};
+
 int xsk_umem__fd(const struct xsk_umem *umem)
 {
        return umem ? umem->fd : -EINVAL;
@@ -133,6 +148,58 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
        return 0;
 }
 
+static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
+{
+       struct xdp_mmap_offsets_v1 off_v1;
+
+       /* getsockopt on a kernel <= 5.3 has no flags fields.
+        * Copy over the offsets to the correct places in the >=5.4 format
+        * and put the flags where they would have been on that kernel.
+        */
+       memcpy(&off_v1, off, sizeof(off_v1));
+
+       off->rx.producer = off_v1.rx.producer;
+       off->rx.consumer = off_v1.rx.consumer;
+       off->rx.desc = off_v1.rx.desc;
+       off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
+
+       off->tx.producer = off_v1.tx.producer;
+       off->tx.consumer = off_v1.tx.consumer;
+       off->tx.desc = off_v1.tx.desc;
+       off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
+
+       off->fr.producer = off_v1.fr.producer;
+       off->fr.consumer = off_v1.fr.consumer;
+       off->fr.desc = off_v1.fr.desc;
+       off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
+
+       off->cr.producer = off_v1.cr.producer;
+       off->cr.consumer = off_v1.cr.consumer;
+       off->cr.desc = off_v1.cr.desc;
+       off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+       socklen_t optlen;
+       int err;
+
+       optlen = sizeof(*off);
+       err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+       if (err)
+               return err;
+
+       if (optlen == sizeof(*off))
+               return 0;
+
+       if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
+               xsk_mmap_offsets_v1(off);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
 int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
                            __u64 size, struct xsk_ring_prod *fill,
                            struct xsk_ring_cons *comp,
@@ -141,7 +208,6 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
        struct xdp_mmap_offsets off;
        struct xdp_umem_reg mr;
        struct xsk_umem *umem;
-       socklen_t optlen;
        void *map;
        int err;
 
@@ -163,6 +229,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
        umem->umem_area = umem_area;
        xsk_set_umem_config(&umem->config, usr_config);
 
+       memset(&mr, 0, sizeof(mr));
        mr.addr = (uintptr_t)umem_area;
        mr.len = size;
        mr.chunk_size = umem->config.frame_size;
@@ -189,8 +256,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
                goto out_socket;
        }
 
-       optlen = sizeof(off);
-       err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       err = xsk_get_mmap_offsets(umem->fd, &off);
        if (err) {
                err = -errno;
                goto out_socket;
@@ -273,33 +339,55 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
        /* This is the C-program:
         * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
         * {
-        *     int index = ctx->rx_queue_index;
+        *     int ret, index = ctx->rx_queue_index;
         *
         *     // A set entry here means that the correspnding queue_id
         *     // has an active AF_XDP socket bound to it.
+        *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
+        *     if (ret > 0)
+        *         return ret;
+        *
+        *     // Fallback for pre-5.3 kernels, not supporting default
+        *     // action in the flags parameter.
         *     if (bpf_map_lookup_elem(&xsks_map, &index))
         *         return bpf_redirect_map(&xsks_map, index, 0);
-        *
         *     return XDP_PASS;
         * }
         */
        struct bpf_insn prog[] = {
-               /* r1 = *(u32 *)(r1 + 16) */
-               BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16),
-               /* *(u32 *)(r10 - 4) = r1 */
-               BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
+               /* r2 = *(u32 *)(r1 + 16) */
+               BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
+               /* *(u32 *)(r10 - 4) = r2 */
+               BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
+               /* r1 = xskmap[] */
+               BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+               /* r3 = XDP_PASS */
+               BPF_MOV64_IMM(BPF_REG_3, 2),
+               /* call bpf_redirect_map */
+               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+               /* if w0 != 0 goto pc+13 */
+               BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
+               /* r2 = r10 */
                BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+               /* r2 += -4 */
                BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+               /* r1 = xskmap[] */
                BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+               /* call bpf_map_lookup_elem */
                BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+               /* r1 = r0 */
                BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-               BPF_MOV32_IMM(BPF_REG_0, 2),
-               /* if r1 == 0 goto +5 */
+               /* r0 = XDP_PASS */
+               BPF_MOV64_IMM(BPF_REG_0, 2),
+               /* if r1 == 0 goto pc+5 */
                BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
                /* r2 = *(u32 *)(r10 - 4) */
-               BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
                BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
-               BPF_MOV32_IMM(BPF_REG_3, 0),
+               /* r1 = xskmap[] */
+               BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+               /* r3 = 0 */
+               BPF_MOV64_IMM(BPF_REG_3, 0),
+               /* call bpf_redirect_map */
                BPF_EMIT_CALL(BPF_FUNC_redirect_map),
                /* The jumps are to this instruction */
                BPF_EXIT_INSN(),
@@ -310,7 +398,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
                                   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
                                   log_buf_size);
        if (prog_fd < 0) {
-               pr_warning("BPF log buffer:\n%s", log_buf);
+               pr_warn("BPF log buffer:\n%s", log_buf);
                return prog_fd;
        }
 
@@ -491,14 +579,13 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
        struct sockaddr_xdp sxdp = {};
        struct xdp_mmap_offsets off;
        struct xsk_socket *xsk;
-       socklen_t optlen;
        int err;
 
        if (!umem || !xsk_ptr || !rx || !tx)
                return -EFAULT;
 
        if (umem->refcount) {
-               pr_warning("Error: shared umems not supported by libbpf.\n");
+               pr_warn("Error: shared umems not supported by libbpf.\n");
                return -EBUSY;
        }
 
@@ -550,8 +637,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
                }
        }
 
-       optlen = sizeof(off);
-       err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       err = xsk_get_mmap_offsets(xsk->fd, &off);
        if (err) {
                err = -errno;
                goto out_socket;
@@ -637,7 +723,6 @@ out_xsk_alloc:
 int xsk_umem__delete(struct xsk_umem *umem)
 {
        struct xdp_mmap_offsets off;
-       socklen_t optlen;
        int err;
 
        if (!umem)
@@ -646,8 +731,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
        if (umem->refcount)
                return -EBUSY;
 
-       optlen = sizeof(off);
-       err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       err = xsk_get_mmap_offsets(umem->fd, &off);
        if (!err) {
                munmap(umem->fill->ring - off.fr.desc,
                       off.fr.desc + umem->config.fill_size * sizeof(__u64));
@@ -665,7 +749,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 {
        size_t desc_sz = sizeof(struct xdp_desc);
        struct xdp_mmap_offsets off;
-       socklen_t optlen;
        int err;
 
        if (!xsk)
@@ -676,8 +759,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
                close(xsk->prog_fd);
        }
 
-       optlen = sizeof(off);
-       err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       err = xsk_get_mmap_offsets(xsk->fd, &off);
        if (!err) {
                if (xsk->rx) {
                        munmap(xsk->rx->ring - off.rx.desc,
index 7470327..4865116 100644 (file)
@@ -7,11 +7,10 @@ FEATURE-DUMP.libbpf
 fixdep
 test_align
 test_dev_cgroup
-test_progs
+/test_progs*
 test_tcpbpf_user
 test_verifier_log
 feature
-test_libbpf_open
 test_sock
 test_sock_addr
 test_sock_fields
@@ -33,9 +32,10 @@ test_tcpnotify_user
 test_libbpf
 test_tcp_check_syncookie_user
 test_sysctl
-alu32
 libbpf.pc
 libbpf.so.*
 test_hashmap
 test_btf_dump
 xdping
+/no_alu32
+/bpf_gcc
index 6889c19..b334a6d 100644 (file)
@@ -2,10 +2,12 @@
 include ../../../../scripts/Kbuild.include
 include ../../../scripts/Makefile.arch
 
-LIBDIR := ../../../lib
+CURDIR := $(abspath .)
+LIBDIR := $(abspath ../../../lib)
 BPFDIR := $(LIBDIR)/bpf
-APIDIR := ../../../include/uapi
-GENDIR := ../../../../include/generated
+TOOLSDIR := $(abspath ../../../include)
+APIDIR := $(TOOLSDIR)/uapi
+GENDIR := $(abspath ../../../../include/generated)
 GENHDR := $(GENDIR)/autoconf.h
 
 ifneq ($(wildcard $(GENHDR)),)
@@ -15,11 +17,10 @@ endif
 CLANG          ?= clang
 LLC            ?= llc
 LLVM_OBJCOPY   ?= llvm-objcopy
-LLVM_READELF   ?= llvm-readelf
-BTF_PAHOLE     ?= pahole
 BPF_GCC                ?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
-         -Dbpf_prog_load=bpf_prog_test_load \
+CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \
+         -I$(GENDIR) -I$(TOOLSDIR) -I$(CURDIR)                         \
+         -Dbpf_prog_load=bpf_prog_test_load                            \
          -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lrt -lpthread
 
@@ -27,33 +28,20 @@ LDLIBS += -lcap -lelf -lrt -lpthread
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
        test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
        test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
-       test_cgroup_storage test_select_reuseport test_section_names \
+       test_cgroup_storage test_select_reuseport \
        test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-       test_btf_dump test_cgroup_attach xdping
-
-BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
-TEST_GEN_FILES = $(BPF_OBJ_FILES)
-
-BTF_C_FILES = $(wildcard progs/btf_dump_test_case_*.c)
-TEST_FILES = $(BTF_C_FILES)
-
-# Also test sub-register code-gen if LLVM has eBPF v3 processor support which
-# contains both ALU32 and JMP32 instructions.
-SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \
-                       $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \
-                       $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \
-                       grep 'if w')
-ifneq ($(SUBREG_CODEGEN),)
-TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES))
-endif
+       test_cgroup_attach xdping test_progs-no_alu32
 
+# Also test bpf-gcc, if present
 ifneq ($(BPF_GCC),)
-TEST_GEN_FILES += $(patsubst %.o,bpf_gcc/%.o, $(BPF_OBJ_FILES))
+TEST_GEN_PROGS += test_progs-bpf_gcc
 endif
 
+TEST_GEN_FILES =
+TEST_FILES =
+
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
-       test_libbpf.sh \
        test_xdp_redirect.sh \
        test_xdp_meta.sh \
        test_xdp_veth.sh \
@@ -80,27 +68,33 @@ TEST_PROGS_EXTENDED := with_addr.sh \
        test_xdp_vlan.sh
 
 # Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
        flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
        test_lirc_mode2_user
 
-include ../lib.mk
+TEST_CUSTOM_PROGS = urandom_read
 
-# NOTE: $(OUTPUT) won't get default value if used before lib.mk
-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
-all: $(TEST_CUSTOM_PROGS)
+include ../lib.mk
 
-$(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
+# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# to build individual tests.
+# NOTE: Semicolon at the end is critical to override lib.mk's default static
+# rule for binaries.
+$(notdir $(TEST_GEN_PROGS)                                             \
+        $(TEST_PROGS)                                                  \
+        $(TEST_PROGS_EXTENDED)                                         \
+        $(TEST_GEN_PROGS_EXTENDED)                                     \
+        $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+
+$(OUTPUT)/urandom_read: urandom_read.c
        $(CC) -o $@ $< -Wl,--build-id
 
 $(OUTPUT)/test_stub.o: test_stub.c
-       $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) -c -o $@ $<
+       $(CC) -c $(CFLAGS) -o $@ $<
 
 BPFOBJ := $(OUTPUT)/libbpf.a
 
-$(TEST_GEN_PROGS): $(OUTPUT)/test_stub.o $(BPFOBJ)
-
-$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(OUTPUT)/libbpf.a
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -110,7 +104,6 @@ $(OUTPUT)/test_socket_cookie: cgroup_helpers.c
 $(OUTPUT)/test_sockmap: cgroup_helpers.c
 $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
 $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
-$(OUTPUT)/test_progs: cgroup_helpers.c trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
 $(OUTPUT)/test_netcnt: cgroup_helpers.c
@@ -126,15 +119,9 @@ force:
 $(BPFOBJ): force
        $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
 
-PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
-
-# Let newer LLVM versions transparently probe the kernel for availability
-# of full BPF instruction set.
-ifeq ($(PROBE),)
-  CPU ?= probe
-else
-  CPU ?= generic
-endif
+BPF_HELPERS := $(BPFDIR)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h)
+$(BPFDIR)/bpf_helper_defs.h:
+       $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ bpf_helper_defs.h
 
 # Get Clang's default includes on this system, as opposed to those seen by
 # '-target bpf'. This fixes "missing" files on some architectures/distros,
@@ -146,9 +133,16 @@ define get_sys_includes
 $(shell $(1) -v -E - </dev/null 2>&1 \
        | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
 endef
+
+# Determine target endianness.
+IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
+                       grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
+MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -I. -I./include/uapi -I../../../include/uapi \
-            -I$(OUTPUT)/../usr/include -D__TARGET_ARCH_$(SRCARCH)
+BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)                  \
+            -I. -I./include/uapi -I$(APIDIR)                           \
+            -I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include)
 
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
               -Wno-compare-distinct-pointer-types
@@ -156,167 +150,165 @@ CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
 $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
 
-$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
-$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
-
 $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
-$(OUTPUT)/test_progs.o: flow_dissector_load.h
-
-BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
-BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
-BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
-BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
-                         $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
-                         $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
-                         /bin/rm -f ./llvm_btf_verify.o)
-
-ifneq ($(BTF_LLVM_PROBE),)
-       BPF_CFLAGS += -g
-else
-ifneq ($(BTF_LLC_PROBE),)
-ifneq ($(BTF_PAHOLE_PROBE),)
-ifneq ($(BTF_OBJCOPY_PROBE),)
-       BPF_CFLAGS += -g
-       LLC_FLAGS += -mattr=dwarfris
-       DWARF2BTF = y
-endif
-endif
-endif
-endif
 
-TEST_PROGS_CFLAGS := -I. -I$(OUTPUT)
-TEST_MAPS_CFLAGS := -I. -I$(OUTPUT)
-TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier
-
-ifneq ($(SUBREG_CODEGEN),)
-ALU32_BUILD_DIR = $(OUTPUT)/alu32
-TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32
-$(ALU32_BUILD_DIR):
-       mkdir -p $@
-
-$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read | $(ALU32_BUILD_DIR)
-       cp $< $@
-
-$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\
-                                               $(ALU32_BUILD_DIR)/urandom_read \
-                                               | $(ALU32_BUILD_DIR)
-       $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \
-               -o $(ALU32_BUILD_DIR)/test_progs_32 \
-               test_progs.c test_stub.c cgroup_helpers.c trace_helpers.c prog_tests/*.c \
-               $(OUTPUT)/libbpf.a $(LDLIBS)
-
-$(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H)
-$(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c
-
-$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR)/test_progs_32 \
-                                       | $(ALU32_BUILD_DIR)
-       ($(CLANG) $(BPF_CFLAGS) $(CLANG_CFLAGS) -O2 -target bpf -emit-llvm \
-               -c $< -o - || echo "clang failed") | \
-       $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \
-               -filetype=obj -o $@
-ifeq ($(DWARF2BTF),y)
-       $(BTF_PAHOLE) -J $@
-endif
+# Build BPF object using Clang
+# $1 - input .c file
+# $2 - output .o file
+# $3 - CFLAGS
+# $4 - LDFLAGS
+define CLANG_BPF_BUILD_RULE
+       ($(CLANG) $3 -O2 -target bpf -emit-llvm                         \
+               -c $1 -o - || echo "BPF obj compilation failed") |      \
+       $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+endef
+# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
+define CLANG_NATIVE_BPF_BUILD_RULE
+       ($(CLANG) $3 -O2 -emit-llvm                                     \
+               -c $1 -o - || echo "BPF obj compilation failed") |      \
+       $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+endef
+# Build BPF object using GCC
+define GCC_BPF_BUILD_RULE
+       $(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+endef
+
+# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
+# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
+# Parameters:
+# $1 - test runner base binary name (e.g., test_progs)
+# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+define DEFINE_TEST_RUNNER
+
+TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
+TRUNNER_BINARY := $1$(if $2,-)$2
+TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o,      \
+                                $$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
+TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o,          \
+                                $$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
+TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
+TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
+TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o,            \
+                               $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)))
+
+# Evaluate rules now with extra TRUNNER_XXX variables above already defined
+$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
+
+endef
+
+# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and
+# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with:
+# $1 - test runner base binary name (e.g., test_progs)
+# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+define DEFINE_TEST_RUNNER_RULES
+
+ifeq ($($(TRUNNER_OUTPUT)-dir),)
+$(TRUNNER_OUTPUT)-dir := y
+$(TRUNNER_OUTPUT):
+       mkdir -p $$@
 endif
 
-ifneq ($(BPF_GCC),)
-GCC_SYS_INCLUDES = $(call get_sys_includes,gcc)
-IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
-                       grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
-ifeq ($(IS_LITTLE_ENDIAN),)
-MENDIAN=-mbig-endian
-else
-MENDIAN=-mlittle-endian
+# ensure we set up BPF objects generation rule just once for a given
+# input/output directory combination
+ifeq ($($(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs),)
+$(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
+$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:                            \
+                    $(TRUNNER_BPF_PROGS_DIR)/%.c                       \
+                    $(TRUNNER_BPF_PROGS_DIR)/*.h                       \
+                    $$(BPF_HELPERS) | $(TRUNNER_OUTPUT)
+       $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,                      \
+                                         $(TRUNNER_BPF_CFLAGS),        \
+                                         $(TRUNNER_BPF_LDFLAGS))
 endif
-BPF_GCC_CFLAGS = $(GCC_SYS_INCLUDES) $(MENDIAN)
-BPF_GCC_BUILD_DIR = $(OUTPUT)/bpf_gcc
-TEST_CUSTOM_PROGS += $(BPF_GCC_BUILD_DIR)/test_progs_bpf_gcc
-$(BPF_GCC_BUILD_DIR):
-       mkdir -p $@
-
-$(BPF_GCC_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read | $(BPF_GCC_BUILD_DIR)
-       cp $< $@
-
-$(BPF_GCC_BUILD_DIR)/test_progs_bpf_gcc: $(OUTPUT)/test_progs \
-                                        | $(BPF_GCC_BUILD_DIR)
-       cp $< $@
-
-$(BPF_GCC_BUILD_DIR)/%.o: progs/%.c $(BPF_GCC_BUILD_DIR)/test_progs_bpf_gcc \
-                         | $(BPF_GCC_BUILD_DIR)
-       $(BPF_GCC) $(BPF_CFLAGS) $(BPF_GCC_CFLAGS) -O2 -c $< -o $@
+
+# ensure we set up tests.h header generation rule just once
+ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
+$(TRUNNER_TESTS_DIR)-tests-hdr := y
+$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
+       $$(shell ( cd $(TRUNNER_TESTS_DIR);                             \
+                 echo '/* Generated header, do not edit */';           \
+                 ls *.c 2> /dev/null |                                 \
+                       sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@';      \
+                ) > $$@)
 endif
 
-# Have one program compiled without "-target bpf" to test whether libbpf loads
-# it successfully
-$(OUTPUT)/test_xdp.o: progs/test_xdp.c
-       ($(CLANG) $(BPF_CFLAGS) $(CLANG_CFLAGS) -O2 -emit-llvm -c $< -o - || \
-               echo "clang failed") | \
-       $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
-ifeq ($(DWARF2BTF),y)
-       $(BTF_PAHOLE) -J $@
+# compile individual test files
+# Note: we cd into output directory to ensure embedded BPF object is found
+$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o:                      \
+                     $(TRUNNER_TESTS_DIR)/%.c                          \
+                     $(TRUNNER_EXTRA_HDRS)                             \
+                     $(TRUNNER_BPF_OBJS)                               \
+                     $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+       cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+
+$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:                          \
+                      %.c                                              \
+                      $(TRUNNER_EXTRA_HDRS)                            \
+                      $(TRUNNER_TESTS_HDR)                             \
+                      $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+       $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+
+$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
+ifneq ($2,)
+       # only copy extra resources if in flavored build
+       cp -a $$^ $(TRUNNER_OUTPUT)/
 endif
 
-$(OUTPUT)/%.o: progs/%.c
-       ($(CLANG) $(BPF_CFLAGS) $(CLANG_CFLAGS) -O2 -target bpf -emit-llvm \
-               -c $< -o - || echo "clang failed") | \
-       $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
-ifeq ($(DWARF2BTF),y)
-       $(BTF_PAHOLE) -J $@
+$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)                      \
+                            $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ)           \
+                            | $(TRUNNER_BINARY)-extras
+       $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+
+endef
+
+# Define test_progs test runner.
+TRUNNER_TESTS_DIR := prog_tests
+TRUNNER_BPF_PROGS_DIR := progs
+TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
+                        flow_dissector_load.h
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read                          \
+                      $(wildcard progs/btf_dump_test_case_*.c)
+TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := -I. -I$(OUTPUT) $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_LDFLAGS := -mattr=+alu32
+$(eval $(call DEFINE_TEST_RUNNER,test_progs))
+
+# Define test_progs-no_alu32 test runner.
+TRUNNER_BPF_LDFLAGS :=
+$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
+
+# Define test_progs BPF-GCC-flavored test runner.
+ifneq ($(BPF_GCC),)
+TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
+TRUNNER_BPF_LDFLAGS :=
+$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
 endif
 
-PROG_TESTS_DIR = $(OUTPUT)/prog_tests
-$(PROG_TESTS_DIR):
-       mkdir -p $@
-PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h
-PROG_TESTS_FILES := $(wildcard prog_tests/*.c)
-test_progs.c: $(PROG_TESTS_H)
-$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
-$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_FILES) | $(PROG_TESTS_H)
-$(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR)
-       $(shell ( cd prog_tests/; \
-                 echo '/* Generated header, do not edit */'; \
-                 ls *.c 2> /dev/null | \
-                       sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \
-                ) > $(PROG_TESTS_H))
-
-MAP_TESTS_DIR = $(OUTPUT)/map_tests
-$(MAP_TESTS_DIR):
-       mkdir -p $@
-MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h
-MAP_TESTS_FILES := $(wildcard map_tests/*.c)
-test_maps.c: $(MAP_TESTS_H)
-$(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS)
-$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_FILES) | $(MAP_TESTS_H)
-$(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR)
-       $(shell ( cd map_tests/; \
-                 echo '/* Generated header, do not edit */'; \
-                 echo '#ifdef DECLARE'; \
-                 ls *.c 2> /dev/null | \
-                       sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \
-                 echo '#endif'; \
-                 echo '#ifdef CALL'; \
-                 ls *.c 2> /dev/null | \
-                       sed -e 's@\([^\.]*\)\.c@test_\1();@'; \
-                 echo '#endif' \
-                ) > $(MAP_TESTS_H))
-
-VERIFIER_TESTS_DIR = $(OUTPUT)/verifier
-$(VERIFIER_TESTS_DIR):
-       mkdir -p $@
-VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h
-VERIFIER_TEST_FILES := $(wildcard verifier/*.c)
-test_verifier.c: $(VERIFIER_TESTS_H)
-$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
-$(OUTPUT)/test_verifier: test_verifier.c | $(VERIFIER_TEST_FILES) $(VERIFIER_TESTS_H)
-$(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR)
+# Define test_maps test runner.
+TRUNNER_TESTS_DIR := map_tests
+TRUNNER_BPF_PROGS_DIR := progs
+TRUNNER_EXTRA_SOURCES := test_maps.c
+TRUNNER_EXTRA_FILES :=
+TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
+TRUNNER_BPF_CFLAGS :=
+TRUNNER_BPF_LDFLAGS :=
+$(eval $(call DEFINE_TEST_RUNNER,test_maps))
+
+# Define test_verifier test runner.
+# It is much simpler than test_maps/test_progs and sufficiently different from
+# them (e.g., test.h is using completely pattern), that it's worth just
+# explicitly defining all the rules explicitly.
+verifier/tests.h: verifier/*.c
        $(shell ( cd verifier/; \
                  echo '/* Generated header, do not edit */'; \
                  echo '#ifdef FILL_ARRAY'; \
-                 ls *.c 2> /dev/null | \
-                       sed -e 's@\(.*\)@#include \"\1\"@'; \
+                 ls *.c 2> /dev/null | sed -e 's@\(.*\)@#include \"\1\"@'; \
                  echo '#endif' \
-                ) > $(VERIFIER_TESTS_H))
+               ) > verifier/tests.h)
+$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
+       $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) $(BPF_GCC_BUILD_DIR) \
-       $(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \
-       feature
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)                                    \
+       prog_tests/tests.h map_tests/tests.h verifier/tests.h           \
+       feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
deleted file mode 100644 (file)
index fbe2800..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __BPF_ENDIAN__
-#define __BPF_ENDIAN__
-
-#include <linux/stddef.h>
-#include <linux/swab.h>
-
-/* LLVM's BPF target selects the endianness of the CPU
- * it compiles on, or the user specifies (bpfel/bpfeb),
- * respectively. The used __BYTE_ORDER__ is defined by
- * the compiler, we cannot rely on __BYTE_ORDER from
- * libc headers, since it doesn't reflect the actual
- * requested byte order.
- *
- * Note, LLVM's BPF target has different __builtin_bswapX()
- * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
- * in bpfel and bpfeb case, which means below, that we map
- * to cpu_to_be16(). We could use it unconditionally in BPF
- * case, but better not rely on it, so that this header here
- * can be used from application and BPF program side, which
- * use different targets.
- */
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-# define __bpf_ntohs(x)                        __builtin_bswap16(x)
-# define __bpf_htons(x)                        __builtin_bswap16(x)
-# define __bpf_constant_ntohs(x)       ___constant_swab16(x)
-# define __bpf_constant_htons(x)       ___constant_swab16(x)
-# define __bpf_ntohl(x)                        __builtin_bswap32(x)
-# define __bpf_htonl(x)                        __builtin_bswap32(x)
-# define __bpf_constant_ntohl(x)       ___constant_swab32(x)
-# define __bpf_constant_htonl(x)       ___constant_swab32(x)
-# define __bpf_be64_to_cpu(x)          __builtin_bswap64(x)
-# define __bpf_cpu_to_be64(x)          __builtin_bswap64(x)
-# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
-# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-# define __bpf_ntohs(x)                        (x)
-# define __bpf_htons(x)                        (x)
-# define __bpf_constant_ntohs(x)       (x)
-# define __bpf_constant_htons(x)       (x)
-# define __bpf_ntohl(x)                        (x)
-# define __bpf_htonl(x)                        (x)
-# define __bpf_constant_ntohl(x)       (x)
-# define __bpf_constant_htonl(x)       (x)
-# define __bpf_be64_to_cpu(x)          (x)
-# define __bpf_cpu_to_be64(x)          (x)
-# define __bpf_constant_be64_to_cpu(x)  (x)
-# define __bpf_constant_cpu_to_be64(x)  (x)
-#else
-# error "Fix your compiler's __BYTE_ORDER__?!"
-#endif
-
-#define bpf_htons(x)                           \
-       (__builtin_constant_p(x) ?              \
-        __bpf_constant_htons(x) : __bpf_htons(x))
-#define bpf_ntohs(x)                           \
-       (__builtin_constant_p(x) ?              \
-        __bpf_constant_ntohs(x) : __bpf_ntohs(x))
-#define bpf_htonl(x)                           \
-       (__builtin_constant_p(x) ?              \
-        __bpf_constant_htonl(x) : __bpf_htonl(x))
-#define bpf_ntohl(x)                           \
-       (__builtin_constant_p(x) ?              \
-        __bpf_constant_ntohl(x) : __bpf_ntohl(x))
-#define bpf_cpu_to_be64(x)                     \
-       (__builtin_constant_p(x) ?              \
-        __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
-#define bpf_be64_to_cpu(x)                     \
-       (__builtin_constant_p(x) ?              \
-        __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
-
-#endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
deleted file mode 100644 (file)
index 54a5069..0000000
+++ /dev/null
@@ -1,535 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __BPF_HELPERS__
-#define __BPF_HELPERS__
-
-#define __uint(name, val) int (*name)[val]
-#define __type(name, val) val *name
-
-/* helper macro to print out debug messages */
-#define bpf_printk(fmt, ...)                           \
-({                                                     \
-       char ____fmt[] = fmt;                           \
-       bpf_trace_printk(____fmt, sizeof(____fmt),      \
-                        ##__VA_ARGS__);                \
-})
-
-#ifdef __clang__
-
-/* helper macro to place programs, maps, license in
- * different sections in elf_bpf file. Section names
- * are interpreted by elf_bpf loader
- */
-#define SEC(NAME) __attribute__((section(NAME), used))
-
-/* helper functions called from eBPF programs written in C */
-static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
-       (void *) BPF_FUNC_map_lookup_elem;
-static int (*bpf_map_update_elem)(void *map, const void *key, const void *value,
-                                 unsigned long long flags) =
-       (void *) BPF_FUNC_map_update_elem;
-static int (*bpf_map_delete_elem)(void *map, const void *key) =
-       (void *) BPF_FUNC_map_delete_elem;
-static int (*bpf_map_push_elem)(void *map, const void *value,
-                               unsigned long long flags) =
-       (void *) BPF_FUNC_map_push_elem;
-static int (*bpf_map_pop_elem)(void *map, void *value) =
-       (void *) BPF_FUNC_map_pop_elem;
-static int (*bpf_map_peek_elem)(void *map, void *value) =
-       (void *) BPF_FUNC_map_peek_elem;
-static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) =
-       (void *) BPF_FUNC_probe_read;
-static unsigned long long (*bpf_ktime_get_ns)(void) =
-       (void *) BPF_FUNC_ktime_get_ns;
-static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
-       (void *) BPF_FUNC_trace_printk;
-static void (*bpf_tail_call)(void *ctx, void *map, int index) =
-       (void *) BPF_FUNC_tail_call;
-static unsigned long long (*bpf_get_smp_processor_id)(void) =
-       (void *) BPF_FUNC_get_smp_processor_id;
-static unsigned long long (*bpf_get_current_pid_tgid)(void) =
-       (void *) BPF_FUNC_get_current_pid_tgid;
-static unsigned long long (*bpf_get_current_uid_gid)(void) =
-       (void *) BPF_FUNC_get_current_uid_gid;
-static int (*bpf_get_current_comm)(void *buf, int buf_size) =
-       (void *) BPF_FUNC_get_current_comm;
-static unsigned long long (*bpf_perf_event_read)(void *map,
-                                                unsigned long long flags) =
-       (void *) BPF_FUNC_perf_event_read;
-static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
-       (void *) BPF_FUNC_clone_redirect;
-static int (*bpf_redirect)(int ifindex, int flags) =
-       (void *) BPF_FUNC_redirect;
-static int (*bpf_redirect_map)(void *map, int key, int flags) =
-       (void *) BPF_FUNC_redirect_map;
-static int (*bpf_perf_event_output)(void *ctx, void *map,
-                                   unsigned long long flags, void *data,
-                                   int size) =
-       (void *) BPF_FUNC_perf_event_output;
-static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
-       (void *) BPF_FUNC_get_stackid;
-static int (*bpf_probe_write_user)(void *dst, const void *src, int size) =
-       (void *) BPF_FUNC_probe_write_user;
-static int (*bpf_current_task_under_cgroup)(void *map, int index) =
-       (void *) BPF_FUNC_current_task_under_cgroup;
-static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
-       (void *) BPF_FUNC_skb_get_tunnel_key;
-static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
-       (void *) BPF_FUNC_skb_set_tunnel_key;
-static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
-       (void *) BPF_FUNC_skb_get_tunnel_opt;
-static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
-       (void *) BPF_FUNC_skb_set_tunnel_opt;
-static unsigned long long (*bpf_get_prandom_u32)(void) =
-       (void *) BPF_FUNC_get_prandom_u32;
-static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
-       (void *) BPF_FUNC_xdp_adjust_head;
-static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
-       (void *) BPF_FUNC_xdp_adjust_meta;
-static int (*bpf_get_socket_cookie)(void *ctx) =
-       (void *) BPF_FUNC_get_socket_cookie;
-static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
-                            int optlen) =
-       (void *) BPF_FUNC_setsockopt;
-static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
-                            int optlen) =
-       (void *) BPF_FUNC_getsockopt;
-static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
-       (void *) BPF_FUNC_sock_ops_cb_flags_set;
-static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
-       (void *) BPF_FUNC_sk_redirect_map;
-static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
-       (void *) BPF_FUNC_sk_redirect_hash;
-static int (*bpf_sock_map_update)(void *map, void *key, void *value,
-                                 unsigned long long flags) =
-       (void *) BPF_FUNC_sock_map_update;
-static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
-                                  unsigned long long flags) =
-       (void *) BPF_FUNC_sock_hash_update;
-static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
-                                       void *buf, unsigned int buf_size) =
-       (void *) BPF_FUNC_perf_event_read_value;
-static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
-                                      unsigned int buf_size) =
-       (void *) BPF_FUNC_perf_prog_read_value;
-static int (*bpf_override_return)(void *ctx, unsigned long rc) =
-       (void *) BPF_FUNC_override_return;
-static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
-       (void *) BPF_FUNC_msg_redirect_map;
-static int (*bpf_msg_redirect_hash)(void *ctx,
-                                   void *map, void *key, int flags) =
-       (void *) BPF_FUNC_msg_redirect_hash;
-static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
-       (void *) BPF_FUNC_msg_apply_bytes;
-static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
-       (void *) BPF_FUNC_msg_cork_bytes;
-static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
-       (void *) BPF_FUNC_msg_pull_data;
-static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
-       (void *) BPF_FUNC_msg_push_data;
-static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) =
-       (void *) BPF_FUNC_msg_pop_data;
-static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
-       (void *) BPF_FUNC_bind;
-static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
-       (void *) BPF_FUNC_xdp_adjust_tail;
-static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
-                                    int size, int flags) =
-       (void *) BPF_FUNC_skb_get_xfrm_state;
-static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
-       (void *) BPF_FUNC_sk_select_reuseport;
-static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
-       (void *) BPF_FUNC_get_stack;
-static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
-                            int plen, __u32 flags) =
-       (void *) BPF_FUNC_fib_lookup;
-static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
-                                unsigned int len) =
-       (void *) BPF_FUNC_lwt_push_encap;
-static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
-                                      void *from, unsigned int len) =
-       (void *) BPF_FUNC_lwt_seg6_store_bytes;
-static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
-                                 unsigned int param_len) =
-       (void *) BPF_FUNC_lwt_seg6_action;
-static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
-                                     unsigned int len) =
-       (void *) BPF_FUNC_lwt_seg6_adjust_srh;
-static int (*bpf_rc_repeat)(void *ctx) =
-       (void *) BPF_FUNC_rc_repeat;
-static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
-                            unsigned long long scancode, unsigned int toggle) =
-       (void *) BPF_FUNC_rc_keydown;
-static unsigned long long (*bpf_get_current_cgroup_id)(void) =
-       (void *) BPF_FUNC_get_current_cgroup_id;
-static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
-       (void *) BPF_FUNC_get_local_storage;
-static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
-       (void *) BPF_FUNC_skb_cgroup_id;
-static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
-       (void *) BPF_FUNC_skb_ancestor_cgroup_id;
-static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
-                                            struct bpf_sock_tuple *tuple,
-                                            int size, unsigned long long netns_id,
-                                            unsigned long long flags) =
-       (void *) BPF_FUNC_sk_lookup_tcp;
-static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx,
-                                            struct bpf_sock_tuple *tuple,
-                                            int size, unsigned long long netns_id,
-                                            unsigned long long flags) =
-       (void *) BPF_FUNC_skc_lookup_tcp;
-static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
-                                            struct bpf_sock_tuple *tuple,
-                                            int size, unsigned long long netns_id,
-                                            unsigned long long flags) =
-       (void *) BPF_FUNC_sk_lookup_udp;
-static int (*bpf_sk_release)(struct bpf_sock *sk) =
-       (void *) BPF_FUNC_sk_release;
-static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
-       (void *) BPF_FUNC_skb_vlan_push;
-static int (*bpf_skb_vlan_pop)(void *ctx) =
-       (void *) BPF_FUNC_skb_vlan_pop;
-static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) =
-       (void *) BPF_FUNC_rc_pointer_rel;
-static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) =
-       (void *) BPF_FUNC_spin_lock;
-static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) =
-       (void *) BPF_FUNC_spin_unlock;
-static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
-       (void *) BPF_FUNC_sk_fullsock;
-static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
-       (void *) BPF_FUNC_tcp_sock;
-static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
-       (void *) BPF_FUNC_get_listener_sock;
-static int (*bpf_skb_ecn_set_ce)(void *ctx) =
-       (void *) BPF_FUNC_skb_ecn_set_ce;
-static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
-           void *ip, int ip_len, void *tcp, int tcp_len) =
-       (void *) BPF_FUNC_tcp_check_syncookie;
-static int (*bpf_sysctl_get_name)(void *ctx, char *buf,
-                                 unsigned long long buf_len,
-                                 unsigned long long flags) =
-       (void *) BPF_FUNC_sysctl_get_name;
-static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf,
-                                          unsigned long long buf_len) =
-       (void *) BPF_FUNC_sysctl_get_current_value;
-static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf,
-                                      unsigned long long buf_len) =
-       (void *) BPF_FUNC_sysctl_get_new_value;
-static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf,
-                                      unsigned long long buf_len) =
-       (void *) BPF_FUNC_sysctl_set_new_value;
-static int (*bpf_strtol)(const char *buf, unsigned long long buf_len,
-                        unsigned long long flags, long *res) =
-       (void *) BPF_FUNC_strtol;
-static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len,
-                         unsigned long long flags, unsigned long *res) =
-       (void *) BPF_FUNC_strtoul;
-static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
-                                  void *value, __u64 flags) =
-       (void *) BPF_FUNC_sk_storage_get;
-static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
-       (void *)BPF_FUNC_sk_storage_delete;
-static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
-static long long (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *ip,
-                                         int ip_len, void *tcp, int tcp_len) =
-       (void *) BPF_FUNC_tcp_gen_syncookie;
-
-/* llvm builtin functions that eBPF C program may use to
- * emit BPF_LD_ABS and BPF_LD_IND instructions
- */
-struct sk_buff;
-unsigned long long load_byte(void *skb,
-                            unsigned long long off) asm("llvm.bpf.load.byte");
-unsigned long long load_half(void *skb,
-                            unsigned long long off) asm("llvm.bpf.load.half");
-unsigned long long load_word(void *skb,
-                            unsigned long long off) asm("llvm.bpf.load.word");
-
-/* a helper structure used by eBPF C program
- * to describe map attributes to elf_bpf loader
- */
-struct bpf_map_def {
-       unsigned int type;
-       unsigned int key_size;
-       unsigned int value_size;
-       unsigned int max_entries;
-       unsigned int map_flags;
-       unsigned int inner_map_idx;
-       unsigned int numa_node;
-};
-
-#else
-
-#include <bpf-helpers.h>
-
-#endif
-
-#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)         \
-       struct ____btf_map_##name {                             \
-               type_key key;                                   \
-               type_val value;                                 \
-       };                                                      \
-       struct ____btf_map_##name                               \
-       __attribute__ ((section(".maps." #name), used))         \
-               ____btf_map_##name = { }
-
-static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
-       (void *) BPF_FUNC_skb_load_bytes;
-static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
-       (void *) BPF_FUNC_skb_load_bytes_relative;
-static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
-       (void *) BPF_FUNC_skb_store_bytes;
-static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
-       (void *) BPF_FUNC_l3_csum_replace;
-static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
-       (void *) BPF_FUNC_l4_csum_replace;
-static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
-       (void *) BPF_FUNC_csum_diff;
-static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
-       (void *) BPF_FUNC_skb_under_cgroup;
-static int (*bpf_skb_change_head)(void *, int len, int flags) =
-       (void *) BPF_FUNC_skb_change_head;
-static int (*bpf_skb_pull_data)(void *, int len) =
-       (void *) BPF_FUNC_skb_pull_data;
-static unsigned int (*bpf_get_cgroup_classid)(void *ctx) =
-       (void *) BPF_FUNC_get_cgroup_classid;
-static unsigned int (*bpf_get_route_realm)(void *ctx) =
-       (void *) BPF_FUNC_get_route_realm;
-static int (*bpf_skb_change_proto)(void *ctx, __be16 proto, __u64 flags) =
-       (void *) BPF_FUNC_skb_change_proto;
-static int (*bpf_skb_change_type)(void *ctx, __u32 type) =
-       (void *) BPF_FUNC_skb_change_type;
-static unsigned int (*bpf_get_hash_recalc)(void *ctx) =
-       (void *) BPF_FUNC_get_hash_recalc;
-static unsigned long long (*bpf_get_current_task)(void) =
-       (void *) BPF_FUNC_get_current_task;
-static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) =
-       (void *) BPF_FUNC_skb_change_tail;
-static long long (*bpf_csum_update)(void *ctx, __u32 csum) =
-       (void *) BPF_FUNC_csum_update;
-static void (*bpf_set_hash_invalid)(void *ctx) =
-       (void *) BPF_FUNC_set_hash_invalid;
-static int (*bpf_get_numa_node_id)(void) =
-       (void *) BPF_FUNC_get_numa_node_id;
-static int (*bpf_probe_read_str)(void *ctx, __u32 size,
-                                const void *unsafe_ptr) =
-       (void *) BPF_FUNC_probe_read_str;
-static unsigned int (*bpf_get_socket_uid)(void *ctx) =
-       (void *) BPF_FUNC_get_socket_uid;
-static unsigned int (*bpf_set_hash)(void *ctx, __u32 hash) =
-       (void *) BPF_FUNC_set_hash;
-static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
-                                 unsigned long long flags) =
-       (void *) BPF_FUNC_skb_adjust_room;
-
-/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
-#if defined(__TARGET_ARCH_x86)
-       #define bpf_target_x86
-       #define bpf_target_defined
-#elif defined(__TARGET_ARCH_s390)
-       #define bpf_target_s390
-       #define bpf_target_defined
-#elif defined(__TARGET_ARCH_arm)
-       #define bpf_target_arm
-       #define bpf_target_defined
-#elif defined(__TARGET_ARCH_arm64)
-       #define bpf_target_arm64
-       #define bpf_target_defined
-#elif defined(__TARGET_ARCH_mips)
-       #define bpf_target_mips
-       #define bpf_target_defined
-#elif defined(__TARGET_ARCH_powerpc)
-       #define bpf_target_powerpc
-       #define bpf_target_defined
-#elif defined(__TARGET_ARCH_sparc)
-       #define bpf_target_sparc
-       #define bpf_target_defined
-#else
-       #undef bpf_target_defined
-#endif
-
-/* Fall back to what the compiler says */
-#ifndef bpf_target_defined
-#if defined(__x86_64__)
-       #define bpf_target_x86
-#elif defined(__s390__)
-       #define bpf_target_s390
-#elif defined(__arm__)
-       #define bpf_target_arm
-#elif defined(__aarch64__)
-       #define bpf_target_arm64
-#elif defined(__mips__)
-       #define bpf_target_mips
-#elif defined(__powerpc__)
-       #define bpf_target_powerpc
-#elif defined(__sparc__)
-       #define bpf_target_sparc
-#endif
-#endif
-
-#if defined(bpf_target_x86)
-
-#ifdef __KERNEL__
-#define PT_REGS_PARM1(x) ((x)->di)
-#define PT_REGS_PARM2(x) ((x)->si)
-#define PT_REGS_PARM3(x) ((x)->dx)
-#define PT_REGS_PARM4(x) ((x)->cx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->sp)
-#define PT_REGS_FP(x) ((x)->bp)
-#define PT_REGS_RC(x) ((x)->ax)
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->ip)
-#else
-#ifdef __i386__
-/* i386 kernel is built with -mregparm=3 */
-#define PT_REGS_PARM1(x) ((x)->eax)
-#define PT_REGS_PARM2(x) ((x)->edx)
-#define PT_REGS_PARM3(x) ((x)->ecx)
-#define PT_REGS_PARM4(x) 0
-#define PT_REGS_PARM5(x) 0
-#define PT_REGS_RET(x) ((x)->esp)
-#define PT_REGS_FP(x) ((x)->ebp)
-#define PT_REGS_RC(x) ((x)->eax)
-#define PT_REGS_SP(x) ((x)->esp)
-#define PT_REGS_IP(x) ((x)->eip)
-#else
-#define PT_REGS_PARM1(x) ((x)->rdi)
-#define PT_REGS_PARM2(x) ((x)->rsi)
-#define PT_REGS_PARM3(x) ((x)->rdx)
-#define PT_REGS_PARM4(x) ((x)->rcx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->rsp)
-#define PT_REGS_FP(x) ((x)->rbp)
-#define PT_REGS_RC(x) ((x)->rax)
-#define PT_REGS_SP(x) ((x)->rsp)
-#define PT_REGS_IP(x) ((x)->rip)
-#endif
-#endif
-
-#elif defined(bpf_target_s390)
-
-/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_S390 const volatile user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
-#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
-#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
-#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
-#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
-#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
-#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
-
-#elif defined(bpf_target_arm)
-
-#define PT_REGS_PARM1(x) ((x)->uregs[0])
-#define PT_REGS_PARM2(x) ((x)->uregs[1])
-#define PT_REGS_PARM3(x) ((x)->uregs[2])
-#define PT_REGS_PARM4(x) ((x)->uregs[3])
-#define PT_REGS_PARM5(x) ((x)->uregs[4])
-#define PT_REGS_RET(x) ((x)->uregs[14])
-#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->uregs[0])
-#define PT_REGS_SP(x) ((x)->uregs[13])
-#define PT_REGS_IP(x) ((x)->uregs[12])
-
-#elif defined(bpf_target_arm64)
-
-/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_ARM64 const volatile struct user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
-#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
-#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
-#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
-#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
-#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
-#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
-
-#elif defined(bpf_target_mips)
-
-#define PT_REGS_PARM1(x) ((x)->regs[4])
-#define PT_REGS_PARM2(x) ((x)->regs[5])
-#define PT_REGS_PARM3(x) ((x)->regs[6])
-#define PT_REGS_PARM4(x) ((x)->regs[7])
-#define PT_REGS_PARM5(x) ((x)->regs[8])
-#define PT_REGS_RET(x) ((x)->regs[31])
-#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[1])
-#define PT_REGS_SP(x) ((x)->regs[29])
-#define PT_REGS_IP(x) ((x)->cp0_epc)
-
-#elif defined(bpf_target_powerpc)
-
-#define PT_REGS_PARM1(x) ((x)->gpr[3])
-#define PT_REGS_PARM2(x) ((x)->gpr[4])
-#define PT_REGS_PARM3(x) ((x)->gpr[5])
-#define PT_REGS_PARM4(x) ((x)->gpr[6])
-#define PT_REGS_PARM5(x) ((x)->gpr[7])
-#define PT_REGS_RC(x) ((x)->gpr[3])
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->nip)
-
-#elif defined(bpf_target_sparc)
-
-#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
-#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
-#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
-#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
-#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
-#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
-
-/* Should this also be a bpf_target check for the sparc case? */
-#if defined(__arch64__)
-#define PT_REGS_IP(x) ((x)->tpc)
-#else
-#define PT_REGS_IP(x) ((x)->pc)
-#endif
-
-#endif
-
-#if defined(bpf_target_powerpc)
-#define BPF_KPROBE_READ_RET_IP(ip, ctx)                ({ (ip) = (ctx)->link; })
-#define BPF_KRETPROBE_READ_RET_IP              BPF_KPROBE_READ_RET_IP
-#elif defined(bpf_target_sparc)
-#define BPF_KPROBE_READ_RET_IP(ip, ctx)                ({ (ip) = PT_REGS_RET(ctx); })
-#define BPF_KRETPROBE_READ_RET_IP              BPF_KPROBE_READ_RET_IP
-#else
-#define BPF_KPROBE_READ_RET_IP(ip, ctx)                ({                              \
-               bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
-#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)     ({                              \
-               bpf_probe_read(&(ip), sizeof(ip),                               \
-                               (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
-#endif
-
-/*
- * BPF_CORE_READ abstracts away bpf_probe_read() call and captures offset
- * relocation for source address using __builtin_preserve_access_index()
- * built-in, provided by Clang.
- *
- * __builtin_preserve_access_index() takes as an argument an expression of
- * taking an address of a field within struct/union. It makes compiler emit
- * a relocation, which records BTF type ID describing root struct/union and an
- * accessor string which describes exact embedded field that was used to take
- * an address. See detailed description of this relocation format and
- * semantics in comments to struct bpf_offset_reloc in libbpf_internal.h.
- *
- * This relocation allows libbpf to adjust BPF instruction to use correct
- * actual field offset, based on target kernel BTF type that matches original
- * (local) BTF, used to record relocation.
- */
-#define BPF_CORE_READ(dst, src)                                                \
-       bpf_probe_read((dst), sizeof(*(src)),                           \
-                      __builtin_preserve_access_index(src))
-
-#endif
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
new file mode 100644 (file)
index 0000000..6f89887
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_LEGACY__
+#define __BPF_LEGACY__
+
+/*
+ * legacy bpf_map_def with extra fields supported only by bpf_load(), do not
+ * use outside of samples/bpf
+ */
+struct bpf_map_def_legacy {
+       unsigned int type;
+       unsigned int key_size;
+       unsigned int value_size;
+       unsigned int max_entries;
+       unsigned int map_flags;
+       unsigned int inner_map_idx;
+       unsigned int numa_node;
+};
+
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)         \
+       struct ____btf_map_##name {                             \
+               type_key key;                                   \
+               type_val value;                                 \
+       };                                                      \
+       struct ____btf_map_##name                               \
+       __attribute__ ((section(".maps." #name), used))         \
+               ____btf_map_##name = { }
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+unsigned long long load_byte(void *skb,
+                            unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+                            unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+                            unsigned long long off) asm("llvm.bpf.load.word");
+
+#endif
+
index e95c33e..0fb910d 100644 (file)
@@ -41,7 +41,7 @@
  *
  * If successful, 0 is returned.
  */
-int enable_all_controllers(char *cgroup_path)
+static int enable_all_controllers(char *cgroup_path)
 {
        char path[PATH_MAX + 1];
        char buf[PATH_MAX];
@@ -98,7 +98,7 @@ int enable_all_controllers(char *cgroup_path)
  */
 int setup_cgroup_environment(void)
 {
-       char cgroup_workdir[PATH_MAX + 1];
+       char cgroup_workdir[PATH_MAX - 24];
 
        format_cgroup_path(cgroup_workdir, "");
 
index 5ecc267..a83111a 100644 (file)
@@ -1,6 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
+#define EMBED_FILE(NAME, PATH)                                             \
+asm (                                                                      \
+"      .pushsection \".rodata\", \"a\", @progbits              \n"         \
+"      .global "#NAME"_data                                    \n"         \
+#NAME"_data:                                                   \n"         \
+"      .incbin \"" PATH "\"                                    \n"         \
+#NAME"_data_end:                                               \n"         \
+"      .global "#NAME"_size                                    \n"         \
+"      .type "#NAME"_size, @object                             \n"         \
+"      .size "#NAME"_size, 4                                   \n"         \
+"      .align 4,                                               \n"         \
+#NAME"_size:                                                   \n"         \
+"      .int "#NAME"_data_end - "#NAME"_data                    \n"         \
+"      .popsection                                             \n"         \
+);                                                                         \
+extern char NAME##_data[];                                                 \
+extern int NAME##_size;
+
 ssize_t get_base_addr() {
        size_t start;
        char buf[256];
@@ -21,6 +39,8 @@ ssize_t get_base_addr() {
        return -EINVAL;
 }
 
+EMBED_FILE(probe, "test_attach_probe.o");
+
 void test_attach_probe(void)
 {
        const char *kprobe_name = "kprobe/sys_nanosleep";
@@ -29,11 +49,15 @@ void test_attach_probe(void)
        const char *uretprobe_name = "uretprobe/trigger_func";
        const int kprobe_idx = 0, kretprobe_idx = 1;
        const int uprobe_idx = 2, uretprobe_idx = 3;
-       const char *file = "./test_attach_probe.o";
+       const char *obj_name = "attach_probe";
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
+               .object_name = obj_name,
+               .relaxed_maps = true,
+       );
        struct bpf_program *kprobe_prog, *kretprobe_prog;
        struct bpf_program *uprobe_prog, *uretprobe_prog;
        struct bpf_object *obj;
-       int err, prog_fd, duration = 0, res;
+       int err, duration = 0, res;
        struct bpf_link *kprobe_link = NULL;
        struct bpf_link *kretprobe_link = NULL;
        struct bpf_link *uprobe_link = NULL;
@@ -48,11 +72,16 @@ void test_attach_probe(void)
                return;
        uprobe_offset = (size_t)&get_base_addr - base_addr;
 
-       /* load programs */
-       err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
-       if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+       /* open object */
+       obj = bpf_object__open_mem(probe_data, probe_size, &open_opts);
+       if (CHECK(IS_ERR(obj), "obj_open_mem", "err %ld\n", PTR_ERR(obj)))
                return;
 
+       if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
+                 "wrong obj name '%s', expected '%s'\n",
+                 bpf_object__name(obj), obj_name))
+               goto cleanup;
+
        kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
        if (CHECK(!kprobe_prog, "find_probe",
                  "prog '%s' not found\n", kprobe_name))
@@ -70,6 +99,11 @@ void test_attach_probe(void)
                  "prog '%s' not found\n", uretprobe_name))
                goto cleanup;
 
+       /* create maps && load programs */
+       err = bpf_object__load(obj);
+       if (CHECK(err, "obj_load", "err %d\n", err))
+               goto cleanup;
+
        /* load maps */
        results_map_fd = bpf_find_map(__func__, obj, "results_map");
        if (CHECK(results_map_fd < 0, "find_results_map",
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
new file mode 100644 (file)
index 0000000..7390d30
--- /dev/null
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+       vfprintf(ctx, fmt, args);
+}
+
+static struct btf_dump_test_case {
+       const char *name;
+       const char *file;
+       struct btf_dump_opts opts;
+} btf_dump_test_cases[] = {
+       {"btf_dump: syntax", "btf_dump_test_case_syntax", {}},
+       {"btf_dump: ordering", "btf_dump_test_case_ordering", {}},
+       {"btf_dump: padding", "btf_dump_test_case_padding", {}},
+       {"btf_dump: packing", "btf_dump_test_case_packing", {}},
+       {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}},
+       {"btf_dump: multidim", "btf_dump_test_case_multidim", {}},
+       {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}},
+};
+
+static int btf_dump_all_types(const struct btf *btf,
+                             const struct btf_dump_opts *opts)
+{
+       size_t type_cnt = btf__get_nr_types(btf);
+       struct btf_dump *d;
+       int err = 0, id;
+
+       d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
+       if (IS_ERR(d))
+               return PTR_ERR(d);
+
+       for (id = 1; id <= type_cnt; id++) {
+               err = btf_dump__dump_type(d, id);
+               if (err)
+                       goto done;
+       }
+
+done:
+       btf_dump__free(d);
+       return err;
+}
+
+static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
+{
+       char test_file[256], out_file[256], diff_cmd[1024];
+       struct btf *btf = NULL;
+       int err = 0, fd = -1;
+       FILE *f = NULL;
+
+       snprintf(test_file, sizeof(test_file), "%s.o", t->file);
+
+       btf = btf__parse_elf(test_file, NULL);
+       if (CHECK(IS_ERR(btf), "btf_parse_elf",
+           "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+               err = -PTR_ERR(btf);
+               btf = NULL;
+               goto done;
+       }
+
+       snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
+       fd = mkstemp(out_file);
+       if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
+               err = fd;
+               goto done;
+       }
+       f = fdopen(fd, "w");
+       if (CHECK(f == NULL, "open_tmp",  "failed to open file: %s(%d)\n",
+                 strerror(errno), errno)) {
+               close(fd);
+               goto done;
+       }
+
+       t->opts.ctx = f;
+       err = btf_dump_all_types(btf, &t->opts);
+       fclose(f);
+       close(fd);
+       if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) {
+               goto done;
+       }
+
+       snprintf(test_file, sizeof(test_file), "progs/%s.c", t->file);
+       if (access(test_file, R_OK) == -1)
+               /*
+                * When the test is run with O=, kselftest copies TEST_FILES
+                * without preserving the directory structure.
+                */
+               snprintf(test_file, sizeof(test_file), "%s.c", t->file);
+       /*
+        * Diff test output and expected test output, contained between
+        * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case.
+        * For expected output lines, everything before '*' is stripped out.
+        * Also lines containing comment start and comment end markers are
+        * ignored. 
+        */
+       snprintf(diff_cmd, sizeof(diff_cmd),
+                "awk '/START-EXPECTED-OUTPUT/{out=1;next} "
+                "/END-EXPECTED-OUTPUT/{out=0} "
+                "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */
+                "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'",
+                test_file, out_file);
+       err = system(diff_cmd);
+       if (CHECK(err, "diff",
+                 "differing test output, output=%s, err=%d, diff cmd:\n%s\n",
+                 out_file, err, diff_cmd))
+               goto done;
+
+       remove(out_file);
+
+done:
+       btf__free(btf);
+       return err;
+}
+
+void test_btf_dump() {
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
+               struct btf_dump_test_case *t = &btf_dump_test_cases[i];
+
+               if (!test__start_subtest(t->name))
+                       continue;
+
+                test_btf_dump_case(i, &btf_dump_test_cases[i]);
+       }
+}
index f3863f9..09dfa75 100644 (file)
        .fails = true,                                                  \
 }
 
+#define EXISTENCE_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {  \
+       .a = 42,                                                        \
+}
+
+#define EXISTENCE_CASE_COMMON(name)                                    \
+       .case_name = #name,                                             \
+       .bpf_obj_file = "test_core_reloc_existence.o",                  \
+       .btf_src_file = "btf__core_reloc_" #name ".o",                  \
+       .relaxed_core_relocs = true                                     \
+
+#define EXISTENCE_ERR_CASE(name) {                                     \
+       EXISTENCE_CASE_COMMON(name),                                    \
+       .fails = true,                                                  \
+}
+
 struct core_reloc_test_case {
        const char *case_name;
        const char *bpf_obj_file;
@@ -183,6 +198,7 @@ struct core_reloc_test_case {
        const char *output;
        int output_len;
        bool fails;
+       bool relaxed_core_relocs;
 };
 
 static struct core_reloc_test_case test_cases[] = {
@@ -193,8 +209,12 @@ static struct core_reloc_test_case test_cases[] = {
                .btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */
                .input = "",
                .input_len = 0,
-               .output = "\1", /* true */
-               .output_len = 1,
+               .output = STRUCT_TO_CHAR_PTR(core_reloc_kernel_output) {
+                       .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+                       .comm = "test_progs",
+                       .comm_len = sizeof("test_progs"),
+               },
+               .output_len = sizeof(struct core_reloc_kernel_output),
        },
 
        /* validate BPF program can use multiple flavors to match against
@@ -279,6 +299,59 @@ static struct core_reloc_test_case test_cases[] = {
                },
                .output_len = sizeof(struct core_reloc_misc_output),
        },
+
+       /* validate field existence checks */
+       {
+               EXISTENCE_CASE_COMMON(existence),
+               .input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
+                       .a = 1,
+                       .b = 2,
+                       .c = 3,
+                       .arr = { 4 },
+                       .s = { .x = 5 },
+               },
+               .input_len = sizeof(struct core_reloc_existence),
+               .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+                       .a_exists = 1,
+                       .b_exists = 1,
+                       .c_exists = 1,
+                       .arr_exists = 1,
+                       .s_exists = 1,
+                       .a_value = 1,
+                       .b_value = 2,
+                       .c_value = 3,
+                       .arr_value = 4,
+                       .s_value = 5,
+               },
+               .output_len = sizeof(struct core_reloc_existence_output),
+       },
+       {
+               EXISTENCE_CASE_COMMON(existence___minimal),
+               .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
+                       .a = 42,
+               },
+               .input_len = sizeof(struct core_reloc_existence),
+               .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+                       .a_exists = 1,
+                       .b_exists = 0,
+                       .c_exists = 0,
+                       .arr_exists = 0,
+                       .s_exists = 0,
+                       .a_value = 42,
+                       .b_value = 0xff000002u,
+                       .c_value = 0xff000003u,
+                       .arr_value = 0xff000004u,
+                       .s_value = 0xff000005u,
+               },
+               .output_len = sizeof(struct core_reloc_existence_output),
+       },
+
+       EXISTENCE_ERR_CASE(existence__err_int_sz),
+       EXISTENCE_ERR_CASE(existence__err_int_type),
+       EXISTENCE_ERR_CASE(existence__err_int_kind),
+       EXISTENCE_ERR_CASE(existence__err_arr_kind),
+       EXISTENCE_ERR_CASE(existence__err_arr_value_type),
+       EXISTENCE_ERR_CASE(existence__err_struct_type),
 };
 
 struct data {
@@ -301,11 +374,14 @@ void test_core_reloc(void)
 
        for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
                test_case = &test_cases[i];
-
                if (!test__start_subtest(test_case->case_name))
                        continue;
 
-               obj = bpf_object__open(test_case->bpf_obj_file);
+               DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+                       .relaxed_core_relocs = test_case->relaxed_core_relocs,
+               );
+
+               obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
                if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
                          "failed to open '%s': %ld\n",
                          test_case->bpf_obj_file, PTR_ERR(obj)))
@@ -315,7 +391,6 @@ void test_core_reloc(void)
                if (CHECK(!prog, "find_probe",
                          "prog '%s' not found\n", probe_name))
                        goto cleanup;
-               bpf_program__set_type(prog, BPF_PROG_TYPE_RAW_TRACEPOINT);
 
                load_attr.obj = obj;
                load_attr.log_level = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
new file mode 100644 (file)
index 0000000..1f51ba6
--- /dev/null
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that the flow_dissector program can be updated with a single
+ * syscall by attaching a new program that replaces the existing one.
+ *
+ * Corner case - the same program cannot be attached twice.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+
+static bool is_attached(int netns)
+{
+       __u32 cnt;
+       int err;
+
+       err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt);
+       if (CHECK_FAIL(err)) {
+               perror("bpf_prog_query");
+               return true; /* fail-safe */
+       }
+
+       return cnt > 0;
+}
+
+static int load_prog(void)
+{
+       struct bpf_insn prog[] = {
+               BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
+               BPF_EXIT_INSN(),
+       };
+       int fd;
+
+       fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+                             ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+       if (CHECK_FAIL(fd < 0))
+               perror("bpf_load_program");
+
+       return fd;
+}
+
+static void do_flow_dissector_reattach(void)
+{
+       int prog_fd[2] = { -1, -1 };
+       int err;
+
+       prog_fd[0] = load_prog();
+       if (prog_fd[0] < 0)
+               return;
+
+       prog_fd[1] = load_prog();
+       if (prog_fd[1] < 0)
+               goto out_close;
+
+       err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0);
+       if (CHECK_FAIL(err)) {
+               perror("bpf_prog_attach-0");
+               goto out_close;
+       }
+
+       /* Expect success when attaching a different program */
+       err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+       if (CHECK_FAIL(err)) {
+               perror("bpf_prog_attach-1");
+               goto out_detach;
+       }
+
+       /* Expect failure when attaching the same program twice */
+       err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+       if (CHECK_FAIL(!err || errno != EINVAL))
+               perror("bpf_prog_attach-2");
+
+out_detach:
+       err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+       if (CHECK_FAIL(err))
+               perror("bpf_prog_detach");
+
+out_close:
+       close(prog_fd[1]);
+       close(prog_fd[0]);
+}
+
+void test_flow_dissector_reattach(void)
+{
+       int init_net, self_net, err;
+
+       self_net = open("/proc/self/ns/net", O_RDONLY);
+       if (CHECK_FAIL(self_net < 0)) {
+               perror("open(/proc/self/ns/net");
+               return;
+       }
+
+       init_net = open("/proc/1/ns/net", O_RDONLY);
+       if (CHECK_FAIL(init_net < 0)) {
+               perror("open(/proc/1/ns/net)");
+               goto out_close;
+       }
+
+       err = setns(init_net, CLONE_NEWNET);
+       if (CHECK_FAIL(err)) {
+               perror("setns(/proc/1/ns/net)");
+               goto out_close;
+       }
+
+       if (is_attached(init_net)) {
+               test__skip();
+               printf("Can't test with flow dissector attached to init_net\n");
+               goto out_setns;
+       }
+
+       /* First run tests in root network namespace */
+       do_flow_dissector_reattach();
+
+       /* Then repeat tests in a non-root namespace */
+       err = unshare(CLONE_NEWNET);
+       if (CHECK_FAIL(err)) {
+               perror("unshare(CLONE_NEWNET)");
+               goto out_setns;
+       }
+       do_flow_dissector_reattach();
+
+out_setns:
+       /* Move back to netns we started in. */
+       err = setns(self_net, CLONE_NEWNET);
+       if (CHECK_FAIL(err))
+               perror("setns(/proc/self/ns/net)");
+
+out_close:
+       close(init_net);
+       close(self_net);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
new file mode 100644 (file)
index 0000000..430b50d
--- /dev/null
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+       int ifindex = *(int *)data, duration = 0;
+       struct ipv6_packet *pkt_v6 = data + 4;
+
+       if (ifindex != 1)
+               /* spurious kfree_skb not on loopback device */
+               return;
+       if (CHECK(size != 76, "check_size", "size %u != 76\n", size))
+               return;
+       if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth",
+                 "h_proto %x\n", pkt_v6->eth.h_proto))
+               return;
+       if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip",
+                 "iph.nexthdr %x\n", pkt_v6->iph.nexthdr))
+               return;
+       if (CHECK(pkt_v6->tcp.doff != 5, "check_tcp",
+                 "tcp.doff %x\n", pkt_v6->tcp.doff))
+               return;
+
+       *(bool *)ctx = true;
+}
+
+void test_kfree_skb(void)
+{
+       struct bpf_prog_load_attr attr = {
+               .file = "./kfree_skb.o",
+       };
+
+       struct bpf_object *obj, *obj2 = NULL;
+       struct perf_buffer_opts pb_opts = {};
+       struct perf_buffer *pb = NULL;
+       struct bpf_link *link = NULL;
+       struct bpf_map *perf_buf_map;
+       struct bpf_program *prog;
+       __u32 duration, retval;
+       int err, pkt_fd, kfree_skb_fd;
+       bool passed = false;
+
+       err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &pkt_fd);
+       if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+               return;
+
+       err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd);
+       if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+               goto close_prog;
+
+       prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb");
+       if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
+               goto close_prog;
+       link = bpf_program__attach_raw_tracepoint(prog, NULL);
+       if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+               goto close_prog;
+
+       perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
+       if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+               goto close_prog;
+
+       /* set up perf buffer */
+       pb_opts.sample_cb = on_sample;
+       pb_opts.ctx = &passed;
+       pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+               goto close_prog;
+
+       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+                               NULL, NULL, &retval, &duration);
+       CHECK(err || retval, "ipv6",
+             "err %d errno %d retval %d duration %d\n",
+             err, errno, retval, duration);
+
+       /* read perf buffer */
+       err = perf_buffer__poll(pb, 100);
+       if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+               goto close_prog;
+       /* make sure kfree_skb program was triggered
+        * and it sent expected skb into ring buffer
+        */
+       CHECK_FAIL(!passed);
+close_prog:
+       perf_buffer__free(pb);
+       if (!IS_ERR_OR_NULL(link))
+               bpf_link__destroy(link);
+       bpf_object__close(obj);
+       bpf_object__close(obj2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
new file mode 100644 (file)
index 0000000..5253889
--- /dev/null
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+__u32 get_map_id(struct bpf_object *obj, const char *name)
+{
+       struct bpf_map_info map_info = {};
+       __u32 map_info_len, duration = 0;
+       struct bpf_map *map;
+       int err;
+
+       map_info_len = sizeof(map_info);
+
+       map = bpf_object__find_map_by_name(obj, name);
+       if (CHECK(!map, "find map", "NULL map"))
+               return 0;
+
+       err = bpf_obj_get_info_by_fd(bpf_map__fd(map),
+                                    &map_info, &map_info_len);
+       CHECK(err, "get map info", "err %d errno %d", err, errno);
+       return map_info.id;
+}
+
+void test_pinning(void)
+{
+       const char *file_invalid = "./test_pinning_invalid.o";
+       const char *custpinpath = "/sys/fs/bpf/custom/pinmap";
+       const char *nopinpath = "/sys/fs/bpf/nopinmap";
+       const char *nopinpath2 = "/sys/fs/bpf/nopinmap2";
+       const char *custpath = "/sys/fs/bpf/custom";
+       const char *pinpath = "/sys/fs/bpf/pinmap";
+       const char *file = "./test_pinning.o";
+       __u32 map_id, map_id2, duration = 0;
+       struct stat statbuf = {};
+       struct bpf_object *obj;
+       struct bpf_map *map;
+       int err;
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+               .pin_root_path = custpath,
+       );
+
+       /* check that opening fails with invalid pinning value in map def */
+       obj = bpf_object__open_file(file_invalid, NULL);
+       err = libbpf_get_error(obj);
+       if (CHECK(err != -EINVAL, "invalid open", "err %d errno %d\n", err, errno)) {
+               obj = NULL;
+               goto out;
+       }
+
+       /* open the valid object file  */
+       obj = bpf_object__open_file(file, NULL);
+       err = libbpf_get_error(obj);
+       if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+               obj = NULL;
+               goto out;
+       }
+
+       err = bpf_object__load(obj);
+       if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* check that pinmap was pinned */
+       err = stat(pinpath, &statbuf);
+       if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* check that nopinmap was *not* pinned */
+       err = stat(nopinpath, &statbuf);
+       if (CHECK(!err || errno != ENOENT, "stat nopinpath",
+                 "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* check that nopinmap2 was *not* pinned */
+       err = stat(nopinpath2, &statbuf);
+       if (CHECK(!err || errno != ENOENT, "stat nopinpath2",
+                 "err %d errno %d\n", err, errno))
+               goto out;
+
+       map_id = get_map_id(obj, "pinmap");
+       if (!map_id)
+               goto out;
+
+       bpf_object__close(obj);
+
+       obj = bpf_object__open_file(file, NULL);
+       if (CHECK_FAIL(libbpf_get_error(obj))) {
+               obj = NULL;
+               goto out;
+       }
+
+       err = bpf_object__load(obj);
+       if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* check that same map ID was reused for second load */
+       map_id2 = get_map_id(obj, "pinmap");
+       if (CHECK(map_id != map_id2, "check reuse",
+                 "err %d errno %d id %d id2 %d\n", err, errno, map_id, map_id2))
+               goto out;
+
+       /* should be no-op to re-pin same map */
+       map = bpf_object__find_map_by_name(obj, "pinmap");
+       if (CHECK(!map, "find map", "NULL map"))
+               goto out;
+
+       err = bpf_map__pin(map, NULL);
+       if (CHECK(err, "re-pin map", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* but error to pin at different location */
+       err = bpf_map__pin(map, "/sys/fs/bpf/other");
+       if (CHECK(!err, "pin map different", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* unpin maps with a pin_path set */
+       err = bpf_object__unpin_maps(obj, NULL);
+       if (CHECK(err, "unpin maps", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* and re-pin them... */
+       err = bpf_object__pin_maps(obj, NULL);
+       if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* set pinning path of other map and re-pin all */
+       map = bpf_object__find_map_by_name(obj, "nopinmap");
+       if (CHECK(!map, "find map", "NULL map"))
+               goto out;
+
+       err = bpf_map__set_pin_path(map, custpinpath);
+       if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* should only pin the one unpinned map */
+       err = bpf_object__pin_maps(obj, NULL);
+       if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* check that nopinmap was pinned at the custom path */
+       err = stat(custpinpath, &statbuf);
+       if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* remove the custom pin path to re-test it with auto-pinning below */
+       err = unlink(custpinpath);
+       if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
+               goto out;
+
+       err = rmdir(custpath);
+       if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
+               goto out;
+
+       bpf_object__close(obj);
+
+       /* open the valid object file again */
+       obj = bpf_object__open_file(file, NULL);
+       err = libbpf_get_error(obj);
+       if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+               obj = NULL;
+               goto out;
+       }
+
+       /* swap pin paths of the two maps */
+       bpf_object__for_each_map(map, obj) {
+               if (!strcmp(bpf_map__name(map), "nopinmap"))
+                       err = bpf_map__set_pin_path(map, pinpath);
+               else if (!strcmp(bpf_map__name(map), "pinmap"))
+                       err = bpf_map__set_pin_path(map, NULL);
+               else
+                       continue;
+
+               if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+                       goto out;
+       }
+
+       /* should fail because of map parameter mismatch */
+       err = bpf_object__load(obj);
+       if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno))
+               goto out;
+
+       bpf_object__close(obj);
+
+       /* test auto-pinning at custom path with open opt */
+       obj = bpf_object__open_file(file, &opts);
+       if (CHECK_FAIL(libbpf_get_error(obj))) {
+               obj = NULL;
+               goto out;
+       }
+
+       err = bpf_object__load(obj);
+       if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* check that pinmap was pinned at the custom path */
+       err = stat(custpinpath, &statbuf);
+       if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+               goto out;
+
+out:
+       unlink(pinpath);
+       unlink(nopinpath);
+       unlink(nopinpath2);
+       unlink(custpinpath);
+       rmdir(custpath);
+       if (obj)
+               bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
new file mode 100644 (file)
index 0000000..8a3187d
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_probe_user(void)
+{
+#define kprobe_name "__sys_connect"
+       const char *prog_name = "kprobe/" kprobe_name;
+       const char *obj_file = "./test_probe_user.o";
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
+       int err, results_map_fd, sock_fd, duration = 0;
+       struct sockaddr curr, orig, tmp;
+       struct sockaddr_in *in = (struct sockaddr_in *)&curr;
+       struct bpf_link *kprobe_link = NULL;
+       struct bpf_program *kprobe_prog;
+       struct bpf_object *obj;
+       static const int zero = 0;
+
+       obj = bpf_object__open_file(obj_file, &opts);
+       if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+               return;
+
+       kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
+       if (CHECK(!kprobe_prog, "find_probe",
+                 "prog '%s' not found\n", prog_name))
+               goto cleanup;
+
+       err = bpf_object__load(obj);
+       if (CHECK(err, "obj_load", "err %d\n", err))
+               goto cleanup;
+
+       results_map_fd = bpf_find_map(__func__, obj, "test_pro.bss");
+       if (CHECK(results_map_fd < 0, "find_bss_map",
+                 "err %d\n", results_map_fd))
+               goto cleanup;
+
+       kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
+                                                kprobe_name);
+       if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
+                 "err %ld\n", PTR_ERR(kprobe_link))) {
+               kprobe_link = NULL;
+               goto cleanup;
+       }
+
+       memset(&curr, 0, sizeof(curr));
+       in->sin_family = AF_INET;
+       in->sin_port = htons(5555);
+       in->sin_addr.s_addr = inet_addr("255.255.255.255");
+       memcpy(&orig, &curr, sizeof(curr));
+
+       sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+       if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd))
+               goto cleanup;
+
+       connect(sock_fd, &curr, sizeof(curr));
+       close(sock_fd);
+
+       err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
+       if (CHECK(err, "get_kprobe_res",
+                 "failed to get kprobe res: %d\n", err))
+               goto cleanup;
+
+       in = (struct sockaddr_in *)&tmp;
+       if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
+                 "wrong kprobe res from probe read: %s:%u\n",
+                 inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
+               goto cleanup;
+
+       memset(&tmp, 0xab, sizeof(tmp));
+
+       in = (struct sockaddr_in *)&curr;
+       if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res",
+                 "wrong kprobe res from probe write: %s:%u\n",
+                 inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
+               goto cleanup;
+cleanup:
+       bpf_link__destroy(kprobe_link);
+       bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
new file mode 100644 (file)
index 0000000..d90acc1
--- /dev/null
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+struct bss {
+       unsigned did_run;
+       unsigned iters;
+       unsigned sum;
+};
+
+struct rdonly_map_subtest {
+       const char *subtest_name;
+       const char *prog_name;
+       unsigned exp_iters;
+       unsigned exp_sum;
+};
+
+void test_rdonly_maps(void)
+{
+       const char *prog_name_skip_loop = "raw_tracepoint/sys_enter:skip_loop";
+       const char *prog_name_part_loop = "raw_tracepoint/sys_enter:part_loop";
+       const char *prog_name_full_loop = "raw_tracepoint/sys_enter:full_loop";
+       const char *file = "test_rdonly_maps.o";
+       struct rdonly_map_subtest subtests[] = {
+               { "skip loop", prog_name_skip_loop, 0, 0 },
+               { "part loop", prog_name_part_loop, 3, 2 + 3 + 4 },
+               { "full loop", prog_name_full_loop, 4, 2 + 3 + 4 + 5 },
+       };
+       int i, err, zero = 0, duration = 0;
+       struct bpf_link *link = NULL;
+       struct bpf_program *prog;
+       struct bpf_map *bss_map;
+       struct bpf_object *obj;
+       struct bss bss;
+
+       obj = bpf_object__open_file(file, NULL);
+       if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+               return;
+
+       err = bpf_object__load(obj);
+       if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+               goto cleanup;
+
+       bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss");
+       if (CHECK(!bss_map, "find_bss_map", "failed\n"))
+               goto cleanup;
+
+       for (i = 0; i < ARRAY_SIZE(subtests); i++) {
+               const struct rdonly_map_subtest *t = &subtests[i];
+
+               if (!test__start_subtest(t->subtest_name))
+                       continue;
+
+               prog = bpf_object__find_program_by_title(obj, t->prog_name);
+               if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
+                         t->prog_name))
+                       goto cleanup;
+
+               memset(&bss, 0, sizeof(bss));
+               err = bpf_map_update_elem(bpf_map__fd(bss_map), &zero, &bss, 0);
+               if (CHECK(err, "set_bss", "failed to set bss data: %d\n", err))
+                       goto cleanup;
+
+               link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+               if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
+                         t->prog_name, PTR_ERR(link))) {
+                       link = NULL;
+                       goto cleanup;
+               }
+
+               /* trigger probe */
+               usleep(1);
+
+               bpf_link__destroy(link);
+               link = NULL;
+
+               err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, &bss);
+               if (CHECK(err, "get_bss", "failed to get bss data: %d\n", err))
+                       goto cleanup;
+               if (CHECK(bss.did_run == 0, "check_run",
+                         "prog '%s' didn't run?\n", t->prog_name))
+                       goto cleanup;
+               if (CHECK(bss.iters != t->exp_iters, "check_iters",
+                         "prog '%s' iters: %d, expected: %d\n",
+                         t->prog_name, bss.iters, t->exp_iters))
+                       goto cleanup;
+               if (CHECK(bss.sum != t->exp_sum, "check_sum",
+                         "prog '%s' sum: %d, expected: %d\n",
+                         t->prog_name, bss.sum, t->exp_sum))
+                       goto cleanup;
+       }
+
+cleanup:
+       bpf_link__destroy(link);
+       bpf_object__close(obj);
+}
index 5c78e2b..fc0d7f4 100644 (file)
@@ -3,16 +3,26 @@
 
 void test_reference_tracking(void)
 {
-       const char *file = "./test_sk_lookup_kern.o";
+       const char *file = "test_sk_lookup_kern.o";
+       const char *obj_name = "ref_track";
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
+               .object_name = obj_name,
+               .relaxed_maps = true,
+       );
        struct bpf_object *obj;
        struct bpf_program *prog;
        __u32 duration = 0;
        int err = 0;
 
-       obj = bpf_object__open(file);
+       obj = bpf_object__open_file(file, &open_opts);
        if (CHECK_FAIL(IS_ERR(obj)))
                return;
 
+       if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
+                 "wrong obj name '%s', expected '%s'\n",
+                 bpf_object__name(obj), obj_name))
+               goto cleanup;
+
        bpf_object__for_each_program(prog, obj) {
                const char *title;
 
@@ -21,7 +31,8 @@ void test_reference_tracking(void)
                if (strstr(title, ".text") != NULL)
                        continue;
 
-               bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS);
+               if (!test__start_subtest(title))
+                       continue;
 
                /* Expect verifier failure if test name has 'fail' */
                if (strstr(title, "fail") != NULL) {
@@ -35,5 +46,7 @@ void test_reference_tracking(void)
                }
                CHECK(err, title, "\n");
        }
+
+cleanup:
        bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
new file mode 100644 (file)
index 0000000..9d9351d
--- /dev/null
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+#include <test_progs.h>
+
+static int duration = 0;
+
+struct sec_name_test {
+       const char sec_name[32];
+       struct {
+               int rc;
+               enum bpf_prog_type prog_type;
+               enum bpf_attach_type expected_attach_type;
+       } expected_load;
+       struct {
+               int rc;
+               enum bpf_attach_type attach_type;
+       } expected_attach;
+};
+
+static struct sec_name_test tests[] = {
+       {"InvAliD", {-ESRCH, 0, 0}, {-EINVAL, 0} },
+       {"cgroup", {-ESRCH, 0, 0}, {-EINVAL, 0} },
+       {"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} },
+       {"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+       {"uprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+       {"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+       {"uretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+       {"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} },
+       {"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} },
+       {"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
+       {"tp/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
+       {
+               "raw_tracepoint/",
+               {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0},
+               {-EINVAL, 0},
+       },
+       {"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} },
+       {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+       {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
+       {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
+       {"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
+       {"lwt_xmit", {0, BPF_PROG_TYPE_LWT_XMIT, 0}, {-EINVAL, 0} },
+       {"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} },
+       {
+               "cgroup_skb/ingress",
+               {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+               {0, BPF_CGROUP_INET_INGRESS},
+       },
+       {
+               "cgroup_skb/egress",
+               {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+               {0, BPF_CGROUP_INET_EGRESS},
+       },
+       {"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} },
+       {
+               "cgroup/sock",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
+               {0, BPF_CGROUP_INET_SOCK_CREATE},
+       },
+       {
+               "cgroup/post_bind4",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND},
+               {0, BPF_CGROUP_INET4_POST_BIND},
+       },
+       {
+               "cgroup/post_bind6",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND},
+               {0, BPF_CGROUP_INET6_POST_BIND},
+       },
+       {
+               "cgroup/dev",
+               {0, BPF_PROG_TYPE_CGROUP_DEVICE, 0},
+               {0, BPF_CGROUP_DEVICE},
+       },
+       {"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} },
+       {
+               "sk_skb/stream_parser",
+               {0, BPF_PROG_TYPE_SK_SKB, 0},
+               {0, BPF_SK_SKB_STREAM_PARSER},
+       },
+       {
+               "sk_skb/stream_verdict",
+               {0, BPF_PROG_TYPE_SK_SKB, 0},
+               {0, BPF_SK_SKB_STREAM_VERDICT},
+       },
+       {"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} },
+       {"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} },
+       {"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} },
+       {
+               "flow_dissector",
+               {0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0},
+               {0, BPF_FLOW_DISSECTOR},
+       },
+       {
+               "cgroup/bind4",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND},
+               {0, BPF_CGROUP_INET4_BIND},
+       },
+       {
+               "cgroup/bind6",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND},
+               {0, BPF_CGROUP_INET6_BIND},
+       },
+       {
+               "cgroup/connect4",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT},
+               {0, BPF_CGROUP_INET4_CONNECT},
+       },
+       {
+               "cgroup/connect6",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT},
+               {0, BPF_CGROUP_INET6_CONNECT},
+       },
+       {
+               "cgroup/sendmsg4",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
+               {0, BPF_CGROUP_UDP4_SENDMSG},
+       },
+       {
+               "cgroup/sendmsg6",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
+               {0, BPF_CGROUP_UDP6_SENDMSG},
+       },
+       {
+               "cgroup/recvmsg4",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG},
+               {0, BPF_CGROUP_UDP4_RECVMSG},
+       },
+       {
+               "cgroup/recvmsg6",
+               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG},
+               {0, BPF_CGROUP_UDP6_RECVMSG},
+       },
+       {
+               "cgroup/sysctl",
+               {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
+               {0, BPF_CGROUP_SYSCTL},
+       },
+       {
+               "cgroup/getsockopt",
+               {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT},
+               {0, BPF_CGROUP_GETSOCKOPT},
+       },
+       {
+               "cgroup/setsockopt",
+               {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
+               {0, BPF_CGROUP_SETSOCKOPT},
+       },
+};
+
+static void test_prog_type_by_name(const struct sec_name_test *test)
+{
+       enum bpf_attach_type expected_attach_type;
+       enum bpf_prog_type prog_type;
+       int rc;
+
+       rc = libbpf_prog_type_by_name(test->sec_name, &prog_type,
+                                     &expected_attach_type);
+
+       CHECK(rc != test->expected_load.rc, "check_code",
+             "prog: unexpected rc=%d for %s", rc, test->sec_name);
+
+       if (rc)
+               return;
+
+       CHECK(prog_type != test->expected_load.prog_type, "check_prog_type",
+             "prog: unexpected prog_type=%d for %s",
+             prog_type, test->sec_name);
+
+       CHECK(expected_attach_type != test->expected_load.expected_attach_type,
+             "check_attach_type", "prog: unexpected expected_attach_type=%d for %s",
+             expected_attach_type, test->sec_name);
+}
+
+static void test_attach_type_by_name(const struct sec_name_test *test)
+{
+       enum bpf_attach_type attach_type;
+       int rc;
+
+       rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
+
+       CHECK(rc != test->expected_attach.rc, "check_ret",
+             "attach: unexpected rc=%d for %s", rc, test->sec_name);
+
+       if (rc)
+               return;
+
+       CHECK(attach_type != test->expected_attach.attach_type,
+             "check_attach_type", "attach: unexpected attach_type=%d for %s",
+             attach_type, test->sec_name);
+}
+
+void test_section_names(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+               struct sec_name_test *test = &tests[i];
+
+               test_prog_type_by_name(test);
+               test_attach_type_by_name(test);
+       }
+}
index e95baa3..a2eb8db 100644 (file)
@@ -10,6 +10,7 @@ void test_skb_ctx(void)
                .cb[3] = 4,
                .cb[4] = 5,
                .priority = 6,
+               .tstamp = 7,
        };
        struct bpf_prog_test_run_attr tattr = {
                .data_in = &pkt_v4,
@@ -86,4 +87,8 @@ void test_skb_ctx(void)
                   "ctx_out_priority",
                   "skb->priority == %d, expected %d\n",
                   skb.priority, 7);
+       CHECK_ATTR(skb.tstamp != 8,
+                  "ctx_out_tstamp",
+                  "skb->tstamp == %lld, expected %d\n",
+                  skb.tstamp, 8);
 }
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c
new file mode 100644 (file)
index 0000000..0b62315
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
new file mode 100644 (file)
index 0000000..dd0ffa5
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_arr_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
new file mode 100644 (file)
index 0000000..bc83372
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_arr_value_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
new file mode 100644 (file)
index 0000000..917bec4
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_int_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
new file mode 100644 (file)
index 0000000..6ec7e6e
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_int_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
new file mode 100644 (file)
index 0000000..7bbcacf
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_int_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
new file mode 100644 (file)
index 0000000..f384dd3
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_struct_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c
new file mode 100644 (file)
index 0000000..aec2dec
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___minimal x) {}
index 3a62119..35c5128 100644 (file)
@@ -62,6 +62,10 @@ struct padded_a_lot {
  *     long: 64;
  *     long: 64;
  *     int b;
+ *     long: 32;
+ *     long: 64;
+ *     long: 64;
+ *     long: 64;
  *};
  *
  */
@@ -95,7 +99,6 @@ struct zone_padding {
 struct zone {
        int a;
        short b;
-       short: 16;
        struct zone_padding __pad__;
 };
 
index f686a81..f5939d9 100644 (file)
@@ -1,5 +1,14 @@
 #include <stdint.h>
 #include <stdbool.h>
+/*
+ * KERNEL
+ */
+
+struct core_reloc_kernel_output {
+       int valid[10];
+       char comm[sizeof("test_progs")];
+       int comm_len;
+};
 
 /*
  * FLAVORS
@@ -665,3 +674,59 @@ struct core_reloc_misc_extensible {
        int c;
        int d;
 };
+
+/*
+ * EXISTENCE
+ */
+struct core_reloc_existence_output {
+       int a_exists;
+       int a_value;
+       int b_exists;
+       int b_value;
+       int c_exists;
+       int c_value;
+       int arr_exists;
+       int arr_value;
+       int s_exists;
+       int s_value;
+};
+
+struct core_reloc_existence {
+       int a;
+       struct {
+               int b;
+       };
+       int c;
+       int arr[1];
+       struct {
+               int x;
+       } s;
+};
+
+struct core_reloc_existence___minimal {
+       int a;
+};
+
+struct core_reloc_existence___err_wrong_int_sz {
+       short a;
+};
+
+struct core_reloc_existence___err_wrong_int_type {
+       int b[1];
+};
+
+struct core_reloc_existence___err_wrong_int_kind {
+       struct{ int x; } c;
+};
+
+struct core_reloc_existence___err_wrong_arr_kind {
+       int arr;
+};
+
+struct core_reloc_existence___err_wrong_arr_value_type {
+       short arr[1];
+};
+
+struct core_reloc_existence___err_wrong_struct_type {
+       int s;
+};
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
new file mode 100644 (file)
index 0000000..489319e
--- /dev/null
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+char _license[] SEC("license") = "GPL";
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
+#define _(P) (__builtin_preserve_access_index(P))
+
+/* define few struct-s that bpf program needs to access */
+struct callback_head {
+       struct callback_head *next;
+       void (*func)(struct callback_head *head);
+};
+struct dev_ifalias {
+       struct callback_head rcuhead;
+};
+
+struct net_device /* same as kernel's struct net_device */ {
+       int ifindex;
+       struct dev_ifalias *ifalias;
+};
+
+typedef struct {
+        int counter;
+} atomic_t;
+typedef struct refcount_struct {
+        atomic_t refs;
+} refcount_t;
+
+struct sk_buff {
+       /* field names and sizes should match to those in the kernel */
+       unsigned int len, data_len;
+       __u16 mac_len, hdr_len, queue_mapping;
+       struct net_device *dev;
+       /* order of the fields doesn't matter */
+       refcount_t users;
+       unsigned char *data;
+       char __pkt_type_offset[0];
+};
+
+/* copy arguments from
+ * include/trace/events/skb.h:
+ * TRACE_EVENT(kfree_skb,
+ *         TP_PROTO(struct sk_buff *skb, void *location),
+ *
+ * into struct below:
+ */
+struct trace_kfree_skb {
+       struct sk_buff *skb;
+       void *location;
+};
+
+SEC("tp_btf/kfree_skb")
+int trace_kfree_skb(struct trace_kfree_skb *ctx)
+{
+       struct sk_buff *skb = ctx->skb;
+       struct net_device *dev;
+       int ifindex;
+       struct callback_head *ptr;
+       void *func;
+       int users;
+       unsigned char *data;
+       unsigned short pkt_data;
+       char pkt_type;
+
+       __builtin_preserve_access_index(({
+               users = skb->users.refs.counter;
+               data = skb->data;
+               dev = skb->dev;
+               ifindex = dev->ifindex;
+               ptr = dev->ifalias->rcuhead.next;
+               func = ptr->func;
+       }));
+
+       bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
+       pkt_type &= 7;
+
+       /* read eth proto */
+       bpf_probe_read_kernel(&pkt_data, sizeof(pkt_data), data + 12);
+
+       bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
+       bpf_printk("skb->len %d users %d pkt_type %x\n",
+                  _(skb->len), users, pkt_type);
+       bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping));
+       bpf_printk("dev->ifindex %d data %llx pkt_data %x\n",
+                  ifindex, data, pkt_data);
+
+       if (users != 1 || pkt_data != bpf_htons(0x86dd) || ifindex != 1)
+               /* raw tp ignores return value */
+               return 0;
+
+       /* send first 72 byte of the packet to user space */
+       bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU,
+                      &ifindex, sizeof(ifindex));
+       return 0;
+}
index 7cdb7f8..40ac722 100644 (file)
@@ -7,6 +7,7 @@
 #include <stdbool.h>
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 char _license[] SEC("license") = "GPL";
 
index 9b2f808..bb80f29 100644 (file)
@@ -7,6 +7,7 @@
 #include <stdbool.h>
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 char _license[] SEC("license") = "GPL";
 
index d727657..2b9165a 100644 (file)
@@ -7,6 +7,7 @@
 #include <stdbool.h>
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_tracing.h"
 
 char _license[] SEC("license") = "GPL";
 
index 003fe10..71d383c 100644 (file)
@@ -72,9 +72,9 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
        void* thread_state;
        int key;
 
-       bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
-       bpf_probe_read(&thread_state, sizeof(thread_state),
-                      tls_base + 0x310 + key * 0x10 + 0x08);
+       bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
+       bpf_probe_read_user(&thread_state, sizeof(thread_state),
+                           tls_base + 0x310 + key * 0x10 + 0x08);
        return thread_state;
 }
 
@@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
                                           FrameData *frame, Symbol *symbol)
 {
        // read data from PyFrameObject
-       bpf_probe_read(&frame->f_back,
-                      sizeof(frame->f_back),
-                      frame_ptr + pidData->offsets.PyFrameObject_back);
-       bpf_probe_read(&frame->f_code,
-                      sizeof(frame->f_code),
-                      frame_ptr + pidData->offsets.PyFrameObject_code);
+       bpf_probe_read_user(&frame->f_back,
+                           sizeof(frame->f_back),
+                           frame_ptr + pidData->offsets.PyFrameObject_back);
+       bpf_probe_read_user(&frame->f_code,
+                           sizeof(frame->f_code),
+                           frame_ptr + pidData->offsets.PyFrameObject_code);
 
        // read data from PyCodeObject
        if (!frame->f_code)
                return false;
-       bpf_probe_read(&frame->co_filename,
-                      sizeof(frame->co_filename),
-                      frame->f_code + pidData->offsets.PyCodeObject_filename);
-       bpf_probe_read(&frame->co_name,
-                      sizeof(frame->co_name),
-                      frame->f_code + pidData->offsets.PyCodeObject_name);
+       bpf_probe_read_user(&frame->co_filename,
+                           sizeof(frame->co_filename),
+                           frame->f_code + pidData->offsets.PyCodeObject_filename);
+       bpf_probe_read_user(&frame->co_name,
+                           sizeof(frame->co_name),
+                           frame->f_code + pidData->offsets.PyCodeObject_name);
        // read actual names into symbol
        if (frame->co_filename)
-               bpf_probe_read_str(&symbol->file,
-                                  sizeof(symbol->file),
-                                  frame->co_filename + pidData->offsets.String_data);
+               bpf_probe_read_user_str(&symbol->file,
+                                       sizeof(symbol->file),
+                                       frame->co_filename +
+                                       pidData->offsets.String_data);
        if (frame->co_name)
-               bpf_probe_read_str(&symbol->name,
-                                  sizeof(symbol->name),
-                                  frame->co_name + pidData->offsets.String_data);
+               bpf_probe_read_user_str(&symbol->name,
+                                       sizeof(symbol->name),
+                                       frame->co_name +
+                                       pidData->offsets.String_data);
        return true;
 }
 
@@ -174,9 +176,9 @@ static __always_inline int __on_event(struct pt_regs *ctx)
        event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
 
        void* thread_state_current = (void*)0;
-       bpf_probe_read(&thread_state_current,
-                      sizeof(thread_state_current),
-                      (void*)(long)pidData->current_state_addr);
+       bpf_probe_read_user(&thread_state_current,
+                           sizeof(thread_state_current),
+                           (void*)(long)pidData->current_state_addr);
 
        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
        void* tls_base = (void*)task;
@@ -188,11 +190,13 @@ static __always_inline int __on_event(struct pt_regs *ctx)
        if (pidData->use_tls) {
                uint64_t pthread_created;
                uint64_t pthread_self;
-               bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
+               bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
+                                   tls_base + 0x10);
 
-               bpf_probe_read(&pthread_created,
-                              sizeof(pthread_created),
-                              thread_state + pidData->offsets.PyThreadState_thread);
+               bpf_probe_read_user(&pthread_created,
+                                   sizeof(pthread_created),
+                                   thread_state +
+                                   pidData->offsets.PyThreadState_thread);
                event->pthread_match = pthread_created == pthread_self;
        } else {
                event->pthread_match = 1;
@@ -204,9 +208,10 @@ static __always_inline int __on_event(struct pt_regs *ctx)
                Symbol sym = {};
                int cur_cpu = bpf_get_smp_processor_id();
 
-               bpf_probe_read(&frame_ptr,
-                              sizeof(frame_ptr),
-                              thread_state + pidData->offsets.PyThreadState_frame);
+               bpf_probe_read_user(&frame_ptr,
+                                   sizeof(frame_ptr),
+                                   thread_state +
+                                   pidData->offsets.PyThreadState_frame);
 
                int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
                if (symbol_counter == NULL)
index 9a3d1c7..1bafbb9 100644 (file)
@@ -14,13 +14,12 @@ struct sockopt_sk {
        __u8 val;
 };
 
-struct bpf_map_def SEC("maps") socket_storage_map = {
-       .type = BPF_MAP_TYPE_SK_STORAGE,
-       .key_size = sizeof(int),
-       .value_size = sizeof(struct sockopt_sk),
-       .map_flags = BPF_F_NO_PREALLOC,
-};
-BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk);
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, struct sockopt_sk);
+} socket_storage_map SEC(".maps");
 
 SEC("cgroup/getsockopt")
 int _getsockopt(struct bpf_sockopt *ctx)
index 067eb62..4bf16e0 100644 (file)
@@ -98,7 +98,7 @@ struct strobe_map_raw {
        /*
         * having volatile doesn't change anything on BPF side, but clang
         * emits warnings for passing `volatile const char *` into
-        * bpf_probe_read_str that expects just `const char *`
+        * bpf_probe_read_user_str that expects just `const char *`
         */
        const char* tag;
        /*
@@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
        dtv_t *dtv;
        void *tls_ptr;
 
-       bpf_probe_read(&tls_index, sizeof(struct tls_index),
-                      (void *)loc->offset);
+       bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
+                           (void *)loc->offset);
        /* valid module index is always positive */
        if (tls_index.module > 0) {
                /* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
-               bpf_probe_read(&dtv, sizeof(dtv),
-                              &((struct tcbhead *)tls_base)->dtv);
+               bpf_probe_read_user(&dtv, sizeof(dtv),
+                                   &((struct tcbhead *)tls_base)->dtv);
                dtv += tls_index.module;
        } else {
                dtv = NULL;
        }
-       bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
+       bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
        /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
        return tls_ptr && tls_ptr != (void *)-1
                ? tls_ptr + tls_index.offset
@@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
        if (!location)
                return;
 
-       bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+       bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
        data->int_vals[idx] = value->val;
        if (value->header.len)
                data->int_vals_set_mask |= (1 << idx);
@@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
        if (!location)
                return 0;
 
-       bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
-       len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+       bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
+       len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
        /*
-        * if bpf_probe_read_str returns error (<0), due to casting to
+        * if bpf_probe_read_user_str returns error (<0), due to casting to
         * unsinged int, it will become big number, so next check is
         * sufficient to check for errors AND prove to BPF verifier, that
-        * bpf_probe_read_str won't return anything bigger than
+        * bpf_probe_read_user_str won't return anything bigger than
         * STROBE_MAX_STR_LEN
         */
        if (len > STROBE_MAX_STR_LEN)
@@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
        if (!location)
                return payload;
 
-       bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
-       if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
+       bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
+       if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
                return payload;
 
        descr->id = map.id;
@@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
                data->req_meta_valid = 1;
        }
 
-       len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
+       len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
        if (len <= STROBE_MAX_STR_LEN) {
                descr->tag_len = len;
                payload += len;
@@ -418,15 +418,15 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
                        break;
 
                descr->key_lens[i] = 0;
-               len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
-                                        map.entries[i].key);
+               len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+                                             map.entries[i].key);
                if (len <= STROBE_MAX_STR_LEN) {
                        descr->key_lens[i] = len;
                        payload += len;
                }
                descr->val_lens[i] = 0;
-               len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
-                                        map.entries[i].val);
+               len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+                                             map.entries[i].val);
                if (len <= STROBE_MAX_STR_LEN) {
                        descr->val_lens[i] = len;
                        payload += len;
index 233bdcb..2cf813a 100644 (file)
@@ -13,13 +13,12 @@ struct tcp_rtt_storage {
        __u32 icsk_retransmits;
 };
 
-struct bpf_map_def SEC("maps") socket_storage_map = {
-       .type = BPF_MAP_TYPE_SK_STORAGE,
-       .key_size = sizeof(int),
-       .value_size = sizeof(struct tcp_rtt_storage),
-       .map_flags = BPF_F_NO_PREALLOC,
-};
-BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage);
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, struct tcp_rtt_storage);
+} socket_storage_map SEC(".maps");
 
 SEC("sockops")
 int _sockops(struct bpf_sock_ops *ctx)
index 63a8dfe..534621e 100644 (file)
@@ -49,4 +49,3 @@ int handle_uprobe_return(struct pt_regs *ctx)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
index e5c79fe..763c514 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2018 Facebook */
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
 
 int _version SEC("version") = 1;
 
index 5ee3622..96f9e84 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2018 Facebook */
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
+#include "bpf_legacy.h"
 
 int _version SEC("version") = 1;
 
index bf67f0f..96b1f5f 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -31,6 +32,8 @@ struct core_reloc_arrays {
        struct core_reloc_arrays_substruct d[1][2];
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_arrays(void *ctx)
 {
@@ -38,16 +41,16 @@ int test_core_arrays(void *ctx)
        struct core_reloc_arrays_output *out = (void *)&data.out;
 
        /* in->a[2] */
-       if (BPF_CORE_READ(&out->a2, &in->a[2]))
+       if (CORE_READ(&out->a2, &in->a[2]))
                return 1;
        /* in->b[1][2][3] */
-       if (BPF_CORE_READ(&out->b123, &in->b[1][2][3]))
+       if (CORE_READ(&out->b123, &in->b[1][2][3]))
                return 1;
        /* in->c[1].c */
-       if (BPF_CORE_READ(&out->c1c, &in->c[1].c))
+       if (CORE_READ(&out->c1c, &in->c[1].c))
                return 1;
        /* in->d[0][0].d */
-       if (BPF_CORE_READ(&out->d00d, &in->d[0][0].d))
+       if (CORE_READ(&out->d00d, &in->d[0][0].d))
                return 1;
 
        return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
new file mode 100644 (file)
index 0000000..c3cac95
--- /dev/null
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+#include "bpf_core_read.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+       char in[256];
+       char out[256];
+} data;
+
+struct core_reloc_existence_output {
+       int a_exists;
+       int a_value;
+       int b_exists;
+       int b_value;
+       int c_exists;
+       int c_value;
+       int arr_exists;
+       int arr_value;
+       int s_exists;
+       int s_value;
+};
+
+struct core_reloc_existence {
+       struct {
+               int x;
+       } s;
+       int arr[1];
+       int a;
+       struct {
+               int b;
+       };
+       int c;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_existence(void *ctx)
+{
+       struct core_reloc_existence *in = (void *)&data.in;
+       struct core_reloc_existence_output *out = (void *)&data.out;
+
+       out->a_exists = bpf_core_field_exists(in->a);
+       if (bpf_core_field_exists(in->a))
+               out->a_value = BPF_CORE_READ(in, a);
+       else
+               out->a_value = 0xff000001u;
+
+       out->b_exists = bpf_core_field_exists(in->b);
+       if (bpf_core_field_exists(in->b))
+               out->b_value = BPF_CORE_READ(in, b);
+       else
+               out->b_value = 0xff000002u;
+
+       out->c_exists = bpf_core_field_exists(in->c);
+       if (bpf_core_field_exists(in->c))
+               out->c_value = BPF_CORE_READ(in, c);
+       else
+               out->c_value = 0xff000003u;
+
+       out->arr_exists = bpf_core_field_exists(in->arr);
+       if (bpf_core_field_exists(in->arr))
+               out->arr_value = BPF_CORE_READ(in, arr[0]);
+       else
+               out->arr_value = 0xff000004u;
+
+       out->s_exists = bpf_core_field_exists(in->s);
+       if (bpf_core_field_exists(in->s))
+               out->s_value = BPF_CORE_READ(in, s.x);
+       else
+               out->s_value = 0xff000005u;
+
+       return 0;
+}
+
index 9fda73e..71fd7ce 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -39,6 +40,8 @@ struct core_reloc_flavors___weird {
        };
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_flavors(void *ctx)
 {
@@ -48,13 +51,13 @@ int test_core_flavors(void *ctx)
        struct core_reloc_flavors *out = (void *)&data.out;
 
        /* read a using weird layout */
-       if (BPF_CORE_READ(&out->a, &in_weird->a))
+       if (CORE_READ(&out->a, &in_weird->a))
                return 1;
        /* read b using reversed layout */
-       if (BPF_CORE_READ(&out->b, &in_rev->b))
+       if (CORE_READ(&out->b, &in_rev->b))
                return 1;
        /* read c using original layout */
-       if (BPF_CORE_READ(&out->c, &in_orig->c))
+       if (CORE_READ(&out->c, &in_orig->c))
                return 1;
 
        return 0;
index d99233c..ad5c3f5 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,20 +24,22 @@ struct core_reloc_ints {
        int64_t         s64_field;
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_ints(void *ctx)
 {
        struct core_reloc_ints *in = (void *)&data.in;
        struct core_reloc_ints *out = (void *)&data.out;
 
-       if (BPF_CORE_READ(&out->u8_field, &in->u8_field) ||
-           BPF_CORE_READ(&out->s8_field, &in->s8_field) ||
-           BPF_CORE_READ(&out->u16_field, &in->u16_field) ||
-           BPF_CORE_READ(&out->s16_field, &in->s16_field) ||
-           BPF_CORE_READ(&out->u32_field, &in->u32_field) ||
-           BPF_CORE_READ(&out->s32_field, &in->s32_field) ||
-           BPF_CORE_READ(&out->u64_field, &in->u64_field) ||
-           BPF_CORE_READ(&out->s64_field, &in->s64_field))
+       if (CORE_READ(&out->u8_field, &in->u8_field) ||
+           CORE_READ(&out->s8_field, &in->s8_field) ||
+           CORE_READ(&out->u16_field, &in->u16_field) ||
+           CORE_READ(&out->s16_field, &in->s16_field) ||
+           CORE_READ(&out->u32_field, &in->u32_field) ||
+           CORE_READ(&out->s32_field, &in->s32_field) ||
+           CORE_READ(&out->u64_field, &in->u64_field) ||
+           CORE_READ(&out->s64_field, &in->s64_field))
                return 1;
 
        return 0;
index 37e02aa..a4b5e05 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -12,24 +13,79 @@ static volatile struct data {
        char out[256];
 } data;
 
+struct core_reloc_kernel_output {
+       int valid[10];
+       /* we have test_progs[-flavor], so cut flavor part */
+       char comm[sizeof("test_progs")];
+       int comm_len;
+};
+
 struct task_struct {
        int pid;
        int tgid;
+       char comm[16];
+       struct task_struct *group_leader;
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_kernel(void *ctx)
 {
        struct task_struct *task = (void *)bpf_get_current_task();
+       struct core_reloc_kernel_output *out = (void *)&data.out;
        uint64_t pid_tgid = bpf_get_current_pid_tgid();
+       uint32_t real_tgid = (uint32_t)pid_tgid;
        int pid, tgid;
 
-       if (BPF_CORE_READ(&pid, &task->pid) ||
-           BPF_CORE_READ(&tgid, &task->tgid))
+       if (CORE_READ(&pid, &task->pid) ||
+           CORE_READ(&tgid, &task->tgid))
                return 1;
 
        /* validate pid + tgid matches */
-       data.out[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid;
+       out->valid[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid;
+
+       /* test variadic BPF_CORE_READ macros */
+       out->valid[1] = BPF_CORE_READ(task,
+                                     tgid) == real_tgid;
+       out->valid[2] = BPF_CORE_READ(task,
+                                     group_leader,
+                                     tgid) == real_tgid;
+       out->valid[3] = BPF_CORE_READ(task,
+                                     group_leader, group_leader,
+                                     tgid) == real_tgid;
+       out->valid[4] = BPF_CORE_READ(task,
+                                     group_leader, group_leader, group_leader,
+                                     tgid) == real_tgid;
+       out->valid[5] = BPF_CORE_READ(task,
+                                     group_leader, group_leader, group_leader,
+                                     group_leader,
+                                     tgid) == real_tgid;
+       out->valid[6] = BPF_CORE_READ(task,
+                                     group_leader, group_leader, group_leader,
+                                     group_leader, group_leader,
+                                     tgid) == real_tgid;
+       out->valid[7] = BPF_CORE_READ(task,
+                                     group_leader, group_leader, group_leader,
+                                     group_leader, group_leader, group_leader,
+                                     tgid) == real_tgid;
+       out->valid[8] = BPF_CORE_READ(task,
+                                     group_leader, group_leader, group_leader,
+                                     group_leader, group_leader, group_leader,
+                                     group_leader,
+                                     tgid) == real_tgid;
+       out->valid[9] = BPF_CORE_READ(task,
+                                     group_leader, group_leader, group_leader,
+                                     group_leader, group_leader, group_leader,
+                                     group_leader, group_leader,
+                                     tgid) == real_tgid;
+
+       /* test BPF_CORE_READ_STR_INTO() returns correct code and contents */
+       out->comm_len = BPF_CORE_READ_STR_INTO(
+               &out->comm, task,
+               group_leader, group_leader, group_leader, group_leader,
+               group_leader, group_leader, group_leader, group_leader,
+               comm);
 
        return 0;
 }
index c59984b..1a36b08 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -32,6 +33,8 @@ struct core_reloc_misc_extensible {
        int b;
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_misc(void *ctx)
 {
@@ -41,15 +44,15 @@ int test_core_misc(void *ctx)
        struct core_reloc_misc_output *out = (void *)&data.out;
 
        /* record two different relocations with the same accessor string */
-       if (BPF_CORE_READ(&out->a, &in_a->a1) ||        /* accessor: 0:0 */
-           BPF_CORE_READ(&out->b, &in_b->b1))          /* accessor: 0:0 */
+       if (CORE_READ(&out->a, &in_a->a1) ||            /* accessor: 0:0 */
+           CORE_READ(&out->b, &in_b->b1))              /* accessor: 0:0 */
                return 1;
 
        /* Validate relocations capture array-only accesses for structs with
         * fixed header, but with potentially extendable tail. This will read
         * first 4 bytes of 2nd element of in_ext array of potentially
         * variably sized struct core_reloc_misc_extensible. */ 
-       if (BPF_CORE_READ(&out->c, &in_ext[2]))         /* accessor: 2 */
+       if (CORE_READ(&out->c, &in_ext[2]))             /* accessor: 2 */
                return 1;
 
        return 0;
index f98b942..3199faf 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -41,20 +42,22 @@ struct core_reloc_mods {
        core_reloc_mods_substruct_t h;
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_mods(void *ctx)
 {
        struct core_reloc_mods *in = (void *)&data.in;
        struct core_reloc_mods_output *out = (void *)&data.out;
 
-       if (BPF_CORE_READ(&out->a, &in->a) ||
-           BPF_CORE_READ(&out->b, &in->b) ||
-           BPF_CORE_READ(&out->c, &in->c) ||
-           BPF_CORE_READ(&out->d, &in->d) ||
-           BPF_CORE_READ(&out->e, &in->e[2]) ||
-           BPF_CORE_READ(&out->f, &in->f[1]) ||
-           BPF_CORE_READ(&out->g, &in->g.x) ||
-           BPF_CORE_READ(&out->h, &in->h.y))
+       if (CORE_READ(&out->a, &in->a) ||
+           CORE_READ(&out->b, &in->b) ||
+           CORE_READ(&out->c, &in->c) ||
+           CORE_READ(&out->d, &in->d) ||
+           CORE_READ(&out->e, &in->e[2]) ||
+           CORE_READ(&out->f, &in->f[1]) ||
+           CORE_READ(&out->g, &in->g.x) ||
+           CORE_READ(&out->h, &in->h.y))
                return 1;
 
        return 0;
index 3ca30ce..98238cb 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -30,15 +31,17 @@ struct core_reloc_nesting {
        } b;
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_nesting(void *ctx)
 {
        struct core_reloc_nesting *in = (void *)&data.in;
        struct core_reloc_nesting *out = (void *)&data.out;
 
-       if (BPF_CORE_READ(&out->a.a.a, &in->a.a.a))
+       if (CORE_READ(&out->a.a.a, &in->a.a.a))
                return 1;
-       if (BPF_CORE_READ(&out->b.b.b, &in->b.b.b))
+       if (CORE_READ(&out->b.b.b, &in->b.b.b))
                return 1;
 
        return 0;
index add52f2..4f3ecb9 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -25,17 +26,19 @@ struct core_reloc_primitives {
        int (*f)(const char *);
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_primitives(void *ctx)
 {
        struct core_reloc_primitives *in = (void *)&data.in;
        struct core_reloc_primitives *out = (void *)&data.out;
 
-       if (BPF_CORE_READ(&out->a, &in->a) ||
-           BPF_CORE_READ(&out->b, &in->b) ||
-           BPF_CORE_READ(&out->c, &in->c) ||
-           BPF_CORE_READ(&out->d, &in->d) ||
-           BPF_CORE_READ(&out->f, &in->f))
+       if (CORE_READ(&out->a, &in->a) ||
+           CORE_READ(&out->b, &in->b) ||
+           CORE_READ(&out->c, &in->c) ||
+           CORE_READ(&out->d, &in->d) ||
+           CORE_READ(&out->f, &in->f))
                return 1;
 
        return 0;
index 526b7dd..27f602f 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include "bpf_helpers.h"
+#include "bpf_core_read.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -16,13 +17,15 @@ struct core_reloc_ptr_as_arr {
        int a;
 };
 
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
 SEC("raw_tracepoint/sys_enter")
 int test_core_ptr_as_arr(void *ctx)
 {
        struct core_reloc_ptr_as_arr *in = (void *)&data.in;
        struct core_reloc_ptr_as_arr *out = (void *)&data.out;
 
-       if (BPF_CORE_READ(&out->a, &in[2].a))
+       if (CORE_READ(&out->a, &in[2].a))
                return 1;
 
        return 0;
index f8ffa3f..6a4a8f5 100644 (file)
@@ -47,12 +47,11 @@ struct {
  * issue and avoid complicated C programming massaging.
  * This is an acceptable workaround since there is one entry here.
  */
-typedef __u64 raw_stack_trace_t[2 * MAX_STACK_RAWTP];
 struct {
        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
        __uint(max_entries, 1);
        __type(key, __u32);
-       __type(value, raw_stack_trace_t);
+       __type(value, __u64[2 * MAX_STACK_RAWTP]);
 } rawdata_map SEC(".maps");
 
 SEC("raw_tracepoint/sys_enter")
@@ -100,4 +99,3 @@ int bpf_prog1(void *ctx)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
index 876c27d..07c09ca 100644 (file)
@@ -22,4 +22,3 @@ int handle_sys_nanosleep_entry(struct pt_regs *ctx)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c
new file mode 100644 (file)
index 0000000..f69a4a5
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+       __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+} nopinmap SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+       __uint(pinning, LIBBPF_PIN_NONE);
+} nopinmap2 SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
new file mode 100644 (file)
index 0000000..51b38ab
--- /dev/null
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+       __uint(pinning, 2); /* invalid */
+} nopinmap3 SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
new file mode 100644 (file)
index 0000000..1871e2e
--- /dev/null
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+
+#include <netinet/in.h>
+
+#include "bpf_helpers.h"
+#include "bpf_tracing.h"
+
+static struct sockaddr_in old;
+
+SEC("kprobe/__sys_connect")
+int handle_sys_connect(struct pt_regs *ctx)
+{
+       void *ptr = (void *)PT_REGS_PARM2(ctx);
+       struct sockaddr_in new;
+
+       bpf_probe_read_user(&old, sizeof(old), ptr);
+       __builtin_memset(&new, 0xab, sizeof(new));
+       bpf_probe_write_user(ptr, &new, sizeof(new));
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
new file mode 100644 (file)
index 0000000..0e014d3
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Politecnico di Torino
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct {
+       __uint(type, MAP_TYPE);
+       __uint(max_entries, 32);
+       __uint(map_flags, 0);
+       __uint(key_size, 0);
+       __uint(value_size, sizeof(__u32));
+} map_in SEC(".maps");
+
+struct {
+       __uint(type, MAP_TYPE);
+       __uint(max_entries, 32);
+       __uint(map_flags, 0);
+       __uint(key_size, 0);
+       __uint(value_size, sizeof(__u32));
+} map_out SEC(".maps");
+
+SEC("test")
+int _test(struct __sk_buff *skb)
+{
+       void *data_end = (void *)(long)skb->data_end;
+       void *data = (void *)(long)skb->data;
+       struct ethhdr *eth = (struct ethhdr *)(data);
+       __u32 value;
+       int err;
+
+       if (eth + 1 > data_end)
+               return TC_ACT_SHOT;
+
+       struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+       if (iph + 1 > data_end)
+               return TC_ACT_SHOT;
+
+       err = bpf_map_pop_elem(&map_in, &value);
+       if (err)
+               return TC_ACT_SHOT;
+
+       iph->daddr = value;
+
+       err = bpf_map_push_elem(&map_out, &iph->saddr, 0);
+       if (err)
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
new file mode 100644 (file)
index 0000000..52d94e8
--- /dev/null
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+static volatile const struct {
+       unsigned a[4];
+       /*
+        * if the struct's size is multiple of 16, compiler will put it into
+        * .rodata.cst16 section, which is not recognized by libbpf; work
+        * around this by ensuring we don't have 16-aligned struct
+        */
+       char _y;
+} rdonly_values = { .a = {2, 3, 4, 5} };
+
+static volatile struct {
+       unsigned did_run;
+       unsigned iters;
+       unsigned sum;
+} res;
+
+SEC("raw_tracepoint/sys_enter:skip_loop")
+int skip_loop(struct pt_regs *ctx)
+{
+       /* prevent compiler to optimize everything out */
+       unsigned * volatile p = (void *)&rdonly_values.a;
+       unsigned iters = 0, sum = 0;
+
+       /* we should never enter this loop */
+       while (*p & 1) {
+               iters++;
+               sum += *p;
+               p++;
+       }
+       res.did_run = 1;
+       res.iters = iters;
+       res.sum = sum;
+       return 0;
+}
+
+SEC("raw_tracepoint/sys_enter:part_loop")
+int part_loop(struct pt_regs *ctx)
+{
+       /* prevent compiler to optimize everything out */
+       unsigned * volatile p = (void *)&rdonly_values.a;
+       unsigned iters = 0, sum = 0;
+
+       /* validate verifier can derive loop termination */
+       while (*p < 5) {
+               iters++;
+               sum += *p;
+               p++;
+       }
+       res.did_run = 1;
+       res.iters = iters;
+       res.sum = sum;
+       return 0;
+}
+
+SEC("raw_tracepoint/sys_enter:full_loop")
+int full_loop(struct pt_regs *ctx)
+{
+       /* prevent compiler to optimize everything out */
+       unsigned * volatile p = (void *)&rdonly_values.a;
+       int i = sizeof(rdonly_values.a) / sizeof(rdonly_values.a[0]);
+       unsigned iters = 0, sum = 0;
+
+       /* validate verifier can allow full loop as well */
+       while (i > 0 ) {
+               iters++;
+               sum += *p;
+               p++;
+               i--;
+       }
+       res.did_run = 1;
+       res.iters = iters;
+       res.sum = sum;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index e21cd73..cb49ccb 100644 (file)
@@ -53,7 +53,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
        return result;
 }
 
-SEC("sk_lookup_success")
+SEC("classifier/sk_lookup_success")
 int bpf_sk_lookup_test0(struct __sk_buff *skb)
 {
        void *data_end = (void *)(long)skb->data_end;
@@ -78,7 +78,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
        return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
 }
 
-SEC("sk_lookup_success_simple")
+SEC("classifier/sk_lookup_success_simple")
 int bpf_sk_lookup_test1(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -90,7 +90,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("fail_use_after_free")
+SEC("classifier/fail_use_after_free")
 int bpf_sk_lookup_uaf(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -105,7 +105,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
        return family;
 }
 
-SEC("fail_modify_sk_pointer")
+SEC("classifier/fail_modify_sk_pointer")
 int bpf_sk_lookup_modptr(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -120,7 +120,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("fail_modify_sk_or_null_pointer")
+SEC("classifier/fail_modify_sk_or_null_pointer")
 int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -134,7 +134,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("fail_no_release")
+SEC("classifier/fail_no_release")
 int bpf_sk_lookup_test2(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -143,7 +143,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("fail_release_twice")
+SEC("classifier/fail_release_twice")
 int bpf_sk_lookup_test3(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -155,7 +155,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("fail_release_unchecked")
+SEC("classifier/fail_release_unchecked")
 int bpf_sk_lookup_test4(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb)
        bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 }
 
-SEC("fail_no_release_subcall")
+SEC("classifier/fail_no_release_subcall")
 int bpf_sk_lookup_test5(struct __sk_buff *skb)
 {
        lookup_no_release(skb);
index 7a80960..2a9f4c7 100644 (file)
@@ -16,6 +16,7 @@ int process(struct __sk_buff *skb)
                skb->cb[i]++;
        }
        skb->priority++;
+       skb->tstamp++;
 
        return 0;
 }
index fa0be3e..3b7e1dc 100644 (file)
@@ -74,4 +74,3 @@ int oncpu(struct sched_switch_args *ctx)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
index c8c595d..87b7d93 100644 (file)
@@ -38,7 +38,7 @@
 #include <sys/socket.h>
 #include "bpf_helpers.h"
 
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
 #define TCP_ESTATS_MAGIC 0xBAADBEEF
 
 /* This test case needs "sock" and "pt_regs" data structure.
diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c
deleted file mode 100644 (file)
index 6e75dd3..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <linux/err.h>
-#include <btf.h>
-
-#define CHECK(condition, format...) ({                                 \
-       int __ret = !!(condition);                                      \
-       if (__ret) {                                                    \
-               fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);     \
-               fprintf(stderr, format);                                \
-       }                                                               \
-       __ret;                                                          \
-})
-
-void btf_dump_printf(void *ctx, const char *fmt, va_list args)
-{
-       vfprintf(ctx, fmt, args);
-}
-
-struct btf_dump_test_case {
-       const char *name;
-       struct btf_dump_opts opts;
-} btf_dump_test_cases[] = {
-       {.name = "btf_dump_test_case_syntax", .opts = {}},
-       {.name = "btf_dump_test_case_ordering", .opts = {}},
-       {.name = "btf_dump_test_case_padding", .opts = {}},
-       {.name = "btf_dump_test_case_packing", .opts = {}},
-       {.name = "btf_dump_test_case_bitfields", .opts = {}},
-       {.name = "btf_dump_test_case_multidim", .opts = {}},
-       {.name = "btf_dump_test_case_namespacing", .opts = {}},
-};
-
-static int btf_dump_all_types(const struct btf *btf,
-                             const struct btf_dump_opts *opts)
-{
-       size_t type_cnt = btf__get_nr_types(btf);
-       struct btf_dump *d;
-       int err = 0, id;
-
-       d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
-       if (IS_ERR(d))
-               return PTR_ERR(d);
-
-       for (id = 1; id <= type_cnt; id++) {
-               err = btf_dump__dump_type(d, id);
-               if (err)
-                       goto done;
-       }
-
-done:
-       btf_dump__free(d);
-       return err;
-}
-
-int test_btf_dump_case(int n, struct btf_dump_test_case *test_case)
-{
-       char test_file[256], out_file[256], diff_cmd[1024];
-       struct btf *btf = NULL;
-       int err = 0, fd = -1;
-       FILE *f = NULL;
-
-       fprintf(stderr, "Test case #%d (%s): ", n, test_case->name);
-
-       snprintf(test_file, sizeof(test_file), "%s.o", test_case->name);
-
-       btf = btf__parse_elf(test_file, NULL);
-       if (CHECK(IS_ERR(btf),
-           "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
-               err = -PTR_ERR(btf);
-               btf = NULL;
-               goto done;
-       }
-
-       snprintf(out_file, sizeof(out_file),
-                "/tmp/%s.output.XXXXXX", test_case->name);
-       fd = mkstemp(out_file);
-       if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) {
-               err = fd;
-               goto done;
-       }
-       f = fdopen(fd, "w");
-       if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n",
-                 strerror(errno), errno)) {
-               close(fd);
-               goto done;
-       }
-
-       test_case->opts.ctx = f;
-       err = btf_dump_all_types(btf, &test_case->opts);
-       fclose(f);
-       close(fd);
-       if (CHECK(err, "failure during C dumping: %d\n", err)) {
-               goto done;
-       }
-
-       snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name);
-       if (access(test_file, R_OK) == -1)
-               /*
-                * When the test is run with O=, kselftest copies TEST_FILES
-                * without preserving the directory structure.
-                */
-               snprintf(test_file, sizeof(test_file), "%s.c",
-                       test_case->name);
-       /*
-        * Diff test output and expected test output, contained between
-        * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case.
-        * For expected output lines, everything before '*' is stripped out.
-        * Also lines containing comment start and comment end markers are
-        * ignored. 
-        */
-       snprintf(diff_cmd, sizeof(diff_cmd),
-                "awk '/START-EXPECTED-OUTPUT/{out=1;next} "
-                "/END-EXPECTED-OUTPUT/{out=0} "
-                "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */
-                "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'",
-                test_file, out_file);
-       err = system(diff_cmd);
-       if (CHECK(err,
-                 "differing test output, output=%s, err=%d, diff cmd:\n%s\n",
-                 out_file, err, diff_cmd))
-               goto done;
-
-       remove(out_file);
-       fprintf(stderr, "OK\n");
-
-done:
-       btf__free(btf);
-       return err;
-}
-
-int main() {
-       int test_case_cnt, i, err, failed = 0;
-
-       test_case_cnt = sizeof(btf_dump_test_cases) /
-                       sizeof(btf_dump_test_cases[0]);
-
-       for (i = 0; i < test_case_cnt; i++) {
-               err = test_btf_dump_case(i, &btf_dump_test_cases[i]);
-               if (err)
-                       failed++;
-       }
-
-       fprintf(stderr, "%d tests succeeded, %d tests failed.\n",
-               test_case_cnt - failed, failed);
-
-       return failed;
-}
index e2d0619..a8485ae 100755 (executable)
@@ -18,19 +18,55 @@ fi
 # this is the case and run it with in_netns.sh if it is being run in the root
 # namespace.
 if [[ -z $(ip netns identify $$) ]]; then
+       err=0
+       if bpftool="$(which bpftool)"; then
+               echo "Testing global flow dissector..."
+
+               $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \
+                       type flow_dissector
+
+               if ! unshare --net $bpftool prog attach pinned \
+                       /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+                       echo "Unexpected unsuccessful attach in namespace" >&2
+                       err=1
+               fi
+
+               $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \
+                       flow_dissector
+
+               if unshare --net $bpftool prog attach pinned \
+                       /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+                       echo "Unexpected successful attach in namespace" >&2
+                       err=1
+               fi
+
+               if ! $bpftool prog detach pinned \
+                       /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+                       echo "Failed to detach flow dissector" >&2
+                       err=1
+               fi
+
+               rm -rf /sys/fs/bpf/flow
+       else
+               echo "Skipping root flow dissector test, bpftool not found" >&2
+       fi
+
+       # Run the rest of the tests in a net namespace.
        ../net/in_netns.sh "$0" "$@"
-       exit $?
-fi
+       err=$(( $err + $? ))
 
-# Determine selftest success via shell exit code
-exit_handler()
-{
-       if (( $? == 0 )); then
+       if (( $err == 0 )); then
                echo "selftests: $TESTNAME [PASS]";
        else
                echo "selftests: $TESTNAME [FAILED]";
        fi
 
+       exit $err
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
        set +e
 
        # Cleanup
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
deleted file mode 100755 (executable)
index 2989b2e..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-export TESTNAME=test_libbpf
-
-# Determine selftest success via shell exit code
-exit_handler()
-{
-       if [ $? -eq 0 ]; then
-               echo "selftests: $TESTNAME [PASS]";
-       else
-               echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
-               echo "selftests: $TESTNAME [FAILED]";
-       fi
-}
-
-libbpf_open_file()
-{
-       LAST_LOADED=$1
-       if [ -n "$VERBOSE" ]; then
-           ./test_libbpf_open $1
-       else
-           ./test_libbpf_open --quiet $1
-       fi
-}
-
-# Exit script immediately (well catched by trap handler) if any
-# program/thing exits with a non-zero status.
-set -e
-
-# (Use 'trap -l' to list meaning of numbers)
-trap exit_handler 0 2 3 6 9
-
-libbpf_open_file test_l4lb.o
-
-# Load a program with BPF-to-BPF calls
-libbpf_open_file test_l4lb_noinline.o
-
-# Load a program compiled without the "-target bpf" flag
-libbpf_open_file test_xdp.o
-
-# Success
-exit 0
diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c
deleted file mode 100644 (file)
index 9e9db20..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
- */
-static const char *__doc__ =
-       "Libbpf test program for loading BPF ELF object files";
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-#include <bpf/libbpf.h>
-#include <getopt.h>
-
-#include "bpf_rlimit.h"
-
-static const struct option long_options[] = {
-       {"help",        no_argument,            NULL, 'h' },
-       {"debug",       no_argument,            NULL, 'D' },
-       {"quiet",       no_argument,            NULL, 'q' },
-       {0, 0, NULL,  0 }
-};
-
-static void usage(char *argv[])
-{
-       int i;
-
-       printf("\nDOCUMENTATION:\n%s\n\n", __doc__);
-       printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]);
-       printf(" Listing options:\n");
-       for (i = 0; long_options[i].name != 0; i++) {
-               printf(" --%-12s", long_options[i].name);
-               printf(" short-option: -%c",
-                      long_options[i].val);
-               printf("\n");
-       }
-       printf("\n");
-}
-
-static bool debug = 0;
-static int libbpf_debug_print(enum libbpf_print_level level,
-                             const char *fmt, va_list args)
-{
-       if (level == LIBBPF_DEBUG && !debug)
-               return 0;
-
-       fprintf(stderr, "[%d] ", level);
-       return vfprintf(stderr, fmt, args);
-}
-
-#define EXIT_FAIL_LIBBPF EXIT_FAILURE
-#define EXIT_FAIL_OPTION 2
-
-int test_walk_progs(struct bpf_object *obj, bool verbose)
-{
-       struct bpf_program *prog;
-       int cnt = 0;
-
-       bpf_object__for_each_program(prog, obj) {
-               cnt++;
-               if (verbose)
-                       printf("Prog (count:%d) section_name: %s\n", cnt,
-                              bpf_program__title(prog, false));
-       }
-       return 0;
-}
-
-int test_walk_maps(struct bpf_object *obj, bool verbose)
-{
-       struct bpf_map *map;
-       int cnt = 0;
-
-       bpf_object__for_each_map(map, obj) {
-               cnt++;
-               if (verbose)
-                       printf("Map (count:%d) name: %s\n", cnt,
-                              bpf_map__name(map));
-       }
-       return 0;
-}
-
-int test_open_file(char *filename, bool verbose)
-{
-       struct bpf_object *bpfobj = NULL;
-       long err;
-
-       if (verbose)
-               printf("Open BPF ELF-file with libbpf: %s\n", filename);
-
-       /* Load BPF ELF object file and check for errors */
-       bpfobj = bpf_object__open(filename);
-       err = libbpf_get_error(bpfobj);
-       if (err) {
-               char err_buf[128];
-               libbpf_strerror(err, err_buf, sizeof(err_buf));
-               if (verbose)
-                       printf("Unable to load eBPF objects in file '%s': %s\n",
-                              filename, err_buf);
-               return EXIT_FAIL_LIBBPF;
-       }
-       test_walk_progs(bpfobj, verbose);
-       test_walk_maps(bpfobj, verbose);
-
-       if (verbose)
-               printf("Close BPF ELF-file with libbpf: %s\n",
-                      bpf_object__name(bpfobj));
-       bpf_object__close(bpfobj);
-
-       return 0;
-}
-
-int main(int argc, char **argv)
-{
-       char filename[1024] = { 0 };
-       bool verbose = 1;
-       int longindex = 0;
-       int opt;
-
-       libbpf_set_print(libbpf_debug_print);
-
-       /* Parse commands line args */
-       while ((opt = getopt_long(argc, argv, "hDq",
-                                 long_options, &longindex)) != -1) {
-               switch (opt) {
-               case 'D':
-                       debug = 1;
-                       break;
-               case 'q': /* Use in scripting mode */
-                       verbose = 0;
-                       break;
-               case 'h':
-               default:
-                       usage(argv);
-                       return EXIT_FAIL_OPTION;
-               }
-       }
-       if (optind >= argc) {
-               usage(argv);
-               printf("ERROR: Expected BPF_FILE argument after options\n");
-               return EXIT_FAIL_OPTION;
-       }
-       snprintf(filename, sizeof(filename), "%s", argv[optind]);
-
-       return test_open_file(filename, verbose);
-}
index e1f1bec..02eae1e 100644 (file)
@@ -1142,7 +1142,6 @@ out_sockmap:
 #define MAPINMAP_PROG "./test_map_in_map.o"
 static void test_map_in_map(void)
 {
-       struct bpf_program *prog;
        struct bpf_object *obj;
        struct bpf_map *map;
        int mim_fd, fd, err;
@@ -1179,9 +1178,6 @@ static void test_map_in_map(void)
                goto out_map_in_map;
        }
 
-       bpf_object__for_each_program(prog, obj) {
-               bpf_program__set_xdp(prog);
-       }
        bpf_object__load(obj);
 
        map = bpf_object__find_map_by_name(obj, "mim_array");
@@ -1717,9 +1713,9 @@ static void run_all_tests(void)
        test_map_in_map();
 }
 
-#define DECLARE
+#define DEFINE_TEST(name) extern void test_##name(void);
 #include <map_tests/tests.h>
-#undef DECLARE
+#undef DEFINE_TEST
 
 int main(void)
 {
@@ -1731,9 +1727,9 @@ int main(void)
        map_flags = BPF_F_NO_PREALLOC;
        run_all_tests();
 
-#define CALL
+#define DEFINE_TEST(name) test_##name();
 #include <map_tests/tests.h>
-#undef CALL
+#undef DEFINE_TEST
 
        printf("test_maps: OK, %d SKIPPED\n", skips);
        return 0;
index af75a1c..a05a807 100644 (file)
@@ -20,7 +20,7 @@ struct prog_test_def {
        bool tested;
        bool need_cgroup_cleanup;
 
-       const char *subtest_name;
+       char *subtest_name;
        int subtest_num;
 
        /* store counts before subtest started */
@@ -81,16 +81,17 @@ void test__end_subtest()
        fprintf(env.stdout, "#%d/%d %s:%s\n",
               test->test_num, test->subtest_num,
               test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+
+       free(test->subtest_name);
+       test->subtest_name = NULL;
 }
 
 bool test__start_subtest(const char *name)
 {
        struct prog_test_def *test = env.test;
 
-       if (test->subtest_name) {
+       if (test->subtest_name)
                test__end_subtest();
-               test->subtest_name = NULL;
-       }
 
        test->subtest_num++;
 
@@ -104,7 +105,13 @@ bool test__start_subtest(const char *name)
        if (!should_run(&env.subtest_selector, test->subtest_num, name))
                return false;
 
-       test->subtest_name = name;
+       test->subtest_name = strdup(name);
+       if (!test->subtest_name) {
+               fprintf(env.stderr,
+                       "Subtest #%d: failed to copy subtest name!\n",
+                       test->subtest_num);
+               return false;
+       }
        env.test->old_error_cnt = env.test->error_cnt;
 
        return true;
@@ -306,7 +313,7 @@ void *spin_lock_thread(void *arg)
 }
 
 /* extern declarations for test funcs */
-#define DEFINE_TEST(name) extern void test_##name();
+#define DEFINE_TEST(name) extern void test_##name(void);
 #include <prog_tests/tests.h>
 #undef DEFINE_TEST
 
@@ -518,6 +525,33 @@ static void stdio_restore(void)
 #endif
 }
 
+/*
+ * Determine if test_progs is running as a "flavored" test runner and switch
+ * into corresponding sub-directory to load correct BPF objects.
+ *
+ * This is done by looking at executable name. If it contains "-flavor"
+ * suffix, then we are running as a flavored test runner.
+ */
+int cd_flavor_subdir(const char *exec_name)
+{
+       /* General form of argv[0] passed here is:
+        * some/path/to/test_progs[-flavor], where -flavor part is optional.
+        * First cut out "test_progs[-flavor]" part, then extract "flavor"
+        * part, if it's there.
+        */
+       const char *flavor = strrchr(exec_name, '/');
+
+       if (!flavor)
+               return 0;
+       flavor++;
+       flavor = strrchr(flavor, '-');
+       if (!flavor)
+               return 0;
+       flavor++;
+       printf("Switching to flavor '%s' subdirectory...\n", flavor);
+       return chdir(flavor);
+}
+
 int main(int argc, char **argv)
 {
        static const struct argp argp = {
@@ -531,6 +565,10 @@ int main(int argc, char **argv)
        if (err)
                return err;
 
+       err = cd_flavor_subdir(argv[0]);
+       if (err)
+               return err;
+
        libbpf_set_print(libbpf_print_fn);
 
        srand(time(NULL));
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
deleted file mode 100644 (file)
index 0e014d3..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (c) 2018 Politecnico di Torino
-#include <stddef.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
-
-int _version SEC("version") = 1;
-
-struct {
-       __uint(type, MAP_TYPE);
-       __uint(max_entries, 32);
-       __uint(map_flags, 0);
-       __uint(key_size, 0);
-       __uint(value_size, sizeof(__u32));
-} map_in SEC(".maps");
-
-struct {
-       __uint(type, MAP_TYPE);
-       __uint(max_entries, 32);
-       __uint(map_flags, 0);
-       __uint(key_size, 0);
-       __uint(value_size, sizeof(__u32));
-} map_out SEC(".maps");
-
-SEC("test")
-int _test(struct __sk_buff *skb)
-{
-       void *data_end = (void *)(long)skb->data_end;
-       void *data = (void *)(long)skb->data;
-       struct ethhdr *eth = (struct ethhdr *)(data);
-       __u32 value;
-       int err;
-
-       if (eth + 1 > data_end)
-               return TC_ACT_SHOT;
-
-       struct iphdr *iph = (struct iphdr *)(eth + 1);
-
-       if (iph + 1 > data_end)
-               return TC_ACT_SHOT;
-
-       err = bpf_map_pop_elem(&map_in, &value);
-       if (err)
-               return TC_ACT_SHOT;
-
-       iph->daddr = value;
-
-       err = bpf_map_push_elem(&map_out, &iph->saddr, 0);
-       if (err)
-               return TC_ACT_SHOT;
-
-       return TC_ACT_OK;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
deleted file mode 100644 (file)
index 29833ae..0000000
+++ /dev/null
@@ -1,233 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <err.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_util.h"
-
-struct sec_name_test {
-       const char sec_name[32];
-       struct {
-               int rc;
-               enum bpf_prog_type prog_type;
-               enum bpf_attach_type expected_attach_type;
-       } expected_load;
-       struct {
-               int rc;
-               enum bpf_attach_type attach_type;
-       } expected_attach;
-};
-
-static struct sec_name_test tests[] = {
-       {"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} },
-       {"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} },
-       {"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} },
-       {"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
-       {"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
-       {"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} },
-       {"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} },
-       {"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
-       {
-               "raw_tracepoint/",
-               {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0},
-               {-EINVAL, 0},
-       },
-       {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
-       {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
-       {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
-       {"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
-       {"lwt_xmit", {0, BPF_PROG_TYPE_LWT_XMIT, 0}, {-EINVAL, 0} },
-       {"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} },
-       {
-               "cgroup_skb/ingress",
-               {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
-               {0, BPF_CGROUP_INET_INGRESS},
-       },
-       {
-               "cgroup_skb/egress",
-               {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
-               {0, BPF_CGROUP_INET_EGRESS},
-       },
-       {"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} },
-       {
-               "cgroup/sock",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
-               {0, BPF_CGROUP_INET_SOCK_CREATE},
-       },
-       {
-               "cgroup/post_bind4",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND},
-               {0, BPF_CGROUP_INET4_POST_BIND},
-       },
-       {
-               "cgroup/post_bind6",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND},
-               {0, BPF_CGROUP_INET6_POST_BIND},
-       },
-       {
-               "cgroup/dev",
-               {0, BPF_PROG_TYPE_CGROUP_DEVICE, 0},
-               {0, BPF_CGROUP_DEVICE},
-       },
-       {"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} },
-       {
-               "sk_skb/stream_parser",
-               {0, BPF_PROG_TYPE_SK_SKB, 0},
-               {0, BPF_SK_SKB_STREAM_PARSER},
-       },
-       {
-               "sk_skb/stream_verdict",
-               {0, BPF_PROG_TYPE_SK_SKB, 0},
-               {0, BPF_SK_SKB_STREAM_VERDICT},
-       },
-       {"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} },
-       {"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} },
-       {"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} },
-       {
-               "flow_dissector",
-               {0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0},
-               {0, BPF_FLOW_DISSECTOR},
-       },
-       {
-               "cgroup/bind4",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND},
-               {0, BPF_CGROUP_INET4_BIND},
-       },
-       {
-               "cgroup/bind6",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND},
-               {0, BPF_CGROUP_INET6_BIND},
-       },
-       {
-               "cgroup/connect4",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT},
-               {0, BPF_CGROUP_INET4_CONNECT},
-       },
-       {
-               "cgroup/connect6",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT},
-               {0, BPF_CGROUP_INET6_CONNECT},
-       },
-       {
-               "cgroup/sendmsg4",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
-               {0, BPF_CGROUP_UDP4_SENDMSG},
-       },
-       {
-               "cgroup/sendmsg6",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
-               {0, BPF_CGROUP_UDP6_SENDMSG},
-       },
-       {
-               "cgroup/recvmsg4",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG},
-               {0, BPF_CGROUP_UDP4_RECVMSG},
-       },
-       {
-               "cgroup/recvmsg6",
-               {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG},
-               {0, BPF_CGROUP_UDP6_RECVMSG},
-       },
-       {
-               "cgroup/sysctl",
-               {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
-               {0, BPF_CGROUP_SYSCTL},
-       },
-       {
-               "cgroup/getsockopt",
-               {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT},
-               {0, BPF_CGROUP_GETSOCKOPT},
-       },
-       {
-               "cgroup/setsockopt",
-               {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
-               {0, BPF_CGROUP_SETSOCKOPT},
-       },
-};
-
-static int test_prog_type_by_name(const struct sec_name_test *test)
-{
-       enum bpf_attach_type expected_attach_type;
-       enum bpf_prog_type prog_type;
-       int rc;
-
-       rc = libbpf_prog_type_by_name(test->sec_name, &prog_type,
-                                     &expected_attach_type);
-
-       if (rc != test->expected_load.rc) {
-               warnx("prog: unexpected rc=%d for %s", rc, test->sec_name);
-               return -1;
-       }
-
-       if (rc)
-               return 0;
-
-       if (prog_type != test->expected_load.prog_type) {
-               warnx("prog: unexpected prog_type=%d for %s", prog_type,
-                     test->sec_name);
-               return -1;
-       }
-
-       if (expected_attach_type != test->expected_load.expected_attach_type) {
-               warnx("prog: unexpected expected_attach_type=%d for %s",
-                     expected_attach_type, test->sec_name);
-               return -1;
-       }
-
-       return 0;
-}
-
-static int test_attach_type_by_name(const struct sec_name_test *test)
-{
-       enum bpf_attach_type attach_type;
-       int rc;
-
-       rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
-
-       if (rc != test->expected_attach.rc) {
-               warnx("attach: unexpected rc=%d for %s", rc, test->sec_name);
-               return -1;
-       }
-
-       if (rc)
-               return 0;
-
-       if (attach_type != test->expected_attach.attach_type) {
-               warnx("attach: unexpected attach_type=%d for %s", attach_type,
-                     test->sec_name);
-               return -1;
-       }
-
-       return 0;
-}
-
-static int run_test_case(const struct sec_name_test *test)
-{
-       if (test_prog_type_by_name(test))
-               return -1;
-       if (test_attach_type_by_name(test))
-               return -1;
-       return 0;
-}
-
-static int run_tests(void)
-{
-       int passes = 0;
-       int fails = 0;
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(tests); ++i) {
-               if (run_test_case(&tests[i]))
-                       ++fails;
-               else
-                       ++passes;
-       }
-       printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
-       return fails ? -1 : 0;
-}
-
-int main(int argc, char **argv)
-{
-       return run_tests();
-}
index a320e38..7aff907 100644 (file)
@@ -120,6 +120,29 @@ static struct sysctl_test tests[] = {
                .newval = "(none)", /* same as default, should fail anyway */
                .result = OP_EPERM,
        },
+       {
+               .descr = "ctx:write sysctl:write read ok narrow",
+               .insns = {
+                       /* u64 w = (u16)write & 1; */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+                       BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
+                                   offsetof(struct bpf_sysctl, write)),
+#else
+                       BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
+                                   offsetof(struct bpf_sysctl, write) + 2),
+#endif
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1),
+                       /* return 1 - w; */
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+                       BPF_EXIT_INSN(),
+               },
+               .attach_type = BPF_CGROUP_SYSCTL,
+               .sysctl = "kernel/domainname",
+               .open_flags = O_WRONLY,
+               .newval = "(none)", /* same as default, should fail anyway */
+               .result = OP_EPERM,
+       },
        {
                .descr = "ctx:write sysctl:read write reject",
                .insns = {
index 1fc4e61..1af3718 100644 (file)
        .prog_type = BPF_PROG_TYPE_XDP,
        .retval = 55,
 },
+{
+       "taken loop with back jump to 1st insn, 2",
+       .insns = {
+       BPF_MOV64_IMM(BPF_REG_1, 10),
+       BPF_MOV64_IMM(BPF_REG_2, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
+       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+       BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, -3),
+       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+       BPF_EXIT_INSN(),
+       },
+       .result = ACCEPT,
+       .prog_type = BPF_PROG_TYPE_XDP,
+       .retval = 55,
+},
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh
new file mode 100644 (file)
index 0000000..f7c168d
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+       local should_fail=$1; shift
+       local target
+
+       target=$(devlink_resource_size_get span_agents)
+
+       if ((! should_fail)); then
+               echo $target
+       else
+               echo $((target + 1))
+       fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
new file mode 100755 (executable)
index 0000000..2b5f4f7
--- /dev/null
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+current_test=""
+
+cleanup()
+{
+       pre_cleanup
+       if [ ! -z $current_test ]; then
+               ${current_test}_cleanup
+       fi
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="tc_flower mirror_gre"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+       source ${current_test}_scale.sh
+
+       num_netifs_var=${current_test^^}_NUM_NETIFS
+       num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+       for should_fail in 0 1; do
+               RET=0
+               target=$(${current_test}_get_target "$should_fail")
+               ${current_test}_setup_prepare
+               setup_wait $num_netifs
+               ${current_test}_test "$target" "$should_fail"
+               ${current_test}_cleanup
+               if [[ "$should_fail" -eq 0 ]]; then
+                       log_test "'$current_test' $target"
+               else
+                       log_test "'$current_test' overflow $target"
+               fi
+       done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
new file mode 100644 (file)
index 0000000..a079522
--- /dev/null
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+       local should_fail=$1; shift
+
+       # The driver associates a counter with each tc filter, which means the
+       # number of supported filters is bounded by the number of available
+       # counters.
+       # Currently, the driver supports 12K (12,288) flow counters and six of
+       # these are used for multicast routing.
+       local target=12282
+
+       if ((! should_fail)); then
+               echo $target
+       else
+               echo $((target + 1))
+       fi
+}
index 8d2186c..f7c168d 100644 (file)
@@ -4,10 +4,13 @@ source ../mirror_gre_scale.sh
 mirror_gre_get_target()
 {
        local should_fail=$1; shift
+       local target
+
+       target=$(devlink_resource_size_get span_agents)
 
        if ((! should_fail)); then
-               echo 3
+               echo $target
        else
-               echo 4
+               echo $((target + 1))
        fi
 }
index 1158373..ee89cd2 100755 (executable)
@@ -3,7 +3,9 @@
 
 lib_dir=$(dirname $0)/../../../net/forwarding
 
-ALL_TESTS="fw_flash_test params_test regions_test"
+ALL_TESTS="fw_flash_test params_test regions_test reload_test \
+          netns_reload_test resource_test dev_info_test \
+          empty_reporter_test dummy_reporter_test"
 NUM_NETIFS=0
 source $lib_dir/lib.sh
 
@@ -142,6 +144,290 @@ regions_test()
        log_test "regions test"
 }
 
+reload_test()
+{
+       RET=0
+
+       devlink dev reload $DL_HANDLE
+       check_err $? "Failed to reload"
+
+       echo "y"> $DEBUGFS_DIR/fail_reload
+       check_err $? "Failed to setup devlink reload to fail"
+
+       devlink dev reload $DL_HANDLE
+       check_fail $? "Unexpected success of devlink reload"
+
+       echo "n"> $DEBUGFS_DIR/fail_reload
+       check_err $? "Failed to setup devlink reload not to fail"
+
+       devlink dev reload $DL_HANDLE
+       check_err $? "Failed to reload after set not to fail"
+
+       echo "y"> $DEBUGFS_DIR/dont_allow_reload
+       check_err $? "Failed to forbid devlink reload"
+
+       devlink dev reload $DL_HANDLE
+       check_fail $? "Unexpected success of devlink reload"
+
+       echo "n"> $DEBUGFS_DIR/dont_allow_reload
+       check_err $? "Failed to re-enable devlink reload"
+
+       devlink dev reload $DL_HANDLE
+       check_err $? "Failed to reload after re-enable"
+
+       log_test "reload test"
+}
+
+netns_reload_test()
+{
+       RET=0
+
+       ip netns add testns1
+       check_err $? "Failed add netns \"testns1\""
+       ip netns add testns2
+       check_err $? "Failed add netns \"testns2\""
+
+       devlink dev reload $DL_HANDLE netns testns1
+       check_err $? "Failed to reload into netns \"testns1\""
+
+       devlink -N testns1 dev reload $DL_HANDLE netns testns2
+       check_err $? "Failed to reload from netns \"testns1\" into netns \"testns2\""
+
+       ip netns del testns2
+       ip netns del testns1
+
+       log_test "netns reload test"
+}
+
+DUMMYDEV="dummytest"
+
+res_val_get()
+{
+       local netns=$1
+       local parentname=$2
+       local name=$3
+       local type=$4
+
+       cmd_jq "devlink -N $netns resource show $DL_HANDLE -j" \
+              ".[][][] | select(.name == \"$parentname\").resources[] \
+               | select(.name == \"$name\").$type"
+}
+
+resource_test()
+{
+       RET=0
+
+       ip netns add testns1
+       check_err $? "Failed add netns \"testns1\""
+       ip netns add testns2
+       check_err $? "Failed add netns \"testns2\""
+
+       devlink dev reload $DL_HANDLE netns testns1
+       check_err $? "Failed to reload into netns \"testns1\""
+
+       # Create dummy dev to add the address and routes on.
+
+       ip -n testns1 link add name $DUMMYDEV type dummy
+       check_err $? "Failed create dummy device"
+       ip -n testns1 link set $DUMMYDEV up
+       check_err $? "Failed bring up dummy device"
+       ip -n testns1 a a 192.0.1.1/24 dev $DUMMYDEV
+       check_err $? "Failed add an IP address to dummy device"
+
+       local occ=$(res_val_get testns1 IPv4 fib occ)
+       local limit=$((occ+1))
+
+       # Set fib size limit to handle one another route only.
+
+       devlink -N testns1 resource set $DL_HANDLE path IPv4/fib size $limit
+       check_err $? "Failed to set IPv4/fib resource size"
+       local size_new=$(res_val_get testns1 IPv4 fib size_new)
+       [ "$size_new" -eq "$limit" ]
+       check_err $? "Unexpected \"size_new\" value (got $size_new, expected $limit)"
+
+       devlink -N testns1 dev reload $DL_HANDLE
+       check_err $? "Failed to reload"
+       local size=$(res_val_get testns1 IPv4 fib size)
+       [ "$size" -eq "$limit" ]
+       check_err $? "Unexpected \"size\" value (got $size, expected $limit)"
+
+       # Insert 2 routes, the first is going to be inserted,
+       # the second is expected to fail to be inserted.
+
+       ip -n testns1 r a 192.0.2.0/24 via 192.0.1.2
+       check_err $? "Failed to add route"
+
+       ip -n testns1 r a 192.0.3.0/24 via 192.0.1.2
+       check_fail $? "Unexpected successful route add over limit"
+
+       # Now create another dummy in second network namespace and
+       # insert two routes. That is over the limit of the netdevsim
+       # instance in the first namespace. Move the netdevsim instance
+       # into the second namespace and expect it to fail.
+
+       ip -n testns2 link add name $DUMMYDEV type dummy
+       check_err $? "Failed create dummy device"
+       ip -n testns2 link set $DUMMYDEV up
+       check_err $? "Failed bring up dummy device"
+       ip -n testns2 a a 192.0.1.1/24 dev $DUMMYDEV
+       check_err $? "Failed add an IP address to dummy device"
+       ip -n testns2 r a 192.0.2.0/24 via 192.0.1.2
+       check_err $? "Failed to add route"
+       ip -n testns2 r a 192.0.3.0/24 via 192.0.1.2
+       check_err $? "Failed to add route"
+
+       devlink -N testns1 dev reload $DL_HANDLE netns testns2
+       check_fail $? "Unexpected successful reload from netns \"testns1\" into netns \"testns2\""
+
+       ip netns del testns2
+       ip netns del testns1
+
+       log_test "resource test"
+}
+
+info_get()
+{
+       local name=$1
+
+       cmd_jq "devlink dev info $DL_HANDLE -j" ".[][][\"$name\"]" "-e"
+}
+
+dev_info_test()
+{
+       RET=0
+
+       driver=$(info_get "driver")
+       check_err $? "Failed to get driver name"
+       [ "$driver" == "netdevsim" ]
+       check_err $? "Unexpected driver name $driver"
+
+       log_test "dev_info test"
+}
+
+empty_reporter_test()
+{
+       RET=0
+
+       devlink health show $DL_HANDLE reporter empty >/dev/null
+       check_err $? "Failed show empty reporter"
+
+       devlink health dump show $DL_HANDLE reporter empty >/dev/null
+       check_err $? "Failed show dump of empty reporter"
+
+       devlink health diagnose $DL_HANDLE reporter empty >/dev/null
+       check_err $? "Failed diagnose empty reporter"
+
+       devlink health recover $DL_HANDLE reporter empty
+       check_err $? "Failed recover empty reporter"
+
+       log_test "empty reporter test"
+}
+
+check_reporter_info()
+{
+       local name=$1
+       local expected_state=$2
+       local expected_error=$3
+       local expected_recover=$4
+       local expected_grace_period=$5
+       local expected_auto_recover=$6
+
+       local show=$(devlink health show $DL_HANDLE reporter $name -j | jq -e -r ".[][][]")
+       check_err $? "Failed show $name reporter"
+
+       local state=$(echo $show | jq -r ".state")
+       [ "$state" == "$expected_state" ]
+       check_err $? "Unexpected \"state\" value (got $state, expected $expected_state)"
+
+       local error=$(echo $show | jq -r ".error")
+       [ "$error" == "$expected_error" ]
+       check_err $? "Unexpected \"error\" value (got $error, expected $expected_error)"
+
+       local recover=`echo $show | jq -r ".recover"`
+       [ "$recover" == "$expected_recover" ]
+       check_err $? "Unexpected \"recover\" value (got $recover, expected $expected_recover)"
+
+       local grace_period=$(echo $show | jq -r ".grace_period")
+       check_err $? "Failed get $name reporter grace_period"
+       [ "$grace_period" == "$expected_grace_period" ]
+       check_err $? "Unexpected \"grace_period\" value (got $grace_period, expected $expected_grace_period)"
+
+       local auto_recover=$(echo $show | jq -r ".auto_recover")
+       [ "$auto_recover" == "$expected_auto_recover" ]
+       check_err $? "Unexpected \"auto_recover\" value (got $auto_recover, expected $expected_auto_recover)"
+}
+
+dummy_reporter_test()
+{
+       RET=0
+
+       check_reporter_info dummy healthy 0 0 0 false
+
+       local BREAK_MSG="foo bar"
+       echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+       check_err $? "Failed to break dummy reporter"
+
+       check_reporter_info dummy error 1 0 0 false
+
+       local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
+       check_err $? "Failed show dump of dummy reporter"
+
+       local dump_break_msg=$(echo $dump | jq -r ".break_message")
+       [ "$dump_break_msg" == "$BREAK_MSG" ]
+       check_err $? "Unexpected dump break message value (got $dump_break_msg, expected $BREAK_MSG)"
+
+       devlink health dump clear $DL_HANDLE reporter dummy
+       check_err $? "Failed clear dump of dummy reporter"
+
+       devlink health recover $DL_HANDLE reporter dummy
+       check_err $? "Failed recover dummy reporter"
+
+       check_reporter_info dummy healthy 1 1 0 false
+
+       devlink health set $DL_HANDLE reporter dummy auto_recover true
+       check_err $? "Failed to dummy reporter auto_recover option"
+
+       check_reporter_info dummy healthy 1 1 0 true
+
+       echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+       check_err $? "Failed to break dummy reporter"
+
+       check_reporter_info dummy healthy 2 2 0 true
+
+       local diagnose=$(devlink health diagnose $DL_HANDLE reporter dummy -j -p)
+       check_err $? "Failed show diagnose of dummy reporter"
+
+       local rcvrd_break_msg=$(echo $diagnose | jq -r ".recovered_break_message")
+       [ "$rcvrd_break_msg" == "$BREAK_MSG" ]
+       check_err $? "Unexpected recovered break message value (got $rcvrd_break_msg, expected $BREAK_MSG)"
+
+       devlink health set $DL_HANDLE reporter dummy grace_period 10
+       check_err $? "Failed to dummy reporter grace_period option"
+
+       check_reporter_info dummy healthy 2 2 10 true
+
+       echo "Y"> $DEBUGFS_DIR/health/fail_recover
+       check_err $? "Failed set dummy reporter recovery to fail"
+
+       echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+       check_fail $? "Unexpected success of dummy reporter break"
+
+       check_reporter_info dummy error 3 2 10 true
+
+       devlink health recover $DL_HANDLE reporter dummy
+       check_fail $? "Unexpected success of dummy reporter recover"
+
+       echo "N"> $DEBUGFS_DIR/health/fail_recover
+       check_err $? "Failed set dummy reporter recovery to be successful"
+
+       devlink health recover $DL_HANDLE reporter dummy
+       check_err $? "Failed recover dummy reporter"
+
+       check_reporter_info dummy healthy 3 3 10 true
+
+       log_test "dummy reporter test"
+}
+
 setup_prepare()
 {
        modprobe netdevsim
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh
new file mode 100755 (executable)
index 0000000..7effd35
--- /dev/null
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="check_devlink_test check_ports_test"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+BUS_ADDR=10
+PORT_COUNT=4
+DEV_NAME=netdevsim$BUS_ADDR
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
+DL_HANDLE=netdevsim/$DEV_NAME
+NETNS_NAME=testns1
+
+port_netdev_get()
+{
+       local port_index=$1
+
+       cmd_jq "devlink -N $NETNS_NAME port show -j" \
+              ".[][\"$DL_HANDLE/$port_index\"].netdev" "-e"
+}
+
+check_ports_test()
+{
+       RET=0
+
+       for i in $(seq 0 $(expr $PORT_COUNT - 1)); do
+               netdev_name=$(port_netdev_get $i)
+               check_err $? "Failed to get netdev name for port $DL_HANDLE/$i"
+               ip -n $NETNS_NAME link show $netdev_name &> /dev/null
+               check_err $? "Failed to find netdev $netdev_name"
+       done
+
+       log_test "check ports test"
+}
+
+check_devlink_test()
+{
+       RET=0
+
+       devlink -N $NETNS_NAME dev show $DL_HANDLE &> /dev/null
+       check_err $? "Failed to show devlink instance"
+
+       log_test "check devlink test"
+}
+
+setup_prepare()
+{
+       modprobe netdevsim
+       ip netns add $NETNS_NAME
+       ip netns exec $NETNS_NAME \
+               echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device
+       while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+}
+
+cleanup()
+{
+       pre_cleanup
+       echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device
+       ip netns del $NETNS_NAME
+       modprobe -r netdevsim
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
index 85c587a..8b48ec5 100644 (file)
@@ -254,6 +254,7 @@ cmd_jq()
 {
        local cmd=$1
        local jq_exp=$2
+       local jq_opts=$3
        local ret
        local output
 
@@ -263,7 +264,11 @@ cmd_jq()
        if [[ $ret -ne 0 ]]; then
                return $ret
        fi
-       output=$(echo $output | jq -r "$jq_exp")
+       output=$(echo $output | jq -r $jq_opts "$jq_exp")
+       ret=$?
+       if [[ $ret -ne 0 ]]; then
+               return $ret
+       fi
        echo $output
        # return success only in case of non-empty output
        [ ! -z "$output" ]
index 4144984..de1032b 100644 (file)
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-       conntrack_icmp_related.sh nft_flowtable.sh
+       conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755 (executable)
index 0000000..c3b8f90
--- /dev/null
@@ -0,0 +1,228 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--------------------------------------------------------------+
+#                      |                                       |
+#         ns0          |         ns1                           |
+#      -----------     |     -----------    -----------        |
+#      | veth01  | --------- | veth10  |    | veth12  |        |
+#      -----------    peer   -----------    -----------        |
+#           |          |                        |              |
+#      -----------     |                        |              |
+#      |  br0    |     |-----------------  peer |--------------|
+#      -----------     |                        |              |
+#           |          |                        |              |
+#      ----------     peer   ----------      -----------       |
+#      |  veth02 | --------- |  veth20 |     | veth21  |       |
+#      ----------      |     ----------      -----------       |
+#                      |         ns2                           |
+#                      |                                       |
+#--------------------------------------------------------------+
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+       modprobe -q ip_vs
+       if [ $? -ne 0 ]; then
+               echo "skip: could not run test without ipvs module"
+               exit $ksft_skip
+       fi
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+       echo "SKIP: Could not run test without ipvsadm"
+       exit $ksft_skip
+fi
+
+setup() {
+       ip netns add ns0
+       ip netns add ns1
+       ip netns add ns2
+
+       ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+       ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+       ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+       ip netns exec ns0 ip link set veth01 up
+       ip netns exec ns0 ip link set veth02 up
+       ip netns exec ns0 ip link add br0 type bridge
+       ip netns exec ns0 ip link set veth01 master br0
+       ip netns exec ns0 ip link set veth02 master br0
+       ip netns exec ns0 ip link set br0 up
+       ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+       ip netns exec ns1 ip link set lo up
+       ip netns exec ns1 ip link set veth10 up
+       ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+       ip netns exec ns1 ip link set veth12 up
+       ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+       ip netns exec ns2 ip link set lo up
+       ip netns exec ns2 ip link set veth21 up
+       ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+       ip netns exec ns2 ip link set veth20 up
+       ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+
+       sleep 1
+
+       dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+       for i in 0 1 2
+       do
+               ip netns del ns$i > /dev/null 2>&1
+       done
+
+       if [ -f "${outfile}" ]; then
+               rm "${outfile}"
+       fi
+       if [ -f "${infile}" ]; then
+               rm "${infile}"
+       fi
+}
+
+server_listen() {
+       ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
+       server_pid=$!
+       sleep 0.2
+}
+
+client_connect() {
+       ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}"
+}
+
+verify_data() {
+       wait "${server_pid}"
+       cmp "$infile" "$outfile" 2>/dev/null
+}
+
+test_service() {
+       server_listen
+       client_connect
+       verify_data
+}
+
+
+test_dr() {
+       ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+       ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+       ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+       ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+       ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+       # avoid incorrect arp response
+       ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+       ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+       # avoid reverse route lookup
+       ip netns exec ns2 sysctl -qw  net.ipv4.conf.all.rp_filter=0
+       ip netns exec ns2 sysctl -qw  net.ipv4.conf.veth21.rp_filter=0
+       ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+       test_service
+}
+
+test_nat() {
+       ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+       ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+       ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+       ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+       ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+       ip netns exec ns2 ip link del veth20
+       ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+       test_service
+}
+
+test_tun() {
+       ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+       ip netns exec ns1 modprobe ipip
+       ip netns exec ns1 ip link set tunl0 up
+       ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+       ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+       ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+       ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+       ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+       ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+       ip netns exec ns2 modprobe ipip
+       ip netns exec ns2 ip link set tunl0 up
+       ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+       ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+       ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+       ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+       ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+       ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+       test_service
+}
+
+run_tests() {
+       local errors=
+
+       echo "Testing DR mode..."
+       cleanup
+       setup
+       test_dr
+       errors=$(( $errors + $? ))
+
+       echo "Testing NAT mode..."
+       cleanup
+       setup
+       test_nat
+       errors=$(( $errors + $? ))
+
+       echo "Testing Tunnel mode..."
+       cleanup
+       setup
+       test_tun
+       errors=$(( $errors + $? ))
+
+       return $errors
+}
+
+trap cleanup EXIT
+
+run_tests
+
+if [ $? -ne 0 ]; then
+       echo -e "$(basename $0): ${RED}FAIL${NC}"
+       exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
+exit 0
index 7c55196..477bc61 100644 (file)
@@ -1,3 +1,12 @@
+#
+# Core Netfilter Configuration
+#
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_NAT=m
+
 CONFIG_NET_SCHED=y
 
 #
@@ -42,6 +51,7 @@ CONFIG_NET_ACT_CTINFO=m
 CONFIG_NET_ACT_SKBMOD=m
 CONFIG_NET_ACT_IFE=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
 CONFIG_NET_ACT_MPLS=m
 CONFIG_NET_IFE_SKBMARK=m
 CONFIG_NET_IFE_SKBPRIO=m
index ddabb2f..88ec134 100644 (file)
         "teardown": [
             "$TC actions flush action csum"
         ]
+    },
+    {
+        "id": "eaf0",
+        "name": "Add csum iph action with no_percpu flag",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum iph no_percpu",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action csum",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*no_percpu",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
     }
 ]
index 62b82fe..4202e95 100644 (file)
             "$TC actions flush action ct"
         ]
     },
+    {
+        "id": "e38c",
+        "name": "Add simple ct action with cookie",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct index 42 cookie deadbeef",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct zone 0 pipe.*index 42 ref.*cookie deadbeef",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
     {
         "id": "9f20",
         "name": "Add ct clear action",
             "$TC actions flush action ct"
         ]
     },
+    {
+        "id": "0bc1",
+        "name": "Add ct clear action with cookie of max length",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct clear index 42 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct clear pipe.*index 42 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
     {
         "id": "5bea",
         "name": "Try ct with zone",
         "teardown": [
             "$TC actions flush action ct"
         ]
+    },
+    {
+        "id": "2faa",
+        "name": "Try ct with mark + mask and cookie",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct mark 0x42/0xf0 index 42 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct mark 66/0xf0 zone 0 pipe.*index 42 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "3991",
+        "name": "Add simple ct action with no_percpu flag",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct no_percpu",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct zone 0 pipe.*no_percpu",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
     }
 ]
index 814b7a8..b24494c 100644 (file)
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "95ad",
+        "name": "Add gact pass action with no_percpu flag",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pass no_percpu",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action pass.*no_percpu",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
     }
 ]
index 2232b21..12a2fe0 100644 (file)
         "matchPattern": "^[ \t]+index [0-9]+ ref",
         "matchCount": "0",
         "teardown": []
+    },
+    {
+        "id": "31e3",
+        "name": "Add mirred mirror to egress action with no_percpu flag",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred egress mirror dev lo no_percpu",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*no_percpu",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
     }
 ]
index e31a080..866f0ef 100644 (file)
             "$TC actions flush action mpls"
         ]
     },
+    {
+        "id": "09d2",
+        "name": "Add mpls dec_ttl action with opcode and cookie",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl pipe index 8 cookie aabbccddeeff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl pipe.*index 8 ref.*cookie aabbccddeeff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "c170",
+        "name": "Add mpls dec_ttl action with opcode and cookie of max length",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl continue index 8 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl continue.*index 8 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
     {
         "id": "9118",
         "name": "Add mpls dec_ttl action with invalid opcode",
             "$TC actions flush action mpls"
         ]
     },
+    {
+        "id": "91fb",
+        "name": "Add mpls pop action with ip proto and cookie",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4 cookie 12345678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*protocol.*ip.*pipe.*ref 1.*cookie 12345678",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
     {
         "id": "92fe",
         "name": "Add mpls pop action with mpls proto",
             "$TC actions flush action mpls"
         ]
     },
+    {
+        "id": "7c34",
+        "name": "Add mpls push action with label, tc ttl and cookie of max length",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push label 20 tc 3 ttl 128 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*tc.*3.*ttl.*128.*pipe.*ref 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
     {
         "id": "16eb",
         "name": "Add mpls push action with label and bos",
             "$TC actions flush action mpls"
         ]
     },
+    {
+        "id": "77c1",
+        "name": "Add mpls mod action with mpls ttl and cookie",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod ttl 128 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*128.*pipe.*ref 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
     {
         "id": "b80f",
         "name": "Add mpls mod action with mpls max ttl",
             "$TC actions flush action mpls"
         ]
     },
+    {
+        "id": "95a9",
+        "name": "Replace existing mpls push action with new label, tc, ttl and cookie",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mpls push label 20 tc 3 ttl 128 index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2"
+        ],
+        "cmdUnderTest": "$TC actions replace action mpls push label 30 tc 2 ttl 125 pipe index 1 cookie aa11bb22cc33",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mpls index 1",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*30 tc 2 ttl 125 pipe.*index 1.*cookie aa11bb22cc33",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
     {
         "id": "6cce",
         "name": "Delete mpls pop action",
index 0d319f1..6035956 100644 (file)
             "$TC actions flush action pedit"
         ]
     },
+    {
+        "id": "a5a7",
+        "name": "Add pedit action with LAYERED_OP eth set src",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 2.*key #0  at eth\\+4: val 00001122 mask ffff0000.*key #1  at eth\\+8: val 33445566 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
     {
         "id": "86d4",
         "name": "Add pedit action with LAYERED_OP eth set src & dst",
             "$TC actions flush action pedit"
         ]
     },
+    {
+        "id": "f8a9",
+        "name": "Add pedit action with LAYERED_OP eth set dst",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set 11:22:33:44:55:66",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 2.*key #0  at eth\\+0: val 11223344 mask 00000000.*key #1  at eth\\+4: val 55660000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
     {
         "id": "c715",
         "name": "Add pedit action with LAYERED_OP eth set src (INVALID)",
             "$TC actions flush action pedit"
         ]
     },
+    {
+        "id": "8131",
+        "name": "Add pedit action with LAYERED_OP eth set dst (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set %e:11:m2:33:x4:-5",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
     {
         "id": "ba22",
         "name": "Add pedit action with LAYERED_OP eth type set/clear sequence",
             "$TC actions flush action pedit"
         ]
     },
+    {
+        "id": "dec4",
+        "name": "Add pedit action with LAYERED_OP eth set type (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth type set 0xabcdef",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 1.*key #0  at eth+12: val ",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ab06",
+        "name": "Add pedit action with LAYERED_OP eth add type",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth type add 0x1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 1.*key #0  at eth\\+12: add 00010000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "918d",
+        "name": "Add pedit action with LAYERED_OP eth invert src",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth src invert",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 2.*key #0  at eth\\+4: val 0000ff00 mask ffff0000.*key #1  at eth\\+8: val 00000000 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "a8d4",
+        "name": "Add pedit action with LAYERED_OP eth invert dst",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth dst invert",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 2.*key #0  at eth\\+0: val ff000000 mask 00000000.*key #1  at eth\\+4: val 00000000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "ee13",
+        "name": "Add pedit action with LAYERED_OP eth invert type",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth type invert",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 1.*key #0  at eth\\+12: val ffff0000 mask ffffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "7588",
+        "name": "Add pedit action with LAYERED_OP ip set src",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip src set 1.1.1.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 1.*key #0  at 12: val 01010101 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "0fa7",
+        "name": "Add pedit action with LAYERED_OP ip set dst",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip dst set 2.2.2.2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 1.*key #0  at 16: val 02020202 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
     {
         "id": "5810",
         "name": "Add pedit action with LAYERED_OP ip set src & dst",
             "$TC actions flush action pedit"
         ]
     },
+    {
+        "id": "cc8a",
+        "name": "Add pedit action with LAYERED_OP ip set tos",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip tos set 0x4 continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action continue keys 1.*key #0  at 0: val 00040000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "7a17",
+        "name": "Add pedit action with LAYERED_OP ip set precedence",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip precedence set 3 jump 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action jump 2 keys 1.*key #0  at 0: val 00030000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "c3b6",
+        "name": "Add pedit action with LAYERED_OP ip add tos",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip tos add 0x1 pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 1.*key #0  at ipv4\\+0: add 00010000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "43d3",
+        "name": "Add pedit action with LAYERED_OP ip add precedence",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip precedence add 0x1 pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pipe keys 1.*key #0  at ipv4\\+0: add 00010000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "438e",
+        "name": "Add pedit action with LAYERED_OP ip clear tos",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip tos clear continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action continue keys 1.*key #0  at 0: val 00000000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "6b1b",
+        "name": "Add pedit action with LAYERED_OP ip clear precedence",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip precedence clear jump 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action jump 2 keys 1.*key #0  at 0: val 00000000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "824a",
+        "name": "Add pedit action with LAYERED_OP ip invert tos",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip tos invert pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pipe keys 1.*key #0  at 0: val 00ff0000 mask ffffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "106f",
+        "name": "Add pedit action with LAYERED_OP ip invert precedence",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip precedence invert reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action reclassify keys 1.*key #0  at 0: val 00ff0000 mask ffffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
     {
         "id": "6829",
         "name": "Add pedit action with LAYERED_OP beyond ip set dport & sport",
             "$TC actions flush action pedit"
         ]
     },
+    {
+        "id": "815c",
+        "name": "Add pedit action with LAYERED_OP ip6 set src",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 src set 2001:0db8:0:f101::1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 4.*key #0  at ipv6\\+8: val 20010db8 mask 00000000.*key #1  at ipv6\\+12: val 0000f101 mask 00000000.*key #2  at ipv6\\+16: val 00000000 mask 00000000.*key #3  at ipv6\\+20: val 00000001 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "4dae",
+        "name": "Add pedit action with LAYERED_OP ip6 set dst",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 dst set 2001:0db8:0:f101::1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "action order [0-9]+:  pedit action pass keys 4.*key #0  at ipv6\\+24: val 20010db8 mask 00000000.*key #1  at ipv6\\+28: val 0000f101 mask 00000000.*key #2  at ipv6\\+32: val 00000000 mask 00000000.*key #3  at ipv6\\+36: val 00000001 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
     {
         "id": "fc1f",
         "name": "Add pedit action with LAYERED_OP ip6 set src & dst",
             "$TC actions flush action pedit"
         ]
     }
-
 ]
index 28453a4..fbeb919 100644 (file)
         "teardown": [
             "$TC actions flush action tunnel_key"
         ]
+    },
+    {
+        "id": "0cd2",
+        "name": "Add tunnel_key set action with no_percpu flag",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 no_percpu",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*no_percpu",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
     }
 ]
index 6503b1c..41d7832 100644 (file)
         "matchPattern": "^[ \t]+index [0-9]+ ref",
         "matchCount": "0",
         "teardown": []
+    },
+    {
+        "id": "1a3d",
+        "name": "Add vlan pop action with no_percpu flag",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop no_percpu",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*no_percpu",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
     }
 ]