Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
authorDavid S. Miller <davem@davemloft.net>
Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)
committerDavid S. Miller <davem@davemloft.net>
Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Use nfnetlink_unicast() instead of netlink_unicast() in nft_compat.

2) Remove call to nf_ct_l4proto_find() in flowtable offload timeout
   fixup.

3) CLUSTERIP registers ARP hook on demand, from Florian.

4) Use clusterip_net to store pernet warning, also from Florian.

5) Remove struct netns_xt, from Florian Westphal.

6) Enable ebtables hooks in initns on demand, from Florian.

7) Allow to filter conntrack netlink dump per status bits,
   from Florian Westphal.

8) Register x_tables hooks in initns on demand, from Florian.

9) Remove queue_handler from per-netns structure, again from Florian.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
996 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/arm64/tagged-address-abi.rst
Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
Documentation/devicetree/bindings/net/qcom,ipa.yaml
Documentation/devicetree/bindings/sound/renesas,rsnd.yaml
Documentation/networking/bonding.rst
Documentation/networking/filter.rst
Documentation/networking/netdev-FAQ.rst
Documentation/networking/operstates.rst
Documentation/trace/histogram.rst
Documentation/virt/kvm/api.rst
MAINTAINERS
Makefile
arch/alpha/Kconfig
arch/alpha/boot/bootp.c
arch/alpha/boot/bootpz.c
arch/alpha/boot/misc.c
arch/alpha/configs/defconfig
arch/alpha/include/asm/compiler.h
arch/alpha/include/asm/syscall.h
arch/alpha/include/uapi/asm/socket.h
arch/alpha/kernel/osf_sys.c
arch/alpha/kernel/perf_event.c
arch/alpha/kernel/process.c
arch/alpha/kernel/setup.c
arch/alpha/kernel/smp.c
arch/alpha/kernel/sys_nautilus.c
arch/alpha/kernel/traps.c
arch/alpha/math-emu/math.c
arch/arm/Kconfig
arch/arm/mach-davinci/Kconfig
arch/arm/mach-rpc/riscpc.c
arch/arm/net/bpf_jit_32.c
arch/arm64/boot/dts/freescale/imx8mp.dtsi
arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
arch/arm64/boot/dts/qcom/ipq8074.dtsi
arch/arm64/boot/dts/qcom/msm8996.dtsi
arch/arm64/boot/dts/qcom/msm8998.dtsi
arch/arm64/boot/dts/qcom/qcs404-evb.dtsi
arch/arm64/boot/dts/qcom/qcs404.dtsi
arch/arm64/boot/dts/qcom/sc7180.dtsi
arch/arm64/boot/dts/qcom/sc7280.dtsi
arch/arm64/boot/dts/qcom/sdm845.dtsi
arch/arm64/boot/dts/qcom/sm8150.dtsi
arch/arm64/kvm/mmu.c
arch/arm64/net/bpf_jit_comp.c
arch/h8300/Kconfig.cpu
arch/ia64/Kconfig
arch/m68k/Kconfig
arch/m68k/Kconfig.machine
arch/m68k/coldfire/m525x.c
arch/mips/Kconfig
arch/mips/include/uapi/asm/socket.h
arch/mips/net/ebpf_jit.c
arch/nds32/mm/mmap.c
arch/parisc/Kconfig
arch/parisc/include/uapi/asm/socket.h
arch/powerpc/Kconfig
arch/powerpc/kernel/vdso64/Makefile
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_hv_p9_entry.c
arch/powerpc/kvm/book3s_rtas.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/net/bpf_jit_comp32.c
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/platforms/pasemi/idle.c
arch/powerpc/platforms/pseries/setup.c
arch/riscv/include/asm/efi.h
arch/riscv/kernel/stacktrace.c
arch/riscv/lib/uaccess.S
arch/riscv/mm/init.c
arch/riscv/net/bpf_jit_comp32.c
arch/riscv/net/bpf_jit_comp64.c
arch/s390/boot/compressed/Makefile
arch/s390/boot/compressed/clz_ctz.c [new file with mode: 0644]
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/include/asm/kvm_host.h
arch/s390/kernel/vdso32/vdso32.lds.S
arch/s390/kernel/vdso64/vdso64.lds.S
arch/s390/kvm/diag.c
arch/s390/kvm/kvm-s390.c
arch/s390/net/bpf_jit_comp.c
arch/sh/Kconfig
arch/sparc/Kconfig
arch/sparc/include/uapi/asm/socket.h
arch/sparc/net/bpf_jit_comp_64.c
arch/x86/Kconfig
arch/x86/kernel/jump_label.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/ioapic.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/svm/svm_onhyperv.h
arch/x86/kvm/trace.h
arch/x86/kvm/x86.c
arch/x86/net/bpf_jit_comp.c
arch/x86/net/bpf_jit_comp32.c
arch/xtensa/Kconfig
block/blk-iocost.c
block/blk-mq-sched.c
block/genhd.c
drivers/acpi/Kconfig
drivers/acpi/dptf/dptf_pch_fivr.c
drivers/acpi/resource.c
drivers/acpi/utils.c
drivers/acpi/x86/s2idle.c
drivers/ata/libata-sff.c
drivers/atm/horizon.c
drivers/base/auxiliary.c
drivers/base/core.c
drivers/block/loop.c
drivers/block/rbd.c
drivers/bus/fsl-mc/fsl-mc-bus.c
drivers/bus/mhi/core/internal.h
drivers/bus/mhi/core/main.c
drivers/bus/mhi/pci_generic.c
drivers/clk/clk-devres.c
drivers/clk/clk-stm32f4.c
drivers/clk/hisilicon/Kconfig
drivers/clk/qcom/clk-smd-rpm.c
drivers/clk/tegra/clk-sdmmc-mux.c
drivers/firmware/efi/dev-path-parser.c
drivers/firmware/efi/efi.c
drivers/firmware/efi/libstub/efi-stub-helper.c
drivers/firmware/efi/mokvar-table.c
drivers/firmware/efi/tpm.c
drivers/gpio/gpio-mpc8xxx.c
drivers/gpio/gpio-tqmx86.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/nv.c
drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h
drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h
drivers/gpu/drm/amd/pm/inc/smu_types.h
drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
drivers/gpu/drm/drm_ioctl.c
drivers/gpu/drm/i915/display/intel_bios.c
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gvt/handlers.c
drivers/gpu/drm/i915/i915_cmd_parser.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/intel_device_info.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
drivers/gpu/drm/msm/dp/dp_catalog.c
drivers/gpu/drm/msm/dp/dp_ctrl.c
drivers/gpu/drm/msm/dp/dp_display.c
drivers/gpu/drm/msm/msm_iommu.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
drivers/gpu/drm/panel/panel-simple.c
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_util.c
drivers/gpu/drm/ttm/ttm_device.c
drivers/gpu/drm/ttm/ttm_module.c
drivers/gpu/drm/vc4/vc4_hdmi.c
drivers/hid/Kconfig
drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
drivers/hid/hid-apple.c
drivers/hid/hid-asus.c
drivers/hid/hid-ft260.c
drivers/hid/intel-ish-hid/ishtp-hid-client.c
drivers/hid/intel-ish-hid/ishtp-hid.h
drivers/hid/intel-ish-hid/ishtp/bus.c
drivers/hid/usbhid/Kconfig
drivers/hid/wacom_wac.c
drivers/i2c/busses/i2c-mpc.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_res.c
drivers/infiniband/hw/bnxt_re/qplib_res.h
drivers/infiniband/hw/irdma/ctrl.c
drivers/infiniband/hw/irdma/hw.c
drivers/infiniband/hw/irdma/main.c
drivers/infiniband/hw/irdma/type.h
drivers/infiniband/hw/irdma/uk.c
drivers/infiniband/hw/irdma/verbs.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/ib_rep.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/std_types.c
drivers/infiniband/sw/rxe/rxe_mr.c
drivers/media/common/videobuf2/videobuf2-core.c
drivers/media/pci/intel/ipu3/cio2-bridge.c
drivers/media/pci/ngene/ngene-core.c
drivers/media/pci/ngene/ngene.h
drivers/media/platform/atmel/Kconfig
drivers/media/platform/atmel/Makefile
drivers/media/platform/atmel/atmel-isc-base.c
drivers/media/usb/dvb-usb-v2/rtl28xxu.c
drivers/misc/eeprom/at24.c
drivers/net/Kconfig
drivers/net/Makefile
drivers/net/Space.c
drivers/net/appletalk/Kconfig
drivers/net/appletalk/ltpc.c
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_netlink.c
drivers/net/bonding/bond_options.c
drivers/net/bonding/bond_procfs.c
drivers/net/bonding/bond_sysfs.c
drivers/net/can/flexcan.c
drivers/net/can/spi/hi311x.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
drivers/net/can/usb/ems_usb.c
drivers/net/can/usb/esd_usb2.c
drivers/net/can/usb/mcba_usb.c
drivers/net/can/usb/peak_usb/pcan_usb.c
drivers/net/can/usb/usb_8dev.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mt7530.h
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/qca/ar9331.c
drivers/net/dsa/sja1105/sja1105.h
drivers/net/dsa/sja1105/sja1105_dynamic_config.c
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/ethernet/3com/3c509.c
drivers/net/ethernet/3com/3c515.c
drivers/net/ethernet/3com/Kconfig
drivers/net/ethernet/8390/Kconfig
drivers/net/ethernet/8390/apne.c
drivers/net/ethernet/8390/ax88796.c
drivers/net/ethernet/8390/ne.c
drivers/net/ethernet/8390/smc-ultra.c
drivers/net/ethernet/8390/wd.c
drivers/net/ethernet/8390/xsurf100.c
drivers/net/ethernet/amd/Kconfig
drivers/net/ethernet/amd/atarilance.c
drivers/net/ethernet/amd/lance.c
drivers/net/ethernet/amd/mvme147.c
drivers/net/ethernet/amd/ni65.c
drivers/net/ethernet/amd/sun3lance.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/cirrus/Kconfig
drivers/net/ethernet/cirrus/cs89x0.c
drivers/net/ethernet/dec/tulip/media.c
drivers/net/ethernet/dec/tulip/winbond-840.c
drivers/net/ethernet/freescale/dpaa2/Makefile
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
drivers/net/ethernet/freescale/dpaa2/dpsw.h
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/hisilicon/Kconfig
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
drivers/net/ethernet/huawei/hinic/hinic_devlink.c
drivers/net/ethernet/huawei/hinic/hinic_devlink.h
drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
drivers/net/ethernet/huawei/hinic/hinic_main.c
drivers/net/ethernet/i825xx/82596.c
drivers/net/ethernet/i825xx/sun3_82586.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/ice/ice_devlink.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/octeontx2/af/cgx.c
drivers/net/ethernet/marvell/octeontx2/af/common.h
drivers/net/ethernet/marvell/octeontx2/af/mbox.h
drivers/net/ethernet/marvell/octeontx2/af/npc.h
drivers/net/ethernet/marvell/octeontx2/af/rvu.c
drivers/net/ethernet/marvell/octeontx2/af/rvu.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/prestera/prestera_devlink.c
drivers/net/ethernet/marvell/prestera/prestera_devlink.h
drivers/net/ethernet/marvell/prestera/prestera_main.c
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/devlink.h
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/channels.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/channels.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lag.h
drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/microchip/sparx5/Kconfig
drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
drivers/net/ethernet/mscc/ocelot_vsc7514.c
drivers/net/ethernet/natsemi/jazzsonic.c
drivers/net/ethernet/natsemi/natsemi.c
drivers/net/ethernet/natsemi/xtsonic.c
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/netronome/nfp/nfp_main.c
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/netronome/nfp/nfp_net_main.c
drivers/net/ethernet/pensando/ionic/ionic_devlink.c
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/pensando/ionic/ionic_lif.h
drivers/net/ethernet/pensando/ionic/ionic_phc.c
drivers/net/ethernet/pensando/ionic/ionic_txrx.c
drivers/net/ethernet/qlogic/qed/qed_devlink.c
drivers/net/ethernet/qlogic/qed/qed_int.c
drivers/net/ethernet/qlogic/qed/qed_iwarp.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_filter.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qla3xxx.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
drivers/net/ethernet/sis/sis900.c
drivers/net/ethernet/smsc/Kconfig
drivers/net/ethernet/smsc/smc9194.c
drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/sun/niu.c
drivers/net/ethernet/ti/Kconfig
drivers/net/ethernet/ti/am65-cpsw-nuss.c
drivers/net/ethernet/ti/am65-cpsw-nuss.h
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/cpsw_new.c
drivers/net/ethernet/ti/davinci_emac.c
drivers/net/ipa/gsi.c
drivers/net/ipa/gsi.h
drivers/net/ipa/ipa.h
drivers/net/ipa/ipa_clock.c
drivers/net/ipa/ipa_clock.h
drivers/net/ipa/ipa_data-v4.9.c
drivers/net/ipa/ipa_endpoint.c
drivers/net/ipa/ipa_main.c
drivers/net/ipa/ipa_modem.c
drivers/net/mhi/Makefile [deleted file]
drivers/net/mhi/mhi.h [deleted file]
drivers/net/mhi/net.c [deleted file]
drivers/net/mhi/proto_mbim.c [deleted file]
drivers/net/mhi_net.c [new file with mode: 0644]
drivers/net/netdevsim/bus.c
drivers/net/netdevsim/dev.c
drivers/net/netdevsim/fib.c
drivers/net/netdevsim/netdevsim.h
drivers/net/phy/broadcom.c
drivers/net/phy/micrel.c
drivers/net/phy/mscc/mscc_ptp.c
drivers/net/usb/ipheth.c
drivers/net/usb/lan78xx.c
drivers/net/usb/pegasus.c
drivers/net/usb/usbnet.c
drivers/net/virtio_net.c
drivers/net/vrf.c
drivers/net/wan/Kconfig
drivers/net/wan/Makefile
drivers/net/wan/hostess_sv11.c
drivers/net/wan/sbni.c [deleted file]
drivers/net/wan/sbni.h [deleted file]
drivers/net/wireless/virt_wifi.c
drivers/net/wwan/Kconfig
drivers/net/wwan/Makefile
drivers/net/wwan/iosm/iosm_ipc_mmio.h
drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
drivers/net/wwan/iosm/iosm_ipc_wwan.c
drivers/net/wwan/mhi_wwan_ctrl.c
drivers/net/wwan/mhi_wwan_mbim.c [new file with mode: 0644]
drivers/nfc/nfcsim.c
drivers/nfc/s3fwrn5/firmware.c
drivers/nvme/host/core.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/trace.h
drivers/pcmcia/i82092.c
drivers/platform/x86/amd-pmc.c
drivers/platform/x86/gigabyte-wmi.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/think-lmi.c
drivers/platform/x86/think-lmi.h
drivers/platform/x86/wireless-hotkey.c
drivers/ptp/Kconfig
drivers/ptp/ptp_ocp.c
drivers/s390/net/Kconfig
drivers/s390/net/qeth_l2_main.c
drivers/scsi/arm/acornscsi.c
drivers/scsi/arm/fas216.c
drivers/scsi/device_handler/scsi_dh_rdac.c
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi/ibmvfc.h
drivers/scsi/megaraid/megaraid_mm.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/pm8001/pm8001_sas.c
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_iscsi.c
drivers/scsi/sr.c
drivers/staging/qlge/qlge_main.c
drivers/target/target_core_sbc.c
drivers/target/target_core_transport.c
drivers/usb/class/cdc-wdm.c
drivers/usb/core/devio.c
drivers/usb/core/hub.c
drivers/usb/core/quirks.c
drivers/usb/dwc2/core.h
drivers/usb/dwc2/core_intr.c
drivers/usb/dwc2/gadget.c
drivers/usb/dwc2/hcd.c
drivers/usb/dwc2/params.c
drivers/usb/dwc3/core.h
drivers/usb/dwc3/ep0.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/function/u_serial.c
drivers/usb/gadget/udc/tegra-xudc.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/max3421-hcd.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-pci-renesas.c
drivers/usb/host/xhci-pci.c
drivers/usb/phy/phy.c
drivers/usb/renesas_usbhs/fifo.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/option.c
drivers/usb/storage/unusual_uas.h
drivers/usb/typec/stusb160x.c
drivers/usb/typec/tipd/core.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
fs/Kconfig.binfmt
fs/Makefile
fs/binfmt_em86.c [deleted file]
fs/block_dev.c
fs/btrfs/backref.c
fs/btrfs/backref.h
fs/btrfs/compression.c
fs/btrfs/delayed-ref.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/tests/qgroup-tests.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/zoned.c
fs/ceph/mds_client.c
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dfs_cache.c
fs/cifs/dfs_cache.h
fs/cifs/file.c
fs/cifs/fs_context.c
fs/cifs/smb2ops.c
fs/ext2/dir.c
fs/ext2/ext2.h
fs/ext2/namei.c
fs/fs-writeback.c
fs/hugetlbfs/inode.c
fs/internal.h
fs/io-wq.c
fs/io_uring.c
fs/ocfs2/file.c
fs/pipe.c
fs/reiserfs/stree.c
fs/reiserfs/super.c
fs/userfaultfd.c
fs/xfs/libxfs/xfs_log_format.h
fs/xfs/xfs_buf_item_recover.c
fs/xfs/xfs_inode_item_recover.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_trace.h
include/acpi/acpi_bus.h
include/drm/drm_ioctl.h
include/linux/blkdev.h
include/linux/bpf-cgroup.h
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/bpf_verifier.h
include/linux/filter.h
include/linux/fs_context.h
include/linux/fsl/mc.h
include/linux/highmem.h
include/linux/if_bridge.h
include/linux/igmp.h
include/linux/intel-ish-client-if.h
include/linux/memblock.h
include/linux/mhi.h
include/linux/mlx5/driver.h
include/linux/mlx5/eswitch.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mm_types.h
include/linux/netdevice.h
include/linux/sched.h
include/linux/skbuff.h
include/linux/skmsg.h
include/net/Space.h
include/net/act_api.h
include/net/af_unix.h
include/net/ax88796.h
include/net/bluetooth/hci_core.h
include/net/bond_3ad.h
include/net/bond_options.h
include/net/bonding.h
include/net/compat.h
include/net/devlink.h
include/net/dsa.h
include/net/flow_offload.h
include/net/if_inet6.h
include/net/inet_hashtables.h
include/net/ip6_route.h
include/net/llc_pdu.h
include/net/netns/xfrm.h
include/net/nfc/nci_core.h
include/net/page_pool.h
include/net/pkt_cls.h
include/net/rtnetlink.h
include/net/sch_generic.h
include/net/sctp/structs.h
include/net/sock.h
include/net/switchdev.h
include/net/tcp.h
include/sound/soc.h
include/uapi/asm-generic/socket.h
include/uapi/linux/can/j1939.h
include/uapi/linux/idxd.h
include/uapi/linux/if_link.h
include/uapi/linux/in.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/socket.h
include/uapi/rdma/irdma-abi.h
init/main.c
kernel/bpf/bpf_iter.c
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/devmap.c
kernel/bpf/disasm.c
kernel/bpf/helpers.c
kernel/bpf/local_storage.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup-v1.c
kernel/dma/ops_helpers.c
kernel/fork.c
kernel/smpboot.c
kernel/time/posix-cpu-timers.c
kernel/time/timer.c
kernel/trace/bpf_trace.c
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_events_synth.c
kernel/trace/trace_hwlat.c
kernel/trace/trace_synth.h
kernel/tracepoint.c
kernel/ucount.c
kernel/workqueue.c
lib/Kconfig
lib/Kconfig.debug
lib/test_bpf.c
mm/backing-dev.c
mm/kfence/core.c
mm/kfence/kfence_test.c
mm/memblock.c
mm/memcontrol.c
mm/memory.c
mm/migrate.c
mm/mmap_lock.c
mm/page_alloc.c
mm/secretmem.c
mm/slab.h
mm/slub.c
net/ax25/ax25_ip.c
net/ax25/ax25_out.c
net/ax25/ax25_route.c
net/batman-adv/bridge_loop_avoidance.c
net/batman-adv/distributed-arp-table.c
net/batman-adv/gateway_client.c
net/batman-adv/multicast.c
net/batman-adv/originator.c
net/batman-adv/translation-table.c
net/bluetooth/hci_core.c
net/bluetooth/hci_sock.c
net/bluetooth/hci_sysfs.c
net/bpf/test_run.c
net/bridge/br.c
net/bridge/br_fdb.c
net/bridge/br_if.c
net/bridge/br_ioctl.c
net/bridge/br_private.h
net/bridge/br_switchdev.c
net/can/j1939/j1939-priv.h
net/can/j1939/socket.c
net/can/j1939/transport.c
net/can/raw.c
net/core/dev.c
net/core/dev_ioctl.c
net/core/devlink.c
net/core/drop_monitor.c
net/core/dst.c
net/core/filter.c
net/core/flow_dissector.c
net/core/neighbour.c
net/core/page_pool.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/skmsg.c
net/core/sock.c
net/decnet/dn_dev.c
net/decnet/dn_fib.c
net/decnet/dn_route.c
net/dsa/Kconfig
net/dsa/dsa.c
net/dsa/dsa2.c
net/dsa/dsa_priv.h
net/dsa/port.c
net/dsa/slave.c
net/dsa/tag_ar9331.c
net/dsa/tag_brcm.c
net/dsa/tag_dsa.c
net/dsa/tag_gswip.c
net/dsa/tag_hellcreek.c
net/dsa/tag_ksz.c
net/dsa/tag_lan9303.c
net/dsa/tag_mtk.c
net/dsa/tag_ocelot.c
net/dsa/tag_ocelot_8021q.c
net/dsa/tag_qca.c
net/dsa/tag_rtl4_a.c
net/dsa/tag_sja1105.c
net/dsa/tag_trailer.c
net/dsa/tag_xrs700x.c
net/ethernet/eth.c
net/ethtool/ioctl.c
net/ethtool/netlink.c
net/ethtool/netlink.h
net/ieee802154/nl-phy.c
net/ieee802154/nl802154.c
net/ieee802154/socket.c
net/ipv4/devinet.c
net/ipv4/fib_semantics.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel.c
net/ipv4/route.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_offload.c
net/ipv4/udp_offload.c
net/ipv6/addrconf.c
net/ipv6/exthdrs.c
net/ipv6/ip6_output.c
net/ipv6/ip6mr.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/route.c
net/iucv/af_iucv.c
net/iucv/iucv.c
net/llc/af_llc.c
net/llc/llc_s_ac.c
net/mac80211/cfg.c
net/mac80211/ieee80211_i.h
net/mac80211/mlme.c
net/mac80211/rx.c
net/mac80211/tx.c
net/mctp/af_mctp.c
net/mptcp/pm_netlink.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_flow_table_offload.c
net/netfilter/nf_queue.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_hook.c
net/netfilter/nft_last.c
net/netfilter/nft_nat.c
net/netlabel/netlabel_unlabeled.c
net/netrom/nr_loopback.c
net/netrom/nr_route.c
net/nfc/nci/core.c
net/nfc/nci/hci.c
net/packet/af_packet.c
net/phonet/af_phonet.c
net/phonet/pn_dev.c
net/phonet/socket.c
net/qrtr/mhi.c
net/qrtr/qrtr.c
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_ct.c
net/sched/act_ctinfo.c
net/sched/act_gact.c
net/sched/act_gate.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_mpls.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_basic.c
net/sched/cls_bpf.c
net/sched/cls_cgroup.c
net/sched/cls_flow.c
net/sched/cls_flower.c
net/sched/cls_fw.c
net/sched/cls_matchall.c
net/sched/cls_route.c
net/sched/cls_rsvp.h
net/sched/cls_tcindex.c
net/sched/cls_u32.c
net/sched/sch_generic.c
net/sched/sch_taprio.c
net/sctp/auth.c
net/sctp/input.c
net/sctp/ipv6.c
net/sctp/sm_statefuns.c
net/sctp/transport.c
net/smc/smc_core.c
net/smc/smc_ib.c
net/smc/smc_pnet.c
net/switchdev/switchdev.c
net/tipc/crypto.c
net/tipc/socket.c
net/unix/Kconfig
net/unix/af_unix.c
net/unix/unix_bpf.c
net/vmw_vsock/virtio_transport_common.c
net/wireless/nl80211.c
net/wireless/scan.c
net/xfrm/xfrm_compat.c
net/xfrm/xfrm_ipcomp.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_user.c
samples/bpf/.gitignore
samples/bpf/test_override_return.sh
samples/bpf/tracex7_user.c
samples/bpf/xdp1_kern.c
samples/bpf/xdp2_kern.c
samples/bpf/xdp_redirect_cpu_user.c
samples/bpf/xdpsock_user.c
scripts/recordmcount.pl
scripts/tracing/draw_functrace.py
security/selinux/ss/policydb.c
sound/core/pcm_native.c
sound/hda/intel-dsp-config.c
sound/isa/sb/sb16_csp.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp-da7219-max98357a.c
sound/soc/codecs/Kconfig
sound/soc/codecs/rt5631.c
sound/soc/codecs/rt5682.c
sound/soc/codecs/tlv320aic31xx.c
sound/soc/codecs/tlv320aic31xx.h
sound/soc/codecs/tlv320aic32x4.c
sound/soc/codecs/wcd938x.c
sound/soc/codecs/wm_adsp.c
sound/soc/intel/boards/sof_sdw_max98373.c
sound/soc/soc-pcm.c
sound/soc/sof/intel/pci-tgl.c
sound/soc/tegra/tegra_pcm.c
sound/soc/ti/j721e-evm.c
sound/usb/mixer.c
sound/usb/quirks.c
tools/bpf/bpftool/Documentation/bpftool-btf.rst
tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
tools/bpf/bpftool/Documentation/bpftool-feature.rst
tools/bpf/bpftool/Documentation/bpftool-gen.rst
tools/bpf/bpftool/Documentation/bpftool-iter.rst
tools/bpf/bpftool/Documentation/bpftool-link.rst
tools/bpf/bpftool/Documentation/bpftool-map.rst
tools/bpf/bpftool/Documentation/bpftool-net.rst
tools/bpf/bpftool/Documentation/bpftool-perf.rst
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
tools/bpf/bpftool/Documentation/bpftool.rst
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/btf.c
tools/bpf/bpftool/btf_dumper.c
tools/bpf/bpftool/cgroup.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/feature.c
tools/bpf/bpftool/gen.c
tools/bpf/bpftool/iter.c
tools/bpf/bpftool/link.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/net.c
tools/bpf/bpftool/perf.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/struct_ops.c
tools/bpf/resolve_btfids/main.c
tools/include/uapi/linux/if_link.h
tools/lib/bpf/Build
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/btf_dump.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_internal.h
tools/lib/bpf/relo_core.c [new file with mode: 0644]
tools/lib/bpf/relo_core.h [new file with mode: 0644]
tools/perf/util/bpf-event.c
tools/perf/util/bpf_counter.c
tools/perf/util/cs-etm.c
tools/perf/util/map.c
tools/perf/util/pmu.c
tools/testing/selftests/Makefile
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/README.rst
tools/testing/selftests/bpf/netcnt_common.h
tools/testing/selftests/bpf/network_helpers.c
tools/testing/selftests/bpf/network_helpers.h
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/btf.c
tools/testing/selftests/bpf/prog_tests/btf_dump.c
tools/testing/selftests/bpf/prog_tests/core_autosize.c
tools/testing/selftests/bpf/prog_tests/core_reloc.c
tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
tools/testing/selftests/bpf/prog_tests/netcnt.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/pinning.c
tools/testing/selftests/bpf/prog_tests/reference_tracking.c
tools/testing/selftests/bpf/prog_tests/tc_redirect.c
tools/testing/selftests/bpf/prog_tests/xdp_bonding.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
tools/testing/selftests/bpf/progs/get_func_ip_test.c
tools/testing/selftests/bpf/progs/netcnt_prog.c
tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
tools/testing/selftests/bpf/progs/xdp_tx.c
tools/testing/selftests/bpf/test_bpftool_synctypes.py [new file with mode: 0755]
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_netcnt.c [deleted file]
tools/testing/selftests/bpf/test_progs.h
tools/testing/selftests/bpf/test_tc_tunnel.sh
tools/testing/selftests/bpf/test_xdp_veth.sh
tools/testing/selftests/bpf/verifier/value_ptr_arith.c
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/get-reg-list.c
tools/testing/selftests/kvm/access_tracking_perf_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/dirty_log_perf_test.c
tools/testing/selftests/kvm/include/x86_64/hyperv.h
tools/testing/selftests/kvm/steal_time.c
tools/testing/selftests/kvm/x86_64/hyperv_clock.c
tools/testing/selftests/kvm/x86_64/hyperv_features.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/af_unix/Makefile [new file with mode: 0644]
tools/testing/selftests/net/af_unix/test_unix_oob.c [new file with mode: 0644]
tools/testing/selftests/net/gro.c [new file with mode: 0644]
tools/testing/selftests/net/gro.sh [new file with mode: 0755]
tools/testing/selftests/net/ipsec.c
tools/testing/selftests/net/psock_snd.sh
tools/testing/selftests/net/setup_loopback.sh [new file with mode: 0755]
tools/testing/selftests/net/toeplitz.c [new file with mode: 0644]
tools/testing/selftests/net/toeplitz.sh [new file with mode: 0755]
tools/testing/selftests/net/toeplitz_client.sh [new file with mode: 0755]
tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json [new file with mode: 0644]
tools/testing/selftests/tc-testing/tdc_config.py
tools/testing/selftests/vm/userfaultfd.c
virt/kvm/kvm_main.c

index bdb2200..ee0569a 100644 (file)
        sa1100ir        [NET]
                        See drivers/net/irda/sa1100_ir.c.
 
-       sbni=           [NET] Granch SBNI12 leased line adapter
-
        sched_verbose   [KNL] Enables verbose scheduler debug messages.
 
        schedstats=     [KNL,X86] Enable or disable scheduled statistics.
index 459e6b6..0c9120e 100644 (file)
@@ -45,14 +45,24 @@ how the user addresses are used by the kernel:
 
 1. User addresses not accessed by the kernel but used for address space
    management (e.g. ``mprotect()``, ``madvise()``). The use of valid
-   tagged pointers in this context is allowed with the exception of
-   ``brk()``, ``mmap()`` and the ``new_address`` argument to
-   ``mremap()`` as these have the potential to alias with existing
-   user addresses.
-
-   NOTE: This behaviour changed in v5.6 and so some earlier kernels may
-   incorrectly accept valid tagged pointers for the ``brk()``,
-   ``mmap()`` and ``mremap()`` system calls.
+   tagged pointers in this context is allowed with these exceptions:
+
+   - ``brk()``, ``mmap()`` and the ``new_address`` argument to
+     ``mremap()`` as these have the potential to alias with existing
+      user addresses.
+
+     NOTE: This behaviour changed in v5.6 and so some earlier kernels may
+     incorrectly accept valid tagged pointers for the ``brk()``,
+     ``mmap()`` and ``mremap()`` system calls.
+
+   - The ``range.start``, ``start`` and ``dst`` arguments to the
+     ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
+     ``userfaultfd()``, as fault addresses subsequently obtained by reading
+     the file descriptor will be untagged, which may otherwise confuse
+     tag-unaware programs.
+
+     NOTE: This behaviour changed in v5.14 and so some earlier kernels may
+     incorrectly accept valid tagged pointers for this system call.
 
 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
    relaxation is disabled by default and the application thread needs to
index a7b5807..fb547e2 100644 (file)
@@ -104,6 +104,12 @@ properties:
           maximum: 32
     maxItems: 1
 
+  power-domains:
+    description:
+      Power domain provider node and an args specifier containing
+      the can device id value.
+    maxItems: 1
+
   can-transceiver:
     $ref: can-transceiver.yaml#
 
index 4853ab7..ed88ba4 100644 (file)
@@ -87,18 +87,16 @@ properties:
       - const: ipa-setup-ready
 
   interconnects:
-    minItems: 2
     items:
-      - description: Path leading to system memory
-      - description: Path between the AP and IPA config space
-      - description: Path leading to internal memory
+      - description: Interconnect path between IPA and main memory
+      - description: Interconnect path between IPA and internal memory
+      - description: Interconnect path between IPA and the AP subsystem
 
   interconnect-names:
-    minItems: 2
     items:
       - const: memory
-      - const: config
       - const: imem
+      - const: config
 
   qcom,smem-states:
     $ref: /schemas/types.yaml#/definitions/phandle-array
@@ -209,11 +207,11 @@ examples:
 
                 interconnects =
                         <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_EBI1>,
-                        <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>,
-                        <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>;
+                        <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>,
+                        <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>;
                 interconnect-names = "memory",
-                                     "config",
-                                     "imem";
+                                     "imem",
+                                     "config";
 
                 qcom,smem-states = <&ipa_smp2p_out 0>,
                                    <&ipa_smp2p_out 1>;
index ee936d1..c2930d6 100644 (file)
@@ -114,7 +114,7 @@ properties:
 
   ports:
     $ref: /schemas/graph.yaml#/properties/ports
-    properties:
+    patternProperties:
       port(@[0-9a-f]+)?:
         $ref: audio-graph-port.yaml#
         unevaluatedProperties: false
index 62f2aab..31cfd7d 100644 (file)
@@ -501,6 +501,18 @@ fail_over_mac
        This option was added in bonding version 3.2.0.  The "follow"
        policy was added in bonding version 3.3.0.
 
+lacp_active
+       Option specifying whether to send LACPDU frames periodically.
+
+       off or 0
+               LACPDU frames acts as "speak when spoken to".
+
+       on or 1
+               LACPDU frames are sent along the configured links
+               periodically. See lacp_rate for more details.
+
+       The default is on.
+
 lacp_rate
 
        Option specifying the rate in which we'll ask our link partner
index 3e2221f..5f13905 100644 (file)
@@ -320,13 +320,6 @@ Examples for low-level BPF:
   ret #-1
   drop: ret #0
 
-**(Accelerated) VLAN w/ id 10**::
-
-  ld vlan_tci
-  jneq #10, drop
-  ret #-1
-  drop: ret #0
-
 **icmp random packet sampling, 1 in 4**::
 
   ldh [12]
@@ -358,6 +351,22 @@ Examples for low-level BPF:
   bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
   good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
 
+Examples for low-level BPF extension:
+
+**Packet for interface index 13**::
+
+  ld ifidx
+  jneq #13, drop
+  ret #-1
+  drop: ret #0
+
+**(Accelerated) VLAN w/ id 10**::
+
+  ld vlan_tci
+  jneq #10, drop
+  ret #-1
+  drop: ret #0
+
 The above example code can be placed into a file (here called "foo"), and
 then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
 and cls_bpf understands and can directly be loaded with. Example with above
index 91b2cf7..e26532f 100644 (file)
@@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance
 gets overloaded very easily and netdev@vger really doesn't need more
 traffic if we can help it.
 
+netdevsim is great, can I extend it for my out-of-tree tests?
+-------------------------------------------------------------
+
+No, `netdevsim` is a test vehicle solely for upstream tests.
+(Please add your tests under tools/testing/selftests/.)
+
+We also give no guarantees that `netdevsim` won't change in the future
+in a way which would break what would normally be considered uAPI.
+
+Is netdevsim considered a "user" of an API?
+-------------------------------------------
+
+Linux kernel has a long standing rule that no API should be added unless
+it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are
+strongly encouraged when adding new APIs, but `netdevsim` in itself
+is **not** considered a use case/user.
+
 Any other tips to help ensure my net/net-next patch gets OK'd?
 --------------------------------------------------------------
 Attention to detail.  Re-read your own work as if you were the
index 9c918f7..1ee2141 100644 (file)
@@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3):
  state (f.e. VLAN).
 
 IF_OPER_TESTING (4):
- Unused in current kernel.
+ Interface is in testing mode, for example executing driver self-tests
+ or media (cable) test. It can't be used for normal traffic until tests
+ complete.
 
 IF_OPER_DORMANT (5):
  Interface is L1 up, but waiting for an external event, f.e. for a
@@ -111,7 +113,7 @@ it as lower layer.
 
 Note that for certain kind of soft-devices, which are not managing any
 real hardware, it is possible to set this bit from userspace.  One
-should use TVL IFLA_CARRIER to do so.
+should use TLV IFLA_CARRIER to do so.
 
 netif_carrier_ok() can be used to query that bit.
 
index b71e09f..f99be80 100644 (file)
@@ -191,7 +191,7 @@ Documentation written by Tom Zanussi
                                 with the event, in nanoseconds.  May be
                                modified by .usecs to have timestamps
                                interpreted as microseconds.
-    cpu                    int  the cpu on which the event occurred.
+    common_cpu             int  the cpu on which the event occurred.
     ====================== ==== =======================================
 
 Extended error information
index c7b165c..dae68e6 100644 (file)
@@ -855,7 +855,7 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
 use PPIs designated for specific cpus.  The irq field is interpreted
 like this::
 
 bits:  |  31 ... 28  | 27 ... 24 | 23  ... 16 | 15 ... 0 |
 bits:  |  31 ... 28  | 27 ... 24 | 23  ... 16 | 15 ... 0 |
   field: | vcpu2_index | irq_type  | vcpu_index |  irq_id  |
 
 The irq_type field has the following values:
@@ -2149,10 +2149,10 @@ prior to calling the KVM_RUN ioctl.
 Errors:
 
   ======   ============================================================
 ENOENT Â Â no such register
 EINVAL Â Â invalid register ID, or no such register or used with VMs in
 ENOENT   no such register
 EINVAL   invalid register ID, or no such register or used with VMs in
            protected virtualization mode on s390
 EPERM Â Â Â (arm64) register access not allowed before vcpu finalization
 EPERM    (arm64) register access not allowed before vcpu finalization
   ======   ============================================================
 
 (These error codes are indicative only: do not rely on a specific error
@@ -2590,10 +2590,10 @@ following id bit patterns::
 Errors include:
 
   ======== ============================================================
 ENOENT Â Â no such register
 EINVAL Â Â invalid register ID, or no such register or used with VMs in
 ENOENT   no such register
 EINVAL   invalid register ID, or no such register or used with VMs in
            protected virtualization mode on s390
 EPERM Â Â Â (arm64) register access not allowed before vcpu finalization
 EPERM    (arm64) register access not allowed before vcpu finalization
   ======== ============================================================
 
 (These error codes are indicative only: do not rely on a specific error
@@ -3112,13 +3112,13 @@ current state.  "addr" is ignored.
 Errors:
 
   ======     =================================================================
 EINVAL  Â Â Â the target is unknown, or the combination of features is invalid.
 ENOENT  Â Â Â a features bit specified is unknown.
 EINVAL     the target is unknown, or the combination of features is invalid.
 ENOENT     a features bit specified is unknown.
   ======     =================================================================
 
 This tells KVM what type of CPU to present to the guest, and what
-optional features it should have. Â This will cause a reset of the cpu
-registers to their initial values. Â If this is not called, KVM_RUN will
+optional features it should have.  This will cause a reset of the cpu
+registers to their initial values.  If this is not called, KVM_RUN will
 return ENOEXEC for that vcpu.
 
 The initial values are defined as:
@@ -3239,8 +3239,8 @@ VCPU matching underlying host.
 Errors:
 
   =====      ==============================================================
 E2BIG  Â Â Â Â the reg index list is too big to fit in the array specified by
            the user (the number required will be written into n).
 E2BIG      the reg index list is too big to fit in the array specified by
            the user (the number required will be written into n).
   =====      ==============================================================
 
 ::
@@ -3288,7 +3288,7 @@ specific device.
 ARM/arm64 divides the id field into two parts, a device id and an
 address type id specific to the individual device::
 
 bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
 bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
   field: |        0x00000000      |     device id   |  addr type id  |
 
 ARM/arm64 currently only require this when using the in-kernel GIC
@@ -7049,7 +7049,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
 trap and emulate MSRs that are outside of the scope of KVM as well as
 limit the attack surface on KVM's MSR emulation code.
 
-8.28 KVM_CAP_ENFORCE_PV_CPUID
+8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
 -----------------------------
 
 Architectures: x86
index 73beb91..41fcfdb 100644 (file)
@@ -445,7 +445,7 @@ F:  drivers/platform/x86/wmi.c
 F:     include/uapi/linux/wmi.h
 
 ACRN HYPERVISOR SERVICE MODULE
-M:     Shuo Liu <shuo.a.liu@intel.com>
+M:     Fei Li <fei1.li@intel.com>
 L:     acrn-dev@lists.projectacrn.org (subscribers-only)
 S:     Supported
 W:     https://projectacrn.org
@@ -7859,9 +7859,9 @@ S:        Maintained
 F:     drivers/input/touchscreen/goodix.c
 
 GOOGLE ETHERNET DRIVERS
-M:     Catherine Sullivan <csully@google.com>
-R:     Sagi Shahar <sagis@google.com>
-R:     Jon Olson <jonolson@google.com>
+M:     Jeroen de Borst <jeroendb@google.com>
+R:     Catherine Sullivan <csully@google.com>
+R:     David Awogbemila <awogbemila@google.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     Documentation/networking/device_drivers/ethernet/google/gve.rst
@@ -11347,6 +11347,12 @@ L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/phy/mxl-gpy.c
 
+MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER
+R:     Yasushi SHOJI <yashi@spacecubics.com>
+L:     linux-can@vger.kernel.org
+S:     Maintained
+F:     drivers/net/can/usb/mcba_usb.c
+
 MCAN MMIO DEVICE DRIVER
 M:     Chandrasekar Ramakrishnan <rcsekar@samsung.com>
 L:     linux-can@vger.kernel.org
@@ -15488,6 +15494,8 @@ M:      Pan, Xinhui <Xinhui.Pan@amd.com>
 L:     amd-gfx@lists.freedesktop.org
 S:     Supported
 T:     git https://gitlab.freedesktop.org/agd5f/linux.git
+B:     https://gitlab.freedesktop.org/drm/amd/-/issues
+C:     irc://irc.oftc.net/radeon
 F:     drivers/gpu/drm/amd/
 F:     drivers/gpu/drm/radeon/
 F:     include/uapi/drm/amdgpu_drm.h
@@ -19143,7 +19151,7 @@ M:      Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-usb@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/phy/hisilicon,hi3670-usb3.yaml
-F:     drivers/phy/hisilicon/phy-kirin970-usb3.c
+F:     drivers/phy/hisilicon/phy-hi3670-usb3.c
 
 USB ISP116X DRIVER
 M:     Olav Kongas <ok@artecdesign.ee>
@@ -19821,6 +19829,14 @@ L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/ptp/ptp_vmw.c
 
+VMWARE VMCI DRIVER
+M:     Jorgen Hansen <jhansen@vmware.com>
+M:     Vishnu Dasa <vdasa@vmware.com>
+L:     linux-kernel@vger.kernel.org
+L:     pv-drivers@vmware.com (private)
+S:     Maintained
+F:     drivers/misc/vmw_vmci/
+
 VMWARE VMMOUSE SUBDRIVER
 M:     "VMware Graphics" <linux-graphics-maintainer@vmware.com>
 M:     "VMware, Inc." <pv-drivers@vmware.com>
index e4f5895..b6ee64d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc4
 NAME = Opossums on Parade
 
 # *DOCUMENTATION*
@@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \
 PHONY += scripts_basic
 scripts_basic:
        $(Q)$(MAKE) $(build)=scripts/basic
-       $(Q)rm -f .tmp_quiet_recordmcount
 
 PHONY += outputmakefile
 ifdef building_out_of_srctree
index 77d3280..6c50877 100644 (file)
@@ -14,7 +14,6 @@ config ALPHA
        select PCI_SYSCALL if PCI
        select HAVE_AOUT
        select HAVE_ASM_MODVERSIONS
-       select HAVE_IDE
        select HAVE_PCSPKR_PLATFORM
        select HAVE_PERF_EVENTS
        select NEED_DMA_MAP_STATE
@@ -532,7 +531,7 @@ config SMP
          will run faster if you say N here.
 
          See also the SMP-HOWTO available at
-         <http://www.tldp.org/docs.html#howto>.
+         <https://www.tldp.org/docs.html#howto>.
 
          If you don't know what to do here, say N.
 
index 00266e6..b4faba2 100644 (file)
@@ -23,7 +23,7 @@
 #include "ksize.h"
 
 extern unsigned long switch_to_osf_pal(unsigned long nr,
-       struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
+       struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa,
        unsigned long *vptb);
 
 extern void move_stack(unsigned long new_stack);
index 43af718..90a2b34 100644 (file)
@@ -200,7 +200,7 @@ extern char _end;
        START_ADDR      KSEG address of the entry point of kernel code.
 
        ZERO_PGE        KSEG address of page full of zeroes, but 
-                       upon entry to kerne cvan be expected
+                       upon entry to kernel, it can be expected
                        to hold the parameter list and possible
                        INTRD information.
 
index d651922..325d4dd 100644 (file)
@@ -30,7 +30,7 @@ extern long srm_printk(const char *, ...)
      __attribute__ ((format (printf, 1, 2)));
 
 /*
- * gzip delarations
+ * gzip declarations
  */
 #define OF(args)  args
 #define STATIC static
index dd2dd9f..7f1ca30 100644 (file)
@@ -70,3 +70,4 @@ CONFIG_DEBUG_INFO=y
 CONFIG_ALPHA_LEGACY_START_ADDRESS=y
 CONFIG_MATHEMU=y
 CONFIG_CRYPTO_HMAC=y
+CONFIG_DEVTMPFS=y
index 5159ba2..ae64595 100644 (file)
@@ -4,15 +4,4 @@
 
 #include <uapi/asm/compiler.h>
 
-/* Some idiots over in <linux/compiler.h> thought inline should imply
-   always_inline.  This breaks stuff.  We'll include this file whenever
-   we run into such problems.  */
-
-#include <linux/compiler.h>
-#undef inline
-#undef __inline__
-#undef __inline
-#undef __always_inline
-#define __always_inline                inline __attribute__((always_inline))
-
 #endif /* __ALPHA_COMPILER_H */
index 11c688c..f21baba 100644 (file)
@@ -9,4 +9,10 @@ static inline int syscall_get_arch(struct task_struct *task)
        return AUDIT_ARCH_ALPHA;
 }
 
+static inline long syscall_get_return_value(struct task_struct *task,
+                                           struct pt_regs *regs)
+{
+       return regs->r0;
+}
+
 #endif /* _ASM_ALPHA_SYSCALL_H */
index 6b3daba..1dd9baf 100644 (file)
 
 #define SO_NETNS_COOKIE                71
 
+#define SO_BUF_LOCK            72
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index d5367a1..d31167e 100644 (file)
@@ -834,7 +834,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
                        return -EFAULT;
                state = &current_thread_info()->ieee_state;
 
-               /* Update softare trap enable bits.  */
+               /* Update software trap enable bits.  */
                *state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK);
 
                /* Update the real fpcr.  */
@@ -854,7 +854,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
                state = &current_thread_info()->ieee_state;
                exc &= IEEE_STATUS_MASK;
 
-               /* Update softare trap enable bits.  */
+               /* Update software trap enable bits.  */
                swcr = (*state & IEEE_SW_MASK) | exc;
                *state |= exc;
 
index e7a59d9..efcf732 100644 (file)
@@ -574,7 +574,7 @@ static void alpha_pmu_start(struct perf_event *event, int flags)
  * Check that CPU performance counters are supported.
  * - currently support EV67 and later CPUs.
  * - actually some later revisions of the EV6 have the same PMC model as the
- *     EV67 but we don't do suffiently deep CPU detection to detect them.
+ *     EV67 but we don't do sufficiently deep CPU detection to detect them.
  *     Bad luck to the very few people who might have one, I guess.
  */
 static int supported_cpu(void)
index ef0c08e..a5123ea 100644 (file)
@@ -256,7 +256,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                childstack->r26 = (unsigned long) ret_from_kernel_thread;
                childstack->r9 = usp;   /* function */
                childstack->r10 = kthread_arg;
-               childregs->hae = alpha_mv.hae_cache,
+               childregs->hae = alpha_mv.hae_cache;
                childti->pcb.usp = 0;
                return 0;
        }
index 7d56c21..b4fbbba 100644 (file)
@@ -319,18 +319,19 @@ setup_memory(void *kernel_end)
                       i, cluster->usage, cluster->start_pfn,
                       cluster->start_pfn + cluster->numpages);
 
-               /* Bit 0 is console/PALcode reserved.  Bit 1 is
-                  non-volatile memory -- we might want to mark
-                  this for later.  */
-               if (cluster->usage & 3)
-                       continue;
-
                end = cluster->start_pfn + cluster->numpages;
                if (end > max_low_pfn)
                        max_low_pfn = end;
 
                memblock_add(PFN_PHYS(cluster->start_pfn),
                             cluster->numpages << PAGE_SHIFT);
+
+               /* Bit 0 is console/PALcode reserved.  Bit 1 is
+                  non-volatile memory -- we might want to mark
+                  this for later.  */
+               if (cluster->usage & 3)
+                       memblock_reserve(PFN_PHYS(cluster->start_pfn),
+                                        cluster->numpages << PAGE_SHIFT);
        }
 
        /*
index 4b2575f..cb64e47 100644 (file)
@@ -582,7 +582,7 @@ void
 smp_send_stop(void)
 {
        cpumask_t to_whom;
-       cpumask_copy(&to_whom, cpu_possible_mask);
+       cpumask_copy(&to_whom, cpu_online_mask);
        cpumask_clear_cpu(smp_processor_id(), &to_whom);
 #ifdef DEBUG_IPI_MSG
        if (hard_smp_processor_id() != boot_cpu_id)
index 53adf43..96fd6ff 100644 (file)
@@ -212,7 +212,7 @@ nautilus_init_pci(void)
 
        /* Use default IO. */
        pci_add_resource(&bridge->windows, &ioport_resource);
-       /* Irongate PCI memory aperture, calculate requred size before
+       /* Irongate PCI memory aperture, calculate required size before
           setting it up. */
        pci_add_resource(&bridge->windows, &irongate_mem);
 
index 921d4b6..5398f98 100644 (file)
@@ -730,7 +730,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
        long error;
 
        /* Check the UAC bits to decide what the user wants us to do
-          with the unaliged access.  */
+          with the unaligned access.  */
 
        if (!(current_thread_info()->status & TS_UAC_NOPRINT)) {
                if (__ratelimit(&ratelimit)) {
index d568cd9..f7cef66 100644 (file)
@@ -65,7 +65,7 @@ static long (*save_emul) (unsigned long pc);
 long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
 long do_alpha_fp_emul(unsigned long);
 
-int init_module(void)
+static int alpha_fp_emul_init_module(void)
 {
        save_emul_imprecise = alpha_fp_emul_imprecise;
        save_emul = alpha_fp_emul;
@@ -73,12 +73,14 @@ int init_module(void)
        alpha_fp_emul = do_alpha_fp_emul;
        return 0;
 }
+module_init(alpha_fp_emul_init_module);
 
-void cleanup_module(void)
+static void alpha_fp_emul_cleanup_module(void)
 {
        alpha_fp_emul_imprecise = save_emul_imprecise;
        alpha_fp_emul = save_emul;
 }
+module_exit(alpha_fp_emul_cleanup_module);
 
 #undef  alpha_fp_emul_imprecise
 #define alpha_fp_emul_imprecise                do_alpha_fp_emul_imprecise
@@ -401,3 +403,5 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
 egress:
        return si_code;
 }
+
+EXPORT_SYMBOL(__udiv_qrnnd);
index 82f908f..2fb7012 100644 (file)
@@ -95,7 +95,6 @@ config ARM
        select HAVE_FUNCTION_TRACER if !XIP_KERNEL
        select HAVE_GCC_PLUGINS
        select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
-       select HAVE_IDE if PCI || ISA || PCMCIA
        select HAVE_IRQ_TIME_ACCOUNTING
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZ4
@@ -361,7 +360,6 @@ config ARCH_FOOTBRIDGE
        bool "FootBridge"
        select CPU_SA110
        select FOOTBRIDGE
-       select HAVE_IDE
        select NEED_MACH_IO_H if !MMU
        select NEED_MACH_MEMORY_H
        help
@@ -430,7 +428,6 @@ config ARCH_PXA
        select GENERIC_IRQ_MULTI_HANDLER
        select GPIO_PXA
        select GPIOLIB
-       select HAVE_IDE
        select IRQ_DOMAIN
        select PLAT_PXA
        select SPARSE_IRQ
@@ -446,7 +443,6 @@ config ARCH_RPC
        select ARM_HAS_SG_CHAIN
        select CPU_SA110
        select FIQ
-       select HAVE_IDE
        select HAVE_PATA_PLATFORM
        select ISA_DMA_API
        select LEGACY_TIMER_TICK
@@ -469,7 +465,6 @@ config ARCH_SA1100
        select CPU_SA1100
        select GENERIC_IRQ_MULTI_HANDLER
        select GPIOLIB
-       select HAVE_IDE
        select IRQ_DOMAIN
        select ISA
        select NEED_MACH_MEMORY_H
@@ -505,7 +500,6 @@ config ARCH_OMAP1
        select GENERIC_IRQ_CHIP
        select GENERIC_IRQ_MULTI_HANDLER
        select GPIOLIB
-       select HAVE_IDE
        select HAVE_LEGACY_CLK
        select IRQ_DOMAIN
        select NEED_MACH_IO_H if PCCARD
index de11030..1d3aef8 100644 (file)
@@ -9,7 +9,6 @@ menuconfig ARCH_DAVINCI
        select PM_GENERIC_DOMAINS_OF if PM && OF
        select REGMAP_MMIO
        select RESET_CONTROLLER
-       select HAVE_IDE
        select PINCTRL_SINGLE
 
 if ARCH_DAVINCI
index d23970b..f70fb9c 100644 (file)
@@ -49,6 +49,7 @@ static int __init parse_tag_acorn(const struct tag *tag)
                fallthrough;    /* ??? */
        case 256:
                vram_size += PAGE_SIZE * 256;
+               break;
        default:
                break;
        }
index 897634d..a951276 100644 (file)
@@ -1602,6 +1602,9 @@ exit:
                rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
                emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
                break;
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
        /* ST: *(size *)(dst + off) = imm */
        case BPF_ST | BPF_MEM | BPF_W:
        case BPF_ST | BPF_MEM | BPF_H:
index ca38d0d..f4eaab3 100644 (file)
                        };
 
                        flexcan1: can@308c0000 {
-                               compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+                               compatible = "fsl,imx8mp-flexcan";
                                reg = <0x308c0000 0x10000>;
                                interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MP_CLK_IPG_ROOT>,
                        };
 
                        flexcan2: can@308d0000 {
-                               compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+                               compatible = "fsl,imx8mp-flexcan";
                                reg = <0x308d0000 0x10000>;
                                interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MP_CLK_IPG_ROOT>,
index 0686923..51e1709 100644 (file)
        status = "okay";
        extcon = <&usb2_id>;
 
-       usb@7600000 {
+       dwc3@7600000 {
                extcon = <&usb2_id>;
                dr_mode = "otg";
                maximum-speed = "high-speed";
        status = "okay";
        extcon = <&usb3_id>;
 
-       usb@6a00000 {
+       dwc3@6a00000 {
                extcon = <&usb3_id>;
                dr_mode = "otg";
        };
index 95d6cb8..f39bc10 100644 (file)
                        resets = <&gcc GCC_USB0_BCR>;
                        status = "disabled";
 
-                       dwc_0: usb@8a00000 {
+                       dwc_0: dwc3@8a00000 {
                                compatible = "snps,dwc3";
                                reg = <0x8a00000 0xcd00>;
                                interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>;
                        resets = <&gcc GCC_USB1_BCR>;
                        status = "disabled";
 
-                       dwc_1: usb@8c00000 {
+                       dwc_1: dwc3@8c00000 {
                                compatible = "snps,dwc3";
                                reg = <0x8c00000 0xcd00>;
                                interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
index 0e1bc46..78c55ca 100644 (file)
                        power-domains = <&gcc USB30_GDSC>;
                        status = "disabled";
 
-                       usb@6a00000 {
+                       dwc3@6a00000 {
                                compatible = "snps,dwc3";
                                reg = <0x06a00000 0xcc00>;
                                interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>;
                        qcom,select-utmi-as-pipe-clk;
                        status = "disabled";
 
-                       usb@7600000 {
+                       dwc3@7600000 {
                                compatible = "snps,dwc3";
                                reg = <0x07600000 0xcc00>;
                                interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>;
index 6f294f9..e9d3ce2 100644 (file)
 
                        resets = <&gcc GCC_USB_30_BCR>;
 
-                       usb3_dwc3: usb@a800000 {
+                       usb3_dwc3: dwc3@a800000 {
                                compatible = "snps,dwc3";
                                reg = <0x0a800000 0xcd00>;
                                interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
index f8a5530..a80c578 100644 (file)
 &usb3 {
        status = "okay";
 
-       usb@7580000 {
+       dwc3@7580000 {
                dr_mode = "host";
        };
 };
index 9c4be02..339790b 100644 (file)
                        assigned-clock-rates = <19200000>, <200000000>;
                        status = "disabled";
 
-                       usb@7580000 {
+                       dwc3@7580000 {
                                compatible = "snps,dwc3";
                                reg = <0x07580000 0xcd00>;
                                interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
                        assigned-clock-rates = <19200000>, <133333333>;
                        status = "disabled";
 
-                       usb@78c0000 {
+                       dwc3@78c0000 {
                                compatible = "snps,dwc3";
                                reg = <0x078c0000 0xcc00>;
                                interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
index 7af551a..a9a052f 100644 (file)
                        no-map;
                };
 
-               ipa_fw_mem: memory@8b700000 {
-                       reg = <0 0x8b700000 0 0x10000>;
-                       no-map;
-               };
-
                rmtfs_mem: memory@94600000 {
                        compatible = "qcom,rmtfs-mem";
                        reg = <0x0 0x94600000 0x0 0x200000>;
                                        <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3 0>;
                        interconnect-names = "usb-ddr", "apps-usb";
 
-                       usb_1_dwc3: usb@a600000 {
+                       usb_1_dwc3: dwc3@a600000 {
                                compatible = "snps,dwc3";
                                reg = <0 0x0a600000 0 0xe000>;
                                interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
index 5eb2b58..a8c274a 100644 (file)
@@ -7,7 +7,6 @@
 
 #include <dt-bindings/clock/qcom,gcc-sc7280.h>
 #include <dt-bindings/clock/qcom,rpmh.h>
-#include <dt-bindings/interconnect/qcom,sc7280.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/mailbox/qcom-ipcc.h>
 #include <dt-bindings/power/qcom-aoss-qmp.h>
                        no-map;
                        reg = <0x0 0x80b00000 0x0 0x100000>;
                };
-
-               ipa_fw_mem: memory@8b700000 {
-                       reg = <0 0x8b700000 0 0x10000>;
-                       no-map;
-               };
        };
 
        cpus {
                        qcom,bcm-voters = <&apps_bcm_voter>;
                };
 
-               ipa: ipa@1e40000 {
-                       compatible = "qcom,sc7280-ipa";
-
-                       iommus = <&apps_smmu 0x480 0x0>,
-                                <&apps_smmu 0x482 0x0>;
-                       reg = <0 0x1e40000 0 0x8000>,
-                             <0 0x1e50000 0 0x4ad0>,
-                             <0 0x1e04000 0 0x23000>;
-                       reg-names = "ipa-reg",
-                                   "ipa-shared",
-                                   "gsi";
-
-                       interrupts-extended = <&intc 0 654 IRQ_TYPE_EDGE_RISING>,
-                                             <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>,
-                                             <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
-                                             <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>;
-                       interrupt-names = "ipa",
-                                         "gsi",
-                                         "ipa-clock-query",
-                                         "ipa-setup-ready";
-
-                       clocks = <&rpmhcc RPMH_IPA_CLK>;
-                       clock-names = "core";
-
-                       interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>,
-                                       <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_IPA_CFG 0>;
-                       interconnect-names = "memory",
-                                            "config";
-
-                       qcom,smem-states = <&ipa_smp2p_out 0>,
-                                          <&ipa_smp2p_out 1>;
-                       qcom,smem-state-names = "ipa-clock-enabled-valid",
-                                               "ipa-clock-enabled";
-
-                       status = "disabled";
-               };
-
                tcsr_mutex: hwlock@1f40000 {
                        compatible = "qcom,tcsr-mutex", "syscon";
                        reg = <0 0x01f40000 0 0x40000>;
index 1796ae8..0a86fe7 100644 (file)
                                        <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>;
                        interconnect-names = "usb-ddr", "apps-usb";
 
-                       usb_1_dwc3: usb@a600000 {
+                       usb_1_dwc3: dwc3@a600000 {
                                compatible = "snps,dwc3";
                                reg = <0 0x0a600000 0 0xcd00>;
                                interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
                                        <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_1 0>;
                        interconnect-names = "usb-ddr", "apps-usb";
 
-                       usb_2_dwc3: usb@a800000 {
+                       usb_2_dwc3: dwc3@a800000 {
                                compatible = "snps,dwc3";
                                reg = <0 0x0a800000 0 0xcd00>;
                                interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
index 612dda0..eef9d79 100644 (file)
 
                        resets = <&gcc GCC_USB30_PRIM_BCR>;
 
-                       usb_1_dwc3: usb@a600000 {
+                       usb_1_dwc3: dwc3@a600000 {
                                compatible = "snps,dwc3";
                                reg = <0 0x0a600000 0 0xcd00>;
                                interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
index 3155c9e..0625bf2 100644 (file)
@@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                vma_shift = get_vma_page_shift(vma, hva);
        }
 
-       shared = (vma->vm_flags & VM_PFNMAP);
+       shared = (vma->vm_flags & VM_SHARED);
 
        switch (vma_shift) {
 #ifndef __PAGETABLE_PMD_FOLDED
index dccf98a..41c23f4 100644 (file)
@@ -823,6 +823,19 @@ emit_cond_jmp:
                        return ret;
                break;
 
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               /*
+                * Nothing required here.
+                *
+                * In case of arm64, we rely on the firmware mitigation of
+                * Speculative Store Bypass as controlled via the ssbd kernel
+                * parameter. Whenever the mitigation is enabled, it works
+                * for all of the kernel code with no need to provide any
+                * additional instructions.
+                */
+               break;
+
        /* ST: *(size *)(dst + off) = imm */
        case BPF_ST | BPF_MEM | BPF_W:
        case BPF_ST | BPF_MEM | BPF_H:
index b5e14d5..c30baa0 100644 (file)
@@ -44,7 +44,6 @@ config H8300_H8MAX
        bool "H8MAX"
        select H83069
        select RAMKERNEL
-       select HAVE_IDE
        help
          H8MAX Evaluation Board Support
          More Information. (Japanese Only)
index cf425c2..4993c7a 100644 (file)
@@ -25,7 +25,6 @@ config IA64
        select HAVE_ASM_MODVERSIONS
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_EXIT_THREAD
-       select HAVE_IDE
        select HAVE_KPROBES
        select HAVE_KRETPROBES
        select HAVE_FTRACE_MCOUNT_RECORD
index 96989ad..d632a1d 100644 (file)
@@ -23,7 +23,6 @@ config M68K
        select HAVE_DEBUG_BUGVERBOSE
        select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
        select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
-       select HAVE_IDE
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_UID16
        select MMU_GATHER_NO_RANGE if MMU
index d964c1f..6a07a68 100644 (file)
@@ -33,6 +33,7 @@ config MAC
        depends on MMU
        select MMU_MOTOROLA if MMU
        select HAVE_ARCH_NVRAM_OPS
+       select HAVE_PATA_PLATFORM
        select LEGACY_TIMER_TICK
        help
          This option enables support for the Apple Macintosh series of
index 2c4d2ca..4853751 100644 (file)
@@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK);
 DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
 
 static struct clk_lookup m525x_clk_lookup[] = {
-       CLKDEV_INIT(NULL, "pll.0", &pll),
+       CLKDEV_INIT(NULL, "pll.0", &clk_pll),
        CLKDEV_INIT(NULL, "sys.0", &clk_sys),
        CLKDEV_INIT("mcftmr.0", NULL, &clk_sys),
        CLKDEV_INIT("mcftmr.1", NULL, &clk_sys),
index cee6087..6dfb27d 100644 (file)
@@ -71,7 +71,6 @@ config MIPS
        select HAVE_FUNCTION_TRACER
        select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO
-       select HAVE_IDE
        select HAVE_IOREMAP_PROT
        select HAVE_IRQ_EXIT_ON_IRQ_STACK
        select HAVE_IRQ_TIME_ACCOUNTING
index cdf404a..1eaf6a1 100644 (file)
 
 #define SO_NETNS_COOKIE                71
 
+#define SO_BUF_LOCK            72
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index 939dd06..3a73e93 100644 (file)
@@ -1355,6 +1355,9 @@ jeq_common:
                }
                break;
 
+       case BPF_ST | BPF_NOSPEC: /* speculation barrier */
+               break;
+
        case BPF_ST | BPF_B | BPF_MEM:
        case BPF_ST | BPF_H | BPF_MEM:
        case BPF_ST | BPF_W | BPF_MEM:
index c206b31..1bdf5e7 100644 (file)
@@ -59,7 +59,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index bde9907..4f8c1fb 100644 (file)
@@ -3,7 +3,6 @@ config PARISC
        def_bool y
        select ARCH_32BIT_OFF_T if !64BIT
        select ARCH_MIGHT_HAVE_PC_PARPORT
-       select HAVE_IDE
        select HAVE_FUNCTION_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_SYSCALL_TRACEPOINTS
index 5b5351c..8baaad5 100644 (file)
 
 #define SO_NETNS_COOKIE                0x4045
 
+#define SO_BUF_LOCK            0x4046
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index d01e340..663766f 100644 (file)
@@ -220,7 +220,6 @@ config PPC
        select HAVE_HARDLOCKUP_DETECTOR_ARCH    if PPC_BOOK3S_64 && SMP
        select HAVE_HARDLOCKUP_DETECTOR_PERF    if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
        select HAVE_HW_BREAKPOINT               if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
-       select HAVE_IDE
        select HAVE_IOREMAP_PROT
        select HAVE_IRQ_EXIT_ON_IRQ_STACK
        select HAVE_IRQ_TIME_ACCOUNTING
index 2813e3f..3c5baaa 100644 (file)
@@ -27,6 +27,13 @@ KASAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
        -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
+
+# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true
+# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is
+# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code
+# generation is minimal, it will just use r29 instead.
+ccflags-y += $(call cc-option, -ffixed-r30)
+
 asflags-y := -D__VDSO64__ -s
 
 targets += vdso64.lds
index 1d1fcc2..085fb8e 100644 (file)
@@ -2697,8 +2697,10 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
                HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
        if (cpu_has_feature(CPU_FTR_HVMODE)) {
                vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
                if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
                        vcpu->arch.hfscr |= HFSCR_TM;
+#endif
        }
        if (cpu_has_feature(CPU_FTR_TM_COMP))
                vcpu->arch.hfscr |= HFSCR_TM;
index 8543ad5..898f942 100644 (file)
@@ -302,6 +302,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        if (vcpu->kvm->arch.l1_ptcr == 0)
                return H_NOT_AVAILABLE;
 
+       if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+               return H_BAD_MODE;
+
        /* copy parameters in */
        hv_ptr = kvmppc_get_gpr(vcpu, 4);
        regs_ptr = kvmppc_get_gpr(vcpu, 5);
@@ -322,6 +325,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        if (l2_hv.vcpu_token >= NR_CPUS)
                return H_PARAMETER;
 
+       /*
+        * L1 must have set up a suspended state to enter the L2 in a
+        * transactional state, and only in that case. These have to be
+        * filtered out here to prevent causing a TM Bad Thing in the
+        * host HRFID. We could synthesize a TM Bad Thing back to the L1
+        * here but there doesn't seem like much point.
+        */
+       if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
+               if (!MSR_TM_ACTIVE(l2_regs.msr))
+                       return H_BAD_MODE;
+       } else {
+               if (l2_regs.msr & MSR_TS_MASK)
+                       return H_BAD_MODE;
+               if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
+                       return H_BAD_MODE;
+       }
+
        /* translate lpid */
        l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
        if (!l2)
index 83f592e..961b3d7 100644 (file)
@@ -317,6 +317,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
         */
        mtspr(SPRN_HDEC, hdec);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tm_return_to_guest:
+#endif
        mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
        mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
        mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
@@ -415,11 +418,23 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
                 * is in real suspend mode and is trying to transition to
                 * transactional mode.
                 */
-               if (local_paca->kvm_hstate.fake_suspend &&
+               if (!local_paca->kvm_hstate.fake_suspend &&
                                (vcpu->arch.shregs.msr & MSR_TS_S)) {
                        if (kvmhv_p9_tm_emulation_early(vcpu)) {
-                               /* Prevent it being handled again. */
-                               trap = 0;
+                               /*
+                                * Go straight back into the guest with the
+                                * new NIP/MSR as set by TM emulation.
+                                */
+                               mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+                               mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
+
+                               /*
+                                * tm_return_to_guest re-loads SRR0/1, DAR,
+                                * DSISR after RI is cleared, in case they had
+                                * been clobbered by a MCE.
+                                */
+                               __mtmsrd(0, 1); /* clear RI */
+                               goto tm_return_to_guest;
                        }
                }
 #endif
@@ -499,6 +514,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
         * If we are in real mode, only switch MMU on after the MMU is
         * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
         */
+       if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+           vcpu->arch.shregs.msr & MSR_TS_MASK)
+               msr |= MSR_TS_S;
+
        __mtmsrd(msr, 0);
 
        end_timing(vcpu);
index c5e6775..0f847f1 100644 (file)
@@ -242,6 +242,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
         * value so we can restore it on the way out.
         */
        orig_rets = args.rets;
+       if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) {
+               /*
+                * Don't overflow our args array: ensure there is room for
+                * at least rets[0] (even if the call specifies 0 nret).
+                *
+                * Each handler must then check for the correct nargs and nret
+                * values, but they may always return failure in rets[0].
+                */
+               rc = -EINVAL;
+               goto fail;
+       }
        args.rets = &args.args[be32_to_cpu(args.nargs)];
 
        mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
@@ -269,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 fail:
        /*
         * We only get here if the guest has called RTAS with a bogus
-        * args pointer. That means we can't get to the args, and so we
-        * can't fail the RTAS call. So fail right out to userspace,
-        * which should kill the guest.
+        * args pointer or nargs/nret values that would overflow the
+        * array. That means we can't get to the args, and so we can't
+        * fail the RTAS call. So fail right out to userspace, which
+        * should kill the guest.
+        *
+        * SLOF should actually pass the hcall return value from the
+        * rtas handler call in r3, so enter_rtas could be modified to
+        * return a failure indication in r3 and we could return such
+        * errors to the guest rather than failing to host userspace.
+        * However old guests that don't test for failure could then
+        * continue silently after errors, so for now we won't do this.
         */
        return rc;
 }
index be33b53..b4e6f70 100644 (file)
@@ -2048,9 +2048,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        {
                struct kvm_enable_cap cap;
                r = -EFAULT;
-               vcpu_load(vcpu);
                if (copy_from_user(&cap, argp, sizeof(cap)))
                        goto out;
+               vcpu_load(vcpu);
                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
                vcpu_put(vcpu);
                break;
@@ -2074,9 +2074,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        case KVM_DIRTY_TLB: {
                struct kvm_dirty_tlb dirty;
                r = -EFAULT;
-               vcpu_load(vcpu);
                if (copy_from_user(&dirty, argp, sizeof(dirty)))
                        goto out;
+               vcpu_load(vcpu);
                r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
                vcpu_put(vcpu);
                break;
index 34bb158..beb12cb 100644 (file)
@@ -737,6 +737,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        }
                        break;
 
+               /*
+                * BPF_ST NOSPEC (speculation barrier)
+                */
+               case BPF_ST | BPF_NOSPEC:
+                       break;
+
                /*
                 * BPF_ST(X)
                 */
index de85958..b87a63d 100644 (file)
@@ -627,6 +627,12 @@ emit_clear:
                        }
                        break;
 
+               /*
+                * BPF_ST NOSPEC (speculation barrier)
+                */
+               case BPF_ST | BPF_NOSPEC:
+                       break;
+
                /*
                 * BPF_ST(X)
                 */
index 9b88e3c..534b031 100644 (file)
@@ -42,6 +42,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
        switch (regs->msr & SRR1_WAKEMASK) {
        case SRR1_WAKEDEC:
                set_dec(1);
+               break;
        case SRR1_WAKEEE:
                /*
                 * Handle these when interrupts get re-enabled and we take
index 631a0d5..6b08866 100644 (file)
@@ -77,7 +77,7 @@
 #include "../../../../drivers/pci/pci.h"
 
 DEFINE_STATIC_KEY_FALSE(shared_processor);
-EXPORT_SYMBOL_GPL(shared_processor);
+EXPORT_SYMBOL(shared_processor);
 
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
index 6d98cd9..7b3483b 100644 (file)
@@ -27,10 +27,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
 #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
 
-/* Load initrd at enough distance from DRAM start */
+/* Load initrd anywhere in system RAM */
 static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
 {
-       return image_addr + SZ_256M;
+       return ULONG_MAX;
 }
 
 #define alloc_screen_info(x...)                (&screen_info)
index ff467b9..ac75936 100644 (file)
@@ -132,8 +132,12 @@ unsigned long get_wchan(struct task_struct *task)
 {
        unsigned long pc = 0;
 
-       if (likely(task && task != current && !task_is_running(task)))
+       if (likely(task && task != current && !task_is_running(task))) {
+               if (!try_get_task_stack(task))
+                       return 0;
                walk_stackframe(task, NULL, save_wchan, &pc);
+               put_task_stack(task);
+       }
        return pc;
 }
 
index bceb062..63bc691 100644 (file)
@@ -30,23 +30,23 @@ ENTRY(__asm_copy_from_user)
         * t0 - end of uncopied dst
         */
        add     t0, a0, a2
-       bgtu    a0, t0, 5f
 
        /*
         * Use byte copy only if too small.
+        * SZREG holds 4 for RV32 and 8 for RV64
         */
-       li      a3, 8*SZREG /* size must be larger than size in word_copy */
+       li      a3, 9*SZREG /* size must be larger than size in word_copy */
        bltu    a2, a3, .Lbyte_copy_tail
 
        /*
-        * Copy first bytes until dst is align to word boundary.
+        * Copy first bytes until dst is aligned to word boundary.
         * a0 - start of dst
         * t1 - start of aligned dst
         */
        addi    t1, a0, SZREG-1
        andi    t1, t1, ~(SZREG-1)
        /* dst is already aligned, skip */
-       beq     a0, t1, .Lskip_first_bytes
+       beq     a0, t1, .Lskip_align_dst
 1:
        /* a5 - one byte for copying data */
        fixup lb      a5, 0(a1), 10f
@@ -55,7 +55,7 @@ ENTRY(__asm_copy_from_user)
        addi    a0, a0, 1       /* dst */
        bltu    a0, t1, 1b      /* t1 - start of aligned dst */
 
-.Lskip_first_bytes:
+.Lskip_align_dst:
        /*
         * Now dst is aligned.
         * Use shift-copy if src is misaligned.
@@ -72,10 +72,9 @@ ENTRY(__asm_copy_from_user)
         *
         * a0 - start of aligned dst
         * a1 - start of aligned src
-        * a3 - a1 & mask:(SZREG-1)
         * t0 - end of aligned dst
         */
-       addi    t0, t0, -(8*SZREG-1) /* not to over run */
+       addi    t0, t0, -(8*SZREG) /* not to over run */
 2:
        fixup REG_L   a4,        0(a1), 10f
        fixup REG_L   a5,    SZREG(a1), 10f
@@ -97,7 +96,7 @@ ENTRY(__asm_copy_from_user)
        addi    a1, a1, 8*SZREG
        bltu    a0, t0, 2b
 
-       addi    t0, t0, 8*SZREG-1 /* revert to original value */
+       addi    t0, t0, 8*SZREG /* revert to original value */
        j       .Lbyte_copy_tail
 
 .Lshift_copy:
@@ -107,7 +106,7 @@ ENTRY(__asm_copy_from_user)
         * For misaligned copy we still perform aligned word copy, but
         * we need to use the value fetched from the previous iteration and
         * do some shifts.
-        * This is safe because reading less than a word size.
+        * This is safe because reading is less than a word size.
         *
         * a0 - start of aligned dst
         * a1 - start of src
@@ -117,7 +116,7 @@ ENTRY(__asm_copy_from_user)
         */
        /* calculating aligned word boundary for dst */
        andi    t1, t0, ~(SZREG-1)
-       /* Converting unaligned src to aligned arc */
+       /* Converting unaligned src to aligned src */
        andi    a1, a1, ~(SZREG-1)
 
        /*
@@ -125,11 +124,11 @@ ENTRY(__asm_copy_from_user)
         * t3 - prev shift
         * t4 - current shift
         */
-       slli    t3, a3, LGREG
+       slli    t3, a3, 3 /* converting bytes in a3 to bits */
        li      a5, SZREG*8
        sub     t4, a5, t3
 
-       /* Load the first word to combine with seceond word */
+       /* Load the first word to combine with second word */
        fixup REG_L   a5, 0(a1), 10f
 
 3:
@@ -161,7 +160,7 @@ ENTRY(__asm_copy_from_user)
         * a1 - start of remaining src
         * t0 - end of remaining dst
         */
-       bgeu    a0, t0, 5f
+       bgeu    a0, t0, .Lout_copy_user  /* check if end of copy */
 4:
        fixup lb      a5, 0(a1), 10f
        addi    a1, a1, 1       /* src */
@@ -169,7 +168,7 @@ ENTRY(__asm_copy_from_user)
        addi    a0, a0, 1       /* dst */
        bltu    a0, t0, 4b      /* t0 - end of dst */
 
-5:
+.Lout_copy_user:
        /* Disable access to user memory */
        csrc CSR_STATUS, t6
        li      a0, 0
index 269fc64..a14bf39 100644 (file)
@@ -127,10 +127,17 @@ void __init mem_init(void)
 }
 
 /*
- * The default maximal physical memory size is -PAGE_OFFSET,
- * limit the memory size via mem.
+ * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
+ * whereas for 64-bit kernel, the end of the virtual address space is occupied
+ * by the modules/BPF/kernel mappings which reduces the available size of the
+ * linear mapping.
+ * Limit the memory size via mem.
  */
+#ifdef CONFIG_64BIT
+static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
+#else
 static phys_addr_t memory_limit = -PAGE_OFFSET;
+#endif
 
 static int __init early_mem(char *p)
 {
@@ -152,7 +159,7 @@ static void __init setup_bootmem(void)
 {
        phys_addr_t vmlinux_end = __pa_symbol(&_end);
        phys_addr_t vmlinux_start = __pa_symbol(&_start);
-       phys_addr_t max_mapped_addr = __pa(~(ulong)0);
+       phys_addr_t __maybe_unused max_mapped_addr;
        phys_addr_t dram_end;
 
 #ifdef CONFIG_XIP_KERNEL
@@ -175,14 +182,21 @@ static void __init setup_bootmem(void)
        memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
        dram_end = memblock_end_of_DRAM();
+
+#ifndef CONFIG_64BIT
        /*
         * memblock allocator is not aware of the fact that last 4K bytes of
         * the addressable memory can not be mapped because of IS_ERR_VALUE
         * macro. Make sure that last 4k bytes are not usable by memblock
-        * if end of dram is equal to maximum addressable memory.
+        * if end of dram is equal to maximum addressable memory.  For 64-bit
+        * kernel, this problem can't happen here as the end of the virtual
+        * address space is occupied by the kernel mapping then this check must
+        * be done in create_kernel_page_table.
         */
+       max_mapped_addr = __pa(~(ulong)0);
        if (max_mapped_addr == (dram_end - 1))
                memblock_set_current_limit(max_mapped_addr - 4096);
+#endif
 
        min_low_pfn = PFN_UP(memblock_start_of_DRAM());
        max_low_pfn = max_pfn = PFN_DOWN(dram_end);
@@ -570,6 +584,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
        BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
        BUG_ON((kernel_map.phys_addr % map_size) != 0);
 
+#ifdef CONFIG_64BIT
+       /*
+        * The last 4K bytes of the addressable memory can not be mapped because
+        * of IS_ERR_VALUE macro.
+        */
+       BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
+#endif
+
        pt_ops.alloc_pte = alloc_pte_early;
        pt_ops.get_pte_virt = get_pte_virt_early;
 #ifndef __PAGETABLE_PMD_FOLDED
@@ -709,6 +731,8 @@ static void __init setup_vm_final(void)
                if (start <= __pa(PAGE_OFFSET) &&
                    __pa(PAGE_OFFSET) < end)
                        start = __pa(PAGE_OFFSET);
+               if (end >= __pa(PAGE_OFFSET) + memory_limit)
+                       end = __pa(PAGE_OFFSET) + memory_limit;
 
                map_size = best_map_size(start, end - start);
                for (pa = start; pa < end; pa += map_size) {
index 81de865..e649742 100644 (file)
@@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
                        return -1;
                break;
 
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
+
        case BPF_ST | BPF_MEM | BPF_B:
        case BPF_ST | BPF_MEM | BPF_H:
        case BPF_ST | BPF_MEM | BPF_W:
index 87e3bf5..3af4131 100644 (file)
@@ -939,6 +939,10 @@ out_be:
                emit_ld(rd, 0, RV_REG_T1, ctx);
                break;
 
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
+
        /* ST: *(size *)(dst + off) = imm */
        case BPF_ST | BPF_MEM | BPF_B:
                emit_imm(RV_REG_T1, imm, ctx);
index 660c799..e30d3fd 100644 (file)
@@ -11,6 +11,7 @@ UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
 
 obj-y  := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
 obj-all := $(obj-y) piggy.o syms.o
 targets        := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c
new file mode 100644 (file)
index 0000000..c3ebf24
--- /dev/null
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/clz_ctz.c"
index 7de253f..b881840 100644 (file)
@@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m
 CONFIG_L2TP_V3=y
 CONFIG_L2TP_IP=m
 CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
 CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
index b671642..1667a3c 100644 (file)
@@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m
 CONFIG_L2TP_V3=y
 CONFIG_L2TP_IP=m
 CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
 CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
index 9b4473f..161a9e1 100644 (file)
@@ -445,15 +445,15 @@ struct kvm_vcpu_stat {
        u64 instruction_sigp_init_cpu_reset;
        u64 instruction_sigp_cpu_reset;
        u64 instruction_sigp_unknown;
-       u64 diagnose_10;
-       u64 diagnose_44;
-       u64 diagnose_9c;
-       u64 diagnose_9c_ignored;
-       u64 diagnose_9c_forward;
-       u64 diagnose_258;
-       u64 diagnose_308;
-       u64 diagnose_500;
-       u64 diagnose_other;
+       u64 instruction_diagnose_10;
+       u64 instruction_diagnose_44;
+       u64 instruction_diagnose_9c;
+       u64 diag_9c_ignored;
+       u64 diag_9c_forward;
+       u64 instruction_diagnose_258;
+       u64 instruction_diagnose_308;
+       u64 instruction_diagnose_500;
+       u64 instruction_diagnose_other;
        u64 pfault_sync;
 };
 
index bff50b6..edf5ff1 100644 (file)
@@ -51,6 +51,7 @@ SECTIONS
 
        .rela.dyn ALIGN(8) : { *(.rela.dyn) }
        .got ALIGN(8)   : { *(.got .toc) }
+       .got.plt ALIGN(8) : { *(.got.plt) }
 
        _end = .;
        PROVIDE(end = .);
index d4fb336..4461ea1 100644 (file)
@@ -51,6 +51,7 @@ SECTIONS
 
        .rela.dyn ALIGN(8) : { *(.rela.dyn) }
        .got ALIGN(8)   : { *(.got .toc) }
+       .got.plt ALIGN(8) : { *(.got.plt) }
 
        _end = .;
        PROVIDE(end = .);
index 02c146f..807fa9d 100644 (file)
@@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
        start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
        end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
-       vcpu->stat.diagnose_10++;
+       vcpu->stat.instruction_diagnose_10++;
 
        if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
            || start < 2 * PAGE_SIZE)
@@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
        VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
                   vcpu->run->s.regs.gprs[rx]);
-       vcpu->stat.diagnose_258++;
+       vcpu->stat.instruction_diagnose_258++;
        if (vcpu->run->s.regs.gprs[rx] & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
        rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 {
        VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
-       vcpu->stat.diagnose_44++;
+       vcpu->stat.instruction_diagnose_44++;
        kvm_vcpu_on_spin(vcpu, true);
        return 0;
 }
@@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
        int tid;
 
        tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
-       vcpu->stat.diagnose_9c++;
+       vcpu->stat.instruction_diagnose_9c++;
 
        /* yield to self */
        if (tid == vcpu->vcpu_id)
@@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
                VCPU_EVENT(vcpu, 5,
                           "diag time slice end directed to %d: yield forwarded",
                           tid);
-               vcpu->stat.diagnose_9c_forward++;
+               vcpu->stat.diag_9c_forward++;
                return 0;
        }
 
@@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
        return 0;
 no_yield:
        VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
-       vcpu->stat.diagnose_9c_ignored++;
+       vcpu->stat.diag_9c_ignored++;
        return 0;
 }
 
@@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
        unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
 
        VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
-       vcpu->stat.diagnose_308++;
+       vcpu->stat.instruction_diagnose_308++;
        switch (subcode) {
        case 3:
                vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 {
        int ret;
 
-       vcpu->stat.diagnose_500++;
+       vcpu->stat.instruction_diagnose_500++;
        /* No virtio-ccw notification? Get out quickly. */
        if (!vcpu->kvm->arch.css_support ||
            (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
@@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
        case 0x500:
                return __diag_virtio_hypercall(vcpu);
        default:
-               vcpu->stat.diagnose_other++;
+               vcpu->stat.instruction_diagnose_other++;
                return -EOPNOTSUPP;
        }
 }
index b655a7d..4527ac7 100644 (file)
@@ -163,15 +163,15 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
        STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
        STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
        STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
-       STATS_DESC_COUNTER(VCPU, diagnose_10),
-       STATS_DESC_COUNTER(VCPU, diagnose_44),
-       STATS_DESC_COUNTER(VCPU, diagnose_9c),
-       STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored),
-       STATS_DESC_COUNTER(VCPU, diagnose_9c_forward),
-       STATS_DESC_COUNTER(VCPU, diagnose_258),
-       STATS_DESC_COUNTER(VCPU, diagnose_308),
-       STATS_DESC_COUNTER(VCPU, diagnose_500),
-       STATS_DESC_COUNTER(VCPU, diagnose_other),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
+       STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
+       STATS_DESC_COUNTER(VCPU, diag_9c_forward),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
        STATS_DESC_COUNTER(VCPU, pfault_sync)
 };
 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
index 2ae419f..8841926 100644 (file)
@@ -1153,6 +1153,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
                        break;
                }
                break;
+       /*
+        * BPF_NOSPEC (speculation barrier)
+        */
+       case BPF_ST | BPF_NOSPEC:
+               break;
        /*
         * BPF_ST(X)
         */
index 45a0549..b683b69 100644 (file)
@@ -39,7 +39,6 @@ config SUPERH
        select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_HW_BREAKPOINT
-       select HAVE_IDE if HAS_IOPORT_MAP
        select HAVE_IOREMAP_PROT if MMU && !X2TLB
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_GZIP
index c5fa793..f0c0f95 100644 (file)
@@ -19,7 +19,6 @@ config SPARC
        select OF
        select OF_PROMTREE
        select HAVE_ASM_MODVERSIONS
-       select HAVE_IDE
        select HAVE_ARCH_KGDB if !SMP || SPARC64
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_SECCOMP if SPARC64
index 92675dc..e80ee86 100644 (file)
 
 #define SO_NETNS_COOKIE          0x0050
 
+#define SO_BUF_LOCK              0x0051
+
 #if !defined(__KERNEL__)
 
 
index 4b8d3c6..9a2f20c 100644 (file)
@@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                        return 1;
                break;
        }
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
        /* ST: *(size *)(dst + off) = imm */
        case BPF_ST | BPF_MEM | BPF_W:
        case BPF_ST | BPF_MEM | BPF_H:
index 4927065..88fb922 100644 (file)
@@ -202,7 +202,6 @@ config X86
        select HAVE_FUNCTION_TRACER
        select HAVE_GCC_PLUGINS
        select HAVE_HW_BREAKPOINT
-       select HAVE_IDE
        select HAVE_IOREMAP_PROT
        select HAVE_IRQ_EXIT_ON_IRQ_STACK       if X86_64
        select HAVE_IRQ_TIME_ACCOUNTING
index 674906f..68f091b 100644 (file)
@@ -79,9 +79,10 @@ __jump_label_patch(struct jump_entry *entry, enum jump_label_type type)
        return (struct jump_label_patch){.code = code, .size = size};
 }
 
-static inline void __jump_label_transform(struct jump_entry *entry,
-                                         enum jump_label_type type,
-                                         int init)
+static __always_inline void
+__jump_label_transform(struct jump_entry *entry,
+                      enum jump_label_type type,
+                      int init)
 {
        const struct jump_label_patch jlp = __jump_label_patch(entry, type);
 
index b07592c..0b38f94 100644 (file)
@@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
 
 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
 {
+       trace_kvm_hv_hypercall_done(result);
        kvm_hv_hypercall_set_result(vcpu, result);
        ++vcpu->stat.hypercalls;
        return kvm_skip_emulated_instruction(vcpu);
@@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
+       struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
        struct kvm_hv_hcall hc;
        u64 ret = HV_STATUS_SUCCESS;
 
@@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
        hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
        hc.rep = !!(hc.rep_cnt || hc.rep_idx);
 
-       if (hc.fast && is_xmm_fast_hypercall(&hc))
-               kvm_hv_hypercall_read_xmm(&hc);
-
        trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
                               hc.ingpa, hc.outgpa);
 
-       if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) {
+       if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
                ret = HV_STATUS_ACCESS_DENIED;
                goto hypercall_complete;
        }
 
+       if (hc.fast && is_xmm_fast_hypercall(&hc)) {
+               if (unlikely(hv_vcpu->enforce_cpuid &&
+                            !(hv_vcpu->cpuid_cache.features_edx &
+                              HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
+                       kvm_queue_exception(vcpu, UD_VECTOR);
+                       return 1;
+               }
+
+               kvm_hv_hypercall_read_xmm(&hc);
+       }
+
        switch (hc.code) {
        case HVCALL_NOTIFY_LONG_SPIN_WAIT:
                if (unlikely(hc.rep)) {
index 698969e..ff005fe 100644 (file)
@@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
 {
        ioapic->rtc_status.pending_eoi = 0;
-       bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+       bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
 }
 
 static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
index 6604017..11e4065 100644 (file)
@@ -43,13 +43,13 @@ struct kvm_vcpu;
 
 struct dest_map {
        /* vcpu bitmap where IRQ has been sent */
-       DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+       DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
 
        /*
         * Vector sent to a given vcpu, only valid when
         * the vcpu's bit in map is set
         */
-       u8 vectors[KVM_MAX_VCPU_ID];
+       u8 vectors[KVM_MAX_VCPU_ID + 1];
 };
 
 
index 66f7f5b..c4f4fa2 100644 (file)
@@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt)
  * aggregate version in order to make the slab shrinker
  * faster
  */
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
 {
        kvm->arch.n_used_mmu_pages += nr;
        percpu_counter_add(&kvm_total_used_mmu_pages, nr);
index 1d01da6..a8ad78a 100644 (file)
@@ -646,7 +646,7 @@ out:
 void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       struct vmcb *vmcb = svm->vmcb;
+       struct vmcb *vmcb = svm->vmcb01.ptr;
        bool activated = kvm_vcpu_apicv_active(vcpu);
 
        if (!enable_apicv)
index 3bd09c5..61738ff 100644 (file)
@@ -515,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
         * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
         * avic_physical_id.
         */
-       WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+       WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
 
        /* Copied from vmcb01.  msrpm_base can be overwritten later.  */
        svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
@@ -702,8 +702,8 @@ out:
 }
 
 /* Copy state save area fields which are handled by VMRUN */
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
-                         struct vmcb_save_area *to_save)
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+                         struct vmcb_save_area *from_save)
 {
        to_save->es = from_save->es;
        to_save->cs = from_save->cs;
@@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
        to_save->cpl = 0;
 }
 
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
 {
        to_vmcb->save.fs = from_vmcb->save.fs;
        to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1385,7 +1385,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 
        svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
 
-       svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
+       svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
        nested_load_control_from_vmcb12(svm, ctl);
 
        svm_switch_vmcb(svm, &svm->nested.vmcb02);
index 6710d9e..7fbce34 100644 (file)
@@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock);
 unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long sev_me_mask;
+static unsigned int nr_asids;
 static unsigned long *sev_asid_bitmap;
 static unsigned long *sev_reclaim_asid_bitmap;
 
@@ -78,11 +79,11 @@ struct enc_region {
 /* Called with the sev_bitmap_lock held, or on shutdown  */
 static int sev_flush_asids(int min_asid, int max_asid)
 {
-       int ret, pos, error = 0;
+       int ret, asid, error = 0;
 
        /* Check if there are any ASIDs to reclaim before performing a flush */
-       pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
-       if (pos >= max_asid)
+       asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+       if (asid > max_asid)
                return -EBUSY;
 
        /*
@@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
 
        /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
        bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
-                  max_sev_asid);
-       bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+                  nr_asids);
+       bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
 
        return true;
 }
 
 static int sev_asid_new(struct kvm_sev_info *sev)
 {
-       int pos, min_asid, max_asid, ret;
+       int asid, min_asid, max_asid, ret;
        bool retry = true;
        enum misc_res_type type;
 
@@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
         * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
         * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
         */
-       min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+       min_asid = sev->es_active ? 1 : min_sev_asid;
        max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
 again:
-       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
-       if (pos >= max_asid) {
+       asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+       if (asid > max_asid) {
                if (retry && __sev_recycle_asids(min_asid, max_asid)) {
                        retry = false;
                        goto again;
@@ -157,11 +158,11 @@ again:
                goto e_uncharge;
        }
 
-       __set_bit(pos, sev_asid_bitmap);
+       __set_bit(asid, sev_asid_bitmap);
 
        mutex_unlock(&sev_bitmap_lock);
 
-       return pos + 1;
+       return asid;
 e_uncharge:
        misc_cg_uncharge(type, sev->misc_cg, 1);
        put_misc_cg(sev->misc_cg);
@@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm)
 static void sev_asid_free(struct kvm_sev_info *sev)
 {
        struct svm_cpu_data *sd;
-       int cpu, pos;
+       int cpu;
        enum misc_res_type type;
 
        mutex_lock(&sev_bitmap_lock);
 
-       pos = sev->asid - 1;
-       __set_bit(pos, sev_reclaim_asid_bitmap);
+       __set_bit(sev->asid, sev_reclaim_asid_bitmap);
 
        for_each_possible_cpu(cpu) {
                sd = per_cpu(svm_data, cpu);
-               sd->sev_vmcbs[pos] = NULL;
+               sd->sev_vmcbs[sev->asid] = NULL;
        }
 
        mutex_unlock(&sev_bitmap_lock);
@@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void)
        min_sev_asid = edx;
        sev_me_mask = 1UL << (ebx & 0x3f);
 
-       /* Initialize SEV ASID bitmaps */
-       sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+       /*
+        * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
+        * even though it's never used, so that the bitmap is indexed by the
+        * actual ASID.
+        */
+       nr_asids = max_sev_asid + 1;
+       sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
        if (!sev_asid_bitmap)
                goto out;
 
-       sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+       sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
        if (!sev_reclaim_asid_bitmap) {
                bitmap_free(sev_asid_bitmap);
                sev_asid_bitmap = NULL;
@@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void)
                return;
 
        /* No need to take sev_bitmap_lock, all VMs have been destroyed. */
-       sev_flush_asids(0, max_sev_asid);
+       sev_flush_asids(1, max_sev_asid);
 
        bitmap_free(sev_asid_bitmap);
        bitmap_free(sev_reclaim_asid_bitmap);
@@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
        if (!sev_enabled)
                return 0;
 
-       sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
+       sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
        if (!sd->sev_vmcbs)
                return -ENOMEM;
 
index 664d20f..e8ccab5 100644 (file)
@@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
                goto error_free_vmsa_page;
        }
 
-       svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
        svm->vmcb01.ptr = page_address(vmcb01_page);
        svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
 
@@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        svm_switch_vmcb(svm, &svm->vmcb01);
        init_vmcb(vcpu);
 
+       svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
        svm_init_osvw(vcpu);
        vcpu->arch.microcode_version = 0x01000065;
 
@@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm)
 {
        struct vmcb_control_area *control;
 
-       /* The following fields are ignored when AVIC is enabled */
-       WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+       /*
+        * The following fields are ignored when AVIC is enabled
+        */
+       WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+
        svm_set_intercept(svm, INTERCEPT_VINTR);
 
        /*
@@ -2147,11 +2150,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
        ret = kvm_skip_emulated_instruction(vcpu);
 
        if (vmload) {
-               nested_svm_vmloadsave(vmcb12, svm->vmcb);
+               svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
                svm->sysenter_eip_hi = 0;
                svm->sysenter_esp_hi = 0;
-       } else
-               nested_svm_vmloadsave(svm->vmcb, vmcb12);
+       } else {
+               svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+       }
 
        kvm_vcpu_unmap(vcpu, &map, true);
 
@@ -4344,8 +4348,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 
                BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
 
-               svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
-                                    map_save.hva + 0x400);
+               svm_copy_vmrun_state(map_save.hva + 0x400,
+                                    &svm->vmcb01.ptr->save);
 
                kvm_vcpu_unmap(vcpu, &map_save, true);
        }
@@ -4393,8 +4397,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
                                         &map_save) == -EINVAL)
                                return 1;
 
-                       svm_copy_vmrun_state(map_save.hva + 0x400,
-                                            &svm->vmcb01.ptr->save);
+                       svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+                                            map_save.hva + 0x400);
 
                        kvm_vcpu_unmap(vcpu, &map_save, true);
                }
index 7e20907..bd0fe94 100644 (file)
@@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm);
 void svm_free_nested(struct vcpu_svm *svm);
 int svm_allocate_nested(struct vcpu_svm *svm);
 int nested_svm_vmrun(struct kvm_vcpu *vcpu);
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
-                         struct vmcb_save_area *to_save);
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+                         struct vmcb_save_area *from_save);
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
 int nested_svm_vmexit(struct vcpu_svm *svm);
 
 static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
index 9b9a55a..c53b8bf 100644 (file)
@@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
         * as we mark it dirty unconditionally towards end of vcpu
         * init phase.
         */
-       if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+       if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
            hve->hv_enlightenments_control.msr_bitmap)
                vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
 }
index b484141..03ebe36 100644 (file)
@@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall,
                  __entry->outgpa)
 );
 
+TRACE_EVENT(kvm_hv_hypercall_done,
+       TP_PROTO(u64 result),
+       TP_ARGS(result),
+
+       TP_STRUCT__entry(
+               __field(__u64, result)
+       ),
+
+       TP_fast_assign(
+               __entry->result = result;
+       ),
+
+       TP_printk("result 0x%llx", __entry->result)
+);
+
 /*
  * Tracepoint for Xen hypercall.
  */
index a4fd106..e5d5c5e 100644 (file)
@@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                break;
        case MSR_KVM_ASYNC_PF_ACK:
-               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
                        return 1;
                if (data & 0x1) {
                        vcpu->arch.apf.pageready_pending = false;
@@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = vcpu->arch.apf.msr_int_val;
                break;
        case MSR_KVM_ASYNC_PF_ACK:
-               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
                        return 1;
 
                msr_info->data = 0;
@@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
 
 static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
 {
-       return kvm_arch_interrupt_allowed(vcpu) &&
-               kvm_cpu_accept_dm_intr(vcpu);
+       /*
+        * Do not cause an interrupt window exit if an exception
+        * is pending or an event needs reinjection; userspace
+        * might want to inject the interrupt manually using KVM_SET_REGS
+        * or KVM_SET_SREGS.  For that to work, we must be at an
+        * instruction boundary and with no events half-injected.
+        */
+       return (kvm_arch_interrupt_allowed(vcpu) &&
+               kvm_cpu_accept_dm_intr(vcpu) &&
+               !kvm_event_needs_reinjection(vcpu) &&
+               !vcpu->arch.exception.pending);
 }
 
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
index 333650b..0fe6aac 100644 (file)
@@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        }
                        break;
 
+                       /* speculation barrier */
+               case BPF_ST | BPF_NOSPEC:
+                       if (boot_cpu_has(X86_FEATURE_XMM2))
+                               /* Emit 'lfence' */
+                               EMIT3(0x0F, 0xAE, 0xE8);
+                       break;
+
                        /* ST: *(u8*)(dst_reg + off) = imm */
                case BPF_ST | BPF_MEM | BPF_B:
                        if (is_ereg(dst_reg))
index 3da88de..3bfda5f 100644 (file)
@@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        i++;
                        break;
                }
+               /* speculation barrier */
+               case BPF_ST | BPF_NOSPEC:
+                       if (boot_cpu_has(X86_FEATURE_XMM2))
+                               /* Emit 'lfence' */
+                               EMIT3(0x0F, 0xAE, 0xE8);
+                       break;
                /* ST: *(u8*)(dst_reg + off) = imm */
                case BPF_ST | BPF_MEM | BPF_H:
                case BPF_ST | BPF_MEM | BPF_B:
index 2332b21..3878880 100644 (file)
@@ -327,7 +327,6 @@ config XTENSA_PLATFORM_ISS
 
 config XTENSA_PLATFORM_XT2000
        bool "XT2000"
-       select HAVE_IDE
        help
          XT2000 is the name of Tensilica's feature-rich emulation platform.
          This hardware is capable of running a full Linux distribution.
index c2d6bc8..5fac375 100644 (file)
@@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
                return -1;
 
        iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
+       wait->committed = true;
 
        /*
         * autoremove_wake_function() removes the wait entry only when it
-        * actually changed the task state.  We want the wait always
-        * removed.  Remove explicitly and use default_wake_function().
+        * actually changed the task state. We want the wait always removed.
+        * Remove explicitly and use default_wake_function(). Note that the
+        * order of operations is important as finish_wait() tests whether
+        * @wq_entry is removed without grabbing the lock.
         */
-       list_del_init(&wq_entry->entry);
-       wait->committed = true;
-
        default_wake_function(wq_entry, mode, flags, key);
+       list_del_init_careful(&wq_entry->entry);
        return 0;
 }
 
index c838d81..0f006ca 100644 (file)
@@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
        percpu_ref_put(&q->q_usage_counter);
 }
 
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
-                                  struct blk_mq_hw_ctx *hctx,
-                                  unsigned int hctx_idx)
-{
-       if (hctx->sched_tags) {
-               blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
-               blk_mq_free_rq_map(hctx->sched_tags, set->flags);
-               hctx->sched_tags = NULL;
-       }
-}
-
 static int blk_mq_sched_alloc_tags(struct request_queue *q,
                                   struct blk_mq_hw_ctx *hctx,
                                   unsigned int hctx_idx)
@@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
                return -ENOMEM;
 
        ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
-       if (ret)
-               blk_mq_sched_free_tags(set, hctx, hctx_idx);
+       if (ret) {
+               blk_mq_free_rq_map(hctx->sched_tags, set->flags);
+               hctx->sched_tags = NULL;
+       }
 
        return ret;
 }
index af4d2ab..298ee78 100644 (file)
@@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev)
        disk_release_events(disk);
        kfree(disk->random);
        xa_destroy(&disk->part_tbl);
-       bdput(disk->part0);
        if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
                blk_put_queue(disk->queue);
-       kfree(disk);
+       bdput(disk->part0);     /* frees the disk */
 }
 struct class block_class = {
        .name           = "block",
index 9d872ea..8f9940f 100644 (file)
@@ -370,7 +370,7 @@ config ACPI_TABLE_UPGRADE
 config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD
        bool "Override ACPI tables from built-in initrd"
        depends on ACPI_TABLE_UPGRADE
-       depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION=""
+       depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION_NONE
        help
          This option provides functionality to override arbitrary ACPI tables
          from built-in uncompressed initrd.
index 5fca182..550b908 100644 (file)
@@ -9,6 +9,42 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
+struct pch_fivr_resp {
+       u64 status;
+       u64 result;
+};
+
+static int pch_fivr_read(acpi_handle handle, char *method, struct pch_fivr_resp *fivr_resp)
+{
+       struct acpi_buffer resp = { sizeof(struct pch_fivr_resp), fivr_resp};
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct acpi_buffer format = { sizeof("NN"), "NN" };
+       union acpi_object *obj;
+       acpi_status status;
+       int ret = -EFAULT;
+
+       status = acpi_evaluate_object(handle, method, NULL, &buffer);
+       if (ACPI_FAILURE(status))
+               return ret;
+
+       obj = buffer.pointer;
+       if (!obj || obj->type != ACPI_TYPE_PACKAGE)
+               goto release_buffer;
+
+       status = acpi_extract_package(obj, &format, &resp);
+       if (ACPI_FAILURE(status))
+               goto release_buffer;
+
+       if (fivr_resp->status)
+               goto release_buffer;
+
+       ret = 0;
+
+release_buffer:
+       kfree(buffer.pointer);
+       return ret;
+}
+
 /*
  * Presentation of attributes which are defined for INT1045
  * They are:
@@ -23,15 +59,14 @@ static ssize_t name##_show(struct device *dev,\
                           char *buf)\
 {\
        struct acpi_device *acpi_dev = dev_get_drvdata(dev);\
-       unsigned long long val;\
-       acpi_status status;\
+       struct pch_fivr_resp fivr_resp;\
+       int status;\
 \
-       status = acpi_evaluate_integer(acpi_dev->handle, #method,\
-                                      NULL, &val);\
-       if (ACPI_SUCCESS(status))\
-               return sprintf(buf, "%d\n", (int)val);\
-       else\
-               return -EINVAL;\
+       status = pch_fivr_read(acpi_dev->handle, #method, &fivr_resp);\
+       if (status)\
+               return status;\
+\
+       return sprintf(buf, "%llu\n", fivr_resp.result);\
 }
 
 #define PCH_FIVR_STORE(name, method) \
index dc01fb5..ee78a21 100644 (file)
@@ -423,13 +423,6 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
        }
 }
 
-static bool irq_is_legacy(struct acpi_resource_irq *irq)
-{
-       return irq->triggering == ACPI_EDGE_SENSITIVE &&
-               irq->polarity == ACPI_ACTIVE_HIGH &&
-               irq->shareable == ACPI_EXCLUSIVE;
-}
-
 /**
  * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
  * @ares: Input ACPI resource object.
@@ -468,7 +461,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
                }
                acpi_dev_get_irqresource(res, irq->interrupts[index],
                                         irq->triggering, irq->polarity,
-                                        irq->shareable, irq_is_legacy(irq));
+                                        irq->shareable, true);
                break;
        case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
                ext_irq = &ares->data.extended_irq;
index e7ddd28..d5cedff 100644 (file)
@@ -860,11 +860,9 @@ EXPORT_SYMBOL(acpi_dev_present);
  * Return the next match of ACPI device if another matching device was present
  * at the moment of invocation, or NULL otherwise.
  *
- * FIXME: The function does not tolerate the sudden disappearance of @adev, e.g.
- * in the case of a hotplug event. That said, the caller should ensure that
- * this will never happen.
- *
  * The caller is responsible for invoking acpi_dev_put() on the returned device.
+ * On the other hand the function invokes  acpi_dev_put() on the given @adev
+ * assuming that its reference counter had been increased beforehand.
  *
  * See additional information in acpi_dev_present() as well.
  */
@@ -880,6 +878,7 @@ acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const cha
        match.hrv = hrv;
 
        dev = bus_find_device(&acpi_bus_type, start, &match, acpi_dev_match_cb);
+       acpi_dev_put(adev);
        return dev ? to_acpi_device(dev) : NULL;
 }
 EXPORT_SYMBOL(acpi_dev_get_next_match_dev);
index 1c50780..fbdbef0 100644 (file)
@@ -378,19 +378,25 @@ static int lps0_device_attach(struct acpi_device *adev,
                 * AMDI0006:
                 * - should use rev_id 0x0
                 * - function mask = 0x3: Should use Microsoft method
+                * AMDI0007:
+                * - Should use rev_id 0x2
+                * - Should only use AMD method
                 */
                const char *hid = acpi_device_hid(adev);
-               rev_id = 0;
+               rev_id = strcmp(hid, "AMDI0007") ? 0 : 2;
                lps0_dsm_func_mask = validate_dsm(adev->handle,
                                        ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
                lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle,
-                                       ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id,
+                                       ACPI_LPS0_DSM_UUID_MICROSOFT, 0,
                                        &lps0_dsm_guid_microsoft);
                if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") ||
                                                 !strcmp(hid, "AMDI0005"))) {
                        lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1;
                        acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n",
                                          ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask);
+               } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) {
+                       lps0_dsm_func_mask_microsoft = -EINVAL;
+                       acpi_handle_debug(adev->handle, "_DSM Using AMD method\n");
                }
        } else {
                rev_id = 1;
index ae7189d..b71ea4a 100644 (file)
@@ -637,6 +637,20 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf,
 }
 EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
 
+static void ata_pio_xfer(struct ata_queued_cmd *qc, struct page *page,
+               unsigned int offset, size_t xfer_size)
+{
+       bool do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
+       unsigned char *buf;
+
+       buf = kmap_atomic(page);
+       qc->ap->ops->sff_data_xfer(qc, buf + offset, xfer_size, do_write);
+       kunmap_atomic(buf);
+
+       if (!do_write && !PageSlab(page))
+               flush_dcache_page(page);
+}
+
 /**
  *     ata_pio_sector - Transfer a sector of data.
  *     @qc: Command on going
@@ -648,11 +662,9 @@ EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
  */
 static void ata_pio_sector(struct ata_queued_cmd *qc)
 {
-       int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
        struct ata_port *ap = qc->ap;
        struct page *page;
        unsigned int offset;
-       unsigned char *buf;
 
        if (!qc->cursg) {
                qc->curbytes = qc->nbytes;
@@ -670,13 +682,20 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 
        DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
 
-       /* do the actual data transfer */
-       buf = kmap_atomic(page);
-       ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, do_write);
-       kunmap_atomic(buf);
+       /*
+        * Split the transfer when it splits a page boundary.  Note that the
+        * split still has to be dword aligned like all ATA data transfers.
+        */
+       WARN_ON_ONCE(offset % 4);
+       if (offset + qc->sect_size > PAGE_SIZE) {
+               unsigned int split_len = PAGE_SIZE - offset;
 
-       if (!do_write && !PageSlab(page))
-               flush_dcache_page(page);
+               ata_pio_xfer(qc, page, offset, split_len);
+               ata_pio_xfer(qc, nth_page(page, 1), 0,
+                            qc->sect_size - split_len);
+       } else {
+               ata_pio_xfer(qc, page, offset, qc->sect_size);
+       }
 
        qc->curbytes += qc->sect_size;
        qc->cursg_ofs += qc->sect_size;
index 4f2951c..d0e67ec 100644 (file)
@@ -2167,10 +2167,10 @@ static int hrz_open (struct atm_vcc *atm_vcc)
   
   // Part of the job is done by atm_pcr_goal which gives us a PCR
   // specification which says: EITHER grab the maximum available PCR
-  // (and perhaps a lower bound which we musn't pass), OR grab this
+  // (and perhaps a lower bound which we must not pass), OR grab this
   // amount, rounding down if you have to (and perhaps a lower bound
-  // which we musn't pass) OR grab this amount, rounding up if you
-  // have to (and perhaps an upper bound which we musn't pass). If any
+  // which we must not pass) OR grab this amount, rounding up if you
+  // have to (and perhaps an upper bound which we must not pass). If any
   // bounds ARE passed we fail. Note that rounding is only rounding to
   // match device limitations, we do not round down to satisfy
   // bandwidth availability even if this would not violate any given
index adc199d..6a30264 100644 (file)
@@ -231,6 +231,8 @@ EXPORT_SYMBOL_GPL(auxiliary_find_device);
 int __auxiliary_driver_register(struct auxiliary_driver *auxdrv,
                                struct module *owner, const char *modname)
 {
+       int ret;
+
        if (WARN_ON(!auxdrv->probe) || WARN_ON(!auxdrv->id_table))
                return -EINVAL;
 
@@ -246,7 +248,11 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv,
        auxdrv->driver.bus = &auxiliary_bus_type;
        auxdrv->driver.mod_name = modname;
 
-       return driver_register(&auxdrv->driver);
+       ret = driver_register(&auxdrv->driver);
+       if (ret)
+               kfree(auxdrv->driver.name);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(__auxiliary_driver_register);
 
index cadcade..f636049 100644 (file)
@@ -574,8 +574,10 @@ static void devlink_remove_symlinks(struct device *dev,
                return;
        }
 
-       snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
-       sysfs_remove_link(&con->kobj, buf);
+       if (device_is_registered(con)) {
+               snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
+               sysfs_remove_link(&con->kobj, buf);
+       }
        snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
        sysfs_remove_link(&sup->kobj, buf);
        kfree(buf);
index f37b9e3..f0cdff0 100644 (file)
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+/**
+ * loop_global_lock_killable() - take locks for safe loop_validate_file() test
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo is about to bind another "struct loop_device", false otherwise
+ *
+ * Returns 0 on success, -EINTR otherwise.
+ *
+ * Since loop_validate_file() traverses on other "struct loop_device" if
+ * is_loop_device() is true, we need a global lock for serializing concurrent
+ * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
+ */
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+       int err;
+
+       if (global) {
+               err = mutex_lock_killable(&loop_validate_mutex);
+               if (err)
+                       return err;
+       }
+       err = mutex_lock_killable(&lo->lo_mutex);
+       if (err && global)
+               mutex_unlock(&loop_validate_mutex);
+       return err;
+}
+
+/**
+ * loop_global_unlock() - release locks taken by loop_global_lock_killable()
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo was about to bind another "struct loop_device", false otherwise
+ */
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+       mutex_unlock(&lo->lo_mutex);
+       if (global)
+               mutex_unlock(&loop_validate_mutex);
+}
 
 static int max_part;
 static int part_shift;
@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
        while (is_loop_device(f)) {
                struct loop_device *l;
 
+               lockdep_assert_held(&loop_validate_mutex);
                if (f->f_mapping->host->i_rdev == bdev->bd_dev)
                        return -EBADF;
 
                l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
-               if (l->lo_state != Lo_bound) {
+               if (l->lo_state != Lo_bound)
                        return -EINVAL;
-               }
+               /* Order wrt setting lo->lo_backing_file in loop_configure(). */
+               rmb();
                f = l->lo_backing_file;
        }
        if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
                          unsigned int arg)
 {
-       struct file     *file = NULL, *old_file;
-       int             error;
-       bool            partscan;
+       struct file *file = fget(arg);
+       struct file *old_file;
+       int error;
+       bool partscan;
+       bool is_loop;
 
-       error = mutex_lock_killable(&lo->lo_mutex);
+       if (!file)
+               return -EBADF;
+       is_loop = is_loop_device(file);
+       error = loop_global_lock_killable(lo, is_loop);
        if (error)
-               return error;
+               goto out_putf;
        error = -ENXIO;
        if (lo->lo_state != Lo_bound)
                goto out_err;
@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
        if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
                goto out_err;
 
-       error = -EBADF;
-       file = fget(arg);
-       if (!file)
-               goto out_err;
-
        error = loop_validate_file(file, bdev);
        if (error)
                goto out_err;
@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
        loop_update_dio(lo);
        blk_mq_unfreeze_queue(lo->lo_queue);
        partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, is_loop);
+
+       /*
+        * Flush loop_validate_file() before fput(), for l->lo_backing_file
+        * might be pointing at old_file which might be the last reference.
+        */
+       if (!is_loop) {
+               mutex_lock(&loop_validate_mutex);
+               mutex_unlock(&loop_validate_mutex);
+       }
        /*
         * We must drop file reference outside of lo_mutex as dropping
         * the file ref can take open_mutex which creates circular locking
@@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
        return 0;
 
 out_err:
-       mutex_unlock(&lo->lo_mutex);
-       if (file)
-               fput(file);
+       loop_global_unlock(lo, is_loop);
+out_putf:
+       fput(file);
        return error;
 }
 
@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
                          struct block_device *bdev,
                          const struct loop_config *config)
 {
-       struct file     *file;
-       struct inode    *inode;
+       struct file *file = fget(config->fd);
+       struct inode *inode;
        struct address_space *mapping;
-       int             error;
-       loff_t          size;
-       bool            partscan;
-       unsigned short  bsize;
+       int error;
+       loff_t size;
+       bool partscan;
+       unsigned short bsize;
+       bool is_loop;
+
+       if (!file)
+               return -EBADF;
+       is_loop = is_loop_device(file);
 
        /* This is safe, since we have a reference from open(). */
        __module_get(THIS_MODULE);
 
-       error = -EBADF;
-       file = fget(config->fd);
-       if (!file)
-               goto out;
-
        /*
         * If we don't hold exclusive handle for the device, upgrade to it
         * here to avoid changing device under exclusive owner.
@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
                        goto out_putf;
        }
 
-       error = mutex_lock_killable(&lo->lo_mutex);
+       error = loop_global_lock_killable(lo, is_loop);
        if (error)
                goto out_bdev;
 
@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
        size = get_loop_size(lo, file);
        loop_set_size(lo, size);
 
+       /* Order wrt reading lo_state in loop_validate_file(). */
+       wmb();
+
        lo->lo_state = Lo_bound;
        if (part_shift)
                lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
         * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
         */
        bdgrab(bdev);
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, is_loop);
        if (partscan)
                loop_reread_partitions(lo);
        if (!(mode & FMODE_EXCL))
@@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
        return 0;
 
 out_unlock:
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, is_loop);
 out_bdev:
        if (!(mode & FMODE_EXCL))
                bd_abort_claiming(bdev, loop_configure);
 out_putf:
        fput(file);
-out:
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
        return error;
@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
        int lo_number;
        struct loop_worker *pos, *worker;
 
+       /*
+        * Flush loop_configure() and loop_change_fd(). It is acceptable for
+        * loop_validate_file() to succeed, for actual clear operation has not
+        * started yet.
+        */
+       mutex_lock(&loop_validate_mutex);
+       mutex_unlock(&loop_validate_mutex);
+       /*
+        * loop_validate_file() now fails because l->lo_state != Lo_bound
+        * became visible.
+        */
+
        mutex_lock(&lo->lo_mutex);
        if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
                err = -ENXIO;
index 531d390..90b947c 100644 (file)
@@ -4100,8 +4100,6 @@ again:
 
 static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
 {
-       bool need_wait;
-
        dout("%s rbd_dev %p\n", __func__, rbd_dev);
        lockdep_assert_held_write(&rbd_dev->lock_rwsem);
 
@@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
         */
        rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
        rbd_assert(!completion_done(&rbd_dev->releasing_wait));
-       need_wait = !list_empty(&rbd_dev->running_list);
-       downgrade_write(&rbd_dev->lock_rwsem);
-       if (need_wait)
-               wait_for_completion(&rbd_dev->releasing_wait);
-       up_read(&rbd_dev->lock_rwsem);
+       if (list_empty(&rbd_dev->running_list))
+               return true;
+
+       up_write(&rbd_dev->lock_rwsem);
+       wait_for_completion(&rbd_dev->releasing_wait);
 
        down_write(&rbd_dev->lock_rwsem);
        if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
@@ -4203,15 +4201,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
        if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
                down_write(&rbd_dev->lock_rwsem);
                if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
-                       /*
-                        * we already know that the remote client is
-                        * the owner
-                        */
-                       up_write(&rbd_dev->lock_rwsem);
-                       return;
+                       dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
+                            __func__, rbd_dev, cid.gid, cid.handle);
+               } else {
+                       rbd_set_owner_cid(rbd_dev, &cid);
                }
-
-               rbd_set_owner_cid(rbd_dev, &cid);
                downgrade_write(&rbd_dev->lock_rwsem);
        } else {
                down_read(&rbd_dev->lock_rwsem);
@@ -4236,14 +4230,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
        if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
                down_write(&rbd_dev->lock_rwsem);
                if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
-                       dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n",
+                       dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
                             __func__, rbd_dev, cid.gid, cid.handle,
                             rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
-                       up_write(&rbd_dev->lock_rwsem);
-                       return;
+               } else {
+                       rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
                }
-
-               rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
                downgrade_write(&rbd_dev->lock_rwsem);
        } else {
                down_read(&rbd_dev->lock_rwsem);
@@ -4951,6 +4943,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
                disk->minors = RBD_MINORS_PER_MAJOR;
        }
        disk->fops = &rbd_bd_ops;
+       disk->private_data = rbd_dev;
 
        blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
        /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
index 09c8ab5..b3691de 100644 (file)
@@ -914,7 +914,8 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
 }
 EXPORT_SYMBOL_GPL(fsl_mc_device_remove);
 
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+                                         u16 if_id)
 {
        struct fsl_mc_device *mc_bus_dev, *endpoint;
        struct fsl_mc_obj_desc endpoint_desc = {{ 0 }};
@@ -925,6 +926,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
        mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
        strcpy(endpoint1.type, mc_dev->obj_desc.type);
        endpoint1.id = mc_dev->obj_desc.id;
+       endpoint1.if_id = if_id;
 
        err = dprc_get_connection(mc_bus_dev->mc_io, 0,
                                  mc_bus_dev->mc_handle,
index 5b9ea66..bc239a1 100644 (file)
@@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
                      struct image_info *img_info);
 void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
 int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
-                       struct mhi_chan *mhi_chan);
+                       struct mhi_chan *mhi_chan, unsigned int flags);
 int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
                       struct mhi_chan *mhi_chan);
 void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
index 22acde1..8444823 100644 (file)
@@ -773,11 +773,18 @@ static void mhi_process_cmd_completion(struct mhi_controller *mhi_cntrl,
        cmd_pkt = mhi_to_virtual(mhi_ring, ptr);
 
        chan = MHI_TRE_GET_CMD_CHID(cmd_pkt);
-       mhi_chan = &mhi_cntrl->mhi_chan[chan];
-       write_lock_bh(&mhi_chan->lock);
-       mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
-       complete(&mhi_chan->completion);
-       write_unlock_bh(&mhi_chan->lock);
+
+       if (chan < mhi_cntrl->max_chan &&
+           mhi_cntrl->mhi_chan[chan].configured) {
+               mhi_chan = &mhi_cntrl->mhi_chan[chan];
+               write_lock_bh(&mhi_chan->lock);
+               mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
+               complete(&mhi_chan->completion);
+               write_unlock_bh(&mhi_chan->lock);
+       } else {
+               dev_err(&mhi_cntrl->mhi_dev->dev,
+                       "Completion packet for invalid channel ID: %d\n", chan);
+       }
 
        mhi_del_ring_element(mhi_cntrl, mhi_ring);
 }
@@ -1423,7 +1430,7 @@ exit_unprepare_channel:
 }
 
 int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
-                       struct mhi_chan *mhi_chan)
+                       struct mhi_chan *mhi_chan, unsigned int flags)
 {
        int ret = 0;
        struct device *dev = &mhi_chan->mhi_dev->dev;
@@ -1448,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
        if (ret)
                goto error_pm_state;
 
+       if (mhi_chan->dir == DMA_FROM_DEVICE)
+               mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS);
+       
        /* Pre-allocate buffer for xfer ring */
        if (mhi_chan->pre_alloc) {
                int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
@@ -1603,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan)
 }
 
 /* Move channel to start state */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags)
 {
        int ret, dir;
        struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
@@ -1614,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
                if (!mhi_chan)
                        continue;
 
-               ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
+               ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags);
                if (ret)
                        goto error_open_chan;
        }
index 19413da..b33b9d7 100644 (file)
@@ -33,6 +33,8 @@
  * @bar_num: PCI base address register to use for MHI MMIO register space
  * @dma_data_width: DMA transfer word size (32 or 64 bits)
  * @mru_default: default MRU size for MBIM network packets
+ * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
+ *                of inband wake support (such as sdx24)
  */
 struct mhi_pci_dev_info {
        const struct mhi_controller_config *config;
@@ -42,6 +44,7 @@ struct mhi_pci_dev_info {
        unsigned int bar_num;
        unsigned int dma_data_width;
        unsigned int mru_default;
+       bool sideband_wake;
 };
 
 #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
@@ -74,6 +77,22 @@ struct mhi_pci_dev_info {
                .doorbell_mode_switch = false,          \
        }
 
+#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \
+       {                                               \
+               .num = ch_num,                          \
+               .name = ch_name,                        \
+               .num_elements = el_count,               \
+               .event_ring = ev_ring,                  \
+               .dir = DMA_FROM_DEVICE,                 \
+               .ee_mask = BIT(MHI_EE_AMSS),            \
+               .pollcfg = 0,                           \
+               .doorbell = MHI_DB_BRST_DISABLE,        \
+               .lpm_notify = false,                    \
+               .offload_channel = false,               \
+               .doorbell_mode_switch = false,          \
+               .auto_queue = true,                     \
+       }
+
 #define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \
        {                                       \
                .num_elements = el_count,       \
@@ -212,7 +231,7 @@ static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = {
        MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
        MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
        MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
-       MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0),
+       MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0),
        MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0),
        MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0),
        MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
@@ -244,7 +263,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx65_info = {
        .edl = "qcom/sdx65m/edl.mbn",
        .config = &modem_qcom_v1_mhiv_config,
        .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = false,
 };
 
 static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
@@ -254,7 +274,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
        .config = &modem_qcom_v1_mhiv_config,
        .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
        .dma_data_width = 32,
-       .mru_default = 32768
+       .mru_default = 32768,
+       .sideband_wake = false,
 };
 
 static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = {
@@ -262,7 +283,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = {
        .edl = "qcom/prog_firehose_sdx24.mbn",
        .config = &modem_qcom_v1_mhiv_config,
        .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = true,
 };
 
 static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = {
@@ -304,7 +326,8 @@ static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = {
        .edl = "qcom/prog_firehose_sdx24.mbn",
        .config = &modem_quectel_em1xx_config,
        .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = true,
 };
 
 static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = {
@@ -342,7 +365,8 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = {
        .edl = "qcom/sdx55m/edl.mbn",
        .config = &modem_foxconn_sdx55_config,
        .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = false,
 };
 
 static const struct pci_device_id mhi_pci_id_table[] = {
@@ -643,11 +667,14 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        mhi_cntrl->status_cb = mhi_pci_status_cb;
        mhi_cntrl->runtime_get = mhi_pci_runtime_get;
        mhi_cntrl->runtime_put = mhi_pci_runtime_put;
-       mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
-       mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
-       mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
        mhi_cntrl->mru = info->mru_default;
 
+       if (info->sideband_wake) {
+               mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
+               mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
+               mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
+       }
+
        err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
        if (err)
                return err;
index be16076..f9d5b73 100644 (file)
@@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks,
 }
 EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional);
 
+static void devm_clk_bulk_release_all(struct device *dev, void *res)
+{
+       struct clk_bulk_devres *devres = res;
+
+       clk_bulk_put_all(devres->num_clks, devres->clks);
+}
+
 int __must_check devm_clk_bulk_get_all(struct device *dev,
                                       struct clk_bulk_data **clks)
 {
        struct clk_bulk_devres *devres;
        int ret;
 
-       devres = devres_alloc(devm_clk_bulk_release,
+       devres = devres_alloc(devm_clk_bulk_release_all,
                              sizeof(*devres), GFP_KERNEL);
        if (!devres)
                return -ENOMEM;
index 18117ce..5c75e3d 100644 (file)
@@ -526,7 +526,7 @@ struct stm32f4_pll {
 
 struct stm32f4_pll_post_div_data {
        int idx;
-       u8 pll_num;
+       int pll_idx;
        const char *name;
        const char *parent;
        u8 flag;
@@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = {
 
 #define MAX_POST_DIV 3
 static const struct stm32f4_pll_post_div_data  post_div_data[MAX_POST_DIV] = {
-       { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q",
+       { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q",
                CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL},
 
-       { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q",
+       { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q",
                CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL },
 
-       { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
+       { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
                STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table },
 };
 
@@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
                                post_div->width,
                                post_div->flag_div,
                                post_div->div_table,
-                               clks[post_div->pll_num],
+                               clks[post_div->pll_idx],
                                &stm32f4_clk_lock);
 
                if (post_div->idx != NO_IDX)
index 5ecc37a..c1ec75a 100644 (file)
@@ -18,6 +18,7 @@ config COMMON_CLK_HI3519
 config COMMON_CLK_HI3559A
        bool "Hi3559A Clock Driver"
        depends on ARCH_HISI || COMPILE_TEST
+       select RESET_HISI
        default ARCH_HISI
        help
          Build the clock driver for hi3559a.
index 800b2fe..b2c142f 100644 (file)
@@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK,
 
 static struct clk_smd_rpm *msm8936_clks[] = {
        [RPM_SMD_PCNOC_CLK]             = &msm8916_pcnoc_clk,
-       [RPM_SMD_PCNOC_A_CLK]           = &msm8916_pcnoc_clk,
+       [RPM_SMD_PCNOC_A_CLK]           = &msm8916_pcnoc_a_clk,
        [RPM_SMD_SNOC_CLK]              = &msm8916_snoc_clk,
        [RPM_SMD_SNOC_A_CLK]            = &msm8916_snoc_a_clk,
        [RPM_SMD_BIMC_CLK]              = &msm8916_bimc_clk,
index 316912d..4f2c330 100644 (file)
@@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw)
        gate_ops->disable(gate_hw);
 }
 
+static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw)
+{
+       struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw);
+       const struct clk_ops *gate_ops = sdmmc_mux->gate_ops;
+       struct clk_hw *gate_hw = &sdmmc_mux->gate.hw;
+
+       gate_ops->disable_unused(gate_hw);
+}
+
 static void clk_sdmmc_mux_restore_context(struct clk_hw *hw)
 {
        struct clk_hw *parent = clk_hw_get_parent(hw);
@@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = {
        .is_enabled = clk_sdmmc_mux_is_enabled,
        .enable = clk_sdmmc_mux_enable,
        .disable = clk_sdmmc_mux_disable,
+       .disable_unused = clk_sdmmc_mux_disable_unused,
        .restore_context = clk_sdmmc_mux_restore_context,
 };
 
index 10d4457..eb9c65f 100644 (file)
@@ -34,7 +34,6 @@ static long __init parse_acpi_path(const struct efi_dev_path *node,
                        break;
                if (!adev->pnp.unique_id && node->acpi.uid == 0)
                        break;
-               acpi_dev_put(adev);
        }
        if (!adev)
                return -ENODEV;
index 4b7ee3f..847f33f 100644 (file)
@@ -896,6 +896,7 @@ static int __init efi_memreserve_map_root(void)
 static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
 {
        struct resource *res, *parent;
+       int ret;
 
        res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
        if (!res)
@@ -908,7 +909,17 @@ static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
 
        /* we expect a conflict with a 'System RAM' region */
        parent = request_resource_conflict(&iomem_resource, res);
-       return parent ? request_resource(parent, res) : 0;
+       ret = parent ? request_resource(parent, res) : 0;
+
+       /*
+        * Given that efi_mem_reserve_iomem() can be called at any
+        * time, only call memblock_reserve() if the architecture
+        * keeps the infrastructure around.
+        */
+       if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret)
+               memblock_reserve(addr, size);
+
+       return ret;
 }
 
 int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
index aa8da0a..ae87dde 100644 (file)
@@ -630,8 +630,8 @@ efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image,
  * @image:     EFI loaded image protocol
  * @load_addr: pointer to loaded initrd
  * @load_size: size of loaded initrd
- * @soft_limit:        preferred size of allocated memory for loading the initrd
- * @hard_limit:        minimum size of allocated memory
+ * @soft_limit:        preferred address for loading the initrd
+ * @hard_limit:        upper limit address for loading the initrd
  *
  * Return:     status code
  */
index d8bc013..38722d2 100644 (file)
@@ -180,7 +180,10 @@ void __init efi_mokvar_table_init(void)
                pr_err("EFI MOKvar config table is not valid\n");
                return;
        }
-       efi_mem_reserve(efi.mokvar_table, map_size_needed);
+
+       if (md.type == EFI_BOOT_SERVICES_DATA)
+               efi_mem_reserve(efi.mokvar_table, map_size_needed);
+
        efi_mokvar_table_size = map_size_needed;
 }
 
index c1955d3..8f66567 100644 (file)
@@ -62,9 +62,11 @@ int __init efi_tpm_eventlog_init(void)
        tbl_size = sizeof(*log_tbl) + log_tbl->size;
        memblock_reserve(efi.tpm_log, tbl_size);
 
-       if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR ||
-           log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) {
-               pr_warn(FW_BUG "TPM Final Events table missing or invalid\n");
+       if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) {
+               pr_info("TPM Final Events table not present\n");
+               goto out;
+       } else if (log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) {
+               pr_warn(FW_BUG "TPM Final Events table invalid\n");
                goto out;
        }
 
index 4b9157a..50b321a 100644 (file)
@@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 
        ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn,
                               mpc8xxx_gpio_irq_cascade,
-                              IRQF_SHARED, "gpio-cascade",
+                              IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade",
                               mpc8xxx_gc);
        if (ret) {
                dev_err(&pdev->dev,
index 5022e0a..0f5d17f 100644 (file)
@@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
        struct resource *res;
        int ret, irq;
 
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0)
+       irq = platform_get_irq_optional(pdev, 0);
+       if (irq < 0 && irq != -ENXIO)
                return irq;
 
        res = platform_get_resource(pdev, IORESOURCE_IO, 0);
@@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 
        pm_runtime_enable(&pdev->dev);
 
-       if (irq) {
+       if (irq > 0) {
                struct irq_chip *irq_chip = &gpio->irq_chip;
                u8 irq_status;
 
index c0316ea..8ac6eb9 100644 (file)
@@ -619,6 +619,13 @@ struct amdgpu_video_codec_info {
        u32 max_level;
 };
 
+#define codec_info_build(type, width, height, level) \
+                        .codec_type = type,\
+                        .max_width = width,\
+                        .max_height = height,\
+                        .max_pixels_per_frame = height * width,\
+                        .max_level = level,
+
 struct amdgpu_video_codecs {
        const u32 codec_count;
        const struct amdgpu_video_codec_info *codec_array;
index 84a1b4b..6cc0d4f 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/power_supply.h>
 #include <linux/pm_runtime.h>
+#include <linux/suspend.h>
 #include <acpi/video.h>
 #include <acpi/actbl.h>
 
@@ -1042,7 +1043,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
 #if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE)
        if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
                if (adev->flags & AMD_IS_APU)
-                       return true;
+                       return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
        }
 #endif
        return false;
index d303e88..f3fd5ec 100644 (file)
@@ -3504,13 +3504,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        r = amdgpu_device_get_job_timeout_settings(adev);
        if (r) {
                dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-               goto failed_unmap;
+               return r;
        }
 
        /* early init functions */
        r = amdgpu_device_ip_early_init(adev);
        if (r)
-               goto failed_unmap;
+               return r;
 
        /* doorbell bar mapping and doorbell index init*/
        amdgpu_device_doorbell_init(adev);
@@ -3736,10 +3736,6 @@ release_ras_con:
 failed:
        amdgpu_vf_error_trans_all(adev);
 
-failed_unmap:
-       iounmap(adev->rmmio);
-       adev->rmmio = NULL;
-
        return r;
 }
 
index abb9288..361b86b 100644 (file)
@@ -1190,6 +1190,10 @@ static const struct pci_device_id pciidlist[] = {
        /* Van Gogh */
        {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU},
 
+       /* Yellow Carp */
+       {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+       {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+
        /* Navy_Flounder */
        {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
        {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
index d0d9bc4..854fc49 100644 (file)
@@ -255,6 +255,15 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
        if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
                return -EPERM;
 
+       /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings
+        * for debugger access to invisible VRAM. Should have used MAP_SHARED
+        * instead. Clearing VM_MAYWRITE prevents the mapping from ever
+        * becoming writable and makes is_cow_mapping(vm_flags) false.
+        */
+       if (is_cow_mapping(vma->vm_flags) &&
+           !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+               vma->vm_flags &= ~VM_MAYWRITE;
+
        return drm_gem_ttm_mmap(obj, vma);
 }
 
index f5e9c02..a64b2c7 100644 (file)
@@ -3300,6 +3300,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
 };
@@ -3379,6 +3380,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
@@ -3445,6 +3447,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020)
index 94a2c07..94d029d 100644 (file)
 #include "smuio_v11_0.h"
 #include "smuio_v11_0_6.h"
 
-#define codec_info_build(type, width, height, level) \
-                        .codec_type = type,\
-                        .max_width = width,\
-                        .max_height = height,\
-                        .max_pixels_per_frame = height * width,\
-                        .max_level = level,
-
 static const struct amd_ip_funcs nv_common_ip_funcs;
 
 /* Navi */
 static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
 };
 
 static const struct amdgpu_video_codecs nv_video_codecs_encode =
@@ -101,55 +82,13 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode =
 /* Navi1x */
 static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
 };
 
 static const struct amdgpu_video_codecs nv_video_codecs_decode =
@@ -161,62 +100,14 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =
 /* Sienna Cichlid */
 static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
 };
 
 static const struct amdgpu_video_codecs sc_video_codecs_decode =
@@ -228,80 +119,20 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode =
 /* SRIOV Sienna Cichlid, not const since data is controlled by host */
 static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
 };
 
 static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
 };
 
 static struct amdgpu_video_codecs sriov_sc_video_codecs_encode =
@@ -333,6 +164,19 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {
        .codec_array = NULL,
 };
 
+/* Yellow Carp*/
+static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+};
+
+static const struct amdgpu_video_codecs yc_video_codecs_decode = {
+       .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array),
+       .codec_array = yc_video_codecs_decode_array,
+};
+
 static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
                                 const struct amdgpu_video_codecs **codecs)
 {
@@ -353,12 +197,17 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
        case CHIP_NAVY_FLOUNDER:
        case CHIP_DIMGREY_CAVEFISH:
        case CHIP_VANGOGH:
-       case CHIP_YELLOW_CARP:
                if (encode)
                        *codecs = &nv_video_codecs_encode;
                else
                        *codecs = &sc_video_codecs_decode;
                return 0;
+       case CHIP_YELLOW_CARP:
+               if (encode)
+                       *codecs = &nv_video_codecs_encode;
+               else
+                       *codecs = &yc_video_codecs_decode;
+               return 0;
        case CHIP_BEIGE_GOBY:
                if (encode)
                        *codecs = &bg_video_codecs_encode;
@@ -1387,7 +1236,10 @@ static int nv_common_early_init(void *handle)
                        AMD_PG_SUPPORT_VCN |
                        AMD_PG_SUPPORT_VCN_DPG |
                        AMD_PG_SUPPORT_JPEG;
-               adev->external_rev_id = adev->rev_id + 0x01;
+               if (adev->pdev->device == 0x1681)
+                       adev->external_rev_id = adev->rev_id + 0x19;
+               else
+                       adev->external_rev_id = adev->rev_id + 0x01;
                break;
        default:
                /* FIXME: not supported yet */
index 618e5b6..536d41f 100644 (file)
@@ -67,7 +67,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 
        err = psp_init_asd_microcode(psp, chip_name);
        if (err)
-               goto out;
+               return err;
 
        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
        err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
@@ -80,7 +80,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
        } else {
                err = amdgpu_ucode_validate(adev->psp.ta_fw);
                if (err)
-                       goto out2;
+                       goto out;
 
                ta_hdr = (const struct ta_firmware_header_v1_0 *)
                                 adev->psp.ta_fw->data;
@@ -105,10 +105,9 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 
        return 0;
 
-out2:
+out:
        release_firmware(adev->psp.ta_fw);
        adev->psp.ta_fw = NULL;
-out:
        if (err) {
                dev_err(adev->dev,
                        "psp v12.0: Failed to load firmware \"%s\"\n",
index b024364..b7d350b 100644 (file)
 /* Vega, Raven, Arcturus */
 static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
 };
 
 static const struct amdgpu_video_codecs vega_video_codecs_encode =
@@ -113,48 +101,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
 /* Vega */
 static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 };
 
 static const struct amdgpu_video_codecs vega_video_codecs_decode =
@@ -166,55 +118,13 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
 /* Raven */
 static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
 };
 
 static const struct amdgpu_video_codecs rv_video_codecs_decode =
@@ -226,55 +136,13 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
 /* Renoir, Arcturus */
 static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
 {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
 };
 
 static const struct amdgpu_video_codecs rn_video_codecs_decode =
index d3a2a5f..b53f49a 100644 (file)
@@ -2429,9 +2429,9 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
        max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
        min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
 
-       if (caps->ext_caps->bits.oled == 1 ||
+       if (caps->ext_caps->bits.oled == 1 /*||
            caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
-           caps->ext_caps->bits.hdr_aux_backlight_control == 1)
+           caps->ext_caps->bits.hdr_aux_backlight_control == 1*/)
                caps->aux_support = true;
 
        if (amdgpu_backlight == 0)
index 6e0c5c6..a5331b9 100644 (file)
@@ -197,7 +197,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
 
        REG_UPDATE(DENTIST_DISPCLK_CNTL,
                        DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider);
-//     REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100);
+       REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000);
        REG_UPDATE(DENTIST_DISPCLK_CNTL,
                        DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider);
        REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100);
index 513676a..af7004b 100644 (file)
@@ -190,6 +190,10 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
                        &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz,
                        &num_levels);
 
+       /* SOCCLK */
+       dcn3_init_single_clock(clk_mgr, PPCLK_SOCCLK,
+                                       &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz,
+                                       &num_levels);
        // DPREFCLK ???
 
        /* DISPCLK */
index 7b7d884..4a4894e 100644 (file)
 
 #include "dc_dmub_srv.h"
 
+#include "yellow_carp_offset.h"
+
+#define regCLK1_CLK_PLL_REQ                    0x0237
+#define regCLK1_CLK_PLL_REQ_BASE_IDX           0
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT    0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT   0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT   0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK      0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK     0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK     0xFFFF0000L
+
+#define REG(reg_name) \
+       (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
 #define TO_CLK_MGR_DCN31(clk_mgr)\
        container_of(clk_mgr, struct clk_mgr_dcn31, base)
 
@@ -124,10 +139,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
         * also if safe to lower is false, we just go in the higher state
         */
        if (safe_to_lower) {
-               if (new_clocks->z9_support == DCN_Z9_SUPPORT_ALLOW &&
-                               new_clocks->z9_support != clk_mgr_base->clks.z9_support) {
+               if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+                               new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
                        dcn31_smu_set_Z9_support(clk_mgr, true);
-                       clk_mgr_base->clks.z9_support = new_clocks->z9_support;
+                       clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
                }
 
                if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
@@ -148,10 +163,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
                        }
                }
        } else {
-               if (new_clocks->z9_support == DCN_Z9_SUPPORT_DISALLOW &&
-                               new_clocks->z9_support != clk_mgr_base->clks.z9_support) {
+               if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
+                               new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
                        dcn31_smu_set_Z9_support(clk_mgr, false);
-                       clk_mgr_base->clks.z9_support = new_clocks->z9_support;
+                       clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
                }
 
                if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
@@ -229,7 +244,32 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
 {
-       return 0;
+       /* get FbMult value */
+       struct fixed31_32 pll_req;
+       unsigned int fbmult_frac_val = 0;
+       unsigned int fbmult_int_val = 0;
+
+       /*
+        * Register value of fbmult is in 8.16 format, we are converting to 31.32
+        * to leverage the fix point operations available in driver
+        */
+
+       REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/
+       REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */
+
+       pll_req = dc_fixpt_from_int(fbmult_int_val);
+
+       /*
+        * since fractional part is only 16 bit in register definition but is 32 bit
+        * in our fix point definiton, need to shift left by 16 to obtain correct value
+        */
+       pll_req.value |= fbmult_frac_val << 16;
+
+       /* multiply by REFCLK period */
+       pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+       /* integer part is now VCO frequency in kHz */
+       return dc_fixpt_floor(pll_req);
 }
 
 static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base)
@@ -246,7 +286,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr)
        clk_mgr->clks.p_state_change_support = true;
        clk_mgr->clks.prev_p_state_change_support = true;
        clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
-       clk_mgr->clks.z9_support = DCN_Z9_SUPPORT_UNKNOWN;
+       clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
 }
 
 static bool dcn31_are_clock_states_equal(struct dc_clocks *a,
@@ -260,7 +300,7 @@ static bool dcn31_are_clock_states_equal(struct dc_clocks *a,
                return false;
        else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
                return false;
-       else if (a->z9_support != b->z9_support)
+       else if (a->zstate_support != b->zstate_support)
                return false;
        else if (a->dtbclk_en != b->dtbclk_en)
                return false;
@@ -592,6 +632,7 @@ void dcn31_clk_mgr_construct(
        clk_mgr->base.dprefclk_ss_percentage = 0;
        clk_mgr->base.dprefclk_ss_divider = 1000;
        clk_mgr->base.ss_on_dprefclk = false;
+       clk_mgr->base.dfs_ref_freq_khz = 48000;
 
        clk_mgr->smu_wm_set.wm_set = (struct dcn31_watermarks *)dm_helpers_allocate_gpu_mem(
                                clk_mgr->base.base.ctx,
index cc21cf7..f8f1005 100644 (file)
 #define __DCN31_CLK_MGR_H__
 #include "clk_mgr_internal.h"
 
-//CLK1_CLK_PLL_REQ
-#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT                                                                   0x0
-#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT                                                                  0xc
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT                                                                  0x10
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK                                                                     0x000001FFL
-#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK                                                                    0x0000F000L
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK                                                                    0xFFFF0000L
-//CLK1_CLK0_DFS_CNTL
-#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT                                                               0x0
-#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK                                                                 0x0000007FL
-/*DPREF clock related*/
-#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-
-//CLK3_0_CLK3_CLK_PLL_REQ
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT                                                            0x0
-#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT                                                           0xc
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT                                                           0x10
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK                                                              0x000001FFL
-#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK                                                             0x0000F000L
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK                                                             0xFFFF0000L
-
-#define mmCLK0_CLK3_DFS_CNTL                            0x16C60
-#define mmCLK00_CLK0_CLK3_DFS_CNTL                      0x16C60
-#define mmCLK01_CLK0_CLK3_DFS_CNTL                      0x16E60
-#define mmCLK02_CLK0_CLK3_DFS_CNTL                      0x17060
-#define mmCLK03_CLK0_CLK3_DFS_CNTL                      0x17260
-
-#define mmCLK0_CLK_PLL_REQ                              0x16C10
-#define mmCLK00_CLK0_CLK_PLL_REQ                        0x16C10
-#define mmCLK01_CLK0_CLK_PLL_REQ                        0x16E10
-#define mmCLK02_CLK0_CLK_PLL_REQ                        0x17010
-#define mmCLK03_CLK0_CLK_PLL_REQ                        0x17210
-
-#define mmCLK1_CLK_PLL_REQ                              0x1B00D
-#define mmCLK10_CLK1_CLK_PLL_REQ                        0x1B00D
-#define mmCLK11_CLK1_CLK_PLL_REQ                        0x1B20D
-#define mmCLK12_CLK1_CLK_PLL_REQ                        0x1B40D
-#define mmCLK13_CLK1_CLK_PLL_REQ                        0x1B60D
-
-#define mmCLK2_CLK_PLL_REQ                              0x17E0D
-
-/*AMCLK*/
-#define mmCLK11_CLK1_CLK0_DFS_CNTL                      0x1B23F
-#define mmCLK11_CLK1_CLK_PLL_REQ                        0x1B20D
-#endif
-
 struct dcn31_watermarks;
 
 struct dcn31_smu_watermark_set {
index 6da226b..9fb8c46 100644 (file)
@@ -1820,8 +1820,7 @@ bool perform_link_training_with_retries(
                                         */
                                        panel_mode = DP_PANEL_MODE_DEFAULT;
                                }
-                       } else
-                               panel_mode = DP_PANEL_MODE_DEFAULT;
+                       }
                }
 #endif
 
@@ -4650,7 +4649,10 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link)
                }
        }
 
-       if (link->dpcd_caps.panel_mode_edp) {
+       if (link->dpcd_caps.panel_mode_edp &&
+               (link->connector_signal == SIGNAL_TYPE_EDP ||
+                (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
+                 link->is_internal_display))) {
                return DP_PANEL_MODE_EDP;
        }
 
@@ -4914,9 +4916,7 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link)
 {
        uint32_t default_backlight;
 
-       if (link &&
-               (link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
-               link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) {
+       if (link && link->dpcd_sink_ext_caps.bits.oled == 1) {
                if (!dc_link_read_default_bl_aux(link, &default_backlight))
                        default_backlight = 150000;
                // if < 5 nits or > 5000, it might be wrong readback
index a6a6724..1596f6b 100644 (file)
@@ -1062,7 +1062,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
         * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
         * did not show such problems, so this seems to be the exception.
         */
-       if (plane_state->ctx->dce_version != DCE_VERSION_11_0)
+       if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
                pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
        else
                pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
index 45640f1..8dcea8f 100644 (file)
@@ -354,10 +354,10 @@ enum dcn_pwr_state {
 };
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-enum dcn_z9_support_state {
-       DCN_Z9_SUPPORT_UNKNOWN,
-       DCN_Z9_SUPPORT_ALLOW,
-       DCN_Z9_SUPPORT_DISALLOW,
+enum dcn_zstate_support_state {
+       DCN_ZSTATE_SUPPORT_UNKNOWN,
+       DCN_ZSTATE_SUPPORT_ALLOW,
+       DCN_ZSTATE_SUPPORT_DISALLOW,
 };
 #endif
 /*
@@ -378,7 +378,7 @@ struct dc_clocks {
        int dramclk_khz;
        bool p_state_change_support;
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-       enum dcn_z9_support_state z9_support;
+       enum dcn_zstate_support_state zstate_support;
        bool dtbclk_en;
 #endif
        enum dcn_pwr_state pwr_state;
index df6539e..0464a8f 100644 (file)
@@ -636,6 +636,7 @@ struct dce_hwseq_registers {
        uint32_t ODM_MEM_PWR_CTRL3;
        uint32_t DMU_MEM_PWR_CNTL;
        uint32_t MMHUBBUB_MEM_PWR_CNTL;
+       uint32_t DCHUBBUB_ARB_HOSTVM_CNTL;
 };
  /* set field name */
 #define HWS_SF(blk_name, reg_name, field_name, post_fix)\
@@ -1110,7 +1111,8 @@ struct dce_hwseq_registers {
        type DOMAIN_POWER_FORCEON;\
        type DOMAIN_POWER_GATE;\
        type DOMAIN_PGFSM_PWR_STATUS;\
-       type HPO_HDMISTREAMCLK_G_GATE_DIS;
+       type HPO_HDMISTREAMCLK_G_GATE_DIS;\
+       type DISABLE_HOSTVM_FORCE_ALLOW_PSTATE;
 
 struct dce_hwseq_shift {
        HWSEQ_REG_FIELD_LIST(uint8_t)
index 673b93f..cb9767d 100644 (file)
@@ -217,6 +217,8 @@ static void dpp1_dscl_set_lb(
        const struct line_buffer_params *lb_params,
        enum lb_memory_config mem_size_config)
 {
+       uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */
+
        /* LB */
        if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) {
                /* DSCL caps: pixel data processed in fixed format */
@@ -239,9 +241,12 @@ static void dpp1_dscl_set_lb(
                        LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
        }
 
+       if (dpp->base.caps->max_lb_partitions == 31)
+               max_partitions = 31;
+
        REG_SET_2(LB_MEMORY_CTRL, 0,
                MEMORY_CONFIG, mem_size_config,
-               LB_MAX_PARTITIONS, 63);
+               LB_MAX_PARTITIONS, max_partitions);
 }
 
 static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio)
index 1b05a37..b173fa3 100644 (file)
@@ -2093,8 +2093,10 @@ int dcn20_populate_dml_pipes_from_context(
                                - timing->v_border_bottom;
                pipes[pipe_cnt].pipe.dest.htotal = timing->h_total;
                pipes[pipe_cnt].pipe.dest.vtotal = v_total;
-               pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable;
-               pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable;
+               pipes[pipe_cnt].pipe.dest.hactive =
+                       timing->h_addressable + timing->h_border_left + timing->h_border_right;
+               pipes[pipe_cnt].pipe.dest.vactive =
+                       timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
                pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE;
                pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
                if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
@@ -3079,6 +3081,37 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
        return false;
 }
 
+static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+       int plane_count;
+       int i;
+
+       plane_count = 0;
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               if (context->res_ctx.pipe_ctx[i].plane_state)
+                       plane_count++;
+       }
+
+       /*
+        * Zstate is allowed in following scenarios:
+        *      1. Single eDP with PSR enabled
+        *      2. 0 planes (No memory requests)
+        *      3. Single eDP without PSR but > 5ms stutter period
+        */
+       if (plane_count == 0)
+               return DCN_ZSTATE_SUPPORT_ALLOW;
+       else if (context->stream_count == 1 &&  context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+               struct dc_link *link = context->streams[0]->sink->link;
+
+               if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled)
+                               || context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
+                       return DCN_ZSTATE_SUPPORT_ALLOW;
+               else
+                       return DCN_ZSTATE_SUPPORT_DISALLOW;
+       } else
+               return DCN_ZSTATE_SUPPORT_DISALLOW;
+}
+
 void dcn20_calculate_dlg_params(
                struct dc *dc, struct dc_state *context,
                display_e2e_pipe_params_st *pipes,
@@ -3086,7 +3119,6 @@ void dcn20_calculate_dlg_params(
                int vlevel)
 {
        int i, pipe_idx;
-       int plane_count;
 
        /* Writeback MCIF_WB arbitration parameters */
        dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
@@ -3102,17 +3134,7 @@ void dcn20_calculate_dlg_params(
                                                        != dm_dram_clock_change_unsupported;
        context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
 
-       context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ?
-                       DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW;
-
-       plane_count = 0;
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (context->res_ctx.pipe_ctx[i].plane_state)
-                       plane_count++;
-       }
-
-       if (plane_count == 0)
-               context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW;
+       context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
 
        context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
 
index f3d98e3..bf0a198 100644 (file)
@@ -109,6 +109,7 @@ struct _vcs_dpi_ip_params_st dcn2_1_ip = {
        .max_page_table_levels = 4,
        .pte_chunk_size_kbytes = 2,
        .meta_chunk_size_kbytes = 2,
+       .min_meta_chunk_size_bytes = 256,
        .writeback_chunk_size_kbytes = 2,
        .line_buffer_size_bits = 789504,
        .is_line_buffer_bpp_fixed = 0,
index 2140b75..23a52d4 100644 (file)
@@ -383,13 +383,6 @@ bool dpp3_get_optimal_number_of_taps(
        int min_taps_y, min_taps_c;
        enum lb_memory_config lb_config;
 
-       /* Some ASICs does not support  FP16 scaling, so we reject modes require this*/
-       if (scl_data->viewport.width  != scl_data->h_active &&
-               scl_data->viewport.height != scl_data->v_active &&
-               dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT &&
-               scl_data->format == PIXEL_FORMAT_FP16)
-               return false;
-
        if (scl_data->viewport.width > scl_data->h_active &&
                dpp->ctx->dc->debug.max_downscale_src_width != 0 &&
                scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
@@ -1440,15 +1433,6 @@ bool dpp3_construct(
        dpp->tf_shift = tf_shift;
        dpp->tf_mask = tf_mask;
 
-       dpp->lb_pixel_depth_supported =
-               LB_PIXEL_DEPTH_18BPP |
-               LB_PIXEL_DEPTH_24BPP |
-               LB_PIXEL_DEPTH_30BPP |
-               LB_PIXEL_DEPTH_36BPP;
-
-       dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY;
-       dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/
-
        return true;
 }
 
index 3fa86cd..ac644ae 100644 (file)
        SRI(COLOR_KEYER_BLUE, CNVC_CFG, id), \
        SRI(CURSOR_CONTROL, CURSOR0_, id),\
        SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\
+       SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \
        SRI(DSCL_MEM_PWR_CTRL, DSCL, id)
 
 #define DPP_REG_LIST_DCN30(id)\
        SRI(CM_SHAPER_LUT_DATA, CM, id),\
        SRI(CM_MEM_PWR_CTRL2, CM, id), \
        SRI(CM_MEM_PWR_STATUS2, CM, id), \
-       SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \
-       SRI(DSCL_MEM_PWR_CTRL, DSCL, id), \
        SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_B, CM, id),\
        SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_G, CM, id),\
        SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_R, CM, id),\
index 16a75ba..7d3ff5d 100644 (file)
@@ -1398,11 +1398,18 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
                        dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
                        dcn3_02_soc.clock_limits[i].dppclk_mhz  = max_dppclk_mhz;
                        dcn3_02_soc.clock_limits[i].phyclk_mhz  = max_phyclk_mhz;
-                       dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz;
+                       /* Populate from bw_params for DTBCLK, SOCCLK */
+                       if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+                               dcn3_02_soc.clock_limits[i].dtbclk_mhz  = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz;
+                       else
+                               dcn3_02_soc.clock_limits[i].dtbclk_mhz  = bw_params->clk_table.entries[i].dtbclk_mhz;
+                       if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+                               dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz;
+                       else
+                               dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
                        /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */
-                       /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+                       /* FCLK, PHYCLK_D18, DSCCLK */
                        dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz;
-                       dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[0].socclk_mhz;
                        dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz;
                }
                /* re-init DML with updated bb */
index 34b8946..833ab13 100644 (file)
@@ -1326,11 +1326,18 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
                        dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
                        dcn3_03_soc.clock_limits[i].dppclk_mhz  = max_dppclk_mhz;
                        dcn3_03_soc.clock_limits[i].phyclk_mhz  = max_phyclk_mhz;
-                       dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz;
+                       /* Populate from bw_params for DTBCLK, SOCCLK */
+                       if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+                               dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz;
+                       else
+                               dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+                       if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+                               dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz;
+                       else
+                               dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
                        /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */
-                       /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+                       /* FCLK, PHYCLK_D18, DSCCLK */
                        dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz;
-                       dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[0].socclk_mhz;
                        dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz;
                }
                /* re-init DML with updated bb */
index 836864a..6ac6faf 100644 (file)
@@ -47,6 +47,7 @@
 #include "dce/dmub_outbox.h"
 #include "dc_link_dp.h"
 #include "inc/link_dpcd.h"
+#include "dcn10/dcn10_hw_sequencer.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -594,3 +595,20 @@ bool dcn31_is_abm_supported(struct dc *dc,
        }
        return false;
 }
+
+static void apply_riommu_invalidation_wa(struct dc *dc)
+{
+       struct dce_hwseq *hws = dc->hwseq;
+
+       if (!hws->wa.early_riommu_invalidation)
+               return;
+
+       REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, 0);
+}
+
+void dcn31_init_pipes(struct dc *dc, struct dc_state *context)
+{
+       dcn10_init_pipes(dc, context);
+       apply_riommu_invalidation_wa(dc);
+
+}
index ff72f0f..40dfebe 100644 (file)
@@ -52,5 +52,6 @@ void dcn31_reset_hw_ctx_wrap(
                struct dc_state *context);
 bool dcn31_is_abm_supported(struct dc *dc,
                struct dc_state *context, struct dc_stream_state *stream);
+void dcn31_init_pipes(struct dc *dc, struct dc_state *context);
 
 #endif /* __DC_HWSS_DCN31_H__ */
index e3048f8..aaf2dbd 100644 (file)
@@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
        .set_flip_control_gsl = dcn20_set_flip_control_gsl,
        .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
        .calc_vupdate_position = dcn10_calc_vupdate_position,
-       .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations,
        .set_backlight_level = dcn21_set_backlight_level,
        .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
        .set_pipe = dcn21_set_pipe,
@@ -104,7 +103,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
 };
 
 static const struct hwseq_private_funcs dcn31_private_funcs = {
-       .init_pipes = dcn10_init_pipes,
+       .init_pipes = dcn31_init_pipes,
        .update_plane_addr = dcn20_update_plane_addr,
        .plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
        .update_mpcc = dcn20_update_mpcc,
index c67bc95..38c010a 100644 (file)
@@ -220,6 +220,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
        .sr_exit_z8_time_us = 402.0,
        .sr_enter_plus_exit_z8_time_us = 520.0,
        .writeback_latency_us = 12.0,
+       .dram_channel_width_bytes = 4,
        .round_trip_ping_latency_dcfclk_cycles = 106,
        .urgent_latency_pixel_data_only_us = 4.0,
        .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
@@ -741,6 +742,7 @@ static const struct dccg_mask dccg_mask = {
 
 #define HWSEQ_DCN31_REG_LIST()\
        SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+       SR(DCHUBBUB_ARB_HOSTVM_CNTL), \
        SR(DIO_MEM_PWR_CTRL), \
        SR(ODM_MEM_PWR_CTRL3), \
        SR(DMU_MEM_PWR_CNTL), \
@@ -801,6 +803,7 @@ static const struct dce_hwseq_registers hwseq_reg = {
 #define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\
        HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
        HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+       HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \
        HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
        HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
        HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
@@ -1299,6 +1302,7 @@ static struct dce_hwseq *dcn31_hwseq_create(
                hws->regs = &hwseq_reg;
                hws->shifts = &hwseq_shift;
                hws->masks = &hwseq_mask;
+               hws->wa.early_riommu_invalidation = true;
        }
        return hws;
 }
index c26e742..6655bb9 100644 (file)
@@ -841,6 +841,9 @@ static bool CalculatePrefetchSchedule(
        else
                *DestinationLinesForPrefetch = dst_y_prefetch_equ;
 
+       // Limit to prevent overflow in DST_Y_PREFETCH register
+       *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75);
+
        dml_print("DML: VStartup: %d\n", VStartup);
        dml_print("DML: TCalc: %f\n", TCalc);
        dml_print("DML: TWait: %f\n", TWait);
@@ -4889,7 +4892,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                }
                        } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
                                        && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
-                                               || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode));
+                                               || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode));
 
                        if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
                                mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
index 2a0db2b..9ac9d5e 100644 (file)
@@ -289,6 +289,9 @@ struct dpp_caps {
        /* DSCL processing pixel data in fixed or float format */
        enum dscl_data_processing_format dscl_data_proc_format;
 
+       /* max LB partitions */
+       unsigned int max_lb_partitions;
+
        /* Calculates the number of partitions in the line buffer.
         * The implementation of this function is overloaded for
         * different versions of DSCL LB.
index f7f7e4f..082549f 100644 (file)
@@ -41,6 +41,7 @@ struct dce_hwseq_wa {
        bool DEGVIDCN10_254;
        bool DEGVIDCN21;
        bool disallow_self_refresh_during_multi_plane_transition;
+       bool early_riommu_invalidation;
 };
 
 struct hwseq_wa_state {
index 6102660..35fa0d8 100644 (file)
 #define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow  0x41
 
 #define PPSMC_MSG_GfxDriverResetRecovery       0x42
-#define PPSMC_Message_Count                    0x43
+#define PPSMC_MSG_BoardPowerCalibration        0x43
+#define PPSMC_Message_Count                    0x44
 
 //PPSMC Reset Types
 #define PPSMC_RESET_TYPE_WARM_RESET              0x00
index 89a16dc..1d3765b 100644 (file)
        __SMU_DUMMY_MAP(DisableDeterminism),            \
        __SMU_DUMMY_MAP(SetUclkDpmMode),                \
        __SMU_DUMMY_MAP(LightSBR),                      \
-       __SMU_DUMMY_MAP(GfxDriverResetRecovery),
+       __SMU_DUMMY_MAP(GfxDriverResetRecovery),        \
+       __SMU_DUMMY_MAP(BoardPowerCalibration),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
index 1962a58..f61b5c9 100644 (file)
@@ -34,7 +34,7 @@
 #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE
 #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03
 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF
-#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0x9
+#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD
 
 /* MP Apertures */
 #define MP0_Public                     0x03800000
index 9316a72..cb5485c 100644 (file)
@@ -134,6 +134,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT
        MSG_MAP(DisableDeterminism,                  PPSMC_MSG_DisableDeterminism,              0),
        MSG_MAP(SetUclkDpmMode,                      PPSMC_MSG_SetUclkDpmMode,                  0),
        MSG_MAP(GfxDriverResetRecovery,              PPSMC_MSG_GfxDriverResetRecovery,          0),
+       MSG_MAP(BoardPowerCalibration,               PPSMC_MSG_BoardPowerCalibration,           0),
 };
 
 static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = {
@@ -440,6 +441,39 @@ static int aldebaran_setup_pptable(struct smu_context *smu)
        return ret;
 }
 
+static bool aldebaran_is_primary(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+
+       if (adev->smuio.funcs && adev->smuio.funcs->get_die_id)
+               return adev->smuio.funcs->get_die_id(adev) == 0;
+
+       return true;
+}
+
+static int aldebaran_run_board_btc(struct smu_context *smu)
+{
+       u32 smu_version;
+       int ret;
+
+       if (!aldebaran_is_primary(smu))
+               return 0;
+
+       ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
+       if (ret) {
+               dev_err(smu->adev->dev, "Failed to get smu version!\n");
+               return ret;
+       }
+       if (smu_version <= 0x00441d00)
+               return 0;
+
+       ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BoardPowerCalibration, NULL);
+       if (ret)
+               dev_err(smu->adev->dev, "Board power calibration failed!\n");
+
+       return ret;
+}
+
 static int aldebaran_run_btc(struct smu_context *smu)
 {
        int ret;
@@ -447,6 +481,8 @@ static int aldebaran_run_btc(struct smu_context *smu)
        ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL);
        if (ret)
                dev_err(smu->adev->dev, "RunDcBtc failed!\n");
+       else
+               ret = aldebaran_run_board_btc(smu);
 
        return ret;
 }
@@ -524,16 +560,6 @@ static int aldebaran_freqs_in_same_level(int32_t frequency1,
        return (abs(frequency1 - frequency2) <= EPSILON);
 }
 
-static bool aldebaran_is_primary(struct smu_context *smu)
-{
-       struct amdgpu_device *adev = smu->adev;
-
-       if (adev->smuio.funcs && adev->smuio.funcs->get_die_id)
-               return adev->smuio.funcs->get_die_id(adev) == 0;
-
-       return true;
-}
-
 static int aldebaran_get_smu_metrics_data(struct smu_context *smu,
                                          MetricsMember_t member,
                                          uint32_t *value)
index 98ae006..f454e04 100644 (file)
@@ -834,6 +834,9 @@ long drm_ioctl(struct file *filp,
        if (drm_dev_is_unplugged(dev))
                return -ENODEV;
 
+       if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE)
+               return -ENOTTY;
+
        is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END;
 
        if (is_driver_ioctl) {
index 5b6922e..aa667fa 100644 (file)
@@ -2166,7 +2166,8 @@ static void
 init_vbt_missing_defaults(struct drm_i915_private *i915)
 {
        enum port port;
-       int ports = PORT_A | PORT_B | PORT_C | PORT_D | PORT_E | PORT_F;
+       int ports = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) |
+                   BIT(PORT_D) | BIT(PORT_E) | BIT(PORT_F);
 
        if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915))
                return;
index 3bad4e0..2d5d217 100644 (file)
@@ -11361,13 +11361,19 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
                intel_ddi_init(dev_priv, PORT_B);
                intel_ddi_init(dev_priv, PORT_C);
                vlv_dsi_init(dev_priv);
-       } else if (DISPLAY_VER(dev_priv) >= 9) {
+       } else if (DISPLAY_VER(dev_priv) == 10) {
                intel_ddi_init(dev_priv, PORT_A);
                intel_ddi_init(dev_priv, PORT_B);
                intel_ddi_init(dev_priv, PORT_C);
                intel_ddi_init(dev_priv, PORT_D);
                intel_ddi_init(dev_priv, PORT_E);
                intel_ddi_init(dev_priv, PORT_F);
+       } else if (DISPLAY_VER(dev_priv) >= 9) {
+               intel_ddi_init(dev_priv, PORT_A);
+               intel_ddi_init(dev_priv, PORT_B);
+               intel_ddi_init(dev_priv, PORT_C);
+               intel_ddi_init(dev_priv, PORT_D);
+               intel_ddi_init(dev_priv, PORT_E);
        } else if (HAS_DDI(dev_priv)) {
                u32 found;
 
index a8abc9a..4a6419d 100644 (file)
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
        struct i915_vma *vma;
@@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
                int err;
                struct intel_engine_cs *engine = eb->engine;
 
+               /* If we need to copy for the cmdparser, we will stall anyway */
+               if (eb_use_cmdparser(eb))
+                       return ERR_PTR(-EWOULDBLOCK);
+
                if (!reloc_can_use_engine(engine)) {
                        engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
                        if (!engine)
@@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
        return vma;
 }
 
-struct eb_parse_work {
-       struct dma_fence_work base;
-       struct intel_engine_cs *engine;
-       struct i915_vma *batch;
-       struct i915_vma *shadow;
-       struct i915_vma *trampoline;
-       unsigned long batch_offset;
-       unsigned long batch_length;
-       unsigned long *jump_whitelist;
-       const void *batch_map;
-       void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
-       struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-       int ret;
-       bool cookie;
-
-       cookie = dma_fence_begin_signalling();
-       ret = intel_engine_cmd_parser(pw->engine,
-                                     pw->batch,
-                                     pw->batch_offset,
-                                     pw->batch_length,
-                                     pw->shadow,
-                                     pw->jump_whitelist,
-                                     pw->shadow_map,
-                                     pw->batch_map);
-       dma_fence_end_signalling(cookie);
-
-       return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
-       struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
-       if (!IS_ERR_OR_NULL(pw->jump_whitelist))
-               kfree(pw->jump_whitelist);
-
-       if (pw->batch_map)
-               i915_gem_object_unpin_map(pw->batch->obj);
-       else
-               i915_gem_object_unpin_pages(pw->batch->obj);
-
-       i915_gem_object_unpin_map(pw->shadow->obj);
-
-       if (pw->trampoline)
-               i915_active_release(&pw->trampoline->active);
-       i915_active_release(&pw->shadow->active);
-       i915_active_release(&pw->batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
-       .name = "eb_parse",
-       .work = __eb_parse,
-       .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
-                    struct intel_timeline *tl,
-                    struct dma_fence *fence)
-{
-       struct intel_gt_buffer_pool_node *node = vma->private;
-
-       return i915_active_ref(&node->active, tl->fence_context, fence);
-}
-
-static int
-parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
-{
-       int err;
-
-       mutex_lock(&tl->mutex);
-
-       err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
-       if (err)
-               goto unlock;
-
-       if (pw->trampoline) {
-               err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
-               if (err)
-                       goto unlock;
-       }
-
-unlock:
-       mutex_unlock(&tl->mutex);
-       return err;
-}
-
-static int eb_parse_pipeline(struct i915_execbuffer *eb,
-                            struct i915_vma *shadow,
-                            struct i915_vma *trampoline)
-{
-       struct eb_parse_work *pw;
-       struct drm_i915_gem_object *batch = eb->batch->vma->obj;
-       bool needs_clflush;
-       int err;
-
-       GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
-       GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
-
-       pw = kzalloc(sizeof(*pw), GFP_KERNEL);
-       if (!pw)
-               return -ENOMEM;
-
-       err = i915_active_acquire(&eb->batch->vma->active);
-       if (err)
-               goto err_free;
-
-       err = i915_active_acquire(&shadow->active);
-       if (err)
-               goto err_batch;
-
-       if (trampoline) {
-               err = i915_active_acquire(&trampoline->active);
-               if (err)
-                       goto err_shadow;
-       }
-
-       pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB);
-       if (IS_ERR(pw->shadow_map)) {
-               err = PTR_ERR(pw->shadow_map);
-               goto err_trampoline;
-       }
-
-       needs_clflush =
-               !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
-       pw->batch_map = ERR_PTR(-ENODEV);
-       if (needs_clflush && i915_has_memcpy_from_wc())
-               pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
-
-       if (IS_ERR(pw->batch_map)) {
-               err = i915_gem_object_pin_pages(batch);
-               if (err)
-                       goto err_unmap_shadow;
-               pw->batch_map = NULL;
-       }
-
-       pw->jump_whitelist =
-               intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len,
-                                                            trampoline);
-       if (IS_ERR(pw->jump_whitelist)) {
-               err = PTR_ERR(pw->jump_whitelist);
-               goto err_unmap_batch;
-       }
-
-       dma_fence_work_init(&pw->base, &eb_parse_ops);
-
-       pw->engine = eb->engine;
-       pw->batch = eb->batch->vma;
-       pw->batch_offset = eb->batch_start_offset;
-       pw->batch_length = eb->batch_len;
-       pw->shadow = shadow;
-       pw->trampoline = trampoline;
-
-       /* Mark active refs early for this worker, in case we get interrupted */
-       err = parser_mark_active(pw, eb->context->timeline);
-       if (err)
-               goto err_commit;
-
-       err = dma_resv_reserve_shared(pw->batch->resv, 1);
-       if (err)
-               goto err_commit;
-
-       err = dma_resv_reserve_shared(shadow->resv, 1);
-       if (err)
-               goto err_commit;
-
-       /* Wait for all writes (and relocs) into the batch to complete */
-       err = i915_sw_fence_await_reservation(&pw->base.chain,
-                                             pw->batch->resv, NULL, false,
-                                             0, I915_FENCE_GFP);
-       if (err < 0)
-               goto err_commit;
-
-       /* Keep the batch alive and unwritten as we parse */
-       dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
-
-       /* Force execution to wait for completion of the parser */
-       dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
-
-       dma_fence_work_commit_imm(&pw->base);
-       return 0;
-
-err_commit:
-       i915_sw_fence_set_error_once(&pw->base.chain, err);
-       dma_fence_work_commit_imm(&pw->base);
-       return err;
-
-err_unmap_batch:
-       if (pw->batch_map)
-               i915_gem_object_unpin_map(batch);
-       else
-               i915_gem_object_unpin_pages(batch);
-err_unmap_shadow:
-       i915_gem_object_unpin_map(shadow->obj);
-err_trampoline:
-       if (trampoline)
-               i915_active_release(&trampoline->active);
-err_shadow:
-       i915_active_release(&shadow->active);
-err_batch:
-       i915_active_release(&eb->batch->vma->active);
-err_free:
-       kfree(pw);
-       return err;
-}
-
 static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
 {
        /*
@@ -2672,7 +2463,15 @@ static int eb_parse(struct i915_execbuffer *eb)
                goto err_trampoline;
        }
 
-       err = eb_parse_pipeline(eb, shadow, trampoline);
+       err = dma_resv_reserve_shared(shadow->resv, 1);
+       if (err)
+               goto err_trampoline;
+
+       err = intel_engine_cmd_parser(eb->engine,
+                                     eb->batch->vma,
+                                     eb->batch_start_offset,
+                                     eb->batch_len,
+                                     shadow, trampoline);
        if (err)
                goto err_unpin_batch;
 
index 4df505e..16162fc 100644 (file)
@@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg)
        intel_gt_pm_get(&eb.i915->gt);
 
        for_each_uabi_engine(eb.engine, eb.i915) {
+               if (intel_engine_requires_cmd_parser(eb.engine) ||
+                   intel_engine_using_cmd_parser(eb.engine))
+                       continue;
+
                reloc_cache_init(&eb.reloc_cache, eb.i915);
                memset(map, POISON_INUSE, 4096);
 
index 98eb48c..06024d3 100644 (file)
@@ -1977,6 +1977,21 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
        if (drm_WARN_ON(&i915->drm, !engine))
                return -EINVAL;
 
+       /*
+        * Due to d3_entered is used to indicate skipping PPGTT invalidation on
+        * vGPU reset, it's set on D0->D3 on PCI config write, and cleared after
+        * vGPU reset if in resuming.
+        * In S0ix exit, the device power state also transite from D3 to D0 as
+        * S3 resume, but no vGPU reset (triggered by QEMU devic model). After
+        * S0ix exit, all engines continue to work. However the d3_entered
+        * remains set which will break next vGPU reset logic (miss the expected
+        * PPGTT invalidation).
+        * Engines can only work in D0. Thus the 1st elsp write gives GVT a
+        * chance to clear d3_entered.
+        */
+       if (vgpu->d3_entered)
+               vgpu->d3_entered = false;
+
        execlist = &vgpu->submission.execlist[engine->id];
 
        execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;
index 3992c25..a3b4d99 100644 (file)
@@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
 static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
                       struct drm_i915_gem_object *src_obj,
                       unsigned long offset, unsigned long length,
-                      void *dst, const void *src)
+                      bool *needs_clflush_after)
 {
-       bool needs_clflush =
-               !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
-       if (src) {
-               GEM_BUG_ON(!needs_clflush);
-               i915_unaligned_memcpy_from_wc(dst, src + offset, length);
-       } else {
-               struct scatterlist *sg;
+       unsigned int src_needs_clflush;
+       unsigned int dst_needs_clflush;
+       void *dst, *src;
+       int ret;
+
+       ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
+       if (ret)
+               return ERR_PTR(ret);
+
+       dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
+       i915_gem_object_finish_access(dst_obj);
+       if (IS_ERR(dst))
+               return dst;
+
+       ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+       if (ret) {
+               i915_gem_object_unpin_map(dst_obj);
+               return ERR_PTR(ret);
+       }
+
+       src = ERR_PTR(-ENODEV);
+       if (src_needs_clflush && i915_has_memcpy_from_wc()) {
+               src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
+               if (!IS_ERR(src)) {
+                       i915_unaligned_memcpy_from_wc(dst,
+                                                     src + offset,
+                                                     length);
+                       i915_gem_object_unpin_map(src_obj);
+               }
+       }
+       if (IS_ERR(src)) {
+               unsigned long x, n, remain;
                void *ptr;
-               unsigned int x, sg_ofs;
-               unsigned long remain;
 
                /*
                 * We can avoid clflushing partial cachelines before the write
@@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
                 * validate up to the end of the batch.
                 */
                remain = length;
-               if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+               if (dst_needs_clflush & CLFLUSH_BEFORE)
                        remain = round_up(remain,
                                          boot_cpu_data.x86_clflush_size);
 
                ptr = dst;
                x = offset_in_page(offset);
-               sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false);
-
-               while (remain) {
-                       unsigned long sg_max = sg->length >> PAGE_SHIFT;
-
-                       for (; remain && sg_ofs < sg_max; sg_ofs++) {
-                               unsigned long len = min(remain, PAGE_SIZE - x);
-                               void *map;
-
-                               map = kmap_atomic(nth_page(sg_page(sg), sg_ofs));
-                               if (needs_clflush)
-                                       drm_clflush_virt_range(map + x, len);
-                               memcpy(ptr, map + x, len);
-                               kunmap_atomic(map);
-
-                               ptr += len;
-                               remain -= len;
-                               x = 0;
-                       }
-
-                       sg_ofs = 0;
-                       sg = sg_next(sg);
+               for (n = offset >> PAGE_SHIFT; remain; n++) {
+                       int len = min(remain, PAGE_SIZE - x);
+
+                       src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+                       if (src_needs_clflush)
+                               drm_clflush_virt_range(src + x, len);
+                       memcpy(ptr, src + x, len);
+                       kunmap_atomic(src);
+
+                       ptr += len;
+                       remain -= len;
+                       x = 0;
                }
        }
 
+       i915_gem_object_finish_access(src_obj);
+
        memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
 
        /* dst_obj is returned with vmap pinned */
+       *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
+
        return dst;
 }
 
@@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
        if (target_cmd_index == offset)
                return 0;
 
+       if (IS_ERR(jump_whitelist))
+               return PTR_ERR(jump_whitelist);
+
        if (!test_bit(target_cmd_index, jump_whitelist)) {
                DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
                          jump_target);
@@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
        return 0;
 }
 
-/**
- * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser()
- * @batch_length: length of the commands in batch_obj
- * @trampoline: Whether jump trampolines are used.
- *
- * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser().
- * This has to be preallocated, because the command parser runs in signaling context,
- * and may not allocate any memory.
- *
- * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use
- * IS_ERR() to check for errors. Must bre freed() with kfree().
- *
- * NULL is a valid value, meaning no allocation was required.
- */
-unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
-                                                           bool trampoline)
+static unsigned long *alloc_whitelist(u32 batch_length)
 {
        unsigned long *jmp;
 
-       if (trampoline)
-               return NULL;
-
        /*
         * We expect batch_length to be less than 256KiB for known users,
         * i.e. we need at most an 8KiB bitmap allocation which should be
@@ -1415,9 +1416,7 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
  * @batch_offset: byte offset in the batch at which execution starts
  * @batch_length: length of the commands in batch_obj
  * @shadow: validated copy of the batch buffer in question
- * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist()
- * @shadow_map: mapping to @shadow vma
- * @batch_map: mapping to @batch vma
+ * @trampoline: true if we need to trampoline into privileged execution
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
@@ -1425,21 +1424,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
  * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
  * if the batch appears legal but should use hardware parsing
  */
+
 int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                            struct i915_vma *batch,
                            unsigned long batch_offset,
                            unsigned long batch_length,
                            struct i915_vma *shadow,
-                           unsigned long *jump_whitelist,
-                           void *shadow_map,
-                           const void *batch_map)
+                           bool trampoline)
 {
        u32 *cmd, *batch_end, offset = 0;
        struct drm_i915_cmd_descriptor default_desc = noop_desc;
        const struct drm_i915_cmd_descriptor *desc = &default_desc;
+       bool needs_clflush_after = false;
+       unsigned long *jump_whitelist;
        u64 batch_addr, shadow_addr;
        int ret = 0;
-       bool trampoline = !jump_whitelist;
 
        GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd)));
        GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd)));
@@ -1447,8 +1446,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                                     batch->size));
        GEM_BUG_ON(!batch_length);
 
-       cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length,
-                        shadow_map, batch_map);
+       cmd = copy_batch(shadow->obj, batch->obj,
+                        batch_offset, batch_length,
+                        &needs_clflush_after);
+       if (IS_ERR(cmd)) {
+               DRM_DEBUG("CMD: Failed to copy batch\n");
+               return PTR_ERR(cmd);
+       }
+
+       jump_whitelist = NULL;
+       if (!trampoline)
+               /* Defer failure until attempted use */
+               jump_whitelist = alloc_whitelist(batch_length);
 
        shadow_addr = gen8_canonical_addr(shadow->node.start);
        batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
@@ -1549,6 +1558,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 
        i915_gem_object_flush_map(shadow->obj);
 
+       if (!IS_ERR_OR_NULL(jump_whitelist))
+               kfree(jump_whitelist);
+       i915_gem_object_unpin_map(shadow->obj);
        return ret;
 }
 
index 38ff2fb..b30397b 100644 (file)
@@ -1906,17 +1906,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
 int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
-                                                           bool trampoline);
-
 int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                            struct i915_vma *batch,
                            unsigned long batch_offset,
                            unsigned long batch_length,
                            struct i915_vma *shadow,
-                           unsigned long *jump_whitelist,
-                           void *shadow_map,
-                           const void *batch_map);
+                           bool trampoline);
 #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8
 
 /* intel_device_info.c */
index 1014c71..37aef13 100644 (file)
@@ -1426,10 +1426,8 @@ i915_request_await_execution(struct i915_request *rq,
 
        do {
                fence = *child++;
-               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-                       i915_sw_fence_set_error_once(&rq->submit, fence->error);
+               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
                        continue;
-               }
 
                if (fence->context == rq->fence.context)
                        continue;
@@ -1527,10 +1525,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 
        do {
                fence = *child++;
-               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-                       i915_sw_fence_set_error_once(&rq->submit, fence->error);
+               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
                        continue;
-               }
 
                /*
                 * Requests on the same timeline are explicitly ordered, along
index 7eaa92f..e0a10f3 100644 (file)
@@ -325,7 +325,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
                        info->pipe_mask &= ~BIT(PIPE_C);
                        info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
                }
-       } else if (HAS_DISPLAY(dev_priv) && GRAPHICS_VER(dev_priv) >= 9) {
+       } else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) {
                u32 dfsm = intel_de_read(dev_priv, SKL_DFSM);
 
                if (dfsm & SKL_DFSM_PIPE_A_DISABLE) {
@@ -340,7 +340,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
                        info->pipe_mask &= ~BIT(PIPE_C);
                        info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
                }
-               if (GRAPHICS_VER(dev_priv) >= 12 &&
+
+               if (DISPLAY_VER(dev_priv) >= 12 &&
                    (dfsm & TGL_DFSM_PIPE_D_DISABLE)) {
                        info->pipe_mask &= ~BIT(PIPE_D);
                        info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D);
@@ -352,10 +353,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
                if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE)
                        info->display.has_fbc = 0;
 
-               if (GRAPHICS_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
+               if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
                        info->display.has_dmc = 0;
 
-               if (GRAPHICS_VER(dev_priv) >= 10 &&
+               if (DISPLAY_VER(dev_priv) >= 10 &&
                    (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE))
                        info->display.has_dsc = 0;
        }
index d01c4c9..704dace 100644 (file)
@@ -296,7 +296,7 @@ static const struct dpu_mdp_cfg sc7180_mdp[] = {
 static const struct dpu_mdp_cfg sm8250_mdp[] = {
        {
        .name = "top_0", .id = MDP_TOP,
-       .base = 0x0, .len = 0x45C,
+       .base = 0x0, .len = 0x494,
        .features = 0,
        .highest_bank_bit = 0x3, /* TODO: 2 for LP_DDR4 */
        .clk_ctrls[DPU_CLK_CTRL_VIG0] = {
index ca96e35..c0423e7 100644 (file)
@@ -771,6 +771,7 @@ int dp_catalog_panel_timing_cfg(struct dp_catalog *dp_catalog)
        dp_write_link(catalog, REG_DP_HSYNC_VSYNC_WIDTH_POLARITY,
                                dp_catalog->width_blanking);
        dp_write_link(catalog, REG_DP_ACTIVE_HOR_VER, dp_catalog->dp_active);
+       dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0);
        return 0;
 }
 
index ee221d8..eaddfd7 100644 (file)
@@ -1526,7 +1526,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl)
         * running. Add the global reset just before disabling the
         * link clocks and core clocks.
         */
-       ret = dp_ctrl_off(&ctrl->dp_ctrl);
+       ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl);
        if (ret) {
                DRM_ERROR("failed to disable DP controller\n");
                return ret;
index 051c1be..867388a 100644 (file)
@@ -219,6 +219,7 @@ static int dp_display_bind(struct device *dev, struct device *master,
                goto end;
        }
 
+       dp->aux->drm_dev = drm;
        rc = dp_aux_register(dp->aux);
        if (rc) {
                DRM_ERROR("DRM DP AUX register failed\n");
@@ -1311,6 +1312,10 @@ static int dp_pm_resume(struct device *dev)
        else
                dp->dp_display.is_connected = false;
 
+       dp_display_handle_plugged_change(g_dp_display,
+                               dp->dp_display.is_connected);
+
+
        mutex_unlock(&dp->event_mutex);
 
        return 0;
index eed2a76..bcaddbb 100644 (file)
@@ -142,6 +142,9 @@ static const struct iommu_flush_ops null_tlb_ops = {
        .tlb_add_page = msm_iommu_tlb_add_page,
 };
 
+static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,
+               unsigned long iova, int flags, void *arg);
+
 struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
 {
        struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev);
@@ -157,6 +160,13 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
        if (!ttbr1_cfg)
                return ERR_PTR(-ENODEV);
 
+       /*
+        * Defer setting the fault handler until we have a valid adreno_smmu
+        * to avoid accidentially installing a GPU specific fault handler for
+        * the display's iommu
+        */
+       iommu_set_fault_handler(iommu->domain, msm_fault_handler, iommu);
+
        pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
        if (!pagetable)
                return ERR_PTR(-ENOMEM);
@@ -300,7 +310,6 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
 
        iommu->domain = domain;
        msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);
-       iommu_set_fault_handler(domain, msm_fault_handler, iommu);
 
        atomic_set(&iommu->pagetables, 0);
 
index 4f3a535..6d07e65 100644 (file)
@@ -149,6 +149,8 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
         */
        if (bo->base.dev)
                drm_gem_object_release(&bo->base);
+       else
+               dma_resv_fini(&bo->base._resv);
 
        kfree(nvbo);
 }
@@ -330,6 +332,10 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
        if (IS_ERR(nvbo))
                return PTR_ERR(nvbo);
 
+       nvbo->bo.base.size = size;
+       dma_resv_init(&nvbo->bo.base._resv);
+       drm_vma_node_reset(&nvbo->bo.base.vma_node);
+
        ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj);
        if (ret)
                return ret;
index 2229f1a..46029c5 100644 (file)
@@ -447,7 +447,6 @@ static int rpi_touchscreen_remove(struct i2c_client *i2c)
        drm_panel_remove(&ts->base);
 
        mipi_dsi_device_unregister(ts->dsi);
-       kfree(ts->dsi);
 
        return 0;
 }
index 21939d4..1b80290 100644 (file)
@@ -4166,7 +4166,7 @@ static const struct drm_display_mode yes_optoelectronics_ytc700tlag_05_201c_mode
 static const struct panel_desc yes_optoelectronics_ytc700tlag_05_201c = {
        .modes = &yes_optoelectronics_ytc700tlag_05_201c_mode,
        .num_modes = 1,
-       .bpc = 6,
+       .bpc = 8,
        .size = {
                .width = 154,
                .height = 90,
index 1b950b4..8d7fd65 100644 (file)
@@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
                return;
        }
 
+       if (!mem)
+               return;
+
        man = ttm_manager_type(bdev, mem->mem_type);
        list_move_tail(&bo->lru, &man->lru[bo->priority]);
 
index 2f57f82..763fa6f 100644 (file)
@@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev,
 void ttm_mem_io_free(struct ttm_device *bdev,
                     struct ttm_resource *mem)
 {
+       if (!mem)
+               return;
+
        if (!mem->bus.offset && !mem->bus.addr)
                return;
 
index 5f31ace..74e3b46 100644 (file)
@@ -44,6 +44,8 @@ static unsigned ttm_glob_use_count;
 struct ttm_global ttm_glob;
 EXPORT_SYMBOL(ttm_glob);
 
+struct dentry *ttm_debugfs_root;
+
 static void ttm_global_release(void)
 {
        struct ttm_global *glob = &ttm_glob;
@@ -53,6 +55,7 @@ static void ttm_global_release(void)
                goto out;
 
        ttm_pool_mgr_fini();
+       debugfs_remove(ttm_debugfs_root);
 
        __free_page(glob->dummy_read_page);
        memset(glob, 0, sizeof(*glob));
@@ -73,6 +76,13 @@ static int ttm_global_init(void)
 
        si_meminfo(&si);
 
+       ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
+       if (IS_ERR(ttm_debugfs_root)) {
+               ret = PTR_ERR(ttm_debugfs_root);
+               ttm_debugfs_root = NULL;
+               goto out;
+       }
+
        /* Limit the number of pages in the pool to about 50% of the total
         * system memory.
         */
@@ -100,6 +110,10 @@ static int ttm_global_init(void)
        debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
                                &glob->bo_count);
 out:
+       if (ret && ttm_debugfs_root)
+               debugfs_remove(ttm_debugfs_root);
+       if (ret)
+               --ttm_glob_use_count;
        mutex_unlock(&ttm_global_mutex);
        return ret;
 }
index 997c458..7fcdef2 100644 (file)
@@ -72,22 +72,6 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp)
        return tmp;
 }
 
-struct dentry *ttm_debugfs_root;
-
-static int __init ttm_init(void)
-{
-       ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
-       return 0;
-}
-
-static void __exit ttm_exit(void)
-{
-       debugfs_remove(ttm_debugfs_root);
-}
-
-module_init(ttm_init);
-module_exit(ttm_exit);
-
 MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse");
 MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)");
 MODULE_LICENSE("GPL and additional rights");
index aab1b36..c287673 100644 (file)
@@ -1857,38 +1857,46 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi)
        vc4_hdmi_cec_update_clk_div(vc4_hdmi);
 
        if (vc4_hdmi->variant->external_irq_controller) {
-               ret = devm_request_threaded_irq(&pdev->dev,
-                                               platform_get_irq_byname(pdev, "cec-rx"),
-                                               vc4_cec_irq_handler_rx_bare,
-                                               vc4_cec_irq_handler_rx_thread, 0,
-                                               "vc4 hdmi cec rx", vc4_hdmi);
+               ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-rx"),
+                                          vc4_cec_irq_handler_rx_bare,
+                                          vc4_cec_irq_handler_rx_thread, 0,
+                                          "vc4 hdmi cec rx", vc4_hdmi);
                if (ret)
                        goto err_delete_cec_adap;
 
-               ret = devm_request_threaded_irq(&pdev->dev,
-                                               platform_get_irq_byname(pdev, "cec-tx"),
-                                               vc4_cec_irq_handler_tx_bare,
-                                               vc4_cec_irq_handler_tx_thread, 0,
-                                               "vc4 hdmi cec tx", vc4_hdmi);
+               ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-tx"),
+                                          vc4_cec_irq_handler_tx_bare,
+                                          vc4_cec_irq_handler_tx_thread, 0,
+                                          "vc4 hdmi cec tx", vc4_hdmi);
                if (ret)
-                       goto err_delete_cec_adap;
+                       goto err_remove_cec_rx_handler;
        } else {
                HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff);
 
-               ret = devm_request_threaded_irq(&pdev->dev, platform_get_irq(pdev, 0),
-                                               vc4_cec_irq_handler,
-                                               vc4_cec_irq_handler_thread, 0,
-                                               "vc4 hdmi cec", vc4_hdmi);
+               ret = request_threaded_irq(platform_get_irq(pdev, 0),
+                                          vc4_cec_irq_handler,
+                                          vc4_cec_irq_handler_thread, 0,
+                                          "vc4 hdmi cec", vc4_hdmi);
                if (ret)
                        goto err_delete_cec_adap;
        }
 
        ret = cec_register_adapter(vc4_hdmi->cec_adap, &pdev->dev);
        if (ret < 0)
-               goto err_delete_cec_adap;
+               goto err_remove_handlers;
 
        return 0;
 
+err_remove_handlers:
+       if (vc4_hdmi->variant->external_irq_controller)
+               free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi);
+       else
+               free_irq(platform_get_irq(pdev, 0), vc4_hdmi);
+
+err_remove_cec_rx_handler:
+       if (vc4_hdmi->variant->external_irq_controller)
+               free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi);
+
 err_delete_cec_adap:
        cec_delete_adapter(vc4_hdmi->cec_adap);
 
@@ -1897,6 +1905,15 @@ err_delete_cec_adap:
 
 static void vc4_hdmi_cec_exit(struct vc4_hdmi *vc4_hdmi)
 {
+       struct platform_device *pdev = vc4_hdmi->pdev;
+
+       if (vc4_hdmi->variant->external_irq_controller) {
+               free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi);
+               free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi);
+       } else {
+               free_irq(platform_get_irq(pdev, 0), vc4_hdmi);
+       }
+
        cec_unregister_adapter(vc4_hdmi->cec_adap);
 }
 #else
index 1605549..76937f7 100644 (file)
@@ -576,7 +576,7 @@ config HID_LOGITECH_HIDPP
        depends on HID_LOGITECH
        select POWER_SUPPLY
        help
-       Support for Logitech devices relyingon the HID++ Logitech specification
+       Support for Logitech devices relying on the HID++ Logitech specification
 
        Say Y if you want support for Logitech devices relying on the HID++
        specification. Such devices are the various Logitech Touchpads (T650,
index 96e2577..8d68796 100644 (file)
@@ -58,7 +58,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx)
        cmd_base.cmd_v2.sensor_id = sensor_idx;
        cmd_base.cmd_v2.length  = 16;
 
-       writeq(0x0, privdata->mmio + AMD_C2P_MSG2);
+       writeq(0x0, privdata->mmio + AMD_C2P_MSG1);
        writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
 }
 
index 6b8f0d0..dc6bd42 100644 (file)
@@ -501,6 +501,8 @@ static const struct hid_device_id apple_devices[] = {
                        APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
                .driver_data = APPLE_HAS_FN },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+               .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
                .driver_data = APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
index fca8fc7..fb807c8 100644 (file)
@@ -485,9 +485,6 @@ static void asus_kbd_backlight_set(struct led_classdev *led_cdev,
 {
        struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds,
                                                 cdev);
-       if (led->brightness == brightness)
-               return;
-
        led->brightness = brightness;
        schedule_work(&led->work);
 }
index f43a840..4ef1c3b 100644 (file)
@@ -742,7 +742,7 @@ static int ft260_is_interface_enabled(struct hid_device *hdev)
        int ret;
 
        ret = ft260_get_system_config(hdev, &cfg);
-       if (ret)
+       if (ret < 0)
                return ret;
 
        ft260_dbg("interface:  0x%02x\n", interface);
@@ -754,23 +754,16 @@ static int ft260_is_interface_enabled(struct hid_device *hdev)
        switch (cfg.chip_mode) {
        case FT260_MODE_ALL:
        case FT260_MODE_BOTH:
-               if (interface == 1) {
+               if (interface == 1)
                        hid_info(hdev, "uart interface is not supported\n");
-                       return 0;
-               }
-               ret = 1;
+               else
+                       ret = 1;
                break;
        case FT260_MODE_UART:
-               if (interface == 0) {
-                       hid_info(hdev, "uart is unsupported on interface 0\n");
-                       ret = 0;
-               }
+               hid_info(hdev, "uart interface is not supported\n");
                break;
        case FT260_MODE_I2C:
-               if (interface == 1) {
-                       hid_info(hdev, "i2c is unsupported on interface 1\n");
-                       ret = 0;
-               }
+               ret = 1;
                break;
        }
        return ret;
@@ -785,7 +778,7 @@ static int ft260_byte_show(struct hid_device *hdev, int id, u8 *cfg, int len,
        if (ret < 0)
                return ret;
 
-       return scnprintf(buf, PAGE_SIZE, "%hi\n", *field);
+       return scnprintf(buf, PAGE_SIZE, "%d\n", *field);
 }
 
 static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
@@ -797,7 +790,7 @@ static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
        if (ret < 0)
                return ret;
 
-       return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field));
+       return scnprintf(buf, PAGE_SIZE, "%d\n", le16_to_cpu(*field));
 }
 
 #define FT260_ATTR_SHOW(name, reptype, id, type, func)                        \
@@ -1004,11 +997,9 @@ err_hid_stop:
 
 static void ft260_remove(struct hid_device *hdev)
 {
-       int ret;
        struct ft260_device *dev = hid_get_drvdata(hdev);
 
-       ret = ft260_is_interface_enabled(hdev);
-       if (ret <= 0)
+       if (!dev)
                return;
 
        sysfs_remove_group(&hdev->dev.kobj, &ft260_attr_group);
index 6b1fa97..91bf4d0 100644 (file)
@@ -784,6 +784,17 @@ static void hid_ishtp_cl_reset_handler(struct work_struct *work)
        }
 }
 
+static void hid_ishtp_cl_resume_handler(struct work_struct *work)
+{
+       struct ishtp_cl_data *client_data = container_of(work, struct ishtp_cl_data, resume_work);
+       struct ishtp_cl *hid_ishtp_cl = client_data->hid_ishtp_cl;
+
+       if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) {
+               client_data->suspended = false;
+               wake_up_interruptible(&client_data->ishtp_resume_wait);
+       }
+}
+
 ishtp_print_log ishtp_hid_print_trace;
 
 /**
@@ -822,6 +833,8 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
        init_waitqueue_head(&client_data->ishtp_resume_wait);
 
        INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler);
+       INIT_WORK(&client_data->resume_work, hid_ishtp_cl_resume_handler);
+
 
        ishtp_hid_print_trace = ishtp_trace_callback(cl_device);
 
@@ -921,7 +934,7 @@ static int hid_ishtp_cl_resume(struct device *device)
 
        hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
                        hid_ishtp_cl);
-       client_data->suspended = false;
+       schedule_work(&client_data->resume_work);
        return 0;
 }
 
index f88443a..6a5cc11 100644 (file)
@@ -135,6 +135,7 @@ struct ishtp_cl_data {
        int multi_packet_cnt;
 
        struct work_struct work;
+       struct work_struct resume_work;
        struct ishtp_cl_device *cl_device;
 };
 
index f0802b0..aa2c516 100644 (file)
@@ -314,13 +314,6 @@ static int ishtp_cl_device_resume(struct device *dev)
        if (!device)
                return 0;
 
-       /*
-        * When ISH needs hard reset, it is done asynchrnously, hence bus
-        * resume will  be called before full ISH resume
-        */
-       if (device->ishtp_dev->resume_flag)
-               return 0;
-
        driver = to_ishtp_cl_driver(dev->driver);
        if (driver && driver->driver.pm) {
                if (driver->driver.pm->resume)
@@ -849,6 +842,28 @@ struct device *ishtp_device(struct ishtp_cl_device *device)
 }
 EXPORT_SYMBOL(ishtp_device);
 
+/**
+ * ishtp_wait_resume() - Wait for IPC resume
+ *
+ * Wait for IPC resume
+ *
+ * Return: resume complete or not
+ */
+bool ishtp_wait_resume(struct ishtp_device *dev)
+{
+       /* 50ms to get resume response */
+       #define WAIT_FOR_RESUME_ACK_MS          50
+
+       /* Waiting to get resume response */
+       if (dev->resume_flag)
+               wait_event_interruptible_timeout(dev->resume_wait,
+                                                !dev->resume_flag,
+                                                msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS));
+
+       return (!dev->resume_flag);
+}
+EXPORT_SYMBOL_GPL(ishtp_wait_resume);
+
 /**
  * ishtp_get_pci_device() - Return PCI device dev pointer
  * This interface is used to return PCI device pointer
index dcf3a23..7c2032f 100644 (file)
@@ -38,7 +38,7 @@ config USB_HIDDEV
        help
          Say Y here if you want to support HID devices (from the USB
          specification standpoint) that aren't strictly user interface
-         devices, like monitor controls and Uninterruptable Power Supplies.
+         devices, like monitor controls and Uninterruptible Power Supplies.
 
          This module supports these devices separately using a separate
          event interface on /dev/usb/hiddevX (char 180:96 to 180:111).
index 81d7d12..81ba642 100644 (file)
@@ -2548,6 +2548,9 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
                int slot;
 
                slot = input_mt_get_slot_by_key(input, hid_data->id);
+               if (slot < 0)
+                       return;
+
                input_mt_slot(input, slot);
                input_mt_report_slot_state(input, MT_TOOL_FINGER, prox);
        }
@@ -3831,7 +3834,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev,
                    wacom_wac->shared->touch->product == 0xF6) {
                        input_dev->evbit[0] |= BIT_MASK(EV_SW);
                        __set_bit(SW_MUTE_DEVICE, input_dev->swbit);
-                       wacom_wac->shared->has_mute_touch_switch = true;
+                       wacom_wac->has_mute_touch_switch = true;
                }
                fallthrough;
 
index 6d5014e..a6ea1eb 100644 (file)
@@ -635,8 +635,8 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id)
 
        status = readb(i2c->base + MPC_I2C_SR);
        if (status & CSR_MIF) {
-               /* Read again to allow register to stabilise */
-               status = readb(i2c->base + MPC_I2C_SR);
+               /* Wait up to 100us for transfer to properly complete */
+               readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100);
                writeb(0, i2c->base + MPC_I2C_SR);
                mpc_i2c_do_intr(i2c, status);
                return IRQ_HANDLED;
index d567402..a8688a9 100644 (file)
@@ -120,6 +120,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
        if (!chip_ctx)
                return -ENOMEM;
        chip_ctx->chip_num = bp->chip_num;
+       chip_ctx->hw_stats_size = bp->hw_ring_stats_size;
 
        rdev->chip_ctx = chip_ctx;
        /* rest members to follow eventually */
@@ -550,6 +551,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
                                       dma_addr_t dma_map,
                                       u32 *fw_stats_ctx_id)
 {
+       struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
        struct hwrm_stat_ctx_alloc_output resp = {0};
        struct hwrm_stat_ctx_alloc_input req = {0};
        struct bnxt_en_dev *en_dev = rdev->en_dev;
@@ -566,7 +568,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
        bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
        req.update_period_ms = cpu_to_le32(1000);
        req.stats_dma_addr = cpu_to_le64(dma_map);
-       req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext));
+       req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
        req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
index 17f0701..44282a8 100644 (file)
@@ -56,6 +56,7 @@
 static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
                                      struct bnxt_qplib_stats *stats);
 static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+                                     struct bnxt_qplib_chip_ctx *cctx,
                                      struct bnxt_qplib_stats *stats);
 
 /* PBL */
@@ -559,7 +560,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
                goto fail;
 stats_alloc:
        /* Stats */
-       rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats);
+       rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats);
        if (rc)
                goto fail;
 
@@ -889,15 +890,12 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
 }
 
 static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+                                     struct bnxt_qplib_chip_ctx *cctx,
                                      struct bnxt_qplib_stats *stats)
 {
        memset(stats, 0, sizeof(*stats));
        stats->fw_id = -1;
-       /* 128 byte aligned context memory is required only for 57500.
-        * However making this unconditional, it does not harm previous
-        * generation.
-        */
-       stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128);
+       stats->size = cctx->hw_stats_size;
        stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
                                        &stats->dma_map, GFP_KERNEL);
        if (!stats->dma) {
index c291f49..9103150 100644 (file)
@@ -54,6 +54,7 @@ struct bnxt_qplib_chip_ctx {
        u16     chip_num;
        u8      chip_rev;
        u8      chip_metal;
+       u16     hw_stats_size;
        struct bnxt_qplib_drv_modes modes;
 };
 
index b1023a7..f1e5515 100644 (file)
@@ -2845,7 +2845,7 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf,
  * parses fpm commit info and copy base value
  * of hmc objects in hmc_info
  */
-static enum irdma_status_code
+static void
 irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
                              struct irdma_hmc_obj_info *info, u32 *sd)
 {
@@ -2915,7 +2915,6 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
        else
                *sd = (u32)(size >> 21);
 
-       return 0;
 }
 
 /**
@@ -4187,11 +4186,9 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
  * @dev: sc device struct
  * @count: allocate count
  */
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
 {
        writel(count, dev->hw_regs[IRDMA_AEQALLOC]);
-
-       return 0;
 }
 
 /**
@@ -4434,9 +4431,9 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev,
        ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
                                           &commit_fpm_mem, true, wait_type);
        if (!ret_code)
-               ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
-                                                        hmc_info->hmc_obj,
-                                                        &hmc_info->sd_table.sd_cnt);
+               irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
+                                             hmc_info->hmc_obj,
+                                             &hmc_info->sd_table.sd_cnt);
        print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
                             8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
                             false);
index 7afb8a6..00de5ee 100644 (file)
@@ -1920,7 +1920,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf)
  * irdma_set_hw_rsrc - set hw memory resources.
  * @rf: RDMA PCI function
  */
-static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf)
+static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
 {
        rf->allocated_qps = (void *)(rf->mem_rsrc +
                   (sizeof(struct irdma_arp_entry) * rf->arp_table_size));
@@ -1937,8 +1937,6 @@ static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf)
        spin_lock_init(&rf->arp_lock);
        spin_lock_init(&rf->qptable_lock);
        spin_lock_init(&rf->qh_list_lock);
-
-       return 0;
 }
 
 /**
@@ -2000,9 +1998,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
 
        rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc;
 
-       ret = irdma_set_hw_rsrc(rf);
-       if (ret)
-               goto set_hw_rsrc_fail;
+       irdma_set_hw_rsrc(rf);
 
        set_bit(0, rf->allocated_mrs);
        set_bit(0, rf->allocated_qps);
@@ -2025,9 +2021,6 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
 
        return 0;
 
-set_hw_rsrc_fail:
-       kfree(rf->mem_rsrc);
-       rf->mem_rsrc = NULL;
 mem_rsrc_kzalloc_fail:
        kfree(rf->allocated_ws_nodes);
        rf->allocated_ws_nodes = NULL;
index ea59432..51a4135 100644 (file)
@@ -215,10 +215,10 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
        pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
 }
 
-static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf)
+static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf,
+                                  struct ice_vsi *vsi)
 {
        struct irdma_pci_f *rf = iwdev->rf;
-       struct ice_vsi *vsi = ice_get_main_vsi(pf);
 
        rf->cdev = pf;
        rf->gen_ops.register_qset = irdma_lan_register_qset;
@@ -253,12 +253,15 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
                                                            struct iidc_auxiliary_dev,
                                                            adev);
        struct ice_pf *pf = iidc_adev->pf;
+       struct ice_vsi *vsi = ice_get_main_vsi(pf);
        struct iidc_qos_params qos_info = {};
        struct irdma_device *iwdev;
        struct irdma_pci_f *rf;
        struct irdma_l2params l2params = {};
        int err;
 
+       if (!vsi)
+               return -EIO;
        iwdev = ib_alloc_device(irdma_device, ibdev);
        if (!iwdev)
                return -ENOMEM;
@@ -268,7 +271,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
                return -ENOMEM;
        }
 
-       irdma_fill_device_info(iwdev, pf);
+       irdma_fill_device_info(iwdev, pf, vsi);
        rf = iwdev->rf;
 
        if (irdma_ctrl_init_hw(rf)) {
index 7387b83..874bc25 100644 (file)
@@ -1222,8 +1222,7 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
                                         struct irdma_aeq_init_info *info);
 enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
                                              struct irdma_aeqe_info *info);
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev,
-                                                  u32 count);
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count);
 
 void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
                      int abi_ver);
index a6d52c2..5fb92de 100644 (file)
@@ -931,7 +931,7 @@ enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp,
 enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
                                             struct irdma_post_rq_info *info)
 {
-       u32 total_size = 0, wqe_idx, i, byte_off;
+       u32 wqe_idx, i, byte_off;
        u32 addl_frag_cnt;
        __le64 *wqe;
        u64 hdr;
@@ -939,9 +939,6 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
        if (qp->max_rq_frag_cnt < info->num_sges)
                return IRDMA_ERR_INVALID_FRAG_COUNT;
 
-       for (i = 0; i < info->num_sges; i++)
-               total_size += info->sg_list[i].len;
-
        wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
        if (!wqe)
                return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
index 9712f69..717147e 100644 (file)
@@ -557,7 +557,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
  * @iwqp: qp ptr
  * @init_info: initialize info to return
  */
-static int irdma_setup_virt_qp(struct irdma_device *iwdev,
+static void irdma_setup_virt_qp(struct irdma_device *iwdev,
                               struct irdma_qp *iwqp,
                               struct irdma_qp_init_info *init_info)
 {
@@ -574,8 +574,6 @@ static int irdma_setup_virt_qp(struct irdma_device *iwdev,
                init_info->sq_pa = qpmr->sq_pbl.addr;
                init_info->rq_pa = qpmr->rq_pbl.addr;
        }
-
-       return 0;
 }
 
 /**
@@ -914,7 +912,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
                        }
                }
                init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
-               err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info);
+               irdma_setup_virt_qp(iwdev, iwqp, &init_info);
        } else {
                init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
                err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
index 7abeb57..18b55d2 100644 (file)
@@ -997,7 +997,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                                  MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
        MLX5_SET(cqc, cqc, uar_page, index);
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
        if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
                MLX5_SET(cqc, cqc, oi, 1);
index eb9b0a2..e994aef 100644 (file)
@@ -1437,11 +1437,10 @@ out:
        rcu_read_unlock();
 }
 
-static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in)
+static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in)
 {
        if (!MLX5_CAP_GEN(dev->mdev, apu) ||
-           !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
-                     apu_thread_cq))
+           !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq))
                return false;
 
        return true;
@@ -1501,7 +1500,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
                err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
                                           cmd_in_len, cmd_out, cmd_out_len);
        } else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
-                  !is_apu_thread_cq(dev, cmd_in)) {
+                  !is_apu_cq(dev, cmd_in)) {
                obj->flags |= DEVX_OBJ_FLAGS_CQ;
                obj->core_cq.comp = devx_cq_comp;
                err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
index b25e0b3..5282148 100644 (file)
@@ -8,13 +8,15 @@
 #include "srq.h"
 
 static int
-mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+                     struct mlx5_eswitch_rep *rep,
+                     int vport_index)
 {
        struct mlx5_ib_dev *ibdev;
-       int vport_index;
 
        ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
-       vport_index = rep->vport_index;
+       if (!ibdev)
+               return -EINVAL;
 
        ibdev->port[vport_index].rep = rep;
        rep->rep_data[REP_IB].priv = ibdev;
@@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        return 0;
 }
 
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
        u32 num_ports = mlx5_eswitch_get_total_vports(dev);
        const struct mlx5_ib_profile *profile;
+       struct mlx5_core_dev *peer_dev;
        struct mlx5_ib_dev *ibdev;
+       u32 peer_num_ports;
        int vport_index;
        int ret;
 
+       vport_index = rep->vport_index;
+
+       if (mlx5_lag_is_shared_fdb(dev)) {
+               peer_dev = mlx5_lag_get_peer_mdev(dev);
+               peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+               if (mlx5_lag_is_master(dev)) {
+                       /* Only 1 ib port is the representor for both uplinks */
+                       num_ports += peer_num_ports - 1;
+               } else {
+                       if (rep->vport == MLX5_VPORT_UPLINK)
+                               return 0;
+                       vport_index += peer_num_ports;
+                       dev = peer_dev;
+               }
+       }
+
        if (rep->vport == MLX5_VPORT_UPLINK)
                profile = &raw_eth_profile;
        else
-               return mlx5_ib_set_vport_rep(dev, rep);
+               return mlx5_ib_set_vport_rep(dev, rep, vport_index);
 
        ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
        if (!ibdev)
@@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
                goto fail_add;
 
        rep->rep_data[REP_IB].priv = ibdev;
+       if (mlx5_lag_is_shared_fdb(dev))
+               mlx5_ib_register_peer_vport_reps(dev);
 
        return 0;
 
@@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
 static void
 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 {
+       struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
        struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+       int vport_index = rep->vport_index;
        struct mlx5_ib_port *port;
 
-       port = &dev->port[rep->vport_index];
+       if (WARN_ON(!mdev))
+               return;
+
+       if (mlx5_lag_is_shared_fdb(mdev) &&
+           !mlx5_lag_is_master(mdev)) {
+               struct mlx5_core_dev *peer_mdev;
+
+               if (rep->vport == MLX5_VPORT_UPLINK)
+                       return;
+               peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+               vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+       }
+
+       if (!dev)
+               return;
+
+       port = &dev->port[vport_index];
        write_lock(&port->roce.netdev_lock);
        port->roce.netdev = NULL;
        write_unlock(&port->roce.netdev_lock);
        rep->rep_data[REP_IB].priv = NULL;
        port->rep = NULL;
 
-       if (rep->vport == MLX5_VPORT_UPLINK)
+       if (rep->vport == MLX5_VPORT_UPLINK) {
+               struct mlx5_core_dev *peer_mdev;
+               struct mlx5_eswitch *esw;
+
+               if (mlx5_lag_is_shared_fdb(mdev)) {
+                       peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+                       esw = peer_mdev->priv.eswitch;
+                       mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+               }
                __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+       }
 }
 
 static const struct mlx5_eswitch_rep_ops rep_ops = {
@@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
        .get_proto_dev = mlx5_ib_rep_to_dev,
 };
 
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+       struct mlx5_eswitch *esw;
+
+       if (!peer_mdev)
+               return;
+
+       esw = peer_mdev->priv.eswitch;
+       mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+}
+
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
                                          u16 vport_num)
 {
@@ -123,7 +186,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
 
        rep = dev->port[port - 1].rep;
 
-       return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
+       return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
 }
 
 static int mlx5r_rep_probe(struct auxiliary_device *adev,
index 094c976..ae05e14 100644 (file)
@@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev,
 
 static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
                                           struct net_device *ndev,
+                                          struct net_device *upper,
                                           u32 *port_num)
 {
        struct net_device *rep_ndev;
@@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
                if (!port->rep)
                        continue;
 
+               if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
+                       *port_num = i + 1;
+                       return &port->roce;
+               }
+
+               if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
+                       continue;
+
                read_lock(&port->roce.netdev_lock);
                rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
                                                  port->rep->vport);
@@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this,
                }
 
                if (ibdev->is_rep)
-                       roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
+                       roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
                if (!roce)
                        return NOTIFY_DONE;
-               if ((upper == ndev || (!upper && ndev == roce->netdev))
-                   && ibdev->ib_active) {
+               if ((upper == ndev ||
+                    ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
+                   ibdev->ib_active) {
                        struct ib_event ibev = { };
                        enum ib_port_state port_state;
 
@@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
        struct mlx5_flow_table *ft;
        int err;
 
-       if (!ns || !mlx5_lag_is_roce(mdev))
+       if (!ns || !mlx5_lag_is_active(mdev))
                return 0;
 
        err = mlx5_cmd_create_vport_lag(mdev);
@@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
 {
        int err;
 
-       err = mlx5_nic_vport_enable_roce(dev->mdev);
-       if (err)
-               return err;
+       if (!dev->is_rep && dev->profile != &raw_eth_profile) {
+               err = mlx5_nic_vport_enable_roce(dev->mdev);
+               if (err)
+                       return err;
+       }
 
        err = mlx5_eth_lag_init(dev);
        if (err)
@@ -3085,7 +3097,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
        return 0;
 
 err_disable_roce:
-       mlx5_nic_vport_disable_roce(dev->mdev);
+       if (!dev->is_rep && dev->profile != &raw_eth_profile)
+               mlx5_nic_vport_disable_roce(dev->mdev);
 
        return err;
 }
@@ -3093,7 +3106,8 @@ err_disable_roce:
 static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
 {
        mlx5_eth_lag_cleanup(dev);
-       mlx5_nic_vport_disable_roce(dev->mdev);
+       if (!dev->is_rep && dev->profile != &raw_eth_profile)
+               mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
 static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
@@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
 
                /* Register only for native ports */
                err = mlx5_add_netdev_notifier(dev, port_num);
-               if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev))
-                       /*
-                        * We don't enable ETH interface for
-                        * 1. IB representors
-                        * 2. User disabled ROCE through devlink interface
-                        */
+               if (err)
                        return err;
 
                err = mlx5_enable_eth(dev);
@@ -3980,8 +3989,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
-               if (!dev->is_rep)
-                       mlx5_disable_eth(dev);
+               mlx5_disable_eth(dev);
 
                port_num = mlx5_core_native_port_num(dev->mdev) - 1;
                mlx5_remove_netdev_notifier(dev, port_num);
@@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
        const char *name;
 
-       if (!mlx5_lag_is_roce(dev->mdev))
+       if (!mlx5_lag_is_active(dev->mdev))
                name = "mlx5_%d";
        else
                name = "mlx5_bond_%d";
index c0ddf7b..bbfcce3 100644 (file)
@@ -114,14 +114,18 @@ out:
 static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
                               struct mlx5_ib_uapi_query_port *info)
 {
-       struct mlx5_core_dev *mdev = dev->mdev;
        struct mlx5_eswitch_rep *rep;
+       struct mlx5_core_dev *mdev;
        int err;
 
        rep = dev->port[port_num - 1].rep;
        if (!rep)
                return -EOPNOTSUPP;
 
+       mdev = mlx5_eswitch_get_core_dev(rep->esw);
+       if (!mdev)
+               return -EINVAL;
+
        info->vport = rep->vport;
        info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT;
 
@@ -138,9 +142,9 @@ static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
        if (err)
                return err;
 
-       if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) {
+       if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) {
                info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(
-                       mdev->priv.eswitch, rep->vport);
+                       rep->esw, rep->vport);
                info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
                info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0;
        }
index 6aabcb4..be4bcb4 100644 (file)
@@ -113,13 +113,14 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
        int                     num_buf;
        void                    *vaddr;
        int err;
+       int i;
 
        umem = ib_umem_get(pd->ibpd.device, start, length, access);
        if (IS_ERR(umem)) {
-               pr_warn("err %d from rxe_umem_get\n",
-                       (int)PTR_ERR(umem));
+               pr_warn("%s: Unable to pin memory region err = %d\n",
+                       __func__, (int)PTR_ERR(umem));
                err = PTR_ERR(umem);
-               goto err1;
+               goto err_out;
        }
 
        mr->umem = umem;
@@ -129,9 +130,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
        err = rxe_mr_alloc(mr, num_buf);
        if (err) {
-               pr_warn("err %d from rxe_mr_alloc\n", err);
-               ib_umem_release(umem);
-               goto err1;
+               pr_warn("%s: Unable to allocate memory for map\n",
+                               __func__);
+               goto err_release_umem;
        }
 
        mr->page_shift = PAGE_SHIFT;
@@ -151,10 +152,10 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
                        vaddr = page_address(sg_page_iter_page(&sg_iter));
                        if (!vaddr) {
-                               pr_warn("null vaddr\n");
-                               ib_umem_release(umem);
+                               pr_warn("%s: Unable to get virtual address\n",
+                                               __func__);
                                err = -ENOMEM;
-                               goto err1;
+                               goto err_cleanup_map;
                        }
 
                        buf->addr = (uintptr_t)vaddr;
@@ -177,7 +178,13 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
        return 0;
 
-err1:
+err_cleanup_map:
+       for (i = 0; i < mr->num_map; i++)
+               kfree(mr->map[i]);
+       kfree(mr->map);
+err_release_umem:
+       ib_umem_release(umem);
+err_out:
        return err;
 }
 
index 02281d1..508ac29 100644 (file)
@@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
                  struct media_request *req)
 {
        struct vb2_buffer *vb;
+       enum vb2_buffer_state orig_state;
        int ret;
 
        if (q->error) {
@@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
         * Add to the queued buffers list, a buffer will stay on it until
         * dequeued in dqbuf.
         */
+       orig_state = vb->state;
        list_add_tail(&vb->queued_entry, &q->queued_list);
        q->queued_count++;
        q->waiting_for_buffers = false;
@@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
        if (q->streaming && !q->start_streaming_called &&
            q->queued_count >= q->min_buffers_needed) {
                ret = vb2_start_streaming(q);
-               if (ret)
+               if (ret) {
+                       /*
+                        * Since vb2_core_qbuf will return with an error,
+                        * we should return it to state DEQUEUED since
+                        * the error indicates that the buffer wasn't queued.
+                        */
+                       list_del(&vb->queued_entry);
+                       q->queued_count--;
+                       vb->state = orig_state;
                        return ret;
+               }
        }
 
        dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index);
index 4657e99..59a36f9 100644 (file)
@@ -173,10 +173,8 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
        int ret;
 
        for_each_acpi_dev_match(adev, cfg->hid, NULL, -1) {
-               if (!adev->status.enabled) {
-                       acpi_dev_put(adev);
+               if (!adev->status.enabled)
                        continue;
-               }
 
                if (bridge->n_sensors >= CIO2_NUM_PORTS) {
                        acpi_dev_put(adev);
@@ -185,7 +183,6 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
                }
 
                sensor = &bridge->sensors[bridge->n_sensors];
-               sensor->adev = adev;
                strscpy(sensor->name, cfg->hid, sizeof(sensor->name));
 
                ret = cio2_bridge_read_acpi_buffer(adev, "SSDB",
@@ -215,6 +212,7 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
                        goto err_free_swnodes;
                }
 
+               sensor->adev = acpi_dev_get(adev);
                adev->fwnode.secondary = fwnode;
 
                dev_info(&cio2->dev, "Found supported sensor %s\n",
index 07f342d..7481f55 100644 (file)
@@ -385,7 +385,7 @@ static int ngene_command_config_free_buf(struct ngene *dev, u8 *config)
 
        com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER;
        com.cmd.hdr.Length = 6;
-       memcpy(&com.cmd.ConfigureBuffers.config, config, 6);
+       memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6);
        com.in_len = 6;
        com.out_len = 0;
 
index 84f04e0..3d296f1 100644 (file)
@@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS {
 
 struct FW_CONFIGURE_FREE_BUFFERS {
        struct FW_HEADER hdr;
-       u8   UVI1_BufferLength;
-       u8   UVI2_BufferLength;
-       u8   TVO_BufferLength;
-       u8   AUD1_BufferLength;
-       u8   AUD2_BufferLength;
-       u8   TVA_BufferLength;
+       struct {
+               u8   UVI1_BufferLength;
+               u8   UVI2_BufferLength;
+               u8   TVO_BufferLength;
+               u8   AUD1_BufferLength;
+               u8   AUD2_BufferLength;
+               u8   TVA_BufferLength;
+       } __packed config;
 } __attribute__ ((__packed__));
 
 struct FW_CONFIGURE_UART {
index 99b5121..dda2f27 100644 (file)
@@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC
        select VIDEOBUF2_DMA_CONTIG
        select REGMAP_MMIO
        select V4L2_FWNODE
+       select VIDEO_ATMEL_ISC_BASE
        help
           This module makes the ATMEL Image Sensor Controller available
           as a v4l2 device.
@@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC
        select VIDEOBUF2_DMA_CONTIG
        select REGMAP_MMIO
        select V4L2_FWNODE
+       select VIDEO_ATMEL_ISC_BASE
        help
           This module makes the ATMEL eXtended Image Sensor Controller
           available as a v4l2 device.
 
+config VIDEO_ATMEL_ISC_BASE
+       tristate
+       default n
+       help
+         ATMEL ISC and XISC common code base.
+
 config VIDEO_ATMEL_ISI
        tristate "ATMEL Image Sensor Interface (ISI) support"
        depends on VIDEO_V4L2 && OF
index c5c0155..46d264a 100644 (file)
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
-atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o
-atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o
+atmel-isc-objs = atmel-sama5d2-isc.o
+atmel-xisc-objs = atmel-sama7g5-isc.o
 
 obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o
+obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o
 obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o
 obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o
index 19daa49..136ab7c 100644 (file)
@@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(isc_clk_init);
 
 void isc_clk_cleanup(struct isc_device *isc)
 {
@@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc)
                        clk_unregister(isc_clk->clk);
        }
 }
+EXPORT_SYMBOL_GPL(isc_clk_cleanup);
 
 static int isc_queue_setup(struct vb2_queue *vq,
                            unsigned int *nbuffers, unsigned int *nplanes,
@@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id)
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(isc_interrupt);
 
 static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max)
 {
@@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = {
        .unbind = isc_async_unbind,
        .complete = isc_async_complete,
 };
+EXPORT_SYMBOL_GPL(isc_async_ops);
 
 void isc_subdev_cleanup(struct isc_device *isc)
 {
@@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc)
 
        INIT_LIST_HEAD(&isc->subdev_entities);
 }
+EXPORT_SYMBOL_GPL(isc_subdev_cleanup);
 
 int isc_pipeline_init(struct isc_device *isc)
 {
@@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(isc_pipeline_init);
 
 /* regmap configuration */
 #define ATMEL_ISC_REG_MAX    0xd5c
@@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = {
        .val_bits       = 32,
        .max_register   = ATMEL_ISC_REG_MAX,
 };
+EXPORT_SYMBOL_GPL(isc_regmap_config);
 
+MODULE_AUTHOR("Songjun Wu");
+MODULE_AUTHOR("Eugen Hristev");
+MODULE_DESCRIPTION("Atmel ISC common code base");
+MODULE_LICENSE("GPL v2");
index 8370573..795a012 100644 (file)
@@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req)
        } else {
                /* read */
                requesttype = (USB_TYPE_VENDOR | USB_DIR_IN);
-               pipe = usb_rcvctrlpipe(d->udev, 0);
+
+               /*
+                * Zero-length transfers must use usb_sndctrlpipe() and
+                * rtl28xxu_identify_state() uses a zero-length i2c read
+                * command to determine the chip type.
+                */
+               if (req->size)
+                       pipe = usb_rcvctrlpipe(d->udev, 0);
+               else
+                       pipe = usb_sndctrlpipe(d->udev, 0);
        }
 
        ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value,
@@ -612,9 +621,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d)
 static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name)
 {
        struct rtl28xxu_dev *dev = d_to_priv(d);
-       u8 buf[1];
        int ret;
-       struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf};
+       struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL};
 
        dev_dbg(&d->intf->dev, "\n");
 
index 7a6f01a..305ffad 100644 (file)
@@ -714,23 +714,20 @@ static int at24_probe(struct i2c_client *client)
        }
 
        /*
-        * If the 'label' property is not present for the AT24 EEPROM,
-        * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO,
-        * and this will append the 'devid' to the name of the NVMEM
-        * device. This is purely legacy and the AT24 driver has always
-        * defaulted to this. However, if the 'label' property is
-        * present then this means that the name is specified by the
-        * firmware and this name should be used verbatim and so it is
-        * not necessary to append the 'devid'.
+        * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the
+        * label property is set as some platform can have multiple eeproms
+        * with same label and we can not register each of those with same
+        * label. Failing to register those eeproms trigger cascade failure
+        * on such platform.
         */
+       nvmem_config.id = NVMEM_DEVID_AUTO;
+
        if (device_property_present(dev, "label")) {
-               nvmem_config.id = NVMEM_DEVID_NONE;
                err = device_property_read_string(dev, "label",
                                                  &nvmem_config.name);
                if (err)
                        return err;
        } else {
-               nvmem_config.id = NVMEM_DEVID_AUTO;
                nvmem_config.name = dev_name(dev);
        }
 
index 56213a8..995c613 100644 (file)
@@ -431,10 +431,10 @@ config VSOCKMON
 config MHI_NET
        tristate "MHI network driver"
        depends on MHI_BUS
-       select WWAN
        help
          This is the network driver for MHI bus.  It can be used with
-         QCOM based WWAN modems (like SDX55).  Say Y or M.
+         QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55).
+         Say Y or M.
 
 endif # NET_CORE
 
@@ -606,4 +606,11 @@ config NET_FAILOVER
          a VM with direct attached VF by failing over to the paravirtual
          datapath when the VF is unplugged.
 
+config NETDEV_LEGACY_INIT
+       bool
+       depends on ISA
+       help
+         Drivers that call netdev_boot_setup_check() should select this
+         symbol, everything else no longer needs it.
+
 endif # NETDEVICES
index a48a664..7398386 100644 (file)
@@ -18,7 +18,8 @@ obj-$(CONFIG_MACVLAN) += macvlan.o
 obj-$(CONFIG_MACVTAP) += macvtap.o
 obj-$(CONFIG_MII) += mii.o
 obj-$(CONFIG_MDIO) += mdio.o
-obj-$(CONFIG_NET) += Space.o loopback.o
+obj-$(CONFIG_NET) += loopback.o
+obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o
 obj-$(CONFIG_NETCONSOLE) += netconsole.o
 obj-y += phy/
 obj-y += mdio/
@@ -36,7 +37,7 @@ obj-$(CONFIG_GTP) += gtp.o
 obj-$(CONFIG_NLMON) += nlmon.o
 obj-$(CONFIG_NET_VRF) += vrf.o
 obj-$(CONFIG_VSOCKMON) += vsockmon.o
-obj-$(CONFIG_MHI_NET) += mhi/
+obj-$(CONFIG_MHI_NET) += mhi_net.o
 
 #
 # Networking Drivers
index df79e73..49e67c9 100644 (file)
 #include <linux/netlink.h>
 #include <net/Space.h>
 
+/*
+ * This structure holds boot-time configured netdevice settings. They
+ * are then used in the device probing.
+ */
+struct netdev_boot_setup {
+       char name[IFNAMSIZ];
+       struct ifmap map;
+};
+#define NETDEV_BOOT_SETUP_MAX 8
+
+
+/******************************************************************************
+ *
+ *                   Device Boot-time Settings Routines
+ *
+ ******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ *     netdev_boot_setup_add   - add new setup entry
+ *     @name: name of the device
+ *     @map: configured settings for the device
+ *
+ *     Adds new setup entry to the dev_boot_setup list.  The function
+ *     returns 0 on error and 1 on success.  This is a generic routine to
+ *     all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+       struct netdev_boot_setup *s;
+       int i;
+
+       s = dev_boot_setup;
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+                       memset(s[i].name, 0, sizeof(s[i].name));
+                       strlcpy(s[i].name, name, IFNAMSIZ);
+                       memcpy(&s[i].map, map, sizeof(s[i].map));
+                       break;
+               }
+       }
+
+       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ * netdev_boot_setup_check     - check boot time settings
+ * @dev: the netdevice
+ *
+ * Check boot time settings for the device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+       struct netdev_boot_setup *s = dev_boot_setup;
+       int i;
+
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+                   !strcmp(dev->name, s[i].name)) {
+                       dev->irq = s[i].map.irq;
+                       dev->base_addr = s[i].map.base_addr;
+                       dev->mem_start = s[i].map.mem_start;
+                       dev->mem_end = s[i].map.mem_end;
+                       return 1;
+               }
+       }
+       return 0;
+}
+EXPORT_SYMBOL(netdev_boot_setup_check);
+
+/**
+ * netdev_boot_base    - get address from boot time settings
+ * @prefix: prefix for network device
+ * @unit: id for network device
+ *
+ * Check boot time settings for the base address of device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found.
+ */
+static unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+       const struct netdev_boot_setup *s = dev_boot_setup;
+       char name[IFNAMSIZ];
+       int i;
+
+       sprintf(name, "%s%d", prefix, unit);
+
+       /*
+        * If device already registered then return base of 1
+        * to indicate not to probe for this interface
+        */
+       if (__dev_get_by_name(&init_net, name))
+               return 1;
+
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+               if (!strcmp(name, s[i].name))
+                       return s[i].map.base_addr;
+       return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+static int __init netdev_boot_setup(char *str)
+{
+       int ints[5];
+       struct ifmap map;
+
+       str = get_options(str, ARRAY_SIZE(ints), ints);
+       if (!str || !*str)
+               return 0;
+
+       /* Save settings */
+       memset(&map, 0, sizeof(map));
+       if (ints[0] > 0)
+               map.irq = ints[1];
+       if (ints[0] > 1)
+               map.base_addr = ints[2];
+       if (ints[0] > 2)
+               map.mem_start = ints[3];
+       if (ints[0] > 3)
+               map.mem_end = ints[4];
+
+       /* Add new entry to the list */
+       return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+static int __init ether_boot_setup(char *str)
+{
+       return netdev_boot_setup(str);
+}
+__setup("ether=", ether_boot_setup);
+
+
 /* A unified ethernet device probe.  This is the easiest way to have every
  * ethernet adaptor have the name "eth[0123...]".
  */
@@ -77,39 +219,15 @@ static struct devprobe2 isa_probes[] __initdata = {
 #ifdef CONFIG_SMC9194
        {smc_init, 0},
 #endif
-#ifdef CONFIG_CS89x0
-#ifndef CONFIG_CS89x0_PLATFORM
+#ifdef CONFIG_CS89x0_ISA
        {cs89x0_probe, 0},
 #endif
-#endif
-#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET)        /* Intel */
-       {i82596_probe, 0},                                      /* I82596 */
-#endif
 #ifdef CONFIG_NI65
        {ni65_probe, 0},
 #endif
        {NULL, 0},
 };
 
-static struct devprobe2 m68k_probes[] __initdata = {
-#ifdef CONFIG_ATARILANCE       /* Lance-based Atari ethernet boards */
-       {atarilance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3LANCE         /* sun3 onboard Lance chip */
-       {sun3lance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3_82586        /* sun3 onboard Intel 82586 chip */
-       {sun3_82586_probe, 0},
-#endif
-#ifdef CONFIG_APNE             /* A1200 PCMCIA NE2000 */
-       {apne_probe, 0},
-#endif
-#ifdef CONFIG_MVME147_NET      /* MVME147 internal Ethernet */
-       {mvme147lance_probe, 0},
-#endif
-       {NULL, 0},
-};
-
 /* Unified ethernet device probe, segmented per architecture and
  * per bus interface. This drives the legacy devices only for now.
  */
@@ -121,8 +239,7 @@ static void __init ethif_probe2(int unit)
        if (base_addr == 1)
                return;
 
-       (void)(probe_list2(unit, m68k_probes, base_addr == 0) &&
-               probe_list2(unit, isa_probes, base_addr == 0));
+       probe_list2(unit, isa_probes, base_addr == 0);
 }
 
 /*  Statically configured drivers -- order matters here. */
@@ -130,10 +247,6 @@ static int __init net_olddevs_init(void)
 {
        int num;
 
-#ifdef CONFIG_SBNI
-       for (num = 0; num < 8; ++num)
-               sbni_probe(num);
-#endif
        for (num = 0; num < 8; ++num)
                ethif_probe2(num);
 
@@ -142,9 +255,6 @@ static int __init net_olddevs_init(void)
        cops_probe(1);
        cops_probe(2);
 #endif
-#ifdef CONFIG_LTPC
-       ltpc_probe();
-#endif
 
        return 0;
 }
index 4391839..90b9f1d 100644 (file)
@@ -52,7 +52,9 @@ config LTPC
 
 config COPS
        tristate "COPS LocalTalk PC support"
-       depends on DEV_APPLETALK && (ISA || EISA)
+       depends on DEV_APPLETALK && ISA
+       depends on NETDEVICES
+       select NETDEV_LEGACY_INIT
        help
          This allows you to use COPS AppleTalk cards to connect to LocalTalk
          networks. You also need version 1.3.3 or later of the netatalk
index 69c2708..1f8925e 100644 (file)
@@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = {
        .ndo_set_rx_mode        = set_multicast_list,
 };
 
-struct net_device * __init ltpc_probe(void)
+static struct net_device * __init ltpc_probe(void)
 {
        struct net_device *dev;
        int err = -ENOMEM;
@@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str)
 }
 
 __setup("ltpc=", ltpc_setup);
-#endif /* MODULE */
+#endif
 
 static struct net_device *dev_ltpc;
 
-#ifdef MODULE
-
 MODULE_LICENSE("GPL");
 module_param(debug, int, 0);
 module_param_hw(io, int, ioport, 0);
@@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void)
        return PTR_ERR_OR_ZERO(dev_ltpc);
 }
 module_init(ltpc_module_init);
-#endif
 
 static void __exit ltpc_cleanup(void)
 {
index 6908822..a4a202b 100644 (file)
@@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker);
 static void ad_mux_machine(struct port *port, bool *update_slave_arr);
 static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
 static void ad_tx_machine(struct port *port);
-static void ad_periodic_machine(struct port *port);
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params);
 static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
 static void ad_agg_selection_logic(struct aggregator *aggregator,
                                   bool *update_slave_arr);
@@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port)
 /**
  * ad_periodic_machine - handle a port's periodic state machine
  * @port: the port we're looking at
+ * @bond_params: bond parameters we will use
  *
  * Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
  */
-static void ad_periodic_machine(struct port *port)
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params)
 {
        periodic_states_t last_state;
 
@@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port)
 
        /* check if port was reinitialized */
        if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
-           (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))
-          ) {
+           (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
+           !bond_params.lacp_active) {
                port->sm_periodic_state = AD_NO_PERIODIC;
        }
        /* check if state machine should change state */
@@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
                }
 
                ad_rx_machine(NULL, port);
-               ad_periodic_machine(port);
+               ad_periodic_machine(port, bond->params);
                ad_port_selection_logic(port, &update_slave_arr);
                ad_mux_machine(port, &update_slave_arr);
                ad_tx_machine(port);
index 22e5632..7d3752c 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/if_bonding.h>
 #include <linux/if_vlan.h>
 #include <linux/in.h>
-#include <net/ipx.h>
 #include <net/arp.h>
 #include <net/ipv6.h>
 #include <asm/byteorder.h>
@@ -1351,8 +1350,6 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
        if (!is_multicast_ether_addr(eth_data->h_dest)) {
                switch (skb->protocol) {
                case htons(ETH_P_IP):
-               case htons(ETH_P_IPX):
-                   /* In case of IPX, it will falback to L2 hash */
                case htons(ETH_P_IPV6):
                        hash_index = bond_xmit_hash(bond, skb);
                        if (bond->params.tlb_dynamic_lb) {
@@ -1454,35 +1451,6 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
                hash_size = sizeof(ip6hdr->daddr);
                break;
        }
-       case ETH_P_IPX: {
-               const struct ipxhdr *ipxhdr;
-
-               if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) {
-                       do_tx_balance = false;
-                       break;
-               }
-               ipxhdr = (struct ipxhdr *)skb_network_header(skb);
-
-               if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) {
-                       /* something is wrong with this packet */
-                       do_tx_balance = false;
-                       break;
-               }
-
-               if (ipxhdr->ipx_type != IPX_TYPE_NCP) {
-                       /* The only protocol worth balancing in
-                        * this family since it has an "ARP" like
-                        * mechanism
-                        */
-                       do_tx_balance = false;
-                       break;
-               }
-
-               eth_data = eth_hdr(skb);
-               hash_start = (char *)eth_data->h_dest;
-               hash_size = ETH_ALEN;
-               break;
-       }
        case ETH_P_ARP:
                do_tx_balance = false;
                if (bond_info->rlb_enabled)
index bec8cea..365953e 100644 (file)
@@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond)
        }
 }
 
+static bool bond_xdp_check(struct bonding *bond)
+{
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+       case BOND_MODE_ACTIVEBACKUP:
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               return true;
+       default:
+               return false;
+       }
+}
+
 /*---------------------------------- VLAN -----------------------------------*/
 
 /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                bond_update_slave_arr(bond, NULL);
 
 
+       if (!slave_dev->netdev_ops->ndo_bpf ||
+           !slave_dev->netdev_ops->ndo_xdp_xmit) {
+               if (bond->xdp_prog) {
+                       NL_SET_ERR_MSG(extack, "Slave does not support XDP");
+                       slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+       } else {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = bond->xdp_prog,
+                       .extack  = extack,
+               };
+
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Slave has XDP program loaded, please unload before enslaving");
+                       slave_err(bond_dev, slave_dev,
+                                 "Slave has XDP program loaded, please unload before enslaving\n");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+
+               res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (res < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
+                       goto err_sysfs_del;
+               }
+               if (bond->xdp_prog)
+                       bpf_prog_inc(bond->xdp_prog);
+       }
+
        slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
                   bond_is_active_slave(new_slave) ? "an active" : "a backup",
                   new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
@@ -2252,7 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev,
        /* recompute stats just before removing the slave */
        bond_get_stats(bond->dev, &bond->bond_stats);
 
-       bond_upper_dev_unlink(bond, slave);
+       if (bond->xdp_prog) {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = NULL,
+                       .extack  = NULL,
+               };
+               if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+                       slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
+       }
+
        /* unregister rx_handler early so bond_handle_frame wouldn't be called
         * for this slave anymore.
         */
@@ -2261,6 +2319,8 @@ static int __bond_release_one(struct net_device *bond_dev,
        if (BOND_MODE(bond) == BOND_MODE_8023AD)
                bond_3ad_unbind_slave(slave);
 
+       bond_upper_dev_unlink(bond, slave);
+
        if (bond_mode_can_use_xmit_hash(bond))
                bond_update_slave_arr(bond, slave);
 
@@ -3613,55 +3673,80 @@ static struct notifier_block bond_netdev_notifier = {
 
 /*---------------------------- Hashing Policies -----------------------------*/
 
+/* Helper to access data in a packet, with or without a backing skb.
+ * If skb is given the data is linearized if necessary via pskb_may_pull.
+ */
+static inline const void *bond_pull_data(struct sk_buff *skb,
+                                        const void *data, int hlen, int n)
+{
+       if (likely(n <= hlen))
+               return data;
+       else if (skb && likely(pskb_may_pull(skb, n)))
+               return skb->head;
+
+       return NULL;
+}
+
 /* L2 hash helper */
-static inline u32 bond_eth_hash(struct sk_buff *skb)
+static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
 {
-       struct ethhdr *ep, hdr_tmp;
+       struct ethhdr *ep;
 
-       ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
-       if (ep)
-               return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
-       return 0;
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+
+       ep = (struct ethhdr *)(data + mhoff);
+       return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
 }
 
-static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
-                        int *noff, int *proto, bool l34)
+static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
+                        int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
 {
        const struct ipv6hdr *iph6;
        const struct iphdr *iph;
 
-       if (skb->protocol == htons(ETH_P_IP)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+       if (l2_proto == htons(ETH_P_IP)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
+               if (!data)
                        return false;
-               iph = (const struct iphdr *)(skb->data + *noff);
+
+               iph = (const struct iphdr *)(data + *nhoff);
                iph_to_flow_copy_v4addrs(fk, iph);
-               *noff += iph->ihl << 2;
+               *nhoff += iph->ihl << 2;
                if (!ip_is_fragment(iph))
-                       *proto = iph->protocol;
-       } else if (skb->protocol == htons(ETH_P_IPV6)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+                       *ip_proto = iph->protocol;
+       } else if (l2_proto == htons(ETH_P_IPV6)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
+               if (!data)
                        return false;
-               iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+
+               iph6 = (const struct ipv6hdr *)(data + *nhoff);
                iph_to_flow_copy_v6addrs(fk, iph6);
-               *noff += sizeof(*iph6);
-               *proto = iph6->nexthdr;
+               *nhoff += sizeof(*iph6);
+               *ip_proto = iph6->nexthdr;
        } else {
                return false;
        }
 
-       if (l34 && *proto >= 0)
-               fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+       if (l34 && *ip_proto >= 0)
+               fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
 
        return true;
 }
 
-static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
+static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
 {
-       struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+       struct ethhdr *mac_hdr;
        u32 srcmac_vendor = 0, srcmac_dev = 0;
        u16 vlan;
        int i;
 
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+       mac_hdr = (struct ethhdr *)(data + mhoff);
+
        for (i = 0; i < 3; i++)
                srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
 
@@ -3677,26 +3762,25 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
 }
 
 /* Extract the appropriate headers based on bond's xmit policy */
-static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
-                             struct flow_keys *fk)
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
+                             __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
 {
        bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
-       int noff, proto = -1;
+       int ip_proto = -1;
 
        switch (bond->params.xmit_policy) {
        case BOND_XMIT_POLICY_ENCAP23:
        case BOND_XMIT_POLICY_ENCAP34:
                memset(fk, 0, sizeof(*fk));
                return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
-                                         fk, NULL, 0, 0, 0, 0);
+                                         fk, data, l2_proto, nhoff, hlen, 0);
        default:
                break;
        }
 
        fk->ports.ports = 0;
        memset(&fk->icmp, 0, sizeof(fk->icmp));
-       noff = skb_network_offset(skb);
-       if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
+       if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
                return false;
 
        /* ICMP error packets contains at least 8 bytes of the header
@@ -3704,22 +3788,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
         * to correlate ICMP error packets within the same flow which
         * generated the error.
         */
-       if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
-               skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
-                                     skb_transport_offset(skb),
-                                     skb_headlen(skb));
-               if (proto == IPPROTO_ICMP) {
+       if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
+               skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
+               if (ip_proto == IPPROTO_ICMP) {
                        if (!icmp_is_err(fk->icmp.type))
                                return true;
 
-                       noff += sizeof(struct icmphdr);
-               } else if (proto == IPPROTO_ICMPV6) {
+                       nhoff += sizeof(struct icmphdr);
+               } else if (ip_proto == IPPROTO_ICMPV6) {
                        if (!icmpv6_is_err(fk->icmp.type))
                                return true;
 
-                       noff += sizeof(struct icmp6hdr);
+                       nhoff += sizeof(struct icmp6hdr);
                }
-               return bond_flow_ip(skb, fk, &noff, &proto, l34);
+               return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
        }
 
        return true;
@@ -3735,33 +3817,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
        return hash >> 1;
 }
 
-/**
- * bond_xmit_hash - generate a hash value based on the xmit policy
- * @bond: bonding device
- * @skb: buffer to use for headers
- *
- * This function will extract the necessary headers from the skb buffer and use
- * them to generate a hash based on the xmit_policy set in the bonding device
+/* Generate hash based on xmit policy. If @skb is given it is used to linearize
+ * the data as required, but this function can be used without it if the data is
+ * known to be linear (e.g. with xdp_buff).
  */
-u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
+                           __be16 l2_proto, int mhoff, int nhoff, int hlen)
 {
        struct flow_keys flow;
        u32 hash;
 
-       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
-           skb->l4_hash)
-               return skb->hash;
-
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
-               return bond_vlan_srcmac_hash(skb);
+               return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
 
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
-           !bond_flow_dissect(bond, skb, &flow))
-               return bond_eth_hash(skb);
+           !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
+               return bond_eth_hash(skb, data, mhoff, hlen);
 
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
            bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
-               hash = bond_eth_hash(skb);
+               hash = bond_eth_hash(skb, data, mhoff, hlen);
        } else {
                if (flow.icmp.id)
                        memcpy(&hash, &flow.icmp, sizeof(hash));
@@ -3772,6 +3847,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
        return bond_ip_hash(hash, &flow);
 }
 
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ */
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+{
+       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+           skb->l4_hash)
+               return skb->hash;
+
+       return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+                               skb->mac_header, skb->network_header,
+                               skb_headlen(skb));
+}
+
+/**
+ * bond_xmit_hash_xdp - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @xdp: buffer to use for headers
+ *
+ * The XDP variant of bond_xmit_hash.
+ */
+static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
+{
+       struct ethhdr *eth;
+
+       if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
+               return 0;
+
+       eth = (struct ethhdr *)xdp->data;
+
+       return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
+                               sizeof(struct ethhdr), xdp->data_end - xdp->data);
+}
+
 /*-------------------------- Device entry points ----------------------------*/
 
 void bond_work_init_all(struct bonding *bond)
@@ -4420,6 +4534,47 @@ non_igmp:
        return NULL;
 }
 
+static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
+                                                       struct xdp_buff *xdp)
+{
+       struct slave *slave;
+       int slave_cnt;
+       u32 slave_id;
+       const struct ethhdr *eth;
+       void *data = xdp->data;
+
+       if (data + sizeof(struct ethhdr) > xdp->data_end)
+               goto non_igmp;
+
+       eth = (struct ethhdr *)data;
+       data += sizeof(struct ethhdr);
+
+       /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               const struct iphdr *iph;
+
+               if (data + sizeof(struct iphdr) > xdp->data_end)
+                       goto non_igmp;
+
+               iph = (struct iphdr *)data;
+
+               if (iph->protocol == IPPROTO_IGMP) {
+                       slave = rcu_dereference(bond->curr_active_slave);
+                       if (slave)
+                               return slave;
+                       return bond_get_slave_by_id(bond, 0);
+               }
+       }
+
+non_igmp:
+       slave_cnt = READ_ONCE(bond->slave_cnt);
+       if (likely(slave_cnt)) {
+               slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+               return bond_get_slave_by_id(bond, slave_id);
+       }
+       return NULL;
+}
+
 static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
                                        struct net_device *bond_dev)
 {
@@ -4433,8 +4588,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
        return bond_tx_drop(bond_dev, skb);
 }
 
-static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
-                                                     struct sk_buff *skb)
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
 {
        return rcu_dereference(bond->curr_active_slave);
 }
@@ -4448,7 +4602,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
        struct bonding *bond = netdev_priv(bond_dev);
        struct slave *slave;
 
-       slave = bond_xmit_activebackup_slave_get(bond, skb);
+       slave = bond_xmit_activebackup_slave_get(bond);
        if (slave)
                return bond_dev_queue_xmit(bond, skb, slave->dev);
 
@@ -4636,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
        return slave;
 }
 
+static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
+                                                    struct xdp_buff *xdp)
+{
+       struct bond_up_slave *slaves;
+       unsigned int count;
+       u32 hash;
+
+       hash = bond_xmit_hash_xdp(bond, xdp);
+       slaves = rcu_dereference(bond->usable_slaves);
+       count = slaves ? READ_ONCE(slaves->count) : 0;
+       if (unlikely(!count))
+               return NULL;
+
+       return slaves->arr[hash % count];
+}
+
 /* Use this Xmit function for 3AD as well as XOR modes. The current
  * usable slave array is formed in the control path. The xmit function
  * just calculates hash and sends the packet out.
@@ -4746,7 +4916,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
                slave = bond_xmit_roundrobin_slave_get(bond, skb);
                break;
        case BOND_MODE_ACTIVEBACKUP:
-               slave = bond_xmit_activebackup_slave_get(bond, skb);
+               slave = bond_xmit_activebackup_slave_get(bond);
                break;
        case BOND_MODE_8023AD:
        case BOND_MODE_XOR:
@@ -4920,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
        return ret;
 }
 
+static struct net_device *
+bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
+{
+       struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+
+       /* Caller needs to hold rcu_read_lock() */
+
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+               slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
+               break;
+
+       case BOND_MODE_ACTIVEBACKUP:
+               slave = bond_xmit_activebackup_slave_get(bond);
+               break;
+
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
+               break;
+
+       default:
+               /* Should never happen. Mode guarded by bond_xdp_check() */
+               netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
+               WARN_ON_ONCE(1);
+               return NULL;
+       }
+
+       if (slave)
+               return slave->dev;
+
+       return NULL;
+}
+
+static int bond_xdp_xmit(struct net_device *bond_dev,
+                        int n, struct xdp_frame **frames, u32 flags)
+{
+       int nxmit, err = -ENXIO;
+
+       rcu_read_lock();
+
+       for (nxmit = 0; nxmit < n; nxmit++) {
+               struct xdp_frame *frame = frames[nxmit];
+               struct xdp_frame *frames1[] = {frame};
+               struct net_device *slave_dev;
+               struct xdp_buff xdp;
+
+               xdp_convert_frame_to_buff(frame, &xdp);
+
+               slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
+               if (!slave_dev) {
+                       err = -ENXIO;
+                       break;
+               }
+
+               err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
+               if (err < 1)
+                       break;
+       }
+
+       rcu_read_unlock();
+
+       /* If error happened on the first frame then we can pass the error up, otherwise
+        * report the number of frames that were xmitted.
+        */
+       if (err < 0)
+               return (nxmit == 0 ? err : nxmit);
+
+       return nxmit;
+}
+
+static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+                       struct netlink_ext_ack *extack)
+{
+       struct bonding *bond = netdev_priv(dev);
+       struct list_head *iter;
+       struct slave *slave, *rollback_slave;
+       struct bpf_prog *old_prog;
+       struct netdev_bpf xdp = {
+               .command = XDP_SETUP_PROG,
+               .flags   = 0,
+               .prog    = prog,
+               .extack  = extack,
+       };
+       int err;
+
+       ASSERT_RTNL();
+
+       if (!bond_xdp_check(bond))
+               return -EOPNOTSUPP;
+
+       old_prog = bond->xdp_prog;
+       bond->xdp_prog = prog;
+
+       bond_for_each_slave(bond, slave, iter) {
+               struct net_device *slave_dev = slave->dev;
+
+               if (!slave_dev->netdev_ops->ndo_bpf ||
+                   !slave_dev->netdev_ops->ndo_xdp_xmit) {
+                       NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
+                       slave_err(dev, slave_dev, "Slave does not support XDP\n");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Slave has XDP program loaded, please unload before enslaving");
+                       slave_err(dev, slave_dev,
+                                 "Slave has XDP program loaded, please unload before enslaving\n");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+
+               err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
+                       goto err;
+               }
+               if (prog)
+                       bpf_prog_inc(prog);
+       }
+
+       if (old_prog)
+               bpf_prog_put(old_prog);
+
+       if (prog)
+               static_branch_inc(&bpf_master_redirect_enabled_key);
+       else
+               static_branch_dec(&bpf_master_redirect_enabled_key);
+
+       return 0;
+
+err:
+       /* unwind the program changes */
+       bond->xdp_prog = old_prog;
+       xdp.prog = old_prog;
+       xdp.extack = NULL; /* do not overwrite original error */
+
+       bond_for_each_slave(bond, rollback_slave, iter) {
+               struct net_device *slave_dev = rollback_slave->dev;
+               int err_unwind;
+
+               if (slave == rollback_slave)
+                       break;
+
+               err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err_unwind < 0)
+                       slave_err(dev, slave_dev,
+                                 "Error %d when unwinding XDP program change\n", err_unwind);
+               else if (xdp.prog)
+                       bpf_prog_inc(xdp.prog);
+       }
+       return err;
+}
+
+static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return bond_xdp_set(dev, xdp->prog, xdp->extack);
+       default:
+               return -EINVAL;
+       }
+}
+
 static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
 {
        if (speed == 0 || speed == SPEED_UNKNOWN)
@@ -5008,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = {
        .ndo_features_check     = passthru_features_check,
        .ndo_get_xmit_slave     = bond_xmit_get_slave,
        .ndo_sk_get_lower_dev   = bond_sk_get_lower_dev,
+       .ndo_bpf                = bond_xdp,
+       .ndo_xdp_xmit           = bond_xdp_xmit,
+       .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
 };
 
 static const struct device_type bond_type = {
@@ -5477,6 +5818,7 @@ static int bond_check_params(struct bond_params *params)
        params->downdelay = downdelay;
        params->peer_notif_delay = 0;
        params->use_carrier = use_carrier;
+       params->lacp_active = 1;
        params->lacp_fast = lacp_fast;
        params->primary[0] = 0;
        params->primary_reselect = primary_reselect_value;
index 0561ece..5d54e11 100644 (file)
@@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
        [IFLA_BOND_MIN_LINKS]           = { .type = NLA_U32 },
        [IFLA_BOND_LP_INTERVAL]         = { .type = NLA_U32 },
        [IFLA_BOND_PACKETS_PER_SLAVE]   = { .type = NLA_U32 },
+       [IFLA_BOND_AD_LACP_ACTIVE]      = { .type = NLA_U8 },
        [IFLA_BOND_AD_LACP_RATE]        = { .type = NLA_U8 },
        [IFLA_BOND_AD_SELECT]           = { .type = NLA_U8 },
        [IFLA_BOND_AD_INFO]             = { .type = NLA_NESTED },
@@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
                if (err)
                        return err;
        }
+
+       if (data[IFLA_BOND_AD_LACP_ACTIVE]) {
+               int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]);
+
+               bond_opt_initval(&newval, lacp_active);
+               err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval);
+               if (err)
+                       return err;
+       }
+
        if (data[IFLA_BOND_AD_LACP_RATE]) {
                int lacp_rate =
                        nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]);
@@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
                nla_total_size(sizeof(u32)) +   /* IFLA_BOND_MIN_LINKS */
                nla_total_size(sizeof(u32)) +   /* IFLA_BOND_LP_INTERVAL */
                nla_total_size(sizeof(u32)) +  /* IFLA_BOND_PACKETS_PER_SLAVE */
+               nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_LACP_ACTIVE */
                nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_LACP_RATE */
                nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_SELECT */
                nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */
@@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb,
                        packets_per_slave))
                goto nla_put_failure;
 
+       if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE,
+                      bond->params.lacp_active))
+               goto nla_put_failure;
+
        if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE,
                       bond->params.lacp_fast))
                goto nla_put_failure;
index 0cf25de..a8fde3b 100644 (file)
@@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond,
                                       const struct bond_opt_value *newval);
 static int bond_option_pps_set(struct bonding *bond,
                               const struct bond_opt_value *newval);
+static int bond_option_lacp_active_set(struct bonding *bond,
+                                      const struct bond_opt_value *newval);
 static int bond_option_lacp_rate_set(struct bonding *bond,
                                     const struct bond_opt_value *newval);
 static int bond_option_ad_select_set(struct bonding *bond,
@@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = {
        { NULL,      -1,      0}
 };
 
+static const struct bond_opt_value bond_lacp_active[] = {
+       { "off", 0,  0},
+       { "on",  1,  BOND_VALFLAG_DEFAULT},
+       { NULL,  -1, 0}
+};
+
 static const struct bond_opt_value bond_lacp_rate_tbl[] = {
        { "slow", AD_LACP_SLOW, 0},
        { "fast", AD_LACP_FAST, 0},
@@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
                .values = bond_intmax_tbl,
                .set = bond_option_updelay_set
        },
+       [BOND_OPT_LACP_ACTIVE] = {
+               .id = BOND_OPT_LACP_ACTIVE,
+               .name = "lacp_active",
+               .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to",
+               .flags = BOND_OPTFLAG_IFDOWN,
+               .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
+               .values = bond_lacp_active,
+               .set = bond_option_lacp_active_set
+       },
        [BOND_OPT_LACP_RATE] = {
                .id = BOND_OPT_LACP_RATE,
                .name = "lacp_rate",
@@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond,
        return 0;
 }
 
+static int bond_option_lacp_active_set(struct bonding *bond,
+                                      const struct bond_opt_value *newval)
+{
+       netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n",
+                  newval->string, newval->value);
+       bond->params.lacp_active = newval->value;
+
+       return 0;
+}
+
 static int bond_option_lacp_rate_set(struct bonding *bond,
                                     const struct bond_opt_value *newval)
 {
index 0fb1da3..f3e3bfd 100644 (file)
@@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq)
                struct ad_info ad_info;
 
                seq_puts(seq, "\n802.3ad info\n");
+               seq_printf(seq, "LACP active: %s\n",
+                          (bond->params.lacp_active) ? "on" : "off");
                seq_printf(seq, "LACP rate: %s\n",
                           (bond->params.lacp_fast) ? "fast" : "slow");
                seq_printf(seq, "Min links: %d\n", bond->params.min_links);
index 5f9e9a2..b9e9842 100644 (file)
@@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d,
 static DEVICE_ATTR(peer_notif_delay, 0644,
                   bonding_show_peer_notif_delay, bonding_sysfs_store_option);
 
-/* Show the LACP interval. */
-static ssize_t bonding_show_lacp(struct device *d,
-                                struct device_attribute *attr,
-                                char *buf)
+/* Show the LACP activity and interval. */
+static ssize_t bonding_show_lacp_active(struct device *d,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct bonding *bond = to_bond(d);
+       const struct bond_opt_value *val;
+
+       val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active);
+
+       return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active);
+}
+static DEVICE_ATTR(lacp_active, 0644,
+                  bonding_show_lacp_active, bonding_sysfs_store_option);
+
+static ssize_t bonding_show_lacp_rate(struct device *d,
+                                     struct device_attribute *attr,
+                                     char *buf)
 {
        struct bonding *bond = to_bond(d);
        const struct bond_opt_value *val;
@@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d,
        return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
 }
 static DEVICE_ATTR(lacp_rate, 0644,
-                  bonding_show_lacp, bonding_sysfs_store_option);
+                  bonding_show_lacp_rate, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_min_links(struct device *d,
                                      struct device_attribute *attr,
@@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = {
        &dev_attr_downdelay.attr,
        &dev_attr_updelay.attr,
        &dev_attr_peer_notif_delay.attr,
+       &dev_attr_lacp_active.attr,
        &dev_attr_lacp_rate.attr,
        &dev_attr_ad_select.attr,
        &dev_attr_xmit_hash_policy.attr,
index 54ffb79..7734229 100644 (file)
@@ -649,7 +649,7 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv)
 
 static int flexcan_clks_enable(const struct flexcan_priv *priv)
 {
-       int err;
+       int err = 0;
 
        if (priv->clk_ipg) {
                err = clk_prepare_enable(priv->clk_ipg);
index dd17b8c..89d9c98 100644 (file)
@@ -218,7 +218,7 @@ static int hi3110_spi_trans(struct spi_device *spi, int len)
        return ret;
 }
 
-static u8 hi3110_cmd(struct spi_device *spi, u8 command)
+static int hi3110_cmd(struct spi_device *spi, u8 command)
 {
        struct hi3110_priv *priv = spi_get_drvdata(spi);
 
index 2b1e575..6c369a3 100644 (file)
@@ -2304,6 +2304,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
                   err, priv->regs_status.intf);
        mcp251xfd_dump(priv);
        mcp251xfd_chip_interrupts_disable(priv);
+       mcp251xfd_timestamp_stop(priv);
 
        return handled;
 }
index 0a37af4..2b5302e 100644 (file)
@@ -255,6 +255,8 @@ struct ems_usb {
        unsigned int free_slots; /* remember number of available slots */
 
        struct ems_cpc_msg active_params; /* active controller parameters */
+       void *rxbuf[MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MAX_RX_URBS];
 };
 
 static void ems_usb_read_interrupt_callback(struct urb *urb)
@@ -587,6 +589,7 @@ static int ems_usb_start(struct ems_usb *dev)
        for (i = 0; i < MAX_RX_URBS; i++) {
                struct urb *urb = NULL;
                u8 *buf = NULL;
+               dma_addr_t buf_dma;
 
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -596,7 +599,7 @@ static int ems_usb_start(struct ems_usb *dev)
                }
 
                buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-                                        &urb->transfer_dma);
+                                        &buf_dma);
                if (!buf) {
                        netdev_err(netdev, "No memory left for USB buffer\n");
                        usb_free_urb(urb);
@@ -604,6 +607,8 @@ static int ems_usb_start(struct ems_usb *dev)
                        break;
                }
 
+               urb->transfer_dma = buf_dma;
+
                usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
                                  buf, RX_BUFFER_SIZE,
                                  ems_usb_read_bulk_callback, dev);
@@ -619,6 +624,9 @@ static int ems_usb_start(struct ems_usb *dev)
                        break;
                }
 
+               dev->rxbuf[i] = buf;
+               dev->rxbuf_dma[i] = buf_dma;
+
                /* Drop reference, USB core will take care of freeing it */
                usb_free_urb(urb);
        }
@@ -684,6 +692,10 @@ static void unlink_all_urbs(struct ems_usb *dev)
 
        usb_kill_anchored_urbs(&dev->rx_submitted);
 
+       for (i = 0; i < MAX_RX_URBS; ++i)
+               usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+                                 dev->rxbuf[i], dev->rxbuf_dma[i]);
+
        usb_kill_anchored_urbs(&dev->tx_submitted);
        atomic_set(&dev->active_tx_urbs, 0);
 
index 60f3e0c..7370981 100644 (file)
@@ -195,6 +195,8 @@ struct esd_usb2 {
        int net_count;
        u32 version;
        int rxinitdone;
+       void *rxbuf[MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MAX_RX_URBS];
 };
 
 struct esd_usb2_net_priv {
@@ -545,6 +547,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
        for (i = 0; i < MAX_RX_URBS; i++) {
                struct urb *urb = NULL;
                u8 *buf = NULL;
+               dma_addr_t buf_dma;
 
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -554,7 +557,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
                }
 
                buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-                                        &urb->transfer_dma);
+                                        &buf_dma);
                if (!buf) {
                        dev_warn(dev->udev->dev.parent,
                                 "No memory left for USB buffer\n");
@@ -562,6 +565,8 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
                        goto freeurb;
                }
 
+               urb->transfer_dma = buf_dma;
+
                usb_fill_bulk_urb(urb, dev->udev,
                                  usb_rcvbulkpipe(dev->udev, 1),
                                  buf, RX_BUFFER_SIZE,
@@ -574,8 +579,12 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
                        usb_unanchor_urb(urb);
                        usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf,
                                          urb->transfer_dma);
+                       goto freeurb;
                }
 
+               dev->rxbuf[i] = buf;
+               dev->rxbuf_dma[i] = buf_dma;
+
 freeurb:
                /* Drop reference, USB core will take care of freeing it */
                usb_free_urb(urb);
@@ -663,6 +672,11 @@ static void unlink_all_urbs(struct esd_usb2 *dev)
        int i, j;
 
        usb_kill_anchored_urbs(&dev->rx_submitted);
+
+       for (i = 0; i < MAX_RX_URBS; ++i)
+               usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+                                 dev->rxbuf[i], dev->rxbuf_dma[i]);
+
        for (i = 0; i < dev->net_count; i++) {
                priv = dev->nets[i];
                if (priv) {
index a45865b..a1a154c 100644 (file)
@@ -653,6 +653,8 @@ static int mcba_usb_start(struct mcba_priv *priv)
                        break;
                }
 
+               urb->transfer_dma = buf_dma;
+
                usb_fill_bulk_urb(urb, priv->udev,
                                  usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN),
                                  buf, MCBA_USB_RX_BUFF_SIZE,
index e36e60c..837b3fe 100644 (file)
 #define PCAN_USB_BERR_MASK     (PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR)
 
 /* identify bus event packets with rx/tx error counters */
-#define PCAN_USB_ERR_CNT               0x80
+#define PCAN_USB_ERR_CNT_DEC           0x00    /* counters are decreasing */
+#define PCAN_USB_ERR_CNT_INC           0x80    /* counters are increasing */
 
 /* private to PCAN-USB adapter */
 struct pcan_usb {
@@ -535,11 +536,12 @@ static int pcan_usb_handle_bus_evt(struct pcan_usb_msg_context *mc, u8 ir)
 
        /* acccording to the content of the packet */
        switch (ir) {
-       case PCAN_USB_ERR_CNT:
+       case PCAN_USB_ERR_CNT_DEC:
+       case PCAN_USB_ERR_CNT_INC:
 
                /* save rx/tx error counters from in the device context */
-               pdev->bec.rxerr = mc->ptr[0];
-               pdev->bec.txerr = mc->ptr[1];
+               pdev->bec.rxerr = mc->ptr[1];
+               pdev->bec.txerr = mc->ptr[2];
                break;
 
        default:
index b6e7ef0..d1b83bd 100644 (file)
@@ -137,7 +137,8 @@ struct usb_8dev_priv {
        u8 *cmd_msg_buffer;
 
        struct mutex usb_8dev_cmd_lock;
-
+       void *rxbuf[MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MAX_RX_URBS];
 };
 
 /* tx frame */
@@ -733,6 +734,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
        for (i = 0; i < MAX_RX_URBS; i++) {
                struct urb *urb = NULL;
                u8 *buf;
+               dma_addr_t buf_dma;
 
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -742,7 +744,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
                }
 
                buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-                                        &urb->transfer_dma);
+                                        &buf_dma);
                if (!buf) {
                        netdev_err(netdev, "No memory left for USB buffer\n");
                        usb_free_urb(urb);
@@ -750,6 +752,8 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
                        break;
                }
 
+               urb->transfer_dma = buf_dma;
+
                usb_fill_bulk_urb(urb, priv->udev,
                                  usb_rcvbulkpipe(priv->udev,
                                                  USB_8DEV_ENDP_DATA_RX),
@@ -767,6 +771,9 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
                        break;
                }
 
+               priv->rxbuf[i] = buf;
+               priv->rxbuf_dma[i] = buf_dma;
+
                /* Drop reference, USB core will take care of freeing it */
                usb_free_urb(urb);
        }
@@ -836,6 +843,10 @@ static void unlink_all_urbs(struct usb_8dev_priv *priv)
 
        usb_kill_anchored_urbs(&priv->rx_submitted);
 
+       for (i = 0; i < MAX_RX_URBS; ++i)
+               usb_free_coherent(priv->udev, RX_BUFFER_SIZE,
+                                 priv->rxbuf[i], priv->rxbuf_dma[i]);
+
        usb_kill_anchored_urbs(&priv->tx_submitted);
        atomic_set(&priv->active_tx_urbs, 0);
 
index b23e348..bd1417a 100644 (file)
@@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port,
 }
 EXPORT_SYMBOL(b53_br_flags);
 
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                   struct netlink_ext_ack *extack)
-{
-       b53_port_set_mcast_flood(ds->priv, port, mrouter);
-
-       return 0;
-}
-EXPORT_SYMBOL(b53_set_mrouter);
-
 static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
 {
        /* Broadcom switches will accept enabling Broadcom tags on the
@@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
        .port_bridge_leave      = b53_br_leave,
        .port_pre_bridge_flags  = b53_br_flags_pre,
        .port_bridge_flags      = b53_br_flags,
-       .port_set_mrouter       = b53_set_mrouter,
        .port_stp_state_set     = b53_br_set_stp_state,
        .port_fast_age          = b53_br_fast_age,
        .port_vlan_filtering    = b53_vlan_filtering,
index 82700a5..9bf8319 100644 (file)
@@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port,
 int b53_br_flags(struct dsa_switch *ds, int port,
                 struct switchdev_brport_flags flags,
                 struct netlink_ext_ack *extack);
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                   struct netlink_ext_ack *extack);
 int b53_setup_devlink_resources(struct dsa_switch *ds);
 void b53_port_event(struct dsa_switch *ds, int port);
 void b53_phylink_validate(struct dsa_switch *ds, int port,
index 3b018fc..6ce9ec1 100644 (file)
@@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
        .port_pre_bridge_flags  = b53_br_flags_pre,
        .port_bridge_flags      = b53_br_flags,
        .port_stp_state_set     = b53_br_set_stp_state,
-       .port_set_mrouter       = b53_set_mrouter,
        .port_fast_age          = b53_br_fast_age,
        .port_vlan_filtering    = b53_vlan_filtering,
        .port_vlan_add          = b53_vlan_add,
index 69f21b7..53e6150 100644 (file)
@@ -366,8 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
        int i;
 
        reg[1] |= vid & CVID_MASK;
-       if (vid > 1)
-               reg[1] |= ATA2_IVL;
+       reg[1] |= ATA2_IVL;
+       reg[1] |= ATA2_FID(FID_BRIDGED);
        reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER;
        reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP;
        /* STATIC_ENT indicate that entry is static wouldn't
@@ -1021,6 +1021,10 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
        mt7530_write(priv, MT7530_PCR_P(port),
                     PCR_MATRIX(dsa_user_ports(priv->ds)));
 
+       /* Set to fallback mode for independent VLAN learning */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_FALLBACK_MODE);
+
        return 0;
 }
 
@@ -1143,7 +1147,8 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state)
                break;
        }
 
-       mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state);
+       mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED),
+                  FID_PST(FID_BRIDGED, stp_state));
 }
 
 static int
@@ -1184,18 +1189,6 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
        return 0;
 }
 
-static int
-mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                       struct netlink_ext_ack *extack)
-{
-       struct mt7530_priv *priv = ds->priv;
-
-       mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
-                  mrouter ? UNM_FFP(BIT(port)) : 0);
-
-       return 0;
-}
-
 static int
 mt7530_port_bridge_join(struct dsa_switch *ds, int port,
                        struct net_device *bridge)
@@ -1229,6 +1222,10 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
                           PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap));
        priv->ports[port].pm |= PCR_MATRIX(port_bitmap);
 
+       /* Set to fallback mode for independent VLAN learning */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_FALLBACK_MODE);
+
        mutex_unlock(&priv->reg_mutex);
 
        return 0;
@@ -1241,15 +1238,22 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
        bool all_user_ports_removed = true;
        int i;
 
-       /* When a port is removed from the bridge, the port would be set up
-        * back to the default as is at initial boot which is a VLAN-unaware
-        * port.
+       /* This is called after .port_bridge_leave when leaving a VLAN-aware
+        * bridge. Don't set standalone ports to fallback mode.
         */
-       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
-                  MT7530_PORT_MATRIX_MODE);
-       mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
+       if (dsa_to_port(ds, port)->bridge_dev)
+               mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                          MT7530_PORT_FALLBACK_MODE);
+
+       mt7530_rmw(priv, MT7530_PVC_P(port),
+                  VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK,
                   VLAN_ATTR(MT7530_VLAN_TRANSPARENT) |
-                  PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
+                  PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) |
+                  MT7530_VLAN_ACC_ALL);
+
+       /* Set PVID to 0 */
+       mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                  G0_PORT_VID_DEF);
 
        for (i = 0; i < MT7530_NUM_PORTS; i++) {
                if (dsa_is_user_port(ds, i) &&
@@ -1276,15 +1280,19 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
        struct mt7530_priv *priv = ds->priv;
 
        /* Trapped into security mode allows packet forwarding through VLAN
-        * table lookup. CPU port is set to fallback mode to let untagged
-        * frames pass through.
+        * table lookup.
         */
-       if (dsa_is_cpu_port(ds, port))
-               mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
-                          MT7530_PORT_FALLBACK_MODE);
-       else
+       if (dsa_is_user_port(ds, port)) {
                mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
                           MT7530_PORT_SECURITY_MODE);
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID(priv->ports[port].pvid));
+
+               /* Only accept tagged frames if PVID is not set */
+               if (!priv->ports[port].pvid)
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+       }
 
        /* Set the port as a user port which is to be able to recognize VID
         * from incoming packets before fetching entry within the VLAN table.
@@ -1329,6 +1337,13 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
                           PCR_MATRIX(BIT(MT7530_CPU_PORT)));
        priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
 
+       /* When a port is removed from the bridge, the port would be set up
+        * back to the default as is at initial boot which is a VLAN-unaware
+        * port.
+        */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_MATRIX_MODE);
+
        mutex_unlock(&priv->reg_mutex);
 }
 
@@ -1511,7 +1526,8 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv,
        /* Validate the entry with independent learning, create egress tag per
         * VLAN and joining the port as one of the port members.
         */
-       val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID;
+       val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) |
+             VLAN_VALID;
        mt7530_write(priv, MT7530_VAWD1, val);
 
        /* Decide whether adding tag or not for those outgoing packets from the
@@ -1601,9 +1617,28 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port,
        mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add);
 
        if (pvid) {
-               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
-                          G0_PORT_VID(vlan->vid));
                priv->ports[port].pvid = vlan->vid;
+
+               /* Accept all frames if PVID is set */
+               mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                          MT7530_VLAN_ACC_ALL);
+
+               /* Only configure PVID if VLAN filtering is enabled */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PPBV1_P(port),
+                                  G0_PORT_VID_MASK,
+                                  G0_PORT_VID(vlan->vid));
+       } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) {
+               /* This VLAN is overwritten without PVID, so unset it */
+               priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+               /* Only accept tagged frames if the port is VLAN-aware */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID_DEF);
        }
 
        mutex_unlock(&priv->reg_mutex);
@@ -1617,11 +1652,9 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
 {
        struct mt7530_hw_vlan_entry target_entry;
        struct mt7530_priv *priv = ds->priv;
-       u16 pvid;
 
        mutex_lock(&priv->reg_mutex);
 
-       pvid = priv->ports[port].pvid;
        mt7530_hw_vlan_entry_init(&target_entry, port, 0);
        mt7530_hw_vlan_update(priv, vlan->vid, &target_entry,
                              mt7530_hw_vlan_del);
@@ -1629,11 +1662,18 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
        /* PVID is being restored to the default whenever the PVID port
         * is being removed from the VLAN.
         */
-       if (pvid == vlan->vid)
-               pvid = G0_PORT_VID_DEF;
+       if (priv->ports[port].pvid == vlan->vid) {
+               priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+               /* Only accept tagged frames if the port is VLAN-aware */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID_DEF);
+       }
 
-       mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid);
-       priv->ports[port].pvid = pvid;
 
        mutex_unlock(&priv->reg_mutex);
 
@@ -1717,15 +1757,7 @@ static enum dsa_tag_protocol
 mtk_get_tag_protocol(struct dsa_switch *ds, int port,
                     enum dsa_tag_protocol mp)
 {
-       struct mt7530_priv *priv = ds->priv;
-
-       if (port != MT7530_CPU_PORT) {
-               dev_warn(priv->dev,
-                        "port not matched with tagging CPU port\n");
-               return DSA_TAG_PROTO_NONE;
-       } else {
-               return DSA_TAG_PROTO_MTK;
-       }
+       return DSA_TAG_PROTO_MTK;
 }
 
 #ifdef CONFIG_GPIOLIB
@@ -2054,6 +2086,7 @@ mt7530_setup(struct dsa_switch *ds)
         * as two netdev instances.
         */
        dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
+       ds->assisted_learning_on_cpu_port = true;
        ds->mtu_enforcement_ingress = true;
 
        if (priv->id == ID_MT7530) {
@@ -2124,6 +2157,9 @@ mt7530_setup(struct dsa_switch *ds)
                mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
                           PCR_MATRIX_CLR);
 
+               /* Disable learning by default on all ports */
+               mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
                if (dsa_is_cpu_port(ds, i)) {
                        ret = mt753x_cpu_port_enable(ds, i);
                        if (ret)
@@ -2131,8 +2167,9 @@ mt7530_setup(struct dsa_switch *ds)
                } else {
                        mt7530_port_disable(ds, i);
 
-                       /* Disable learning by default on all user ports */
-                       mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+                       /* Set default PVID to 0 on all user ports */
+                       mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+                                  G0_PORT_VID_DEF);
                }
                /* Enable consistent egress tag */
                mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
@@ -2289,6 +2326,9 @@ mt7531_setup(struct dsa_switch *ds)
                mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
                           PCR_MATRIX_CLR);
 
+               /* Disable learning by default on all ports */
+               mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
                mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
 
                if (dsa_is_cpu_port(ds, i)) {
@@ -2298,8 +2338,9 @@ mt7531_setup(struct dsa_switch *ds)
                } else {
                        mt7530_port_disable(ds, i);
 
-                       /* Disable learning by default on all user ports */
-                       mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+                       /* Set default PVID to 0 on all user ports */
+                       mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+                                  G0_PORT_VID_DEF);
                }
 
                /* Enable consistent egress tag */
@@ -2307,6 +2348,7 @@ mt7531_setup(struct dsa_switch *ds)
                           PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
        }
 
+       ds->assisted_learning_on_cpu_port = true;
        ds->mtu_enforcement_ingress = true;
 
        /* Flush the FDB table */
@@ -3060,7 +3102,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
        .port_stp_state_set     = mt7530_stp_state_set,
        .port_pre_bridge_flags  = mt7530_port_pre_bridge_flags,
        .port_bridge_flags      = mt7530_port_bridge_flags,
-       .port_set_mrouter       = mt7530_port_set_mrouter,
        .port_bridge_join       = mt7530_port_bridge_join,
        .port_bridge_leave      = mt7530_port_bridge_leave,
        .port_fdb_add           = mt7530_port_fdb_add,
index b19b389..fe4cd2a 100644 (file)
@@ -80,6 +80,7 @@ enum mt753x_bpdu_port_fw {
 #define  STATIC_ENT                    3
 #define MT7530_ATA2                    0x78
 #define  ATA2_IVL                      BIT(15)
+#define  ATA2_FID(x)                   (((x) & 0x7) << 12)
 
 /* Register for address table write data */
 #define MT7530_ATWD                    0x7c
@@ -148,11 +149,18 @@ enum mt7530_vlan_cmd {
 #define  VTAG_EN                       BIT(28)
 /* VLAN Member Control */
 #define  PORT_MEM(x)                   (((x) & 0xff) << 16)
+/* Filter ID */
+#define  FID(x)                                (((x) & 0x7) << 1)
 /* VLAN Entry Valid */
 #define  VLAN_VALID                    BIT(0)
 #define  PORT_MEM_SHFT                 16
 #define  PORT_MEM_MASK                 0xff
 
+enum mt7530_fid {
+       FID_STANDALONE = 0,
+       FID_BRIDGED = 1,
+};
+
 #define MT7530_VAWD2                   0x98
 /* Egress Tag Control */
 #define  ETAG_CTRL_P(p, x)             (((x) & 0x3) << ((p) << 1))
@@ -179,8 +187,8 @@ enum mt7530_vlan_egress_attr {
 
 /* Register for port STP state control */
 #define MT7530_SSP_P(x)                        (0x2000 + ((x) * 0x100))
-#define  FID_PST(x)                    ((x) & 0x3)
-#define  FID_PST_MASK                  FID_PST(0x3)
+#define  FID_PST(fid, state)           (((state) & 0x3) << ((fid) * 2))
+#define  FID_PST_MASK(fid)             FID_PST(fid, 0x3)
 
 enum mt7530_stp_state {
        MT7530_STP_DISABLED = 0,
@@ -230,6 +238,7 @@ enum mt7530_port_mode {
 #define  PVC_EG_TAG_MASK               PVC_EG_TAG(7)
 #define  VLAN_ATTR(x)                  (((x) & 0x3) << 6)
 #define  VLAN_ATTR_MASK                        VLAN_ATTR(3)
+#define  ACC_FRM_MASK                  GENMASK(1, 0)
 
 enum mt7530_vlan_port_eg_tag {
        MT7530_VLAN_EG_DISABLED = 0,
@@ -241,13 +250,19 @@ enum mt7530_vlan_port_attr {
        MT7530_VLAN_TRANSPARENT = 3,
 };
 
+enum mt7530_vlan_port_acc_frm {
+       MT7530_VLAN_ACC_ALL = 0,
+       MT7530_VLAN_ACC_TAGGED = 1,
+       MT7530_VLAN_ACC_UNTAGGED = 2,
+};
+
 #define  STAG_VPID                     (((x) & 0xffff) << 16)
 
 /* Register for port port-and-protocol based vlan 1 control */
 #define MT7530_PPBV1_P(x)              (0x2014 + ((x) * 0x100))
 #define  G0_PORT_VID(x)                        (((x) & 0xfff) << 0)
 #define  G0_PORT_VID_MASK              G0_PORT_VID(0xfff)
-#define  G0_PORT_VID_DEF               G0_PORT_VID(1)
+#define  G0_PORT_VID_DEF               G0_PORT_VID(0)
 
 /* Register for port MAC control register */
 #define MT7530_PMCR_P(x)               (0x3000 + ((x) * 0x100))
index af764b8..c45ca24 100644 (file)
@@ -2176,7 +2176,7 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip,
        int i, err;
 
        if (!vid)
-               return -EOPNOTSUPP;
+               return 0;
 
        err = mv88e6xxx_vtu_get(chip, vid, &vlan);
        if (err)
@@ -5797,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
                                       struct netlink_ext_ack *extack)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       bool do_fast_age = false;
        int err = -EOPNOTSUPP;
 
        mv88e6xxx_reg_lock(chip);
@@ -5809,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
                err = mv88e6xxx_port_set_assoc_vector(chip, port, pav);
                if (err)
                        goto out;
-
-               if (!learning)
-                       do_fast_age = true;
        }
 
        if (flags.mask & BR_FLOOD) {
@@ -5843,26 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
 out:
        mv88e6xxx_reg_unlock(chip);
 
-       if (do_fast_age)
-               mv88e6xxx_port_fast_age(ds, port);
-
-       return err;
-}
-
-static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port,
-                                     bool mrouter,
-                                     struct netlink_ext_ack *extack)
-{
-       struct mv88e6xxx_chip *chip = ds->priv;
-       int err;
-
-       if (!chip->info->ops->port_set_mcast_flood)
-               return -EOPNOTSUPP;
-
-       mv88e6xxx_reg_lock(chip);
-       err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter);
-       mv88e6xxx_reg_unlock(chip);
-
        return err;
 }
 
@@ -6167,7 +6143,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .port_bridge_leave      = mv88e6xxx_port_bridge_leave,
        .port_pre_bridge_flags  = mv88e6xxx_port_pre_bridge_flags,
        .port_bridge_flags      = mv88e6xxx_port_bridge_flags,
-       .port_set_mrouter       = mv88e6xxx_port_set_mrouter,
        .port_stp_state_set     = mv88e6xxx_port_stp_state_set,
        .port_fast_age          = mv88e6xxx_port_fast_age,
        .port_vlan_filtering    = mv88e6xxx_port_vlan_filtering,
index ca2ad77..6686192 100644 (file)
@@ -837,16 +837,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val)
                return 0;
        }
 
-       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
+       /* In case of this switch we work with 32bit registers on top of 16bit
+        * bus. Some registers (for example access to forwarding database) have
+        * trigger bit on the first 16bit half of request, the result and
+        * configuration of request in the second half.
+        * To make it work properly, we should do the second part of transfer
+        * before the first one is done.
+        */
+       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
+                                 val >> 16);
        if (ret < 0)
                goto error;
 
-       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
-                                 val >> 16);
+       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
        if (ret < 0)
                goto error;
 
        return 0;
+
 error:
        dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n");
        return ret;
index 9cd7dbd..2e899c9 100644 (file)
@@ -233,7 +233,6 @@ struct sja1105_private {
        phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS];
        bool fixed_link[SJA1105_MAX_NUM_PORTS];
        bool vlan_aware;
-       unsigned long learn_ena;
        unsigned long ucast_egress_floods;
        unsigned long bcast_egress_floods;
        const struct sja1105_info *info;
index bd3ad18..f2049f5 100644 (file)
@@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
                        hostcmd = SJA1105_HOSTCMD_INVALIDATE;
        }
        sja1105_packing(p, &hostcmd, 25, 23, size, op);
+}
+
+static void
+sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+                                 enum packing_op op)
+{
+       int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
+
+       sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
 
        /* Hack - The hardware takes the 'index' field within
         * struct sja1105_l2_lookup_entry as the index on which this command
@@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
         * such that our API doesn't need to ask for a full-blown entry
         * structure when e.g. a delete is requested.
         */
-       sja1105_packing(buf, &cmd->index, 15, 6,
-                       SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op);
-}
-
-static void
-sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
-                                 enum packing_op op)
-{
-       int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
-
-       return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+       sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op);
 }
 
 static void
 sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
                              enum packing_op op)
 {
-       int size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+       int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+
+       sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
 
-       return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+       sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op);
 }
 
 /* The switch is so retarded that it makes our command/entry abstraction
index 5ab1676..6a52db1 100644 (file)
@@ -176,7 +176,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
        struct sja1105_mac_config_entry *mac;
        struct dsa_switch *ds = priv->ds;
        struct sja1105_table *table;
-       int i;
+       struct dsa_port *dp;
 
        table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG];
 
@@ -195,14 +195,21 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 
        mac = table->entries;
 
-       for (i = 0; i < ds->num_ports; i++) {
-               mac[i] = default_mac;
+       list_for_each_entry(dp, &ds->dst->ports, list) {
+               if (dp->ds != ds)
+                       continue;
+
+               mac[dp->index] = default_mac;
 
                /* Let sja1105_bridge_stp_state_set() keep address learning
-                * enabled for the CPU port.
+                * enabled for the DSA ports. CPU ports use software-assisted
+                * learning to ensure that only FDB entries belonging to the
+                * bridge are learned, and that they are learned towards all
+                * CPU ports in a cross-chip topology if multiple CPU ports
+                * exist.
                 */
-               if (dsa_is_cpu_port(ds, i))
-                       priv->learn_ena |= BIT(i);
+               if (dsa_port_is_dsa(dp))
+                       dp->learning = true;
        }
 
        return 0;
@@ -460,7 +467,7 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
                pvid.vlan_bc |= BIT(port);
                pvid.tag_port &= ~BIT(port);
 
-               if (dsa_is_cpu_port(ds, port)) {
+               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
                        priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN;
                        priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN;
                }
@@ -474,8 +481,11 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 {
        struct sja1105_l2_forwarding_entry *l2fwd;
        struct dsa_switch *ds = priv->ds;
+       struct dsa_switch_tree *dst;
        struct sja1105_table *table;
-       int i, j;
+       struct dsa_link *dl;
+       int port, tc;
+       int from, to;
 
        table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING];
 
@@ -493,47 +503,109 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 
        l2fwd = table->entries;
 
-       /* First 5 entries define the forwarding rules */
-       for (i = 0; i < ds->num_ports; i++) {
-               unsigned int upstream = dsa_upstream_port(priv->ds, i);
+       /* First 5 entries in the L2 Forwarding Table define the forwarding
+        * rules and the VLAN PCP to ingress queue mapping.
+        * Set up the ingress queue mapping first.
+        */
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_is_unused_port(ds, port))
+                       continue;
+
+               for (tc = 0; tc < SJA1105_NUM_TC; tc++)
+                       l2fwd[port].vlan_pmap[tc] = tc;
+       }
 
-               if (dsa_is_unused_port(ds, i))
+       /* Then manage the forwarding domain for user ports. These can forward
+        * only to the always-on domain (CPU port and DSA links)
+        */
+       for (from = 0; from < ds->num_ports; from++) {
+               if (!dsa_is_user_port(ds, from))
                        continue;
 
-               for (j = 0; j < SJA1105_NUM_TC; j++)
-                       l2fwd[i].vlan_pmap[j] = j;
+               for (to = 0; to < ds->num_ports; to++) {
+                       if (!dsa_is_cpu_port(ds, to) &&
+                           !dsa_is_dsa_port(ds, to))
+                               continue;
 
-               /* All ports start up with egress flooding enabled,
-                * including the CPU port.
-                */
-               priv->ucast_egress_floods |= BIT(i);
-               priv->bcast_egress_floods |= BIT(i);
+                       l2fwd[from].bc_domain |= BIT(to);
+                       l2fwd[from].fl_domain |= BIT(to);
 
-               if (i == upstream)
+                       sja1105_port_allow_traffic(l2fwd, from, to, true);
+               }
+       }
+
+       /* Then manage the forwarding domain for DSA links and CPU ports (the
+        * always-on domain). These can send packets to any enabled port except
+        * themselves.
+        */
+       for (from = 0; from < ds->num_ports; from++) {
+               if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from))
                        continue;
 
-               sja1105_port_allow_traffic(l2fwd, i, upstream, true);
-               sja1105_port_allow_traffic(l2fwd, upstream, i, true);
+               for (to = 0; to < ds->num_ports; to++) {
+                       if (dsa_is_unused_port(ds, to))
+                               continue;
 
-               l2fwd[i].bc_domain = BIT(upstream);
-               l2fwd[i].fl_domain = BIT(upstream);
+                       if (from == to)
+                               continue;
+
+                       l2fwd[from].bc_domain |= BIT(to);
+                       l2fwd[from].fl_domain |= BIT(to);
+
+                       sja1105_port_allow_traffic(l2fwd, from, to, true);
+               }
+       }
+
+       /* In odd topologies ("H" connections where there is a DSA link to
+        * another switch which also has its own CPU port), TX packets can loop
+        * back into the system (they are flooded from CPU port 1 to the DSA
+        * link, and from there to CPU port 2). Prevent this from happening by
+        * cutting RX from DSA links towards our CPU port, if the remote switch
+        * has its own CPU port and therefore doesn't need ours for network
+        * stack termination.
+        */
+       dst = ds->dst;
+
+       list_for_each_entry(dl, &dst->rtable, list) {
+               if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp)
+                       continue;
+
+               from = dl->dp->index;
+               to = dsa_upstream_port(ds, from);
+
+               dev_warn(ds->dev,
+                        "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n",
+                        from, to);
+
+               sja1105_port_allow_traffic(l2fwd, from, to, false);
 
-               l2fwd[upstream].bc_domain |= BIT(i);
-               l2fwd[upstream].fl_domain |= BIT(i);
+               l2fwd[from].bc_domain &= ~BIT(to);
+               l2fwd[from].fl_domain &= ~BIT(to);
+       }
+
+       /* Finally, manage the egress flooding domain. All ports start up with
+        * flooding enabled, including the CPU port and DSA links.
+        */
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_is_unused_port(ds, port))
+                       continue;
+
+               priv->ucast_egress_floods |= BIT(port);
+               priv->bcast_egress_floods |= BIT(port);
        }
 
        /* Next 8 entries define VLAN PCP mapping from ingress to egress.
         * Create a one-to-one mapping.
         */
-       for (i = 0; i < SJA1105_NUM_TC; i++) {
-               for (j = 0; j < ds->num_ports; j++) {
-                       if (dsa_is_unused_port(ds, j))
+       for (tc = 0; tc < SJA1105_NUM_TC; tc++) {
+               for (port = 0; port < ds->num_ports; port++) {
+                       if (dsa_is_unused_port(ds, port))
                                continue;
 
-                       l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
+                       l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc;
                }
 
-               l2fwd[ds->num_ports + i].type_egrpcp2outputq = true;
+               l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true;
        }
 
        return 0;
@@ -688,6 +760,72 @@ static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv)
        general_params->tdmaconfigidx = tdmaconfigidx;
 }
 
+static int sja1105_init_topology(struct sja1105_private *priv,
+                                struct sja1105_general_params_entry *general_params)
+{
+       struct dsa_switch *ds = priv->ds;
+       int port;
+
+       /* The host port is the destination for traffic matching mac_fltres1
+        * and mac_fltres0 on all ports except itself. Default to an invalid
+        * value.
+        */
+       general_params->host_port = ds->num_ports;
+
+       /* Link-local traffic received on casc_port will be forwarded
+        * to host_port without embedding the source port and device ID
+        * info in the destination MAC address, and no RX timestamps will be
+        * taken either (presumably because it is a cascaded port and a
+        * downstream SJA switch already did that).
+        * To disable the feature, we need to do different things depending on
+        * switch generation. On SJA1105 we need to set an invalid port, while
+        * on SJA1110 which support multiple cascaded ports, this field is a
+        * bitmask so it must be left zero.
+        */
+       if (!priv->info->multiple_cascade_ports)
+               general_params->casc_port = ds->num_ports;
+
+       for (port = 0; port < ds->num_ports; port++) {
+               bool is_upstream = dsa_is_upstream_port(ds, port);
+               bool is_dsa_link = dsa_is_dsa_port(ds, port);
+
+               /* Upstream ports can be dedicated CPU ports or
+                * upstream-facing DSA links
+                */
+               if (is_upstream) {
+                       if (general_params->host_port == ds->num_ports) {
+                               general_params->host_port = port;
+                       } else {
+                               dev_err(ds->dev,
+                                       "Port %llu is already a host port, configuring %d as one too is not supported\n",
+                                       general_params->host_port, port);
+                               return -EINVAL;
+                       }
+               }
+
+               /* Cascade ports are downstream-facing DSA links */
+               if (is_dsa_link && !is_upstream) {
+                       if (priv->info->multiple_cascade_ports) {
+                               general_params->casc_port |= BIT(port);
+                       } else if (general_params->casc_port == ds->num_ports) {
+                               general_params->casc_port = port;
+                       } else {
+                               dev_err(ds->dev,
+                                       "Port %llu is already a cascade port, configuring %d as one too is not supported\n",
+                                       general_params->casc_port, port);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       if (general_params->host_port == ds->num_ports) {
+               dev_err(ds->dev, "No host port configured\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int sja1105_init_general_params(struct sja1105_private *priv)
 {
        struct sja1105_general_params_entry default_general_params = {
@@ -706,12 +844,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
                .mac_flt0    = SJA1105_LINKLOCAL_FILTER_B_MASK,
                .incl_srcpt0 = false,
                .send_meta0  = false,
-               /* The destination for traffic matching mac_fltres1 and
-                * mac_fltres0 on all ports except host_port. Such traffic
-                * receieved on host_port itself would be dropped, except
-                * by installing a temporary 'management route'
-                */
-               .host_port = priv->ds->num_ports,
                /* Default to an invalid value */
                .mirr_port = priv->ds->num_ports,
                /* No TTEthernet */
@@ -731,16 +863,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
                .header_type = ETH_P_SJA1110,
        };
        struct sja1105_general_params_entry *general_params;
-       struct dsa_switch *ds = priv->ds;
        struct sja1105_table *table;
-       int port;
+       int rc;
 
-       for (port = 0; port < ds->num_ports; port++) {
-               if (dsa_is_cpu_port(ds, port)) {
-                       default_general_params.host_port = port;
-                       break;
-               }
-       }
+       rc = sja1105_init_topology(priv, &default_general_params);
+       if (rc)
+               return rc;
 
        table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
 
@@ -763,19 +891,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 
        sja1110_select_tdmaconfigidx(priv);
 
-       /* Link-local traffic received on casc_port will be forwarded
-        * to host_port without embedding the source port and device ID
-        * info in the destination MAC address, and no RX timestamps will be
-        * taken either (presumably because it is a cascaded port and a
-        * downstream SJA switch already did that).
-        * To disable the feature, we need to do different things depending on
-        * switch generation. On SJA1105 we need to set an invalid port, while
-        * on SJA1110 which support multiple cascaded ports, this field is a
-        * bitmask so it must be left zero.
-        */
-       if (!priv->info->multiple_cascade_ports)
-               general_params->casc_port = ds->num_ports;
-
        return 0;
 }
 
@@ -903,7 +1018,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
        for (port = 0; port < ds->num_ports; port++) {
                int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
 
-               if (dsa_is_cpu_port(priv->ds, port))
+               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                        mtu += VLAN_HLEN;
 
                policing[port].smax = 65535; /* Burst size in bytes */
@@ -1372,10 +1487,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin,
 int sja1105et_fdb_add(struct dsa_switch *ds, int port,
                      const unsigned char *addr, u16 vid)
 {
-       struct sja1105_l2_lookup_entry l2_lookup = {0};
+       struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
        struct sja1105_private *priv = ds->priv;
        struct device *dev = ds->dev;
        int last_unused = -1;
+       int start, end, i;
        int bin, way, rc;
 
        bin = sja1105et_fdb_hash(priv, addr, vid);
@@ -1387,7 +1503,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
                 * mask? If yes, we need to do nothing. If not, we need
                 * to rewrite the entry by adding this port to it.
                 */
-               if (l2_lookup.destports & BIT(port))
+               if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds)
                        return 0;
                l2_lookup.destports |= BIT(port);
        } else {
@@ -1418,6 +1534,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
                                                     index, NULL, false);
                }
        }
+       l2_lookup.lockeds = true;
        l2_lookup.index = sja1105et_fdb_index(bin, way);
 
        rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
@@ -1426,6 +1543,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
        if (rc < 0)
                return rc;
 
+       /* Invalidate a dynamically learned entry if that exists */
+       start = sja1105et_fdb_index(bin, 0);
+       end = sja1105et_fdb_index(bin, way);
+
+       for (i = start; i < end; i++) {
+               rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+                                                i, &tmp);
+               if (rc == -ENOENT)
+                       continue;
+               if (rc)
+                       return rc;
+
+               if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid)
+                       continue;
+
+               rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+                                                 i, NULL, false);
+               if (rc)
+                       return rc;
+
+               break;
+       }
+
        return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
 }
 
@@ -1467,32 +1607,30 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port,
 int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
                        const unsigned char *addr, u16 vid)
 {
-       struct sja1105_l2_lookup_entry l2_lookup = {0};
+       struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
        struct sja1105_private *priv = ds->priv;
        int rc, i;
 
        /* Search for an existing entry in the FDB table */
        l2_lookup.macaddr = ether_addr_to_u64(addr);
        l2_lookup.vlanid = vid;
-       l2_lookup.iotag = SJA1105_S_TAG;
        l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-       if (priv->vlan_aware) {
-               l2_lookup.mask_vlanid = VLAN_VID_MASK;
-               l2_lookup.mask_iotag = BIT(0);
-       } else {
-               l2_lookup.mask_vlanid = 0;
-               l2_lookup.mask_iotag = 0;
-       }
+       l2_lookup.mask_vlanid = VLAN_VID_MASK;
        l2_lookup.destports = BIT(port);
 
+       tmp = l2_lookup;
+
        rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
-                                        SJA1105_SEARCH, &l2_lookup);
-       if (rc == 0) {
-               /* Found and this port is already in the entry's
+                                        SJA1105_SEARCH, &tmp);
+       if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) {
+               /* Found a static entry and this port is already in the entry's
                 * port mask => job done
                 */
-               if (l2_lookup.destports & BIT(port))
+               if ((tmp.destports & BIT(port)) && tmp.lockeds)
                        return 0;
+
+               l2_lookup = tmp;
+
                /* l2_lookup.index is populated by the switch in case it
                 * found something.
                 */
@@ -1514,16 +1652,46 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
                dev_err(ds->dev, "FDB is full, cannot add entry.\n");
                return -EINVAL;
        }
-       l2_lookup.lockeds = true;
        l2_lookup.index = i;
 
 skip_finding_an_index:
+       l2_lookup.lockeds = true;
+
        rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
                                          l2_lookup.index, &l2_lookup,
                                          true);
        if (rc < 0)
                return rc;
 
+       /* The switch learns dynamic entries and looks up the FDB left to
+        * right. It is possible that our addition was concurrent with the
+        * dynamic learning of the same address, so now that the static entry
+        * has been installed, we are certain that address learning for this
+        * particular address has been turned off, so the dynamic entry either
+        * is in the FDB at an index smaller than the static one, or isn't (it
+        * can also be at a larger index, but in that case it is inactive
+        * because the static FDB entry will match first, and the dynamic one
+        * will eventually age out). Search for a dynamically learned address
+        * prior to our static one and invalidate it.
+        */
+       tmp = l2_lookup;
+
+       rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+                                        SJA1105_SEARCH, &tmp);
+       if (rc < 0) {
+               dev_err(ds->dev,
+                       "port %d failed to read back entry for %pM vid %d: %pe\n",
+                       port, addr, vid, ERR_PTR(rc));
+               return rc;
+       }
+
+       if (tmp.index < l2_lookup.index) {
+               rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+                                                 tmp.index, NULL, false);
+               if (rc < 0)
+                       return rc;
+       }
+
        return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
 }
 
@@ -1537,15 +1705,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
 
        l2_lookup.macaddr = ether_addr_to_u64(addr);
        l2_lookup.vlanid = vid;
-       l2_lookup.iotag = SJA1105_S_TAG;
        l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-       if (priv->vlan_aware) {
-               l2_lookup.mask_vlanid = VLAN_VID_MASK;
-               l2_lookup.mask_iotag = BIT(0);
-       } else {
-               l2_lookup.mask_vlanid = 0;
-               l2_lookup.mask_iotag = 0;
-       }
+       l2_lookup.mask_vlanid = VLAN_VID_MASK;
        l2_lookup.destports = BIT(port);
 
        rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
@@ -1633,6 +1794,46 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
        return 0;
 }
 
+static void sja1105_fast_age(struct dsa_switch *ds, int port)
+{
+       struct sja1105_private *priv = ds->priv;
+       int i;
+
+       for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
+               struct sja1105_l2_lookup_entry l2_lookup = {0};
+               u8 macaddr[ETH_ALEN];
+               int rc;
+
+               rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+                                                i, &l2_lookup);
+               /* No fdb entry at i, not an issue */
+               if (rc == -ENOENT)
+                       continue;
+               if (rc) {
+                       dev_err(ds->dev, "Failed to read FDB: %pe\n",
+                               ERR_PTR(rc));
+                       return;
+               }
+
+               if (!(l2_lookup.destports & BIT(port)))
+                       continue;
+
+               /* Don't delete static FDB entries */
+               if (l2_lookup.lockeds)
+                       continue;
+
+               u64_to_ether_addr(l2_lookup.macaddr, macaddr);
+
+               rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid);
+               if (rc) {
+                       dev_err(ds->dev,
+                               "Failed to delete FDB entry %pM vid %lld: %pe\n",
+                               macaddr, l2_lookup.vlanid, ERR_PTR(rc));
+                       return;
+               }
+       }
+}
+
 static int sja1105_mdb_add(struct dsa_switch *ds, int port,
                           const struct switchdev_obj_port_mdb *mdb)
 {
@@ -1741,6 +1942,7 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port,
 static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
                                         u8 state)
 {
+       struct dsa_port *dp = dsa_to_port(ds, port);
        struct sja1105_private *priv = ds->priv;
        struct sja1105_mac_config_entry *mac;
 
@@ -1766,12 +1968,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
        case BR_STATE_LEARNING:
                mac[port].ingress   = true;
                mac[port].egress    = false;
-               mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+               mac[port].dyn_learn = dp->learning;
                break;
        case BR_STATE_FORWARDING:
                mac[port].ingress   = true;
                mac[port].egress    = true;
-               mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+               mac[port].dyn_learn = dp->learning;
                break;
        default:
                dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -2231,8 +2433,8 @@ static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port,
                return -EBUSY;
        }
 
-       /* Always install bridge VLANs as egress-tagged on the CPU port. */
-       if (dsa_is_cpu_port(ds, port))
+       /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                flags = 0;
 
        rc = sja1105_vlan_add(priv, port, vlan->vid, flags);
@@ -2401,6 +2603,7 @@ static int sja1105_setup(struct dsa_switch *ds)
        ds->num_tx_queues = SJA1105_NUM_TC;
 
        ds->mtu_enforcement_ingress = true;
+       ds->assisted_learning_on_cpu_port = true;
 
        rc = sja1105_devlink_setup(ds);
        if (rc < 0)
@@ -2585,7 +2788,7 @@ static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 
        new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN;
 
-       if (dsa_is_cpu_port(ds, port))
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                new_mtu += VLAN_HLEN;
 
        policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
@@ -2732,23 +2935,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port,
                                     bool enabled)
 {
        struct sja1105_mac_config_entry *mac;
-       int rc;
 
        mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
 
        mac[port].dyn_learn = enabled;
 
-       rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
-                                         &mac[port], true);
-       if (rc)
-               return rc;
-
-       if (enabled)
-               priv->learn_ena |= BIT(port);
-       else
-               priv->learn_ena &= ~BIT(port);
-
-       return 0;
+       return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+                                           &mac[port], true);
 }
 
 static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
@@ -2883,6 +3076,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
        .port_fdb_dump          = sja1105_fdb_dump,
        .port_fdb_add           = sja1105_fdb_add,
        .port_fdb_del           = sja1105_fdb_del,
+       .port_fast_age          = sja1105_fast_age,
        .port_bridge_join       = sja1105_bridge_join,
        .port_bridge_leave      = sja1105_bridge_leave,
        .port_pre_bridge_flags  = sja1105_port_pre_bridge_flags,
index 96cc5fc..87c906e 100644 (file)
@@ -302,7 +302,6 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev)
                return -ENOMEM;
 
        SET_NETDEV_DEV(dev, pdev);
-       netdev_boot_setup_check(dev);
 
        if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) {
                free_netdev(dev);
@@ -421,7 +420,6 @@ static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id)
                return -ENOMEM;
        }
        SET_NETDEV_DEV(dev, &pdev->dev);
-       netdev_boot_setup_check(dev);
 
        el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP);
        pnp_set_drvdata(pdev, dev);
@@ -514,7 +512,9 @@ static int el3_common_init(struct net_device *dev)
 {
        struct el3_private *lp = netdev_priv(dev);
        int err;
-       const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"};
+       static const char * const if_names[] = {
+               "10baseT", "AUI", "undefined", "BNC"
+       };
 
        spin_lock_init(&lp->lock);
 
@@ -588,7 +588,6 @@ static int el3_eisa_probe(struct device *device)
        }
 
        SET_NETDEV_DEV(dev, device);
-       netdev_boot_setup_check(dev);
 
        el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA);
        eisa_set_drvdata (edev, dev);
index 47b4215..8d90fed 100644 (file)
@@ -407,7 +407,7 @@ MODULE_PARM_DESC(max_interrupt_work, "3c515 maximum events handled per interrupt
 /* we will need locking (and refcounting) if we ever use it for more */
 static LIST_HEAD(root_corkscrew_dev);
 
-int init_module(void)
+static int corkscrew_init_module(void)
 {
        int found = 0;
        if (debug >= 0)
@@ -416,6 +416,7 @@ int init_module(void)
                found++;
        return found ? 0 : -ENODEV;
 }
+module_init(corkscrew_init_module);
 
 #else
 struct net_device *tc515_probe(int unit)
index a52a374..706bd59 100644 (file)
@@ -34,6 +34,7 @@ config EL3
 config 3C515
        tristate "3c515 ISA \"Fast EtherLink\""
        depends on ISA && ISA_DMA_API && !PPC32
+       select NETDEV_LEGACY_INIT
        help
          If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet
          network card, say Y here.
index 9f4b302..a4130e6 100644 (file)
@@ -102,6 +102,7 @@ config MCF8390
 config NE2000
        tristate "NE2000/NE1000 support"
        depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC)
+       select NETDEV_LEGACY_INIT if ISA
        select CRC32
        help
          If you have a network (Ethernet) card of this type, say Y here.
@@ -169,6 +170,7 @@ config STNIC
 config ULTRA
        tristate "SMC Ultra support"
        depends on ISA
+       select NETDEV_LEGACY_INIT
        select CRC32
        help
          If you have a network (Ethernet) card of this type, say Y here.
@@ -186,6 +188,7 @@ config ULTRA
 config WD80x3
        tristate "WD80*3 support"
        depends on ISA
+       select NETDEV_LEGACY_INIT
        select CRC32
        help
          If you have a network (Ethernet) card of this type, say Y here.
index fe6c834..da1ae37 100644 (file)
@@ -75,7 +75,6 @@
 #define NESM_STOP_PG   0x80    /* Last page +1 of RX ring */
 
 
-struct net_device * __init apne_probe(int unit);
 static int apne_probe1(struct net_device *dev, int ioaddr);
 
 static void apne_reset_8390(struct net_device *dev);
@@ -120,7 +119,7 @@ static u32 apne_msg_enable;
 module_param_named(msg_enable, apne_msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
-struct net_device * __init apne_probe(int unit)
+static struct net_device * __init apne_probe(void)
 {
        struct net_device *dev;
        struct ei_device *ei_local;
@@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit)
        dev = alloc_ei_netdev();
        if (!dev)
                return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
        ei_local = netdev_priv(dev);
        ei_local->msg_enable = apne_msg_enable;
 
@@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id)
     return IRQ_HANDLED;
 }
 
-#ifdef MODULE
 static struct net_device *apne_dev;
 
 static int __init apne_module_init(void)
 {
-       apne_dev = apne_probe(-1);
+       apne_dev = apne_probe();
        return PTR_ERR_OR_ZERO(apne_dev);
 }
 
@@ -579,7 +573,6 @@ static void __exit apne_module_exit(void)
 }
 module_init(apne_module_init);
 module_exit(apne_module_exit);
-#endif
 
 static int init_pcmcia(void)
 {
index 9595dd1..6c6bdd5 100644 (file)
@@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev)
        return (struct ax_device *)(ei_local + 1);
 }
 
+void ax_NS8390_reinit(struct net_device *dev)
+{
+       ax_NS8390_init(dev, 1);
+}
+
+EXPORT_SYMBOL_GPL(ax_NS8390_reinit);
+
 /*
  * ax_initial_check
  *
index e9756d0..53660bc 100644 (file)
@@ -923,7 +923,7 @@ static void __init ne_add_devices(void)
 }
 
 #ifdef MODULE
-int __init init_module(void)
+static int __init ne_init(void)
 {
        int retval;
        ne_add_devices();
@@ -940,6 +940,7 @@ int __init init_module(void)
        ne_loop_rm_unreg(0);
        return retval;
 }
+module_init(ne_init);
 #else /* MODULE */
 static int __init ne_init(void)
 {
@@ -951,6 +952,7 @@ static int __init ne_init(void)
 }
 module_init(ne_init);
 
+#ifdef CONFIG_NETDEV_LEGACY_INIT
 struct net_device * __init ne_probe(int unit)
 {
        int this_dev;
@@ -991,6 +993,7 @@ struct net_device * __init ne_probe(int unit)
 
        return ERR_PTR(-ENODEV);
 }
+#endif
 #endif /* MODULE */
 
 static void __exit ne_exit(void)
index 1d8ed73..0890fa4 100644 (file)
@@ -522,7 +522,6 @@ static void ultra_pio_input(struct net_device *dev, int count,
        /* We know skbuffs are padded to at least word alignment. */
        insw(ioaddr + IOPD, buf, (count+1)>>1);
 }
-
 static void ultra_pio_output(struct net_device *dev, int count,
                                                        const unsigned char *buf, const int start_page)
 {
@@ -572,8 +571,7 @@ MODULE_LICENSE("GPL");
 
 /* This is set up so that only a single autoprobe takes place per call.
 ISA device autoprobes on a running machine are not recommended. */
-int __init
-init_module(void)
+static int __init ultra_init_module(void)
 {
        struct net_device *dev;
        int this_dev, found = 0;
@@ -600,6 +598,7 @@ init_module(void)
                return 0;
        return -ENXIO;
 }
+module_init(ultra_init_module);
 
 static void cleanup_card(struct net_device *dev)
 {
@@ -613,8 +612,7 @@ static void cleanup_card(struct net_device *dev)
        iounmap(ei_status.mem);
 }
 
-void __exit
-cleanup_module(void)
+static void __exit ultra_cleanup_module(void)
 {
        int this_dev;
 
@@ -627,4 +625,5 @@ cleanup_module(void)
                }
        }
 }
+module_exit(ultra_cleanup_module);
 #endif /* MODULE */
index c834123..263a942 100644 (file)
@@ -519,7 +519,7 @@ MODULE_LICENSE("GPL");
 /* This is set up so that only a single autoprobe takes place per call.
 ISA device autoprobes on a running machine are not recommended. */
 
-int __init init_module(void)
+static int __init wd_init_module(void)
 {
        struct net_device *dev;
        int this_dev, found = 0;
@@ -548,6 +548,7 @@ int __init init_module(void)
                return 0;
        return -ENXIO;
 }
+module_init(wd_init_module);
 
 static void cleanup_card(struct net_device *dev)
 {
@@ -556,8 +557,7 @@ static void cleanup_card(struct net_device *dev)
        iounmap(ei_status.mem);
 }
 
-void __exit
-cleanup_module(void)
+static void __exit wd_cleanup_module(void)
 {
        int this_dev;
 
@@ -570,4 +570,5 @@ cleanup_module(void)
                }
        }
 }
+module_exit(wd_cleanup_module);
 #endif /* MODULE */
index e2c9638..fe7a747 100644 (file)
@@ -22,8 +22,6 @@
 #define XS100_8390_DATA_WRITE32_BASE 0x0C80
 #define XS100_8390_DATA_AREA_SIZE 0x80
 
-#define __NS8390_init ax_NS8390_init
-
 /* force unsigned long back to 'void __iomem *' */
 #define ax_convert_addr(_a) ((void __force __iomem *)(_a))
 
 /* Ensure we have our RCR base value */
 #define AX88796_PLATFORM
 
-static unsigned char version[] =
-               "ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
-
-#include "lib8390.c"
+#include "8390.h"
 
 /* from ne.c */
 #define NE_CMD         EI_SHIFT(0x00)
@@ -232,7 +227,7 @@ static void xs100_block_output(struct net_device *dev, int count,
                if (jiffies - dma_start > 2 * HZ / 100) {       /* 20ms */
                        netdev_warn(dev, "timeout waiting for Tx RDC.\n");
                        ei_local->reset_8390(dev);
-                       ax_NS8390_init(dev, 1);
+                       ax_NS8390_reinit(dev);
                        break;
                }
        }
index d0b0609..c6a3abe 100644 (file)
@@ -46,6 +46,7 @@ config AMD8111_ETH
 config LANCE
        tristate "AMD LANCE and PCnet (AT1500 and NE2100) support"
        depends on ISA && ISA_DMA_API && !ARM && !PPC32
+       select NETDEV_LEGACY_INIT
        help
          If you have a network (Ethernet) card of this type, say Y here.
          Some LinkSys cards are of this type.
@@ -132,6 +133,7 @@ config PCMCIA_NMCLAN
 config NI65
        tristate "NI6510 support"
        depends on ISA && ISA_DMA_API && !ARM && !PPC32
+       select NETDEV_LEGACY_INIT
        help
          If you have a network (Ethernet) card of this type, say Y here.
 
index 36f54d1..9d2f49f 100644 (file)
@@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len )
 }
 
 
-struct net_device * __init atarilance_probe(int unit)
+struct net_device * __init atarilance_probe(void)
 {
        int i;
        static int found;
@@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit)
        dev = alloc_etherdev(sizeof(struct lance_private));
        if (!dev)
                return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
 
        for( i = 0; i < N_LANCE_ADDR; ++i ) {
                if (lance_probe1( dev, &lance_addr_list[i] )) {
@@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr )
        return 0;
 }
 
-
-#ifdef MODULE
 static struct net_device *atarilance_dev;
 
 static int __init atarilance_module_init(void)
 {
-       atarilance_dev = atarilance_probe(-1);
+       atarilance_dev = atarilance_probe();
        return PTR_ERR_OR_ZERO(atarilance_dev);
 }
 
@@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void)
 }
 module_init(atarilance_module_init);
 module_exit(atarilance_module_exit);
-#endif /* MODULE */
index 2178e6b..945bf1d 100644 (file)
@@ -327,7 +327,7 @@ MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)");
 MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)");
 MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)");
 
-int __init init_module(void)
+static int __init lance_init_module(void)
 {
        struct net_device *dev;
        int this_dev, found = 0;
@@ -356,6 +356,7 @@ int __init init_module(void)
                return 0;
        return -ENXIO;
 }
+module_init(lance_init_module);
 
 static void cleanup_card(struct net_device *dev)
 {
@@ -368,7 +369,7 @@ static void cleanup_card(struct net_device *dev)
        kfree(lp);
 }
 
-void __exit cleanup_module(void)
+static void __exit lance_cleanup_module(void)
 {
        int this_dev;
 
@@ -381,6 +382,7 @@ void __exit cleanup_module(void)
                }
        }
 }
+module_exit(lance_cleanup_module);
 #endif /* MODULE */
 MODULE_LICENSE("GPL");
 
index 3f2e4cd..da97fcc 100644 (file)
@@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = {
 };
 
 /* Initialise the one and only on-board 7990 */
-struct net_device * __init mvme147lance_probe(int unit)
+static struct net_device * __init mvme147lance_probe(void)
 {
        struct net_device *dev;
        static int called;
@@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit)
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
-       if (unit >= 0)
-               sprintf(dev->name, "eth%d", unit);
-
        /* Fill the dev fields */
        dev->base_addr = (unsigned long)MVME147_LANCE_BASE;
        dev->netdev_ops = &lance_netdev_ops;
@@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev)
        return 0;
 }
 
-#ifdef MODULE
 MODULE_LICENSE("GPL");
 
 static struct net_device *dev_mvme147_lance;
-int __init init_module(void)
+static int __init m147lance_init(void)
 {
-       dev_mvme147_lance = mvme147lance_probe(-1);
+       dev_mvme147_lance = mvme147lance_probe();
        return PTR_ERR_OR_ZERO(dev_mvme147_lance);
 }
+module_init(m147lance_init);
 
-void __exit cleanup_module(void)
+static void __exit m147lance_exit(void)
 {
        struct m147lance_private *lp = netdev_priv(dev_mvme147_lance);
        unregister_netdev(dev_mvme147_lance);
        free_pages(lp->ram, 3);
        free_netdev(dev_mvme147_lance);
 }
-
-#endif /* MODULE */
+module_exit(m147lance_exit);
index 5c1cfb0..b5df7ad 100644 (file)
@@ -1230,18 +1230,20 @@ MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)");
 MODULE_PARM_DESC(io, "ni6510 I/O base address");
 MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
 
-int __init init_module(void)
+static int __init ni65_init_module(void)
 {
        dev_ni65 = ni65_probe(-1);
        return PTR_ERR_OR_ZERO(dev_ni65);
 }
+module_init(ni65_init_module);
 
-void __exit cleanup_module(void)
+static void __exit ni65_cleanup_module(void)
 {
        unregister_netdev(dev_ni65);
        cleanup_card(dev_ni65);
        free_netdev(dev_ni65);
 }
+module_exit(ni65_cleanup_module);
 #endif /* MODULE */
 
 MODULE_LICENSE("GPL");
index f8d7a93..4a845bc 100644 (file)
@@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev );
 
 /************************* End of Prototypes **************************/
 
-struct net_device * __init sun3lance_probe(int unit)
+static struct net_device * __init sun3lance_probe(void)
 {
        struct net_device *dev;
        static int found;
@@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit)
        dev = alloc_etherdev(sizeof(struct lance_private));
        if (!dev)
                return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
 
        if (!lance_probe(dev))
                goto out;
@@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev )
 }
 
 
-#ifdef MODULE
-
 static struct net_device *sun3lance_dev;
 
-int __init init_module(void)
+static int __init sun3lance_init(void)
 {
-       sun3lance_dev = sun3lance_probe(-1);
+       sun3lance_dev = sun3lance_probe();
        return PTR_ERR_OR_ZERO(sun3lance_dev);
 }
+module_init(sun3lance_init);
 
-void __exit cleanup_module(void)
+static void __exit sun3lance_cleanup(void)
 {
        unregister_netdev(sun3lance_dev);
 #ifdef CONFIG_SUN3
@@ -942,6 +937,4 @@ void __exit cleanup_module(void)
 #endif
        free_netdev(sun3lance_dev);
 }
-
-#endif /* MODULE */
-
+module_exit(sun3lance_cleanup);
index 1a6ec1a..b5d954c 100644 (file)
@@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        }
 
        /* Allocated memory for FW statistics  */
-       if (bnx2x_alloc_fw_stats_mem(bp))
+       rc = bnx2x_alloc_fw_stats_mem(bp);
+       if (rc)
                LOAD_ERROR_EXIT(bp, load_error0);
 
        /* request pf to initialize status blocks */
index 4a92ea7..865fcb8 100644 (file)
@@ -3163,6 +3163,58 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
        return 0;
 }
 
+static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr)
+{
+       kfree(cpr->cp_desc_ring);
+       cpr->cp_desc_ring = NULL;
+       kfree(cpr->cp_desc_mapping);
+       cpr->cp_desc_mapping = NULL;
+}
+
+static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n)
+{
+       cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL);
+       if (!cpr->cp_desc_ring)
+               return -ENOMEM;
+       cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping),
+                                      GFP_KERNEL);
+       if (!cpr->cp_desc_mapping)
+               return -ENOMEM;
+       return 0;
+}
+
+static void bnxt_free_all_cp_arrays(struct bnxt *bp)
+{
+       int i;
+
+       if (!bp->bnapi)
+               return;
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_napi *bnapi = bp->bnapi[i];
+
+               if (!bnapi)
+                       continue;
+               bnxt_free_cp_arrays(&bnapi->cp_ring);
+       }
+}
+
+static int bnxt_alloc_all_cp_arrays(struct bnxt *bp)
+{
+       int i, n = bp->cp_nr_pages;
+
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_napi *bnapi = bp->bnapi[i];
+               int rc;
+
+               if (!bnapi)
+                       continue;
+               rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n);
+               if (rc)
+                       return rc;
+       }
+       return 0;
+}
+
 static void bnxt_free_cp_rings(struct bnxt *bp)
 {
        int i;
@@ -3190,6 +3242,7 @@ static void bnxt_free_cp_rings(struct bnxt *bp)
                        if (cpr2) {
                                ring = &cpr2->cp_ring_struct;
                                bnxt_free_ring(bp, &ring->ring_mem);
+                               bnxt_free_cp_arrays(cpr2);
                                kfree(cpr2);
                                cpr->cp_ring_arr[j] = NULL;
                        }
@@ -3208,6 +3261,12 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
        if (!cpr)
                return NULL;
 
+       rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages);
+       if (rc) {
+               bnxt_free_cp_arrays(cpr);
+               kfree(cpr);
+               return NULL;
+       }
        ring = &cpr->cp_ring_struct;
        rmem = &ring->ring_mem;
        rmem->nr_pages = bp->cp_nr_pages;
@@ -3218,6 +3277,7 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
        rc = bnxt_alloc_ring(bp, rmem);
        if (rc) {
                bnxt_free_ring(bp, rmem);
+               bnxt_free_cp_arrays(cpr);
                kfree(cpr);
                cpr = NULL;
        }
@@ -3650,9 +3710,15 @@ void bnxt_set_ring_params(struct bnxt *bp)
                if (jumbo_factor > agg_factor)
                        agg_factor = jumbo_factor;
        }
-       agg_ring_size = ring_size * agg_factor;
+       if (agg_factor) {
+               if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) {
+                       ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+                       netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n",
+                                   bp->rx_ring_size, ring_size);
+                       bp->rx_ring_size = ring_size;
+               }
+               agg_ring_size = ring_size * agg_factor;
 
-       if (agg_ring_size) {
                bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size,
                                                        RX_DESC_CNT);
                if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) {
@@ -4253,6 +4319,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
        bnxt_free_tx_rings(bp);
        bnxt_free_rx_rings(bp);
        bnxt_free_cp_rings(bp);
+       bnxt_free_all_cp_arrays(bp);
        bnxt_free_ntp_fltrs(bp, irq_re_init);
        if (irq_re_init) {
                bnxt_free_ring_stats(bp);
@@ -4373,6 +4440,10 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
                        goto alloc_mem_err;
        }
 
+       rc = bnxt_alloc_all_cp_arrays(bp);
+       if (rc)
+               goto alloc_mem_err;
+
        bnxt_init_ring_struct(bp);
 
        rc = bnxt_alloc_rx_rings(bp);
@@ -12168,9 +12239,8 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                /* Make sure fw_reset_state is 0 before clearing the flag */
                smp_mb__before_atomic();
                clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
-               bnxt_ulp_start(bp, rc);
-               if (!rc)
-                       bnxt_reenable_sriov(bp);
+               bnxt_ulp_start(bp, 0);
+               bnxt_reenable_sriov(bp);
                bnxt_vf_reps_alloc(bp);
                bnxt_vf_reps_open(bp);
                bnxt_ptp_reapply_pps(bp);
index e379c48..9c3324e 100644 (file)
@@ -596,15 +596,17 @@ struct nqe_cn {
 #define MAX_TPA_SEGS_P5        0x3f
 
 #if (BNXT_PAGE_SHIFT == 16)
-#define MAX_RX_PAGES   1
+#define MAX_RX_PAGES_AGG_ENA   1
+#define MAX_RX_PAGES   4
 #define MAX_RX_AGG_PAGES       4
 #define MAX_TX_PAGES   1
-#define MAX_CP_PAGES   8
+#define MAX_CP_PAGES   16
 #else
-#define MAX_RX_PAGES   8
+#define MAX_RX_PAGES_AGG_ENA   8
+#define MAX_RX_PAGES   32
 #define MAX_RX_AGG_PAGES       32
 #define MAX_TX_PAGES   8
-#define MAX_CP_PAGES   64
+#define MAX_CP_PAGES   128
 #endif
 
 #define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd))
@@ -622,6 +624,7 @@ struct nqe_cn {
 #define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT)
 
 #define BNXT_MAX_RX_DESC_CNT           (RX_DESC_CNT * MAX_RX_PAGES - 1)
+#define BNXT_MAX_RX_DESC_CNT_JUM_ENA   (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1)
 #define BNXT_MAX_RX_JUM_DESC_CNT       (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1)
 #define BNXT_MAX_TX_DESC_CNT           (TX_DESC_CNT * MAX_TX_PAGES - 1)
 
@@ -972,11 +975,11 @@ struct bnxt_cp_ring_info {
        struct dim              dim;
 
        union {
-               struct tx_cmp   *cp_desc_ring[MAX_CP_PAGES];
-               struct nqe_cn   *nq_desc_ring[MAX_CP_PAGES];
+               struct tx_cmp   **cp_desc_ring;
+               struct nqe_cn   **nq_desc_ring;
        };
 
-       dma_addr_t              cp_desc_mapping[MAX_CP_PAGES];
+       dma_addr_t              *cp_desc_mapping;
 
        struct bnxt_stats_mem   stats;
        u32                     hw_stats_ctx_id;
index 64381be..2cd8bb3 100644 (file)
@@ -743,14 +743,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp)
 
 int bnxt_dl_register(struct bnxt *bp)
 {
+       const struct devlink_ops *devlink_ops;
        struct devlink_port_attrs attrs = {};
        struct devlink *dl;
        int rc;
 
        if (BNXT_PF(bp))
-               dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+               devlink_ops = &bnxt_dl_ops;
        else
-               dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
+               devlink_ops = &bnxt_vf_dl_ops;
+
+       dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev);
        if (!dl) {
                netdev_warn(bp->dev, "devlink_alloc failed\n");
                return -ENOMEM;
@@ -763,7 +766,7 @@ int bnxt_dl_register(struct bnxt *bp)
            bp->hwrm_spec_code > 0x10803)
                bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
 
-       rc = devlink_register(dl, &bp->pdev->dev);
+       rc = devlink_register(dl);
        if (rc) {
                netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc);
                goto err_dl_free;
index 786ca51..485252d 100644 (file)
@@ -768,8 +768,13 @@ static void bnxt_get_ringparam(struct net_device *dev,
 {
        struct bnxt *bp = netdev_priv(dev);
 
-       ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
-       ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+       if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+               ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+               ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+       } else {
+               ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
+               ering->rx_jumbo_max_pending = 0;
+       }
        ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT;
 
        ering->rx_pending = bp->rx_ring_size;
index e33e311..7f55ebb 100644 (file)
@@ -560,6 +560,12 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
 
        bnxt_ptp_get_current_time(bp);
        ptp->next_period = now + HZ;
+       if (time_after_eq(now, ptp->next_overflow_check)) {
+               spin_lock_bh(&ptp->ptp_lock);
+               timecounter_read(&ptp->tc);
+               spin_unlock_bh(&ptp->ptp_lock);
+               ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD;
+       }
        return HZ;
 }
 
@@ -713,6 +719,7 @@ int bnxt_ptp_init(struct bnxt *bp)
        ptp->cc.shift = 0;
        ptp->cc.mult = 1;
 
+       ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
        timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
 
        ptp->ptp_info = bnxt_ptp_caps;
index 8892334..cc3cdba 100644 (file)
@@ -83,6 +83,10 @@ struct bnxt_ptp_cfg {
        u64                     current_time;
        u64                     old_time;
        unsigned long           next_period;
+       unsigned long           next_overflow_check;
+       /* 48-bit PHC overflows in 78 hours.  Check overflow every 19 hours. */
+       #define BNXT_PHC_OVERFLOW_PERIOD        (19 * 3600 * HZ)
+
        u16                     tx_seqid;
        struct bnxt             *bp;
        atomic_t                tx_avail;
index 63e2237..8507198 100644 (file)
@@ -3972,8 +3972,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
         */
        dev->needed_headroom += 64;
 
-       netdev_boot_setup_check(dev);
-
        priv->dev = dev;
        priv->pdev = pdev;
 
index a4a5209..2907e13 100644 (file)
@@ -1457,7 +1457,7 @@ static void free_netsgbuf(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
@@ -1500,7 +1500,7 @@ static void free_netsgbuf_with_resp(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
@@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
        }
 
        devlink = devlink_alloc(&liquidio_devlink_ops,
-                               sizeof(struct lio_devlink_priv));
+                               sizeof(struct lio_devlink_priv),
+                               &octeon_dev->pci_dev->dev);
        if (!devlink) {
                dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n");
                goto setup_nic_dev_free;
@@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
        lio_devlink = devlink_priv(devlink);
        lio_devlink->oct = octeon_dev;
 
-       if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) {
+       if (devlink_register(devlink)) {
                devlink_free(devlink);
                dev_err(&octeon_dev->pci_dev->dev,
                        "devlink registration failed\n");
index 3085dd4..c6fe0f2 100644 (file)
@@ -843,7 +843,7 @@ static void free_netsgbuf(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
@@ -887,7 +887,7 @@ static void free_netsgbuf_with_resp(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
index 9361f96..691e147 100644 (file)
@@ -1322,18 +1322,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_disable_device;
        }
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
        if (err) {
                dev_err(dev, "Unable to get usable DMA configuration\n");
                goto err_release_regions;
        }
 
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
-       if (err) {
-               dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
-               goto err_release_regions;
-       }
-
        /* MAP PF's configuration registers */
        nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
        if (!nic->reg_base) {
index efaaa57..d1667b7 100644 (file)
@@ -2130,18 +2130,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_disable_device;
        }
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
        if (err) {
                dev_err(dev, "Unable to get usable DMA configuration\n");
                goto err_release_regions;
        }
 
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
-       if (err) {
-               dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
-               goto err_release_regions;
-       }
-
        qcount = netif_get_num_default_rss_queues();
 
        /* Restrict multiqset support only for host bound VFs */
index 6260b3b..786ceae 100644 (file)
@@ -1441,7 +1441,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
        } else if (iconf & USE_ENC_IDX_F) {
                if (f->fs.val.encap_vld) {
                        struct port_info *pi = netdev_priv(f->dev);
-                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+                       static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 
                        /* allocate MPS TCAM entry */
                        ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
@@ -1688,7 +1688,7 @@ int __cxgb4_set_filter(struct net_device *dev, int ftid,
        } else if (iconf & USE_ENC_IDX_F) {
                if (f->fs.val.encap_vld) {
                        struct port_info *pi = netdev_priv(f->dev);
-                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+                       static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 
                        /* allocate MPS TCAM entry */
                        ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
index d8af9e6..dac1764 100644 (file)
@@ -6,7 +6,7 @@
 config NET_VENDOR_CIRRUS
        bool "Cirrus devices"
        default y
-       depends on ISA || EISA || ARM || MAC
+       depends on ISA || EISA || ARM || MAC || COMPILE_TEST
        help
          If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -18,9 +18,16 @@ config NET_VENDOR_CIRRUS
 if NET_VENDOR_CIRRUS
 
 config CS89x0
-       tristate "CS89x0 support"
-       depends on ISA || EISA || ARM
+       tristate
+
+config CS89x0_ISA
+       tristate "CS89x0 ISA driver support"
+       depends on HAS_IOPORT_MAP
+       depends on ISA
        depends on !PPC32
+       depends on CS89x0_PLATFORM=n
+       select NETDEV_LEGACY_INIT
+       select CS89x0
        help
          Support for CS89x0 chipset based Ethernet cards. If you have a
          network (Ethernet) card of this type, say Y and read the file
@@ -30,15 +37,15 @@ config CS89x0
          will be called cs89x0.
 
 config CS89x0_PLATFORM
-       bool "CS89x0 platform driver support" if HAS_IOPORT_MAP
-       default !HAS_IOPORT_MAP
-       depends on CS89x0
+       tristate "CS89x0 platform driver support"
+       depends on ARM || COMPILE_TEST
+       select CS89x0
        help
-         Say Y to compile the cs89x0 driver as a platform driver. This
-         makes this driver suitable for use on certain evaluation boards
-         such as the iMX21ADS.
+         Say Y to compile the cs89x0 platform driver. This makes this driver
+         suitable for use on certain evaluation boards such as the iMX21ADS.
 
-         If you are unsure, say N.
+         To compile this driver as a module, choose M here. The module
+         will be called cs89x0.
 
 config EP93XX_ETH
        tristate "EP93xx Ethernet support"
index 33ace33..d0c4c8b 100644 (file)
@@ -104,7 +104,7 @@ static char version[] __initdata =
  * them to system IRQ numbers. This mapping is card specific and is set to
  * the configuration of the Cirrus Eval board for this chip.
  */
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
 static unsigned int netcard_portlist[] __used __initdata = {
        0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240,
        0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0
@@ -292,7 +292,7 @@ write_irq(struct net_device *dev, int chip_type, int irq)
        int i;
 
        if (chip_type == CS8900) {
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                /* Search the mapping table for the corresponding IRQ pin. */
                for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++)
                        if (cs8900_irq_map[i] == irq)
@@ -859,7 +859,7 @@ net_open(struct net_device *dev)
                        goto bad_out;
                }
        } else {
-#if !defined(CONFIG_CS89x0_PLATFORM)
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                if (((1 << dev->irq) & lp->irq_map) == 0) {
                        pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n",
                               dev->name, dev->irq, lp->irq_map);
@@ -1523,7 +1523,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular)
                        dev->irq = i;
        } else {
                i = lp->isa_config & INT_NO_MASK;
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                if (lp->chip_type == CS8900) {
                        /* Translate the IRQ using the IRQ mapping table. */
                        if (i >= ARRAY_SIZE(cs8900_irq_map))
@@ -1576,7 +1576,7 @@ out1:
        return retval;
 }
 
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
 /*
  * This function converts the I/O port address used by the cs89x0_probe() and
  * init_module() functions to the I/O memory address used by the
@@ -1682,11 +1682,7 @@ out:
        pr_warn("no cs8900 or cs8920 detected.  Be sure to disable PnP with SETUP\n");
        return ERR_PTR(err);
 }
-#endif
-#endif
-
-#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM)
-
+#else
 static struct net_device *dev_cs89x0;
 
 /* Support the 'debug' module parm even if we're compiled for non-debug to
@@ -1757,9 +1753,9 @@ MODULE_LICENSE("GPL");
  * (hw or software util)
  */
 
-int __init init_module(void)
+static int __init cs89x0_isa_init_module(void)
 {
-       struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
+       struct net_device *dev;
        struct net_local *lp;
        int ret = 0;
 
@@ -1768,6 +1764,7 @@ int __init init_module(void)
 #else
        debug = 0;
 #endif
+       dev = alloc_etherdev(sizeof(struct net_local));
        if (!dev)
                return -ENOMEM;
 
@@ -1826,9 +1823,9 @@ out:
        free_netdev(dev);
        return ret;
 }
+module_init(cs89x0_isa_init_module);
 
-void __exit
-cleanup_module(void)
+static void __exit cs89x0_isa_cleanup_module(void)
 {
        struct net_local *lp = netdev_priv(dev_cs89x0);
 
@@ -1838,9 +1835,11 @@ cleanup_module(void)
        release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);
        free_netdev(dev_cs89x0);
 }
-#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */
+module_exit(cs89x0_isa_cleanup_module);
+#endif /* MODULE */
+#endif /* CONFIG_CS89x0_ISA */
 
-#ifdef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_PLATFORM)
 static int __init cs89x0_platform_probe(struct platform_device *pdev)
 {
        struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
index 0116047..55d6fc9 100644 (file)
@@ -362,7 +362,7 @@ void tulip_select_media(struct net_device *dev, int startup)
                        iowrite32(0x33, ioaddr + CSR12);
                        new_csr6 = 0x01860000;
                        /* Trigger autonegotiation. */
-                       iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8);
+                       iowrite32(0x0001F868, ioaddr + 0xB8);
                } else {
                        iowrite32(0x32, ioaddr + CSR12);
                        new_csr6 = 0x00420000;
index 07a48f6..85b9909 100644 (file)
@@ -357,7 +357,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
        int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0;
        void __iomem *ioaddr;
 
-       i = pci_enable_device(pdev);
+       i = pcim_enable_device(pdev);
        if (i) return i;
 
        pci_set_master(pdev);
@@ -379,7 +379,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size);
        if (!ioaddr)
-               goto err_out_free_res;
+               goto err_out_netdev;
 
        for (i = 0; i < 3; i++)
                ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i));
@@ -458,8 +458,6 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 err_out_cleardev:
        pci_iounmap(pdev, ioaddr);
-err_out_free_res:
-       pci_release_regions(pdev);
 err_out_netdev:
        free_netdev (dev);
        return -ENODEV;
@@ -1526,7 +1524,6 @@ static void w840_remove1(struct pci_dev *pdev)
        if (dev) {
                struct netdev_private *np = netdev_priv(dev);
                unregister_netdev(dev);
-               pci_release_regions(pdev);
                pci_iounmap(pdev, np->base_addr);
                free_netdev(dev);
        }
index c2ef740..3d9842a 100644 (file)
@@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs    := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa
 fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs     := dpaa2-ptp.o dprtc.o
-fsl-dpaa2-switch-objs  := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o
+fsl-dpaa2-switch-objs  := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o
 
 # Needed by the tracing framework
 CFLAGS_dpaa2-eth.o := -I$(src)
index 8336962..605a39f 100644 (file)
@@ -68,7 +68,7 @@ dpaa2_eth_dl_trap_item_lookup(struct dpaa2_eth_priv *priv, u16 trap_id)
 struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv,
                                                  struct dpaa2_fapr *fapr)
 {
-       struct dpaa2_faf_error_bit {
+       static const struct dpaa2_faf_error_bit {
                int position;
                enum devlink_trap_generic_id trap_id;
        } faf_bits[] = {
@@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
        struct dpaa2_eth_devlink_priv *dl_priv;
        int err;
 
-       priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv));
+       priv->devlink =
+               devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev);
        if (!priv->devlink) {
                dev_err(dev, "devlink_alloc failed\n");
                return -ENOMEM;
@@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
        dl_priv = devlink_priv(priv->devlink);
        dl_priv->dpaa2_priv = priv;
 
-       err = devlink_register(priv->devlink, dev);
+       err = devlink_register(priv->devlink);
        if (err) {
                dev_err(dev, "devlink_register() = %d\n", err);
                goto devlink_free;
index f664021..7065c71 100644 (file)
@@ -4138,7 +4138,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
        int err;
 
        dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
-       dpmac_dev = fsl_mc_get_endpoint(dpni_dev);
+       dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0);
 
        if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
                return PTR_ERR(dpmac_dev);
index 70e0432..720c923 100644 (file)
@@ -15,18 +15,18 @@ static struct {
        enum dpsw_counter id;
        char name[ETH_GSTRING_LEN];
 } dpaa2_switch_ethtool_counters[] =  {
-       {DPSW_CNT_ING_FRAME,            "rx frames"},
-       {DPSW_CNT_ING_BYTE,             "rx bytes"},
-       {DPSW_CNT_ING_FLTR_FRAME,       "rx filtered frames"},
-       {DPSW_CNT_ING_FRAME_DISCARD,    "rx discarded frames"},
-       {DPSW_CNT_ING_BCAST_FRAME,      "rx b-cast frames"},
-       {DPSW_CNT_ING_BCAST_BYTES,      "rx b-cast bytes"},
-       {DPSW_CNT_ING_MCAST_FRAME,      "rx m-cast frames"},
-       {DPSW_CNT_ING_MCAST_BYTE,       "rx m-cast bytes"},
-       {DPSW_CNT_EGR_FRAME,            "tx frames"},
-       {DPSW_CNT_EGR_BYTE,             "tx bytes"},
-       {DPSW_CNT_EGR_FRAME_DISCARD,    "tx discarded frames"},
-       {DPSW_CNT_ING_NO_BUFF_DISCARD,  "rx discarded no buffer frames"},
+       {DPSW_CNT_ING_FRAME,            "[hw] rx frames"},
+       {DPSW_CNT_ING_BYTE,             "[hw] rx bytes"},
+       {DPSW_CNT_ING_FLTR_FRAME,       "[hw] rx filtered frames"},
+       {DPSW_CNT_ING_FRAME_DISCARD,    "[hw] rx discarded frames"},
+       {DPSW_CNT_ING_BCAST_FRAME,      "[hw] rx bcast frames"},
+       {DPSW_CNT_ING_BCAST_BYTES,      "[hw] rx bcast bytes"},
+       {DPSW_CNT_ING_MCAST_FRAME,      "[hw] rx mcast frames"},
+       {DPSW_CNT_ING_MCAST_BYTE,       "[hw] rx mcast bytes"},
+       {DPSW_CNT_EGR_FRAME,            "[hw] tx frames"},
+       {DPSW_CNT_EGR_BYTE,             "[hw] tx bytes"},
+       {DPSW_CNT_EGR_FRAME_DISCARD,    "[hw] tx discarded frames"},
+       {DPSW_CNT_ING_NO_BUFF_DISCARD,  "[hw] rx nobuffer discards"},
 };
 
 #define DPAA2_SWITCH_NUM_COUNTERS      ARRAY_SIZE(dpaa2_switch_ethtool_counters)
@@ -62,6 +62,10 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev,
        struct dpsw_link_state state = {0};
        int err = 0;
 
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return phylink_ethtool_ksettings_get(port_priv->mac->phylink,
+                                                    link_ksettings);
+
        err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
                                     port_priv->ethsw_data->dpsw_handle,
                                     port_priv->idx,
@@ -95,6 +99,10 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
        bool if_running;
        int err = 0, ret;
 
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return phylink_ethtool_ksettings_set(port_priv->mac->phylink,
+                                                    link_ksettings);
+
        /* Interface needs to be down to change link settings */
        if_running = netif_running(netdev);
        if (if_running) {
@@ -134,11 +142,17 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
        return err;
 }
 
-static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
+static int
+dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset)
 {
+       struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+       int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS;
+
        switch (sset) {
        case ETH_SS_STATS:
-               return DPAA2_SWITCH_NUM_COUNTERS;
+               if (port_priv->mac)
+                       num_ss_stats += dpaa2_mac_get_sset_count();
+               return num_ss_stats;
        default:
                return -EOPNOTSUPP;
        }
@@ -147,14 +161,19 @@ static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
 static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev,
                                             u32 stringset, u8 *data)
 {
+       struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+       u8 *p = data;
        int i;
 
        switch (stringset) {
        case ETH_SS_STATS:
-               for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++)
-                       memcpy(data + i * ETH_GSTRING_LEN,
-                              dpaa2_switch_ethtool_counters[i].name,
+               for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) {
+                       memcpy(p, dpaa2_switch_ethtool_counters[i].name,
                               ETH_GSTRING_LEN);
+                       p += ETH_GSTRING_LEN;
+               }
+               if (port_priv->mac)
+                       dpaa2_mac_get_strings(p);
                break;
        }
 }
@@ -176,6 +195,9 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev,
                        netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
                                   dpaa2_switch_ethtool_counters[i].name, err);
        }
+
+       if (port_priv->mac)
+               dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i);
 }
 
 const struct ethtool_ops dpaa2_switch_port_ethtool_ops = {
index 7112972..d260993 100644 (file)
@@ -594,12 +594,18 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
        return 0;
 }
 
-static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
+static int dpaa2_switch_port_link_state_update(struct net_device *netdev)
 {
        struct ethsw_port_priv *port_priv = netdev_priv(netdev);
        struct dpsw_link_state state;
        int err;
 
+       /* When we manage the MAC/PHY using phylink there is no need
+        * to manually update the netif_carrier.
+        */
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return 0;
+
        /* Interrupts are received even though no one issued an 'ifconfig up'
         * on the switch interface. Ignore these link state update interrupts
         */
@@ -677,12 +683,14 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
        struct ethsw_core *ethsw = port_priv->ethsw_data;
        int err;
 
-       /* Explicitly set carrier off, otherwise
-        * netif_carrier_ok() will return true and cause 'ip link show'
-        * to report the LOWER_UP flag, even though the link
-        * notification wasn't even received.
-        */
-       netif_carrier_off(netdev);
+       if (!dpaa2_switch_port_is_type_phy(port_priv)) {
+               /* Explicitly set carrier off, otherwise
+                * netif_carrier_ok() will return true and cause 'ip link show'
+                * to report the LOWER_UP flag, even though the link
+                * notification wasn't even received.
+                */
+               netif_carrier_off(netdev);
+       }
 
        err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
                             port_priv->ethsw_data->dpsw_handle,
@@ -692,23 +700,12 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
                return err;
        }
 
-       /* sync carrier state */
-       err = dpaa2_switch_port_carrier_state_sync(netdev);
-       if (err) {
-               netdev_err(netdev,
-                          "dpaa2_switch_port_carrier_state_sync err %d\n", err);
-               goto err_carrier_sync;
-       }
-
        dpaa2_switch_enable_ctrl_if_napi(ethsw);
 
-       return 0;
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               phylink_start(port_priv->mac->phylink);
 
-err_carrier_sync:
-       dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
-                       port_priv->ethsw_data->dpsw_handle,
-                       port_priv->idx);
-       return err;
+       return 0;
 }
 
 static int dpaa2_switch_port_stop(struct net_device *netdev)
@@ -717,6 +714,13 @@ static int dpaa2_switch_port_stop(struct net_device *netdev)
        struct ethsw_core *ethsw = port_priv->ethsw_data;
        int err;
 
+       if (dpaa2_switch_port_is_type_phy(port_priv)) {
+               phylink_stop(port_priv->mac->phylink);
+       } else {
+               netif_tx_stop_all_queues(netdev);
+               netif_carrier_off(netdev);
+       }
+
        err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
                              port_priv->ethsw_data->dpsw_handle,
                              port_priv->idx);
@@ -1419,41 +1423,103 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
        return netdev->netdev_ops == &dpaa2_switch_port_ops;
 }
 
-static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
+static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
 {
-       int i;
+       struct fsl_mc_device *dpsw_port_dev, *dpmac_dev;
+       struct dpaa2_mac *mac;
+       int err;
 
-       for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-               dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
-               dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
+       dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent);
+       dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx);
+
+       if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
+               return PTR_ERR(dpmac_dev);
+
+       if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+               return 0;
+
+       mac = kzalloc(sizeof(*mac), GFP_KERNEL);
+       if (!mac)
+               return -ENOMEM;
+
+       mac->mc_dev = dpmac_dev;
+       mac->mc_io = port_priv->ethsw_data->mc_io;
+       mac->net_dev = port_priv->netdev;
+
+       err = dpaa2_mac_open(mac);
+       if (err)
+               goto err_free_mac;
+       port_priv->mac = mac;
+
+       if (dpaa2_switch_port_is_type_phy(port_priv)) {
+               err = dpaa2_mac_connect(mac);
+               if (err) {
+                       netdev_err(port_priv->netdev,
+                                  "Error connecting to the MAC endpoint %pe\n",
+                                  ERR_PTR(err));
+                       goto err_close_mac;
+               }
        }
+
+       return 0;
+
+err_close_mac:
+       dpaa2_mac_close(mac);
+       port_priv->mac = NULL;
+err_free_mac:
+       kfree(mac);
+       return err;
+}
+
+static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv)
+{
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               dpaa2_mac_disconnect(port_priv->mac);
+
+       if (!dpaa2_switch_port_has_mac(port_priv))
+               return;
+
+       dpaa2_mac_close(port_priv->mac);
+       kfree(port_priv->mac);
+       port_priv->mac = NULL;
 }
 
 static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
 {
        struct device *dev = (struct device *)arg;
        struct ethsw_core *ethsw = dev_get_drvdata(dev);
-
-       /* Mask the events and the if_id reserved bits to be cleared on read */
-       u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
-       int err;
+       struct ethsw_port_priv *port_priv;
+       u32 status = ~0;
+       int err, if_id;
 
        err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
                                  DPSW_IRQ_INDEX_IF, &status);
        if (err) {
                dev_err(dev, "Can't get irq status (err %d)\n", err);
-
-               err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
-                                           DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
-               if (err)
-                       dev_err(dev, "Can't clear irq status (err %d)\n", err);
                goto out;
        }
 
-       if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
-               dpaa2_switch_links_state_update(ethsw);
+       if_id = (status & 0xFFFF0000) >> 16;
+       port_priv = ethsw->ports[if_id];
+
+       if (status & DPSW_IRQ_EVENT_LINK_CHANGED) {
+               dpaa2_switch_port_link_state_update(port_priv->netdev);
+               dpaa2_switch_port_set_mac_addr(port_priv);
+       }
+
+       if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) {
+               if (dpaa2_switch_port_has_mac(port_priv))
+                       dpaa2_switch_port_disconnect_mac(port_priv);
+               else
+                       dpaa2_switch_port_connect_mac(port_priv);
+       }
 
 out:
+       err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                   DPSW_IRQ_INDEX_IF, status);
+       if (err)
+               dev_err(dev, "Can't clear irq status (err %d)\n", err);
+
        return IRQ_HANDLED;
 }
 
@@ -3133,6 +3199,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
        for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
                port_priv = ethsw->ports[i];
                unregister_netdev(port_priv->netdev);
+               dpaa2_switch_port_disconnect_mac(port_priv);
                free_netdev(port_priv->netdev);
        }
 
@@ -3212,6 +3279,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
                goto err_port_probe;
        port_priv->learn_ena = false;
 
+       err = dpaa2_switch_port_connect_mac(port_priv);
+       if (err)
+               goto err_port_probe;
+
        return 0;
 
 err_port_probe:
@@ -3288,12 +3359,6 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
                               &ethsw->fq[i].napi, dpaa2_switch_poll,
                               NAPI_POLL_WEIGHT);
 
-       err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-       if (err) {
-               dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
-               goto err_free_netdev;
-       }
-
        /* Setup IRQs */
        err = dpaa2_switch_setup_irqs(sw_dev);
        if (err)
index f69d940..0002dca 100644 (file)
@@ -21,6 +21,7 @@
 #include <net/pkt_cls.h>
 #include <soc/fsl/dpaa2-io.h>
 
+#include "dpaa2-mac.h"
 #include "dpsw.h"
 
 /* Number of IRQs supported */
@@ -159,6 +160,7 @@ struct ethsw_port_priv {
        bool                    learn_ena;
 
        struct dpaa2_switch_filter_block *filter_block;
+       struct dpaa2_mac        *mac;
 };
 
 /* Switch data */
@@ -225,6 +227,22 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
        return true;
 }
 
+static inline bool
+dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv)
+{
+       if (port_priv->mac &&
+           (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY ||
+            port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE))
+               return true;
+
+       return false;
+}
+
+static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv)
+{
+       return port_priv->mac ? true : false;
+}
+
 bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
 
 int dpaa2_switch_port_vlans_add(struct net_device *netdev,
index 892df90..b90bd36 100644 (file)
@@ -98,6 +98,11 @@ int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
  */
 #define DPSW_IRQ_EVENT_LINK_CHANGED    0x0001
 
+/**
+ * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint
+ */
+#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED        0x0002
+
 /**
  * struct dpsw_irq_cfg - IRQ configuration
  * @addr:      Address that must be written to signal a message-based interrupt
index ae32591..d2e9a6c 100644 (file)
 #define FEC_RXIC0              0xfff
 #define FEC_RXIC1              0xfff
 #define FEC_RXIC2              0xfff
+#define FEC_LPI_SLEEP          0xfff
+#define FEC_LPI_WAKE           0xfff
 #endif /* CONFIG_M5272 */
 
 
index 40ea318..fdff37b 100644 (file)
@@ -2042,6 +2042,34 @@ failed_clk_ptp:
        return ret;
 }
 
+static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep,
+                                     struct device_node *np)
+{
+       u32 rgmii_tx_delay, rgmii_rx_delay;
+
+       /* For rgmii tx internal delay, valid values are 0ps and 2000ps */
+       if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) {
+               if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) {
+                       dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps");
+                       return -EINVAL;
+               } else if (rgmii_tx_delay == 2000) {
+                       fep->rgmii_txc_dly = true;
+               }
+       }
+
+       /* For rgmii rx internal delay, valid values are 0ps and 2000ps */
+       if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) {
+               if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) {
+                       dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps");
+                       return -EINVAL;
+               } else if (rgmii_rx_delay == 2000) {
+                       fep->rgmii_rxc_dly = true;
+               }
+       }
+
+       return 0;
+}
+
 static int fec_enet_mii_probe(struct net_device *ndev)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
@@ -3719,7 +3747,6 @@ fec_probe(struct platform_device *pdev)
        char irq_name[8];
        int irq_cnt;
        struct fec_devinfo *dev_info;
-       u32 rgmii_delay;
 
        fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs);
 
@@ -3777,12 +3804,6 @@ fec_probe(struct platform_device *pdev)
        if (ret)
                goto failed_stop_mode;
 
-       /* For rgmii internal delay, valid values are 0ps and 2000ps */
-       if (of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_delay))
-               fep->rgmii_txc_dly = true;
-       if (of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_delay))
-               fep->rgmii_rxc_dly = true;
-
        phy_node = of_parse_phandle(np, "phy-handle", 0);
        if (!phy_node && of_phy_is_fixed_link(np)) {
                ret = of_phy_register_fixed_link(np);
@@ -3806,6 +3827,10 @@ fec_probe(struct platform_device *pdev)
                fep->phy_interface = interface;
        }
 
+       ret = fec_enet_parse_rgmii_delay(fep, np);
+       if (ret)
+               goto failed_rgmii_delay;
+
        fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
        if (IS_ERR(fep->clk_ipg)) {
                ret = PTR_ERR(fep->clk_ipg);
@@ -3835,9 +3860,11 @@ fec_probe(struct platform_device *pdev)
        fep->clk_ref_rate = clk_get_rate(fep->clk_ref);
 
        /* clk_2x_txclk is optional, depends on board */
-       fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
-       if (IS_ERR(fep->clk_2x_txclk))
-               fep->clk_2x_txclk = NULL;
+       if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) {
+               fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
+               if (IS_ERR(fep->clk_2x_txclk))
+                       fep->clk_2x_txclk = NULL;
+       }
 
        fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX;
        fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp");
@@ -3955,6 +3982,7 @@ failed_clk_ahb:
 failed_clk_ipg:
        fec_enet_clk_enable(ndev, false);
 failed_clk:
+failed_rgmii_delay:
        if (of_phy_is_fixed_link(np))
                of_phy_deregister_fixed_link(np);
        of_node_put(phy_node);
@@ -3989,13 +4017,13 @@ fec_drv_remove(struct platform_device *pdev)
        if (of_phy_is_fixed_link(np))
                of_phy_deregister_fixed_link(np);
        of_node_put(fep->phy_node);
-       free_netdev(ndev);
 
        clk_disable_unprepare(fep->clk_ahb);
        clk_disable_unprepare(fep->clk_ipg);
        pm_runtime_put_noidle(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
 
+       free_netdev(ndev);
        return 0;
 }
 
index 094e4a3..2ba0e7b 100644 (file)
@@ -91,6 +91,7 @@ config HNS3
        tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
        depends on PCI
        select NET_DEVLINK
+       select PAGE_POOL
        help
          This selects the framework support for Hisilicon Network Subsystem 3.
          This layer facilitates clients like ENET, RoCE and user-space ethernet
index cb8d5da..fcbeb1f 100644 (file)
@@ -3205,6 +3205,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
        unsigned int order = hns3_page_order(ring);
        struct page *p;
 
+       if (ring->page_pool) {
+               p = page_pool_dev_alloc_frag(ring->page_pool,
+                                            &cb->page_offset,
+                                            hns3_buf_size(ring));
+               if (unlikely(!p))
+                       return -ENOMEM;
+
+               cb->priv = p;
+               cb->buf = page_address(p);
+               cb->dma = page_pool_get_dma_addr(p);
+               cb->type = DESC_TYPE_PP_FRAG;
+               cb->reuse_flag = 0;
+               return 0;
+       }
+
        p = dev_alloc_pages(order);
        if (!p)
                return -ENOMEM;
@@ -3227,8 +3242,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
        if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
                        DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
                napi_consume_skb(cb->priv, budget);
-       else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
-               __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+       else if (!HNAE3_IS_TX_RING(ring)) {
+               if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias)
+                       __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+               else if (cb->type & DESC_TYPE_PP_FRAG)
+                       page_pool_put_full_page(ring->page_pool, cb->priv,
+                                               false);
+       }
        memset(cb, 0, sizeof(*cb));
 }
 
@@ -3315,7 +3335,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring,
        int ret;
 
        ret = hns3_alloc_buffer(ring, cb);
-       if (ret)
+       if (ret || ring->page_pool)
                goto out;
 
        ret = hns3_map_buffer(ring, cb);
@@ -3337,7 +3357,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i)
        if (ret)
                return ret;
 
-       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+                                        ring->desc_cb[i].page_offset);
 
        return 0;
 }
@@ -3367,7 +3388,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
 {
        hns3_unmap_buffer(ring, &ring->desc_cb[i]);
        ring->desc_cb[i] = *res_cb;
-       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+                                        ring->desc_cb[i].page_offset);
        ring->desc[i].rx.bd_base_info = 0;
 }
 
@@ -3539,6 +3561,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
        u32 frag_size = size - pull_len;
        bool reused;
 
+       if (ring->page_pool) {
+               skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+                               frag_size, truesize);
+               return;
+       }
+
        /* Avoid re-using remote or pfmem page */
        if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
                goto out;
@@ -3856,6 +3884,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
                /* We can reuse buffer as-is, just make sure it is reusable */
                if (dev_page_is_reusable(desc_cb->priv))
                        desc_cb->reuse_flag = 1;
+               else if (desc_cb->type & DESC_TYPE_PP_FRAG)
+                       page_pool_put_full_page(ring->page_pool, desc_cb->priv,
+                                               false);
                else /* This page cannot be reused so discard it */
                        __page_frag_cache_drain(desc_cb->priv,
                                                desc_cb->pagecnt_bias);
@@ -3863,6 +3894,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
                hns3_rx_ring_move_fw(ring);
                return 0;
        }
+
+       if (ring->page_pool)
+               skb_mark_for_recycle(skb);
+
        u64_stats_update_begin(&ring->syncp);
        ring->stats.seg_pkt_cnt++;
        u64_stats_update_end(&ring->syncp);
@@ -3901,6 +3936,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
                                            "alloc rx fraglist skb fail\n");
                                return -ENXIO;
                        }
+
+                       if (ring->page_pool)
+                               skb_mark_for_recycle(new_skb);
+
                        ring->frag_num = 0;
 
                        if (ring->tail_skb) {
@@ -4705,6 +4744,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv)
        priv->ring = NULL;
 }
 
+static void hns3_alloc_page_pool(struct hns3_enet_ring *ring)
+{
+       struct page_pool_params pp_params = {
+               .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG |
+                               PP_FLAG_DMA_SYNC_DEV,
+               .order = hns3_page_order(ring),
+               .pool_size = ring->desc_num * hns3_buf_size(ring) /
+                               (PAGE_SIZE << hns3_page_order(ring)),
+               .nid = dev_to_node(ring_to_dev(ring)),
+               .dev = ring_to_dev(ring),
+               .dma_dir = DMA_FROM_DEVICE,
+               .offset = 0,
+               .max_len = PAGE_SIZE << hns3_page_order(ring),
+       };
+
+       ring->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(ring->page_pool)) {
+               dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n",
+                        PTR_ERR(ring->page_pool));
+               ring->page_pool = NULL;
+       }
+}
+
 static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
 {
        int ret;
@@ -4724,6 +4786,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
                goto out_with_desc_cb;
 
        if (!HNAE3_IS_TX_RING(ring)) {
+               hns3_alloc_page_pool(ring);
+
                ret = hns3_alloc_ring_buffers(ring);
                if (ret)
                        goto out_with_desc;
@@ -4764,6 +4828,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
                devm_kfree(ring_to_dev(ring), tx_spare);
                ring->tx_spare = NULL;
        }
+
+       if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) {
+               page_pool_destroy(ring->page_pool);
+               ring->page_pool = NULL;
+       }
 }
 
 static int hns3_buf_size2type(u32 buf_size)
index 15af3d9..27809d6 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/dim.h>
 #include <linux/if_vlan.h>
+#include <net/page_pool.h>
 
 #include "hnae3.h"
 
@@ -307,6 +308,7 @@ enum hns3_desc_type {
        DESC_TYPE_BOUNCE_ALL            = 1 << 3,
        DESC_TYPE_BOUNCE_HEAD           = 1 << 4,
        DESC_TYPE_SGL_SKB               = 1 << 5,
+       DESC_TYPE_PP_FRAG               = 1 << 6,
 };
 
 struct hns3_desc_cb {
@@ -451,6 +453,7 @@ struct hns3_enet_ring {
        struct hnae3_queue *tqp;
        int queue_index;
        struct device *dev; /* will be used for DMA mapping of descriptors */
+       struct page_pool *page_pool;
 
        /* statistic */
        struct ring_stats stats;
index 06d2994..448f29a 100644 (file)
@@ -112,14 +112,14 @@ int hclge_devlink_init(struct hclge_dev *hdev)
        int ret;
 
        devlink = devlink_alloc(&hclge_devlink_ops,
-                               sizeof(struct hclge_devlink_priv));
+                               sizeof(struct hclge_devlink_priv), &pdev->dev);
        if (!devlink)
                return -ENOMEM;
 
        priv = devlink_priv(devlink);
        priv->hdev = hdev;
 
-       ret = devlink_register(devlink, &pdev->dev);
+       ret = devlink_register(devlink);
        if (ret) {
                dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
                        ret);
index 3b1f845..befa9bc 100644 (file)
@@ -5,9 +5,27 @@
 #include "hclge_main.h"
 #include "hnae3.h"
 
+static int hclge_ptp_get_cycle(struct hclge_dev *hdev)
+{
+       struct hclge_ptp *ptp = hdev->ptp;
+
+       ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) &
+                        HCLGE_PTP_CYCLE_QUO_MASK;
+       ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
+       ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+
+       if (ptp->cycle.den == 0) {
+               dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 {
        struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+       struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle;
        u64 adj_val, adj_base, diff;
        unsigned long flags;
        bool is_neg = false;
@@ -18,7 +36,7 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
                is_neg = true;
        }
 
-       adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT;
+       adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer;
        adj_val = adj_base * ppb;
        diff = div_u64(adj_val, 1000000000ULL);
 
@@ -29,16 +47,16 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 
        /* This clock cycle is defined by three part: quotient, numerator
         * and denominator. For example, 2.5ns, the quotient is 2,
-        * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator
-        * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT.
+        * denominator is fixed to ptp->cycle.den, and numerator
+        * is 0.5 * ptp->cycle.den.
         */
-       quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator);
+       quo = div_u64_rem(adj_val, cycle->den, &numerator);
 
        spin_lock_irqsave(&hdev->ptp->lock, flags);
-       writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
+       writel(quo & HCLGE_PTP_CYCLE_QUO_MASK,
+              hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
        writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
-       writel(HCLGE_PTP_CYCLE_ADJ_UNIT,
-              hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+       writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
        writel(HCLGE_PTP_CYCLE_ADJ_EN,
               hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG);
        spin_unlock_irqrestore(&hdev->ptp->lock, flags);
@@ -475,6 +493,10 @@ int hclge_ptp_init(struct hclge_dev *hdev)
                ret = hclge_ptp_create_clock(hdev);
                if (ret)
                        return ret;
+
+               ret = hclge_ptp_get_cycle(hdev);
+               if (ret)
+                       return ret;
        }
 
        ret = hclge_ptp_int_en(hdev, true);
index 5a202b7..dbf5f4c 100644 (file)
@@ -29,6 +29,7 @@
 #define HCLGE_PTP_TIME_ADJ_REG         0x60
 #define HCLGE_PTP_TIME_ADJ_EN          BIT(0)
 #define HCLGE_PTP_CYCLE_QUO_REG                0x64
+#define HCLGE_PTP_CYCLE_QUO_MASK       GENMASK(7, 0)
 #define HCLGE_PTP_CYCLE_DEN_REG                0x68
 #define HCLGE_PTP_CYCLE_NUM_REG                0x6C
 #define HCLGE_PTP_CYCLE_CFG_REG                0x70
@@ -37,9 +38,7 @@
 #define HCLGE_PTP_CUR_TIME_SEC_L_REG   0x78
 #define HCLGE_PTP_CUR_TIME_NSEC_REG    0x7C
 
-#define HCLGE_PTP_CYCLE_ADJ_BASE       2
 #define HCLGE_PTP_CYCLE_ADJ_MAX                500000000
-#define HCLGE_PTP_CYCLE_ADJ_UNIT       100000000
 #define HCLGE_PTP_SEC_H_OFFSET         32u
 #define HCLGE_PTP_SEC_L_MASK           GENMASK(31, 0)
 
 #define HCLGE_PTP_FLAG_TX_EN           1
 #define HCLGE_PTP_FLAG_RX_EN           2
 
+struct hclge_ptp_cycle {
+       u32 quo;
+       u32 numer;
+       u32 den;
+};
+
 struct hclge_ptp {
        struct hclge_dev *hdev;
        struct ptp_clock *clock;
@@ -58,6 +63,7 @@ struct hclge_ptp {
        spinlock_t lock;        /* protects ptp registers */
        u32 ptp_cfg;
        u32 last_tx_seqid;
+       struct hclge_ptp_cycle cycle;
        unsigned long tx_start;
        unsigned long tx_cnt;
        unsigned long tx_skipped;
index 21a4527..1e6061f 100644 (file)
@@ -112,15 +112,16 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev)
        struct devlink *devlink;
        int ret;
 
-       devlink = devlink_alloc(&hclgevf_devlink_ops,
-                               sizeof(struct hclgevf_devlink_priv));
+       devlink =
+               devlink_alloc(&hclgevf_devlink_ops,
+                             sizeof(struct hclgevf_devlink_priv), &pdev->dev);
        if (!devlink)
                return -ENOMEM;
 
        priv = devlink_priv(devlink);
        priv->hdev = hdev;
 
-       ret = devlink_register(devlink, &pdev->dev);
+       ret = devlink_register(devlink);
        if (ret) {
                dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
                        ret);
index 58d5646..6e11ee3 100644 (file)
@@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = {
        .flash_update = hinic_devlink_flash_update,
 };
 
-struct devlink *hinic_devlink_alloc(void)
+struct devlink *hinic_devlink_alloc(struct device *dev)
 {
-       return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev));
+       return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev);
 }
 
 void hinic_devlink_free(struct devlink *devlink)
@@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink)
        devlink_free(devlink);
 }
 
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev)
+int hinic_devlink_register(struct hinic_devlink_priv *priv)
 {
        struct devlink *devlink = priv_to_devlink(priv);
 
-       return devlink_register(devlink, dev);
+       return devlink_register(devlink);
 }
 
 void hinic_devlink_unregister(struct hinic_devlink_priv *priv)
index a090ebc..9e31501 100644 (file)
@@ -108,9 +108,9 @@ struct host_image_st {
        u32 device_id;
 };
 
-struct devlink *hinic_devlink_alloc(void);
+struct devlink *hinic_devlink_alloc(struct device *dev);
 void hinic_devlink_free(struct devlink *devlink);
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev);
+int hinic_devlink_register(struct hinic_devlink_priv *priv);
 void hinic_devlink_unregister(struct hinic_devlink_priv *priv);
 
 int hinic_health_reporters_create(struct hinic_devlink_priv *priv);
index 428108e..56b6b04 100644 (file)
@@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev)
                return err;
        }
 
-       err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev);
+       err = hinic_devlink_register(hwdev->devlink_dev);
        if (err) {
                dev_err(&hwif->pdev->dev, "Failed to register devlink\n");
                hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
index 405ee4d..881d0b2 100644 (file)
@@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev)
        struct devlink *devlink;
        int err, num_qps;
 
-       devlink = hinic_devlink_alloc();
+       devlink = hinic_devlink_alloc(&pdev->dev);
        if (!devlink) {
                dev_err(&pdev->dev, "Hinic devlink alloc failed\n");
                return -ENOMEM;
index fc8c7cd..b8a4014 100644 (file)
@@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str)
               add, add + 6, add, add[12], add[13], str);
 }
 
-static int io = 0x300;
-static int irq = 10;
-
 static const struct net_device_ops i596_netdev_ops = {
        .ndo_open               = i596_open,
        .ndo_stop               = i596_close,
@@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
 };
 
-struct net_device * __init i82596_probe(int unit)
+static struct net_device * __init i82596_probe(void)
 {
        struct net_device *dev;
        int i;
@@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit)
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       } else {
-               dev->base_addr = io;
-               dev->irq = irq;
-       }
-
 #ifdef ENABLE_MVME16x_NET
        if (MACH_IS_MVME16x) {
                if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) {
@@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev)
        }
 }
 
-#ifdef MODULE
 static struct net_device *dev_82596;
 
 static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "i82596 debug mask");
 
-int __init init_module(void)
+static int __init i82596_init(void)
 {
        if (debug >= 0)
                i596_debug = debug;
-       dev_82596 = i82596_probe(-1);
+       dev_82596 = i82596_probe();
        return PTR_ERR_OR_ZERO(dev_82596);
 }
+module_init(i82596_init);
 
-void __exit cleanup_module(void)
+static void __exit i82596_cleanup(void)
 {
        unregister_netdev(dev_82596);
 #ifdef __mc68000__
@@ -1544,5 +1533,4 @@ void __exit cleanup_module(void)
        free_page ((u32)(dev_82596->mem_start));
        free_netdev(dev_82596);
 }
-
-#endif                         /* MODULE */
+module_exit(i82596_cleanup);
index 4564ee0..893e0dd 100644 (file)
@@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */
 static int fifo=0x8;   /* don't change */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
@@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev)
        memset((char *)p->scb,0,sizeof(struct scb_struct));
 }
 
-struct net_device * __init sun3_82586_probe(int unit)
+static int __init sun3_82586_probe(void)
 {
        struct net_device *dev;
        unsigned long ioaddr;
@@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit)
                break;
 
        default:
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
        }
 
        if (found)
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
 
        ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE);
        if (!ioaddr)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
        found = 1;
 
        dev = alloc_etherdev(sizeof(struct priv));
        if (!dev)
                goto out;
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
        dev->irq = IE_IRQ;
        dev->base_addr = ioaddr;
        err = sun3_82586_probe1(dev, ioaddr);
@@ -326,8 +322,9 @@ out1:
        free_netdev(dev);
 out:
        iounmap((void __iomem *)ioaddr);
-       return ERR_PTR(err);
+       return err;
 }
+module_init(sun3_82586_probe);
 
 static const struct net_device_ops sun3_82586_netdev_ops = {
        .ndo_open               = sun3_82586_open,
index 3e822ba..2c9e4ee 100644 (file)
@@ -980,7 +980,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
        default:
                /* if we got here and link is up something bad is afoot */
                netdev_info(netdev,
-                           "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+                           "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n",
                            hw_link_info->phy_type);
        }
 
@@ -5294,6 +5294,10 @@ flags_complete:
                                        dev_warn(&pf->pdev->dev,
                                                 "Device configuration forbids SW from starting the LLDP agent.\n");
                                        return -EINVAL;
+                               case I40E_AQ_RC_EAGAIN:
+                                       dev_warn(&pf->pdev->dev,
+                                                "Stop FW LLDP agent command is still being processed, please try again in a second.\n");
+                                       return -EBUSY;
                                default:
                                        dev_warn(&pf->pdev->dev,
                                                 "Starting FW LLDP agent failed: error: %s, %s\n",
index 5b4012a..97c7855 100644 (file)
@@ -4457,11 +4457,10 @@ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
 }
 
 /**
- * i40e_vsi_control_tx - Start or stop a VSI's rings
+ * i40e_vsi_enable_tx - Start a VSI's rings
  * @vsi: the VSI being configured
- * @enable: start or stop the rings
  **/
-static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_tx(struct i40e_vsi *vsi)
 {
        struct i40e_pf *pf = vsi->back;
        int i, pf_q, ret = 0;
@@ -4470,7 +4469,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
        for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
                ret = i40e_control_wait_tx_q(vsi->seid, pf,
                                             pf_q,
-                                            false /*is xdp*/, enable);
+                                            false /*is xdp*/, true);
                if (ret)
                        break;
 
@@ -4479,7 +4478,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
 
                ret = i40e_control_wait_tx_q(vsi->seid, pf,
                                             pf_q + vsi->alloc_queue_pairs,
-                                            true /*is xdp*/, enable);
+                                            true /*is xdp*/, true);
                if (ret)
                        break;
        }
@@ -4577,32 +4576,25 @@ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
 }
 
 /**
- * i40e_vsi_control_rx - Start or stop a VSI's rings
+ * i40e_vsi_enable_rx - Start a VSI's rings
  * @vsi: the VSI being configured
- * @enable: start or stop the rings
  **/
-static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_rx(struct i40e_vsi *vsi)
 {
        struct i40e_pf *pf = vsi->back;
        int i, pf_q, ret = 0;
 
        pf_q = vsi->base_queue;
        for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
-               ret = i40e_control_wait_rx_q(pf, pf_q, enable);
+               ret = i40e_control_wait_rx_q(pf, pf_q, true);
                if (ret) {
                        dev_info(&pf->pdev->dev,
-                                "VSI seid %d Rx ring %d %sable timeout\n",
-                                vsi->seid, pf_q, (enable ? "en" : "dis"));
+                                "VSI seid %d Rx ring %d enable timeout\n",
+                                vsi->seid, pf_q);
                        break;
                }
        }
 
-       /* Due to HW errata, on Rx disable only, the register can indicate done
-        * before it really is. Needs 50ms to be sure
-        */
-       if (!enable)
-               mdelay(50);
-
        return ret;
 }
 
@@ -4615,29 +4607,47 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi)
        int ret = 0;
 
        /* do rx first for enable and last for disable */
-       ret = i40e_vsi_control_rx(vsi, true);
+       ret = i40e_vsi_enable_rx(vsi);
        if (ret)
                return ret;
-       ret = i40e_vsi_control_tx(vsi, true);
+       ret = i40e_vsi_enable_tx(vsi);
 
        return ret;
 }
 
+#define I40E_DISABLE_TX_GAP_MSEC       50
+
 /**
  * i40e_vsi_stop_rings - Stop a VSI's rings
  * @vsi: the VSI being configured
  **/
 void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
 {
+       struct i40e_pf *pf = vsi->back;
+       int pf_q, err, q_end;
+
        /* When port TX is suspended, don't wait */
        if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
                return i40e_vsi_stop_rings_no_wait(vsi);
 
-       /* do rx first for enable and last for disable
-        * Ignore return value, we need to shutdown whatever we can
-        */
-       i40e_vsi_control_tx(vsi, false);
-       i40e_vsi_control_rx(vsi, false);
+       q_end = vsi->base_queue + vsi->num_queue_pairs;
+       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+               i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
+
+       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
+               err = i40e_control_wait_rx_q(pf, pf_q, false);
+               if (err)
+                       dev_info(&pf->pdev->dev,
+                                "VSI seid %d Rx ring %d dissable timeout\n",
+                                vsi->seid, pf_q);
+       }
+
+       msleep(I40E_DISABLE_TX_GAP_MSEC);
+       pf_q = vsi->base_queue;
+       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+               wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
+
+       i40e_vsi_wait_queues_disabled(vsi);
 }
 
 /**
@@ -7283,6 +7293,8 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
        }
        if (vsi->num_queue_pairs <
            (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Failed to create traffic channel, insufficient number of queues.\n");
                return -EINVAL;
        }
        if (sum_max_rate > i40e_get_link_speed(vsi)) {
@@ -13264,6 +13276,7 @@ static const struct net_device_ops i40e_netdev_ops = {
        .ndo_poll_controller    = i40e_netpoll,
 #endif
        .ndo_setup_tc           = __i40e_setup_tc,
+       .ndo_select_queue       = i40e_lan_select_queue,
        .ndo_set_features       = i40e_set_features,
        .ndo_set_vf_mac         = i40e_ndo_set_vf_mac,
        .ndo_set_vf_vlan        = i40e_ndo_set_vf_port_vlan,
index 38eb815..3f25bd8 100644 (file)
@@ -3631,6 +3631,56 @@ dma_error:
        return -1;
 }
 
+static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev,
+                                 const struct sk_buff *skb,
+                                 u16 num_tx_queues)
+{
+       u32 jhash_initval_salt = 0xd631614b;
+       u32 hash;
+
+       if (skb->sk && skb->sk->sk_hash)
+               hash = skb->sk->sk_hash;
+       else
+               hash = (__force u16)skb->protocol ^ skb->hash;
+
+       hash = jhash_1word(hash, jhash_initval_salt);
+
+       return (u16)(((u64)hash * num_tx_queues) >> 32);
+}
+
+u16 i40e_lan_select_queue(struct net_device *netdev,
+                         struct sk_buff *skb,
+                         struct net_device __always_unused *sb_dev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_hw *hw;
+       u16 qoffset;
+       u16 qcount;
+       u8 tclass;
+       u16 hash;
+       u8 prio;
+
+       /* is DCB enabled at all? */
+       if (vsi->tc_config.numtc == 1)
+               return i40e_swdcb_skb_tx_hash(netdev, skb,
+                                             netdev->real_num_tx_queues);
+
+       prio = skb->priority;
+       hw = &vsi->back->hw;
+       tclass = hw->local_dcbx_config.etscfg.prioritytable[prio];
+       /* sanity check */
+       if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass))))
+               tclass = 0;
+
+       /* select a queue assigned for the given TC */
+       qcount = vsi->tc_config.tc_info[tclass].qcount;
+       hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount);
+
+       qoffset = vsi->tc_config.tc_info[tclass].qoffset;
+       return qoffset + hash;
+}
+
 /**
  * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
  * @xdpf: data to transmit
index 86fed05..bfc2845 100644 (file)
@@ -451,6 +451,8 @@ static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
 
 bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb,
+                         struct net_device *sb_dev);
 void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
 void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);
index 91b545a..8c863d6 100644 (file)
@@ -475,7 +475,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev)
 {
        struct devlink *devlink;
 
-       devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf));
+       devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
        if (!devlink)
                return NULL;
 
@@ -502,7 +502,7 @@ int ice_devlink_register(struct ice_pf *pf)
        struct device *dev = ice_pf_to_dev(pf);
        int err;
 
-       err = devlink_register(devlink, dev);
+       err = devlink_register(devlink);
        if (err) {
                dev_err(dev, "devlink registration failed: %d\n", err);
                return err;
index ff8db31..5d1007e 100644 (file)
@@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
        if (!skb)
                return ERR_PTR(-ENOMEM);
 
-       skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool);
+       skb_mark_for_recycle(skb);
 
        skb_reserve(skb, xdp->data - xdp->data_hard_start);
        skb_put(skb, xdp->data_end - xdp->data);
@@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
                                skb_frag_page(frag), skb_frag_off(frag),
                                skb_frag_size(frag), PAGE_SIZE);
-               /* We don't need to reset pp_recycle here. It's already set, so
-                * just mark fragments for recycling.
-                */
-               page_pool_store_mem_info(skb_frag_page(frag), pool);
        }
 
        return skb;
index 99bd8b8..744f58f 100644 (file)
@@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
                }
 
                if (pp)
-                       skb_mark_for_recycle(skb, page, pp);
+                       skb_mark_for_recycle(skb);
                else
                        dma_unmap_single_attrs(dev->dev.parent, dma_addr,
                                               bm_pool->buf_size, DMA_FROM_DEVICE,
index 9169849..544c96c 100644 (file)
@@ -1504,8 +1504,8 @@ static int cgx_lmac_init(struct cgx *cgx)
 
                /* Add reference */
                cgx->lmac_idmap[lmac->lmac_id] = lmac;
-               cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
                set_bit(lmac->lmac_id, &cgx->lmac_bmap);
+               cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
        }
 
        return cgx_lmac_verify_fwi_version(cgx);
index 47f5ed0..752ba6b 100644 (file)
@@ -146,10 +146,7 @@ enum nix_scheduler {
 #define TXSCH_RR_QTM_MAX               ((1 << 24) - 1)
 #define TXSCH_TL1_DFLT_RR_QTM          TXSCH_RR_QTM_MAX
 #define TXSCH_TL1_DFLT_RR_PRIO         (0x1ull)
-#define MAX_SCHED_WEIGHT               0xFF
-#define DFLT_RR_WEIGHT                 71
-#define DFLT_RR_QTM    ((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
-                        / MAX_SCHED_WEIGHT)
+#define CN10K_MAX_DWRR_WEIGHT          16384 /* Weight is 14bit on CN10K */
 
 /* Min/Max packet sizes, excluding FCS */
 #define        NIC_HW_MIN_FRS                  40
index f5ec39d..4470933 100644 (file)
@@ -1032,8 +1032,12 @@ struct nix_bp_cfg_rsp {
 
 struct nix_hw_info {
        struct mbox_msghdr hdr;
+       u16 rsvs16;
        u16 max_mtu;
        u16 min_mtu;
+       u32 rpm_dwrr_mtu;
+       u32 sdp_dwrr_mtu;
+       u64 rsvd[16]; /* Add reserved fields for future expansion */
 };
 
 struct nix_bandprof_alloc_req {
index 19bad9a..243cf80 100644 (file)
@@ -151,7 +151,10 @@ enum npc_kpu_lh_ltype {
  * Software assigns pkind for each incoming port such as CGX
  * Ethernet interfaces, LBK interfaces, etc.
  */
+#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND
+
 enum npc_pkind_type {
+       NPC_RX_LBK_PKIND = 0ULL,
        NPC_RX_VLAN_EXDSA_PKIND = 56ULL,
        NPC_RX_CHLEN24B_PKIND = 57ULL,
        NPC_RX_CPT_HDR_PKIND,
index 017163f..5fe277e 100644 (file)
@@ -391,8 +391,10 @@ void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf)
 
        /* Get numVFs attached to this PF and first HWVF */
        cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
-       *numvfs = (cfg >> 12) & 0xFF;
-       *hwvf = cfg & 0xFFF;
+       if (numvfs)
+               *numvfs = (cfg >> 12) & 0xFF;
+       if (hwvf)
+               *hwvf = cfg & 0xFFF;
 }
 
 static int rvu_get_hwvf(struct rvu *rvu, int pcifunc)
index 91503fb..95591e7 100644 (file)
@@ -329,6 +329,7 @@ struct hw_cap {
        bool    nix_shaping;             /* Is shaping and coloring supported */
        bool    nix_tx_link_bp;          /* Can link backpressure TL queues ? */
        bool    nix_rx_multicast;        /* Rx packet replication support */
+       bool    nix_common_dwrr_mtu;     /* Common DWRR MTU for quantum config */
        bool    per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */
        bool    programmable_chans; /* Channels programmable ? */
        bool    ipolicer;
@@ -706,6 +707,8 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
                        struct nix_cn10k_aq_enq_rsp *aq_rsp,
                        u16 pcifunc, u8 ctype, u32 qidx);
 int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc);
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu);
+u32 convert_bytes_to_dwrr_mtu(u32 bytes);
 
 /* NPC APIs */
 int rvu_npc_init(struct rvu *rvu);
index 2688186..a55b46a 100644 (file)
@@ -1364,6 +1364,89 @@ static void rvu_health_reporters_destroy(struct rvu *rvu)
        rvu_nix_health_reporters_destroy(rvu_dl);
 }
 
+/* Devlink Params APIs */
+static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id,
+                                      union devlink_param_value val,
+                                      struct netlink_ext_ack *extack)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       int dwrr_mtu = val.vu32;
+       struct nix_txsch *txsch;
+       struct nix_hw *nix_hw;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Setting DWRR_MTU is not supported on this silicon");
+               return -EOPNOTSUPP;
+       }
+
+       if ((dwrr_mtu > 65536 || !is_power_of_2(dwrr_mtu)) &&
+           (dwrr_mtu != 9728 && dwrr_mtu != 10240)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Invalid, supported MTUs are 0,2,4,8.16,32,64....4K,8K,32K,64K and 9728, 10240");
+               return -EINVAL;
+       }
+
+       nix_hw = get_nix_hw(rvu->hw, BLKADDR_NIX0);
+       if (!nix_hw)
+               return -ENODEV;
+
+       txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+       if (rvu_rsrc_free_count(&txsch->schq) != txsch->schq.max) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Changing DWRR MTU is not supported when there are active NIXLFs");
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Make sure none of the PF/VF interfaces are initialized and retry");
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       u64 dwrr_mtu;
+
+       dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32);
+       rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu);
+
+       return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       u64 dwrr_mtu;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu)
+               return -EOPNOTSUPP;
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+       ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+       return 0;
+}
+
+enum rvu_af_dl_param_id {
+       RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+};
+
+static const struct devlink_param rvu_af_dl_params[] = {
+       DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+                            "dwrr_mtu", DEVLINK_PARAM_TYPE_U32,
+                            BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+                            rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set,
+                            rvu_af_dl_dwrr_mtu_validate),
+};
+
+/* Devlink switch mode */
 static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
        struct rvu_devlink *rvu_dl = devlink_priv(devlink);
@@ -1420,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu)
        struct devlink *dl;
        int err;
 
-       dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink));
+       dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink),
+                          rvu->dev);
        if (!dl) {
                dev_warn(rvu->dev, "devlink_alloc failed\n");
                return -ENOMEM;
        }
 
-       err = devlink_register(dl, rvu->dev);
+       err = devlink_register(dl);
        if (err) {
                dev_err(rvu->dev, "devlink register failed with error %d\n", err);
                devlink_free(dl);
@@ -1438,7 +1522,30 @@ int rvu_register_dl(struct rvu *rvu)
        rvu_dl->rvu = rvu;
        rvu->rvu_dl = rvu_dl;
 
-       return rvu_health_reporters_create(rvu);
+       err = rvu_health_reporters_create(rvu);
+       if (err) {
+               dev_err(rvu->dev,
+                       "devlink health reporter creation failed with error %d\n", err);
+               goto err_dl_health;
+       }
+
+       err = devlink_params_register(dl, rvu_af_dl_params,
+                                     ARRAY_SIZE(rvu_af_dl_params));
+       if (err) {
+               dev_err(rvu->dev,
+                       "devlink params register failed with error %d", err);
+               goto err_dl_health;
+       }
+
+       devlink_params_publish(dl);
+
+       return 0;
+
+err_dl_health:
+       rvu_health_reporters_destroy(rvu);
+       devlink_unregister(dl);
+       devlink_free(dl);
+       return err;
 }
 
 void rvu_unregister_dl(struct rvu *rvu)
@@ -1449,6 +1556,8 @@ void rvu_unregister_dl(struct rvu *rvu)
        if (!dl)
                return;
 
+       devlink_params_unregister(dl, rvu_af_dl_params,
+                                 ARRAY_SIZE(rvu_af_dl_params));
        rvu_health_reporters_destroy(rvu);
        devlink_unregister(dl);
        devlink_free(dl);
index 0933699..53db8eb 100644 (file)
@@ -192,15 +192,67 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
        return NULL;
 }
 
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu)
+{
+       dwrr_mtu &= 0x1FULL;
+
+       /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+        * Value of 4 is reserved for MTU value of 9728 bytes.
+        * Value of 5 is reserved for MTU value of 10240 bytes.
+        */
+       switch (dwrr_mtu) {
+       case 4:
+               return 9728;
+       case 5:
+               return 10240;
+       default:
+               return BIT_ULL(dwrr_mtu);
+       }
+
+       return 0;
+}
+
+u32 convert_bytes_to_dwrr_mtu(u32 bytes)
+{
+       /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+        * Value of 4 is reserved for MTU value of 9728 bytes.
+        * Value of 5 is reserved for MTU value of 10240 bytes.
+        */
+       if (bytes > BIT_ULL(16))
+               return 0;
+
+       switch (bytes) {
+       case 9728:
+               return 4;
+       case 10240:
+               return 5;
+       default:
+               return ilog2(bytes);
+       }
+
+       return 0;
+}
+
 static void nix_rx_sync(struct rvu *rvu, int blkaddr)
 {
        int err;
 
-       /*Sync all in flight RX packets to LLC/DRAM */
+       /* Sync all in flight RX packets to LLC/DRAM */
+       rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
+       err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
+       if (err)
+               dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n");
+
+       /* SW_SYNC ensures all existing transactions are finished and pkts
+        * are written to LLC/DRAM, queues should be teared down after
+        * successful SW_SYNC. Due to a HW errata, in some rare scenarios
+        * an existing transaction might end after SW_SYNC operation. To
+        * ensure operation is fully done, do the SW_SYNC twice.
+        */
        rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
        err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
        if (err)
-               dev_err(rvu->dev, "NIX RX software sync failed\n");
+               dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n");
 }
 
 static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
@@ -298,6 +350,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
                                        rvu_nix_chan_lbk(rvu, lbkid, vf + 1);
                pfvf->rx_chan_cnt = 1;
                pfvf->tx_chan_cnt = 1;
+               rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf);
                rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
                                              pfvf->rx_chan_base,
                                              pfvf->rx_chan_cnt);
@@ -1946,8 +1999,17 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw,
                return;
        rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq),
                    (TXSCH_TL1_DFLT_RR_PRIO << 1));
-       rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
-                   TXSCH_TL1_DFLT_RR_QTM);
+
+       /* On OcteonTx2 the config was in bytes and newer silcons
+        * it's changed to weight.
+        */
+       if (!rvu->hw->cap.nix_common_dwrr_mtu)
+               rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+                           TXSCH_TL1_DFLT_RR_QTM);
+       else
+               rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+                           CN10K_MAX_DWRR_WEIGHT);
+
        rvu_write64(rvu, blkaddr, NIX_AF_TL1X_CIR(schq), 0x00);
        pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE);
 }
@@ -2655,6 +2717,15 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
                for (schq = 0; schq < txsch->schq.max; schq++)
                        txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
        }
+
+       /* Setup a default value of 8192 as DWRR MTU */
+       if (rvu->hw->cap.nix_common_dwrr_mtu) {
+               rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU,
+                           convert_bytes_to_dwrr_mtu(8192));
+               rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU,
+                           convert_bytes_to_dwrr_mtu(8192));
+       }
+
        return 0;
 }
 
@@ -2731,6 +2802,7 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
                                     struct nix_hw_info *rsp)
 {
        u16 pcifunc = req->hdr.pcifunc;
+       u64 dwrr_mtu;
        int blkaddr;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
@@ -2743,6 +2815,20 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
                rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu);
 
        rsp->min_mtu = NIC_HW_MIN_FRS;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+               /* Return '1' on OTx2 */
+               rsp->rpm_dwrr_mtu = 1;
+               rsp->sdp_dwrr_mtu = 1;
+               return 0;
+       }
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+       rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU);
+       rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
        return 0;
 }
 
@@ -3635,6 +3721,28 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
        return 0;
 }
 
+static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+       u64 hw_const;
+
+       hw_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+
+       /* On OcteonTx2 DWRR quantum is directly configured into each of
+        * the transmit scheduler queues. And PF/VF drivers were free to
+        * config any value upto 2^24.
+        * On CN10K, HW is modified, the quantum configuration at scheduler
+        * queues is in terms of weight. And SW needs to setup a base DWRR MTU
+        * at NIX_AF_DWRR_RPM_MTU / NIX_AF_DWRR_SDP_MTU. HW will do
+        * 'DWRR MTU * weight' to get the quantum.
+        *
+        * Check if HW uses a common MTU for all DWRR quantum configs.
+        * On OcteonTx2 this register field is '0'.
+        */
+       if (((hw_const >> 56) & 0x10) == 0x10)
+               hw->cap.nix_common_dwrr_mtu = true;
+}
+
 static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
 {
        const struct npc_lt_def_cfg *ltdefs;
@@ -3672,6 +3780,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
        if (err)
                return err;
 
+       /* Setup capabilities of the NIX block */
+       rvu_nix_setup_capabilities(rvu, blkaddr);
+
        /* Initialize admin queue */
        err = nix_aq_init(rvu, block);
        if (err)
@@ -3842,7 +3953,6 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr,
                vlan = &nix_hw->txvlan;
                kfree(vlan->rsrc.bmap);
                mutex_destroy(&vlan->rsrc_lock);
-               devm_kfree(rvu->dev, vlan->entry2pfvf_map);
 
                mcast = &nix_hw->mcast;
                qmem_free(rvu->dev, mcast->mce_ctx);
index 1097291..52b2554 100644 (file)
@@ -1721,7 +1721,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
 {
        struct rvu_hwinfo *hw = rvu->hw;
        int num_pkinds, num_kpus, idx;
-       struct npc_pkind *pkind;
 
        /* Disable all KPUs and their entries */
        for (idx = 0; idx < hw->npc_kpus; idx++) {
@@ -1739,9 +1738,8 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
         * Check HW max count to avoid configuring junk or
         * writing to unsupported CSR addresses.
         */
-       pkind = &hw->pkind;
        num_pkinds = rvu->kpu.pkinds;
-       num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds);
+       num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds);
 
        for (idx = 0; idx < num_pkinds; idx++)
                npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true);
@@ -1891,7 +1889,8 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr)
        if (npc_const1 & BIT_ULL(63))
                npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2);
 
-       pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL;
+       pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT;
+       hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL;
        hw->npc_kpu_entries = npc_const1 & 0xFFFULL;
        hw->npc_kpus = (npc_const >> 8) & 0x1FULL;
        hw->npc_intfs = npc_const & 0xFULL;
@@ -2002,6 +2001,10 @@ int rvu_npc_init(struct rvu *rvu)
        err = rvu_alloc_bitmap(&pkind->rsrc);
        if (err)
                return err;
+       /* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0',
+        * no need to configure PKIND for all LBKs separately.
+        */
+       rvu_alloc_rsrc(&pkind->rsrc);
 
        /* Allocate mem for pkind to PF and channel mapping info */
        pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max,
index 8b01ef6..6efcf3a 100644 (file)
 #define NIX_AF_DEBUG_NPC_RESP_DATAX(a)          (0x680 | (a) << 3)
 #define NIX_AF_SMQX_CFG(a)                      (0x700 | (a) << 16)
 #define NIX_AF_SQM_DBG_CTL_STATUS               (0x750)
+#define NIX_AF_DWRR_SDP_MTU                     (0x790)
+#define NIX_AF_DWRR_RPM_MTU                     (0x7A0)
 #define NIX_AF_PSE_CHANNEL_LEVEL                (0x800)
 #define NIX_AF_PSE_SHAPER_CFG                   (0x810)
 #define NIX_AF_TX_EXPR_CREDIT                  (0x830)
index 2e53797..820adf3 100644 (file)
@@ -71,8 +71,8 @@ static int rvu_switch_install_rules(struct rvu *rvu)
        struct rvu_switch *rswitch = &rvu->rswitch;
        u16 start = rswitch->start_entry;
        struct rvu_hwinfo *hw = rvu->hw;
-       int pf, vf, numvfs, hwvf;
        u16 pcifunc, entry = 0;
+       int pf, vf, numvfs;
        int err;
 
        for (pf = 1; pf < hw->total_pfs; pf++) {
@@ -110,8 +110,8 @@ static int rvu_switch_install_rules(struct rvu *rvu)
 
                rswitch->entry2pcifunc[entry++] = pcifunc;
 
-               rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf);
-               for (vf = 0; vf < numvfs; vf++, hwvf++) {
+               rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+               for (vf = 0; vf < numvfs; vf++) {
                        pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
                        rvu_get_nix_blkaddr(rvu, pcifunc);
 
@@ -198,7 +198,7 @@ void rvu_switch_disable(struct rvu *rvu)
        struct npc_mcam_free_entry_req free_req = { 0 };
        struct rvu_switch *rswitch = &rvu->rswitch;
        struct rvu_hwinfo *hw = rvu->hw;
-       int pf, vf, numvfs, hwvf;
+       int pf, vf, numvfs;
        struct msg_rsp rsp;
        u16 pcifunc;
        int err;
@@ -217,7 +217,8 @@ void rvu_switch_disable(struct rvu *rvu)
                                "Reverting RX rule for PF%d failed(%d)\n",
                                pf, err);
 
-               for (vf = 0; vf < numvfs; vf++, hwvf++) {
+               rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+               for (vf = 0; vf < numvfs; vf++) {
                        pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
                        err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF);
                        if (err)
index 184de94..ccffdda 100644 (file)
@@ -92,8 +92,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
        aq->sq.ena = 1;
        /* Only one SMQ is allocated, map all SQ's to that SMQ  */
        aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-       /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
-       aq->sq.smq_rr_weight = pfvf->netdev->mtu;
+       aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
        aq->sq.default_chan = pfvf->hw.tx_chan_base;
        aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
        aq->sq.sqb_aura = sqb_aura;
index 1a1ae33..e07723d 100644 (file)
@@ -9,6 +9,20 @@
 
 #include "otx2_common.h"
 
+static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu)
+{
+       u32 weight;
+
+       /* On OTx2, since AF returns DWRR_MTU as '1', this logic
+        * will work on those silicons as well.
+        */
+       weight = mtu / pfvf->hw.dwrr_mtu;
+       if (mtu % pfvf->hw.dwrr_mtu)
+               weight += 1;
+
+       return weight;
+}
+
 void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
 void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
 int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
index 7cccd80..ce799b7 100644 (file)
@@ -596,6 +596,9 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
        struct otx2_hw *hw = &pfvf->hw;
        struct nix_txschq_config *req;
        u64 schq, parent;
+       u64 dwrr_val;
+
+       dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
 
        req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
        if (!req)
@@ -621,21 +624,21 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
                req->num_regs++;
                /* Set DWRR quantum */
                req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
-               req->regval[2] =  DFLT_RR_QTM;
+               req->regval[2] =  dwrr_val;
        } else if (lvl == NIX_TXSCH_LVL_TL4) {
                parent =  hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
                req->reg[0] = NIX_AF_TL4X_PARENT(schq);
                req->regval[0] = parent << 16;
                req->num_regs++;
                req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
-               req->regval[1] = DFLT_RR_QTM;
+               req->regval[1] = dwrr_val;
        } else if (lvl == NIX_TXSCH_LVL_TL3) {
                parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
                req->reg[0] = NIX_AF_TL3X_PARENT(schq);
                req->regval[0] = parent << 16;
                req->num_regs++;
                req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
-               req->regval[1] = DFLT_RR_QTM;
+               req->regval[1] = dwrr_val;
        } else if (lvl == NIX_TXSCH_LVL_TL2) {
                parent =  hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
                req->reg[0] = NIX_AF_TL2X_PARENT(schq);
@@ -643,7 +646,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 
                req->num_regs++;
                req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
-               req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+               req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
 
                req->num_regs++;
                req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
@@ -656,7 +659,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
                 * For VF this is always ignored.
                 */
 
-               /* Set DWRR quantum */
+               /* On CN10K, if RR_WEIGHT is greater than 16384, HW will
+                * clip it to 16384, so configuring a 24bit max value
+                * will work on both OTx2 and CN10K.
+                */
                req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
                req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
 
@@ -803,7 +809,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
        aq->sq.ena = 1;
        /* Only one SMQ is allocated, map all SQ's to that SMQ  */
        aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-       aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+       aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
        aq->sq.default_chan = pfvf->hw.tx_chan_base;
        aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
        aq->sq.sqb_aura = sqb_aura;
@@ -924,12 +930,14 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
                aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt);
                aq->cq.drop_ena = 1;
 
-               /* Enable receive CQ backpressure */
-               aq->cq.bp_ena = 1;
-               aq->cq.bpid = pfvf->bpid[0];
+               if (!is_otx2_lbkvf(pfvf->pdev)) {
+                       /* Enable receive CQ backpressure */
+                       aq->cq.bp_ena = 1;
+                       aq->cq.bpid = pfvf->bpid[0];
 
-               /* Set backpressure level is same as cq pass level */
-               aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+                       /* Set backpressure level is same as cq pass level */
+                       aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+               }
        }
 
        /* Fill AQ info */
@@ -1186,7 +1194,7 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
        aq->aura.fc_hyst_bits = 0; /* Store count on all updates */
 
        /* Enable backpressure for RQ aura */
-       if (aura_id < pfvf->hw.rqpool_cnt) {
+       if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) {
                aq->aura.bp_ena = 0;
                aq->aura.nix0_bpid = pfvf->bpid[0];
                /* Set backpressure level for RQ's Aura */
@@ -1666,6 +1674,11 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf)
                 * SMQ errors
                 */
                max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN;
+
+               /* Also save DWRR MTU, needed for DWRR weight calculation */
+               pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu;
+               if (!pfvf->hw.dwrr_mtu)
+                       pfvf->hw.dwrr_mtu = 1;
        }
 
 out:
index 8fd58cd..2a80cdc 100644 (file)
@@ -181,6 +181,7 @@ struct otx2_hw {
        /* NIX */
        u16             txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
        u16                     matchall_ipolicer;
+       u32                     dwrr_mtu;
 
        /* HW settings, coalescing etc */
        u16                     rx_chan_base;
index 8df748e..b906a0e 100644 (file)
@@ -298,15 +298,14 @@ static int otx2_set_channels(struct net_device *dev,
        err = otx2_set_real_num_queues(dev, channel->tx_count,
                                       channel->rx_count);
        if (err)
-               goto fail;
+               return err;
 
        pfvf->hw.rx_queues = channel->rx_count;
        pfvf->hw.tx_queues = channel->tx_count;
        pfvf->qset.cq_cnt = pfvf->hw.tx_queues +  pfvf->hw.rx_queues;
 
-fail:
        if (if_up)
-               dev->netdev_ops->ndo_open(dev);
+               err = dev->netdev_ops->ndo_open(dev);
 
        netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
                    pfvf->hw.tx_queues, pfvf->hw.rx_queues);
@@ -410,7 +409,7 @@ static int otx2_set_ringparam(struct net_device *netdev,
        qs->rqe_cnt = rx_count;
 
        if (if_up)
-               netdev->netdev_ops->ndo_open(netdev);
+               return netdev->netdev_ops->ndo_open(netdev);
 
        return 0;
 }
index 3f03bbd..22b7af0 100644 (file)
@@ -1662,6 +1662,7 @@ int otx2_open(struct net_device *netdev)
 err_tx_stop_queues:
        netif_tx_stop_all_queues(netdev);
        netif_carrier_off(netdev);
+       pf->flags |= OTX2_FLAG_INTF_DOWN;
 err_free_cints:
        otx2_free_cints(pf, qidx);
        vec = pci_irq_vector(pf->pdev,
@@ -1689,6 +1690,10 @@ int otx2_stop(struct net_device *netdev)
        struct otx2_rss_info *rss;
        int qidx, vec, wrk;
 
+       /* If the DOWN flag is set resources are already freed */
+       if (pf->flags & OTX2_FLAG_INTF_DOWN)
+               return 0;
+
        netif_carrier_off(netdev);
        netif_tx_stop_all_queues(netdev);
 
index d12e21d..68b442e 100644 (file)
@@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = {
        .trap_drop_counter_get = prestera_drop_counter_get,
 };
 
-struct prestera_switch *prestera_devlink_alloc(void)
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev)
 {
        struct devlink *dl;
 
-       dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch));
+       dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch),
+                          dev->dev);
 
        return devlink_priv(dl);
 }
@@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw)
        struct devlink *dl = priv_to_devlink(sw);
        int err;
 
-       err = devlink_register(dl, sw->dev->dev);
+       err = devlink_register(dl);
        if (err) {
                dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err);
                return err;
@@ -530,6 +531,8 @@ err_trap_register:
                prestera_trap = &prestera_trap_items_arr[i];
                devlink_traps_unregister(devlink, &prestera_trap->trap, 1);
        }
+       devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr,
+                                      groups_count);
 err_groups_register:
        kfree(trap_data->trap_items_arr);
 err_trap_items_alloc:
index 5d73aa9..cc34c3d 100644 (file)
@@ -6,7 +6,7 @@
 
 #include "prestera.h"
 
-struct prestera_switch *prestera_devlink_alloc(void);
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev);
 void prestera_devlink_free(struct prestera_switch *sw);
 
 int prestera_devlink_register(struct prestera_switch *sw);
index 7c569c1..44c6708 100644 (file)
@@ -905,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev)
        struct prestera_switch *sw;
        int err;
 
-       sw = prestera_devlink_alloc();
+       sw = prestera_devlink_alloc(dev);
        if (!sw)
                return -ENOMEM;
 
index 743ca96..dc9dd77 100644 (file)
@@ -4884,7 +4884,7 @@ static int sky2_test_msi(struct sky2_hw *hw)
 /* This driver supports yukon2 chipset only */
 static const char *sky2_name(u8 chipid, char *buf, int sz)
 {
-       const char *name[] = {
+       static const char *const name[] = {
                "XL",           /* 0xb3 */
                "EC Ultra",     /* 0xb4 */
                "Extreme",      /* 0xb5 */
index 00c8465..7267c6c 100644 (file)
@@ -3535,6 +3535,7 @@ slave_start:
 
                if (!SRIOV_VALID_STATE(dev->flags)) {
                        mlx4_err(dev, "Invalid SRIOV state\n");
+                       err = -EINVAL;
                        goto err_close;
                }
        }
@@ -4004,7 +4005,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
        printk_once(KERN_INFO "%s", mlx4_version);
 
-       devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
+       devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev);
        if (!devlink)
                return -ENOMEM;
        priv = devlink_priv(devlink);
@@ -4023,7 +4024,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        mutex_init(&dev->persist->interface_state_mutex);
        mutex_init(&dev->persist->pci_status_mutex);
 
-       ret = devlink_register(devlink, &pdev->dev);
+       ret = devlink_register(devlink);
        if (ret)
                goto err_persist_free;
        ret = devlink_params_register(devlink, mlx4_devlink_params,
index 427e7a3..2584bc0 100644 (file)
@@ -917,7 +917,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 {
        int err;
        int i;
-       enum mlx4_qp_state states[] = {
+       static const enum mlx4_qp_state states[] = {
                MLX4_QP_STATE_RST,
                MLX4_QP_STATE_INIT,
                MLX4_QP_STATE_RTR,
index 6378dc8..33e550d 100644 (file)
@@ -15,7 +15,7 @@ mlx5_core-y :=        main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
                transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
                fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
-               lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
+               lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
                diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
                fw_reset.o qos.o
 
@@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
                en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
                en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
                en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \
-               en/rx_res.o
+               en/rx_res.o en/channels.o
 
 #
 # Netdev extra
index df3e493..99ec278 100644 (file)
@@ -89,7 +89,8 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
                        u32 *in, int inlen, u32 *out, int outlen)
 {
-       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+                          c_eqn_or_apu_element);
        u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
        struct mlx5_eq_comp *eq;
        int err;
index ceebfc2..def2156 100644 (file)
@@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data)
        return 1;
 }
 
-/* This function is called with two flows:
- * 1. During initialization of mlx5_core_dev and we don't need to lock it.
- * 2. During LAG configure stage and caller holds &mlx5_intf_mutex.
- */
+/* Must be called with intf_mutex held */
 struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
 {
        struct auxiliary_device *adev;
index d791d35..f38553f 100644 (file)
@@ -359,9 +359,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
        return 0;
 }
 
-struct devlink *mlx5_devlink_alloc(void)
+struct devlink *mlx5_devlink_alloc(struct device *dev)
 {
-       return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+       return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev),
+                            dev);
 }
 
 void mlx5_devlink_free(struct devlink *devlink)
@@ -638,11 +639,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink)
                                       ARRAY_SIZE(mlx5_trap_groups_arr));
 }
 
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+int mlx5_devlink_register(struct devlink *devlink)
 {
        int err;
 
-       err = devlink_register(devlink, dev);
+       err = devlink_register(devlink);
        if (err)
                return err;
 
index 7318d44..30bf488 100644 (file)
@@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
 int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
                                  enum devlink_trap_action *action);
 
-struct devlink *mlx5_devlink_alloc(void);
+struct devlink *mlx5_devlink_alloc(struct device *dev);
 void mlx5_devlink_free(struct devlink *devlink);
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+int mlx5_devlink_register(struct devlink *devlink);
 void mlx5_devlink_unregister(struct devlink *devlink);
 
 #endif /* __MLX5_DEVLINK_H__ */
index 3566898..4f6897c 100644 (file)
@@ -66,8 +66,6 @@ struct page_pool;
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
 #define MLX5E_METADATA_ETHER_LEN 8
 
-#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
-
 #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
 
 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
@@ -140,6 +138,7 @@ struct page_pool;
 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW            0x2
 
 #define MLX5E_MIN_NUM_CHANNELS         0x1
+#define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE / 2)
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
 #define MLX5E_TX_XSK_POLL_BUDGET       64
@@ -921,8 +920,6 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
                           u16 vid);
 void mlx5e_timestamp_init(struct mlx5e_priv *priv);
 
-int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv);
-
 struct mlx5e_xsk_param;
 
 struct mlx5e_rq_param;
@@ -984,9 +981,6 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
 
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
-                                  int num_channels);
-
 int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
 void mlx5e_activate_rq(struct mlx5e_rq *rq);
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
@@ -1036,16 +1030,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
 int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
 void mlx5e_free_di_list(struct mlx5e_rq *rq);
 
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
-
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
-
 int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
 
@@ -1133,8 +1117,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
 void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
-                           u16 num_channels);
 void mlx5e_rx_dim_work(struct work_struct *work);
 void mlx5e_tx_dim_work(struct work_struct *work);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
new file mode 100644 (file)
index 0000000..e7c14c0
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+       return chs->num;
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+       struct mlx5e_channel *c;
+
+       WARN_ON(ix >= mlx5e_channels_get_num(chs));
+       c = chs->c[ix];
+
+       *rqn = c->rq.rqn;
+}
+
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+       struct mlx5e_channel *c;
+
+       WARN_ON(ix >= mlx5e_channels_get_num(chs));
+       c = chs->c[ix];
+
+       if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+               return false;
+
+       *rqn = c->xskrq.rqn;
+       return true;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+       struct mlx5e_ptp *c = chs->ptp;
+
+       if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+               return false;
+
+       *rqn = c->rq.rqn;
+       return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
new file mode 100644 (file)
index 0000000..ca00cbc
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
index 0e053aa..e348c27 100644 (file)
@@ -5,6 +5,7 @@
 #define __MLX5E_FLOW_STEER_H__
 
 #include "mod_hdr.h"
+#include "lib/fs_ttc.h"
 
 enum {
        MLX5E_TC_FT_LEVEL = 0,
@@ -67,21 +68,7 @@ struct mlx5e_l2_table {
        bool                       promisc_enabled;
 };
 
-enum mlx5e_traffic_types {
-       MLX5E_TT_IPV4_TCP,
-       MLX5E_TT_IPV6_TCP,
-       MLX5E_TT_IPV4_UDP,
-       MLX5E_TT_IPV6_UDP,
-       MLX5E_TT_IPV4_IPSEC_AH,
-       MLX5E_TT_IPV6_IPSEC_AH,
-       MLX5E_TT_IPV4_IPSEC_ESP,
-       MLX5E_TT_IPV6_IPSEC_ESP,
-       MLX5E_TT_IPV4,
-       MLX5E_TT_IPV6,
-       MLX5E_TT_ANY,
-       MLX5E_NUM_TT,
-       MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
-};
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
 
 #define MLX5_HASH_IP           (MLX5_HASH_FIELD_SEL_SRC_IP   |\
                                 MLX5_HASH_FIELD_SEL_DST_IP)
@@ -93,30 +80,6 @@ enum mlx5e_traffic_types {
                                 MLX5_HASH_FIELD_SEL_DST_IP   |\
                                 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
 
-enum mlx5e_tunnel_types {
-       MLX5E_TT_IPV4_GRE,
-       MLX5E_TT_IPV6_GRE,
-       MLX5E_TT_IPV4_IPIP,
-       MLX5E_TT_IPV6_IPIP,
-       MLX5E_TT_IPV4_IPV6,
-       MLX5E_TT_IPV6_IPV6,
-       MLX5E_NUM_TUNNEL_TT,
-};
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
-
-struct mlx5e_ttc_rule {
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_destination default_dest;
-};
-
-/* L3/L4 traffic type classifier */
-struct mlx5e_ttc_table {
-       struct mlx5e_flow_table ft;
-       struct mlx5e_ttc_rule rules[MLX5E_NUM_TT];
-       struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
-};
-
 /* NIC prio FTS */
 enum {
        MLX5E_PROMISC_FT_LEVEL,
@@ -138,22 +101,6 @@ enum {
 #endif
 };
 
-#define MLX5E_TTC_NUM_GROUPS   3
-#define MLX5E_TTC_GROUP1_SIZE  (BIT(3) + MLX5E_NUM_TUNNEL_TT)
-#define MLX5E_TTC_GROUP2_SIZE   BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE   BIT(0)
-#define MLX5E_TTC_TABLE_SIZE   (MLX5E_TTC_GROUP1_SIZE +\
-                                MLX5E_TTC_GROUP2_SIZE +\
-                                MLX5E_TTC_GROUP3_SIZE)
-
-#define MLX5E_INNER_TTC_NUM_GROUPS     3
-#define MLX5E_INNER_TTC_GROUP1_SIZE    BIT(3)
-#define MLX5E_INNER_TTC_GROUP2_SIZE    BIT(1)
-#define MLX5E_INNER_TTC_GROUP3_SIZE    BIT(0)
-#define MLX5E_INNER_TTC_TABLE_SIZE     (MLX5E_INNER_TTC_GROUP1_SIZE +\
-                                        MLX5E_INNER_TTC_GROUP2_SIZE +\
-                                        MLX5E_INNER_TTC_GROUP3_SIZE)
-
 struct mlx5e_priv;
 
 #ifdef CONFIG_MLX5_EN_RXNFC
@@ -222,8 +169,8 @@ struct mlx5e_flow_steering {
        struct mlx5e_promisc_table      promisc;
        struct mlx5e_vlan_table         *vlan;
        struct mlx5e_l2_table           l2;
-       struct mlx5e_ttc_table          ttc;
-       struct mlx5e_ttc_table          inner_ttc;
+       struct mlx5_ttc_table           *ttc;
+       struct mlx5_ttc_table           *inner_ttc;
 #ifdef CONFIG_MLX5_EN_ARFS
        struct mlx5e_arfs_tables       *arfs;
 #endif
@@ -235,27 +182,13 @@ struct mlx5e_flow_steering {
        struct mlx5e_ptp_fs            *ptp_fs;
 };
 
-struct ttc_params {
-       struct mlx5_flow_table_attr ft_attr;
-       u32 any_tt_tirn;
-       u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_ttc_table *inner_ttc;
-};
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+                         struct ttc_params *ttc_params, bool tunnel);
 
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                          struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
-                            struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
 
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
-                      struct mlx5_flow_destination *new_dest);
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
 
 void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
 void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
@@ -263,7 +196,6 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
 int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt);
 int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int  trap_id, int tir_num);
 void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
 int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int  trap_id, int tir_num);
index 909faa6..7aa25a5 100644 (file)
@@ -33,22 +33,22 @@ static char *fs_udp_type2str(enum fs_udp_type i)
        }
 }
 
-static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i)
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
 {
        switch (i) {
        case FS_IPV4_UDP:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
        default: /* FS_IPV6_UDP */
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
        }
 }
 
-static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i)
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
 {
        switch (i) {
-       case MLX5E_TT_IPV4_UDP:
+       case MLX5_TT_IPV4_UDP:
                return FS_IPV4_UDP;
-       case MLX5E_TT_IPV6_UDP:
+       case MLX5_TT_IPV6_UDP:
                return FS_IPV6_UDP;
        default:
                return FS_UDP_NUM_TYPES;
@@ -75,7 +75,7 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type
 
 struct mlx5_flow_handle *
 mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
-                                 enum mlx5e_traffic_types ttc_type,
+                                 enum mlx5_traffic_types ttc_type,
                                  u32 tir_num, u16 d_port)
 {
        enum fs_udp_type type = tt2fs_udp(ttc_type);
@@ -124,7 +124,7 @@ static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type typ
        fs_udp = priv->fs.udp;
        fs_udp_t = &fs_udp->tables[type];
 
-       dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type));
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type));
        rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -259,7 +259,7 @@ static int fs_udp_disable(struct mlx5e_priv *priv)
 
        for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
                /* Modify ttc rules destination to point back to the indir TIRs */
-               err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i));
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -281,7 +281,7 @@ static int fs_udp_enable(struct mlx5e_priv *priv)
                dest.ft = priv->fs.udp->tables[i].t;
 
                /* Modify ttc rules destination to point on the accel_fs FTs */
-               err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
@@ -401,7 +401,7 @@ static int fs_any_add_default_rule(struct mlx5e_priv *priv)
        fs_any = priv->fs.any;
        fs_any_t = &fs_any->table;
 
-       dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY);
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY);
        rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -514,11 +514,11 @@ static int fs_any_disable(struct mlx5e_priv *priv)
        int err;
 
        /* Modify ttc rules destination to point back to the indir TIRs */
-       err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY);
+       err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY);
        if (err) {
                netdev_err(priv->netdev,
                           "%s: modify ttc[%d] default destination failed, err(%d)\n",
-                          __func__, MLX5E_TT_ANY, err);
+                          __func__, MLX5_TT_ANY, err);
                return err;
        }
        return 0;
@@ -533,11 +533,11 @@ static int fs_any_enable(struct mlx5e_priv *priv)
        dest.ft = priv->fs.any->table.t;
 
        /* Modify ttc rules destination to point on the accel_fs FTs */
-       err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest);
+       err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest);
        if (err) {
                netdev_err(priv->netdev,
                           "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
-                          __func__, MLX5E_TT_ANY, err);
+                          __func__, MLX5_TT_ANY, err);
                return err;
        }
        return 0;
index 8385df2..7a70c4f 100644 (file)
@@ -12,7 +12,7 @@ void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
 /* UDP traffic type redirect */
 struct mlx5_flow_handle *
 mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
-                                 enum mlx5e_traffic_types ttc_type,
+                                 enum mlx5_traffic_types ttc_type,
                                  u32 tir_num, u16 d_port);
 void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
 int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
index ea321e5..4e72ca8 100644 (file)
@@ -5,11 +5,15 @@
 #include <linux/slab.h>
 #include <linux/xarray.h>
 #include <linux/hashtable.h>
+#include <linux/refcount.h>
 
 #include "mapping.h"
 
 #define MAPPING_GRACE_PERIOD 2000
 
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
 struct mapping_ctx {
        struct xarray xarray;
        DECLARE_HASHTABLE(ht, 8);
@@ -20,6 +24,10 @@ struct mapping_ctx {
        struct delayed_work dwork;
        struct list_head pending_list;
        spinlock_t pending_list_lock; /* Guards pending list */
+       u64 id;
+       u8 type;
+       struct list_head list;
+       refcount_t refcount;
 };
 
 struct mapping_item {
@@ -205,11 +213,48 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
        mutex_init(&ctx->lock);
        xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
 
+       refcount_set(&ctx->refcount, 1);
+       INIT_LIST_HEAD(&ctx->list);
+
+       return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+       struct mapping_ctx *ctx;
+
+       mutex_lock(&shared_ctx_lock);
+       list_for_each_entry(ctx, &shared_ctx_list, list) {
+               if (ctx->id == id && ctx->type == type) {
+                       if (refcount_inc_not_zero(&ctx->refcount))
+                               goto unlock;
+                       break;
+               }
+       }
+
+       ctx = mapping_create(data_size, max_id, delayed_removal);
+       if (IS_ERR(ctx))
+               goto unlock;
+
+       ctx->id = id;
+       ctx->type = type;
+       list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+       mutex_unlock(&shared_ctx_lock);
        return ctx;
 }
 
 void mapping_destroy(struct mapping_ctx *ctx)
 {
+       if (!refcount_dec_and_test(&ctx->refcount))
+               return;
+
+       mutex_lock(&shared_ctx_lock);
+       list_del(&ctx->list);
+       mutex_unlock(&shared_ctx_lock);
+
        mapping_flush_work(ctx);
        xa_destroy(&ctx->xarray);
        mutex_destroy(&ctx->lock);
index 285525c..4e2119f 100644 (file)
@@ -24,4 +24,9 @@ struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
                                   bool delayed_removal);
 void mapping_destroy(struct mapping_ctx *ctx);
 
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
 #endif /* __MLX5_MAPPING_H__ */
index fc602d8..3cbb596 100644 (file)
@@ -483,6 +483,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
        param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 }
 
+static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+       bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+               MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+       return ro && params->lro_en ?
+               MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+}
+
 int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
                         struct mlx5e_params *params,
                         struct mlx5e_xsk_param *xsk,
@@ -520,7 +529,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
        }
 
        MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
-       MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+       MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params));
        MLX5_SET(wq, wq, log_wq_stride,
                 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
        MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
index 849ee3e..f479ef3 100644 (file)
@@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
                params->log_sq_size = orig->log_sq_size;
                mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
        }
-       if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+       /* RQ */
+       if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+               params->vlan_strip_disable = orig->vlan_strip_disable;
                mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+       }
 }
 
 static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
@@ -494,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
        int err;
 
        rq->wq_type      = params->rq_wq_type;
-       rq->pdev         = mdev->device;
+       rq->pdev         = c->pdev;
        rq->netdev       = priv->netdev;
        rq->priv         = priv;
        rq->clock        = &mdev->clock;
@@ -602,8 +605,8 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
 
 static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
 {
+       u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
        struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
-       u32 tirn = priv->rx_res->ptp.tir.tirn;
        struct mlx5_flow_handle *rule;
        int err;
 
@@ -614,7 +617,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
        if (err)
                goto out_free;
 
-       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP,
+       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
                                                 tirn, PTP_EV_PORT);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -622,7 +625,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
        }
        ptp_fs->udp_v4_rule = rule;
 
-       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP,
+       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
                                                 tirn, PTP_EV_PORT);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
index 38d0e9d..b915fb2 100644 (file)
@@ -4,6 +4,15 @@
 #include "rqt.h"
 #include <linux/mlx5/transobj.h>
 
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+                                        unsigned int num_channels)
+{
+       unsigned int i;
+
+       for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+               indir->table[i] = i % num_channels;
+}
+
 static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
                          u16 max_size, u32 *init_rqns, u16 init_size)
 {
index d2c7664..60c985a 100644 (file)
@@ -14,6 +14,9 @@ struct mlx5e_rss_params_indir {
        u32 table[MLX5E_INDIR_RQT_SIZE];
 };
 
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+                                        unsigned int num_channels);
+
 struct mlx5e_rqt {
        struct mlx5_core_dev *mdev;
        u32 rqtn;
index 8fc1dfc..e2a8fe1 100644 (file)
@@ -2,54 +2,56 @@
 /* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
 
 #include "rx_res.h"
+#include "channels.h"
+#include "params.h"
 
 static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
-       [MLX5E_TT_IPV4_TCP] = {
+       [MLX5_TT_IPV4_TCP] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
                .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
        },
-       [MLX5E_TT_IPV6_TCP] = {
+       [MLX5_TT_IPV6_TCP] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
                .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
        },
-       [MLX5E_TT_IPV4_UDP] = {
+       [MLX5_TT_IPV4_UDP] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
                .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
        },
-       [MLX5E_TT_IPV6_UDP] = {
+       [MLX5_TT_IPV6_UDP] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
                .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
        },
-       [MLX5E_TT_IPV4_IPSEC_AH] = {
+       [MLX5_TT_IPV4_IPSEC_AH] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                .l4_prot_type = 0,
                .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
        },
-       [MLX5E_TT_IPV6_IPSEC_AH] = {
+       [MLX5_TT_IPV6_IPSEC_AH] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                .l4_prot_type = 0,
                .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
        },
-       [MLX5E_TT_IPV4_IPSEC_ESP] = {
+       [MLX5_TT_IPV4_IPSEC_ESP] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                .l4_prot_type = 0,
                .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
        },
-       [MLX5E_TT_IPV6_IPSEC_ESP] = {
+       [MLX5_TT_IPV6_IPSEC_ESP] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                .l4_prot_type = 0,
                .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
        },
-       [MLX5E_TT_IPV4] = {
+       [MLX5_TT_IPV4] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                .l4_prot_type = 0,
                .rx_hash_fields = MLX5_HASH_IP,
        },
-       [MLX5E_TT_IPV6] = {
+       [MLX5_TT_IPV6] = {
                .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                .l4_prot_type = 0,
                .rx_hash_fields = MLX5_HASH_IP,
@@ -57,13 +59,556 @@ static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_I
 };
 
 struct mlx5e_rss_params_traffic_type
-mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt)
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
 {
        return rss_default_config[tt];
 }
 
+struct mlx5e_rx_res {
+       struct mlx5_core_dev *mdev;
+       enum mlx5e_rx_res_features features;
+       unsigned int max_nch;
+       u32 drop_rqn;
+
+       struct {
+               struct mlx5e_rss_params_hash hash;
+               struct mlx5e_rss_params_indir indir;
+               u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+       } rss_params;
+
+       struct mlx5e_rqt indir_rqt;
+       struct {
+               struct mlx5e_tir indir_tir;
+               struct mlx5e_tir inner_indir_tir;
+       } rss[MLX5E_NUM_INDIR_TIRS];
+
+       bool rss_active;
+       u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+       unsigned int rss_nch;
+
+       struct {
+               struct mlx5e_rqt direct_rqt;
+               struct mlx5e_tir direct_tir;
+               struct mlx5e_rqt xsk_rqt;
+               struct mlx5e_tir xsk_tir;
+       } *channels;
+
+       struct {
+               struct mlx5e_rqt rqt;
+               struct mlx5e_tir tir;
+       } ptp;
+};
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+       return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static void mlx5e_rx_res_rss_params_init(struct mlx5e_rx_res *res, unsigned int init_nch)
+{
+       enum mlx5_traffic_types tt;
+
+       res->rss_params.hash.hfunc = ETH_RSS_HASH_TOP;
+       netdev_rss_key_fill(res->rss_params.hash.toeplitz_hash_key,
+                           sizeof(res->rss_params.hash.toeplitz_hash_key));
+       mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, init_nch);
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               res->rss_params.rx_hash_fields[tt] =
+                       mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res,
+                                const struct mlx5e_lro_param *init_lro_param)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       enum mlx5_traffic_types tt, max_tt;
+       struct mlx5e_tir_builder *builder;
+       u32 indir_rqtn;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       err = mlx5e_rqt_init_direct(&res->indir_rqt, res->mdev, true, res->drop_rqn);
+       if (err)
+               goto out;
+
+       indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt);
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               struct mlx5e_rss_params_traffic_type rss_tt;
+
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           indir_rqtn, inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+               mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, false);
+
+               err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an indirect TIR: err = %d, tt = %d\n",
+                                      err, tt);
+                       goto err_destroy_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       if (!inner_ft_support)
+               goto out;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               struct mlx5e_rss_params_traffic_type rss_tt;
+
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           indir_rqtn, inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+               mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, true);
+
+               err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an inner indirect TIR: err = %d, tt = %d\n",
+                                      err, tt);
+                       goto err_destroy_inner_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       goto out;
+
+err_destroy_inner_tirs:
+       max_tt = tt;
+       for (tt = 0; tt < max_tt; tt++)
+               mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
+
+       tt = MLX5E_NUM_INDIR_TIRS;
+err_destroy_tirs:
+       max_tt = tt;
+       for (tt = 0; tt < max_tt; tt++)
+               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
+
+       mlx5e_rqt_destroy(&res->indir_rqt);
+
+out:
+       mlx5e_tir_builder_free(builder);
+
+       return err;
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+                                     const struct mlx5e_lro_param *init_lro_param)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+       int err = 0;
+       int ix;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+       if (!res->channels) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+                                           res->mdev, false, res->drop_rqn);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_direct_rqts;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                           inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_direct(builder);
+
+               err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_direct_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+               goto out;
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
+                                           res->mdev, false, res->drop_rqn);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_xsk_rqts;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                           inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_direct(builder);
+
+               err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_xsk_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       goto out;
+
+err_destroy_xsk_tirs:
+       while (--ix >= 0)
+               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+
+       ix = res->max_nch;
+err_destroy_xsk_rqts:
+       while (--ix >= 0)
+               mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+
+       ix = res->max_nch;
+err_destroy_direct_tirs:
+       while (--ix >= 0)
+               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+       ix = res->max_nch;
+err_destroy_direct_rqts:
+       while (--ix >= 0)
+               mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+       kvfree(res->channels);
+
+out:
+       mlx5e_tir_builder_free(builder);
+
+       return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+       if (err)
+               goto out;
+
+       mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                   mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                   inner_ft_support);
+       mlx5e_tir_builder_build_direct(builder);
+
+       err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+       if (err)
+               goto err_destroy_ptp_rqt;
+
+       goto out;
+
+err_destroy_ptp_rqt:
+       mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+       mlx5e_tir_builder_free(builder);
+       return err;
+}
+
+static void mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res)
+{
+       enum mlx5_traffic_types tt;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
+
+       if (res->features & MLX5E_RX_RES_FEATURE_INNER_FT)
+               for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+                       mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
+
+       mlx5e_rqt_destroy(&res->indir_rqt);
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+       unsigned int ix;
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+               mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+               mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+       }
+
+       kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+       mlx5e_tir_destroy(&res->ptp.tir);
+       mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     unsigned int init_nch)
+{
+       int err;
+
+       res->mdev = mdev;
+       res->features = features;
+       res->max_nch = max_nch;
+       res->drop_rqn = drop_rqn;
+
+       mlx5e_rx_res_rss_params_init(res, init_nch);
+
+       err = mlx5e_rx_res_rss_init(res, init_lro_param);
+       if (err)
+               return err;
+
+       err = mlx5e_rx_res_channels_init(res, init_lro_param);
+       if (err)
+               goto err_rss_destroy;
+
+       err = mlx5e_rx_res_ptp_init(res);
+       if (err)
+               goto err_channels_destroy;
+
+       return 0;
+
+err_channels_destroy:
+       mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+       mlx5e_rx_res_rss_destroy(res);
+       return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+       mlx5e_rx_res_ptp_destroy(res);
+       mlx5e_rx_res_channels_destroy(res);
+       mlx5e_rx_res_rss_destroy(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+       kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
+
+       return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       return mlx5e_tir_get_tirn(&res->rss[tt].indir_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT));
+       return mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+       return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+       int err;
+
+       res->rss_active = true;
+
+       err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch,
+                                      res->rss_params.hash.hfunc,
+                                      &res->rss_params.indir);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->indir_rqt), err);
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+       int err;
+
+       res->rss_active = false;
+
+       err = mlx5e_rqt_redirect_direct(&res->indir_rqt, res->drop_rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to drop RQ %#x: err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->indir_rqt), res->drop_rqn, err);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+       unsigned int nch, ix;
+       int err;
+
+       nch = mlx5e_channels_get_num(chs);
+
+       for (ix = 0; ix < chs->num; ix++)
+               mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+       res->rss_nch = chs->num;
+
+       mlx5e_rx_res_rss_enable(res);
+
+       for (ix = 0; ix < nch; ix++) {
+               u32 rqn;
+
+               mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+                       rqn = res->drop_rqn;
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      rqn, ix, err);
+       }
+       for (ix = nch; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      res->drop_rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      res->drop_rqn, ix, err);
+       }
+
+       if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+               u32 rqn;
+
+               if (mlx5e_channels_get_ptp_rqn(chs, &rqn))
+                       rqn = res->drop_rqn;
+
+               err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                      rqn, err);
+       }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+       unsigned int ix;
+       int err;
+
+       mlx5e_rx_res_rss_disable(res);
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      res->drop_rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      res->drop_rqn, ix, err);
+       }
+
+       if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+               err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                      res->drop_rqn, err);
+       }
+}
+
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+                             unsigned int ix)
+{
+       u32 rqn;
+       int err;
+
+       if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+               return -EINVAL;
+
+       err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                              rqn, ix, err);
+       return err;
+}
+
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       int err;
+
+       err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                              res->drop_rqn, ix, err);
+       return err;
+}
+
 struct mlx5e_rss_params_traffic_type
-mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt)
+mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
 {
        struct mlx5e_rss_params_traffic_type rss_tt;
 
@@ -71,3 +616,216 @@ mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traf
        rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt];
        return rss_tt;
 }
+
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+       mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, nch);
+
+       if (!res->rss_active)
+               return;
+
+       mlx5e_rx_res_rss_enable(res);
+}
+
+void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc)
+{
+       unsigned int i;
+
+       if (indir)
+               for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+                       indir[i] = res->rss_params.indir.table[i];
+
+       if (key)
+               memcpy(key, res->rss_params.hash.toeplitz_hash_key,
+                      sizeof(res->rss_params.hash.toeplitz_hash_key));
+
+       if (hfunc)
+               *hfunc = res->rss_params.hash.hfunc;
+}
+
+static int mlx5e_rx_res_rss_update_tir(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                      bool inner)
+{
+       struct mlx5e_rss_params_traffic_type rss_tt;
+       struct mlx5e_tir_builder *builder;
+       struct mlx5e_tir *tir;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(true);
+       if (!builder)
+               return -ENOMEM;
+
+       rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+
+       mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, inner);
+       tir = inner ? &res->rss[tt].inner_indir_tir : &res->rss[tt].indir_tir;
+       err = mlx5e_tir_modify(tir, builder);
+
+       mlx5e_tir_builder_free(builder);
+       return err;
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir,
+                             const u8 *key, const u8 *hfunc)
+{
+       enum mlx5_traffic_types tt;
+       bool changed_indir = false;
+       bool changed_hash = false;
+       int err;
+
+       if (hfunc && *hfunc != res->rss_params.hash.hfunc) {
+               switch (*hfunc) {
+               case ETH_RSS_HASH_XOR:
+               case ETH_RSS_HASH_TOP:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               changed_hash = true;
+               changed_indir = true;
+               res->rss_params.hash.hfunc = *hfunc;
+       }
+
+       if (key) {
+               if (res->rss_params.hash.hfunc == ETH_RSS_HASH_TOP)
+                       changed_hash = true;
+               memcpy(res->rss_params.hash.toeplitz_hash_key, key,
+                      sizeof(res->rss_params.hash.toeplitz_hash_key));
+       }
+
+       if (indir) {
+               unsigned int i;
+
+               changed_indir = true;
+
+               for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+                       res->rss_params.indir.table[i] = indir[i];
+       }
+
+       if (changed_indir && res->rss_active) {
+               err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch,
+                                              res->rss_params.hash.hfunc,
+                                              &res->rss_params.indir);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->indir_rqt), err);
+       }
+
+       if (changed_hash)
+               for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+                       err = mlx5e_rx_res_rss_update_tir(res, tt, false);
+                       if (err)
+                               mlx5_core_warn(res->mdev, "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+                                              tt, err);
+
+                       if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+                               continue;
+
+                       err = mlx5e_rx_res_rss_update_tir(res, tt, true);
+                       if (err)
+                               mlx5_core_warn(res->mdev, "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+                                              tt, err);
+               }
+
+       return 0;
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       return res->rss_params.rx_hash_fields[tt];
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                    u8 rx_hash_fields)
+{
+       u8 old_rx_hash_fields;
+       int err;
+
+       old_rx_hash_fields = res->rss_params.rx_hash_fields[tt];
+
+       if (old_rx_hash_fields == rx_hash_fields)
+               return 0;
+
+       res->rss_params.rx_hash_fields[tt] = rx_hash_fields;
+
+       err = mlx5e_rx_res_rss_update_tir(res, tt, false);
+       if (err) {
+               res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields;
+               mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+                              tt, err);
+               return err;
+       }
+
+       if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+               return 0;
+
+       err = mlx5e_rx_res_rss_update_tir(res, tt, true);
+       if (err) {
+               /* Partial update happened. Try to revert - it may fail too, but
+                * there is nothing more we can do.
+                */
+               res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields;
+               mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+                              tt, err);
+               if (mlx5e_rx_res_rss_update_tir(res, tt, false))
+                       mlx5_core_warn(res->mdev, "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+                                      tt);
+       }
+
+       return err;
+}
+
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+{
+       struct mlx5e_tir_builder *builder;
+       enum mlx5_traffic_types tt;
+       int err, final_err;
+       unsigned int ix;
+
+       builder = mlx5e_tir_builder_alloc(true);
+       if (!builder)
+               return -ENOMEM;
+
+       mlx5e_tir_builder_build_lro(builder, lro_param);
+
+       final_err = 0;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->rss[tt].indir_tir), tt, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+                       continue;
+
+               err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir), tt, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+       }
+
+       mlx5e_tir_builder_free(builder);
+       return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+       return res->rss_params.hash;
+}
index 068e481..1baeec5 100644 (file)
@@ -9,39 +9,59 @@
 #include "tir.h"
 #include "fs.h"
 
-#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
+struct mlx5e_rx_res;
 
-struct mlx5e_rss_params {
-       struct mlx5e_rss_params_hash hash;
-       struct mlx5e_rss_params_indir indir;
-       u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
-};
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
 
-struct mlx5e_rx_res {
-       struct mlx5e_rss_params rss_params;
-
-       struct mlx5e_rqt indir_rqt;
-       struct {
-               struct mlx5e_tir indir_tir;
-               struct mlx5e_tir inner_indir_tir;
-       } rss[MLX5E_NUM_INDIR_TIRS];
-
-       struct {
-               struct mlx5e_rqt direct_rqt;
-               struct mlx5e_tir direct_tir;
-               struct mlx5e_rqt xsk_rqt;
-               struct mlx5e_tir xsk_tir;
-       } channels[MLX5E_MAX_NUM_CHANNELS];
-
-       struct {
-               struct mlx5e_rqt rqt;
-               struct mlx5e_tir tir;
-       } ptp;
+enum mlx5e_rx_res_features {
+       MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+       MLX5E_RX_RES_FEATURE_XSK = BIT(1),
+       MLX5E_RX_RES_FEATURE_PTP = BIT(2),
 };
 
 struct mlx5e_rss_params_traffic_type
-mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt);
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* RQTN getters for modules that create their own TIRs */
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+                             unsigned int ix);
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Configuration API */
 struct mlx5e_rss_params_traffic_type
-mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt);
+mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir,
+                             const u8 *key, const u8 *hfunc);
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                    u8 rx_hash_fields);
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
 
 #endif /* __MLX5_EN_RX_RES_H__ */
index 91e7a01..b1707b8 100644 (file)
@@ -2138,6 +2138,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
        struct mlx5_tc_ct_priv *ct_priv;
        struct mlx5_core_dev *dev;
        const char *msg;
+       u64 mapping_id;
        int err;
 
        dev = priv->mdev;
@@ -2153,13 +2154,17 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
        if (!ct_priv)
                goto err_alloc;
 
-       ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
+       mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+       ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+                                                     sizeof(u16), 0, true);
        if (IS_ERR(ct_priv->zone_mapping)) {
                err = PTR_ERR(ct_priv->zone_mapping);
                goto err_mapping_zone;
        }
 
-       ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
+       ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+                                                       sizeof(u32) * 4, 0, true);
        if (IS_ERR(ct_priv->labels_mapping)) {
                err = PTR_ERR(ct_priv->labels_mapping);
                goto err_mapping_labels;
index afaf5b4..d54607a 100644 (file)
@@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params
        struct mlx5e_priv *priv = t->priv;
 
        rq->wq_type      = params->rq_wq_type;
-       rq->pdev         = mdev->device;
+       rq->pdev         = t->pdev;
        rq->netdev       = priv->netdev;
        rq->priv         = priv;
        rq->clock        = &mdev->clock;
index 71e8d66..7b562d2 100644 (file)
@@ -122,7 +122,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
         * any Fill Ring entries at the setup stage.
         */
 
-       err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+       err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
        if (unlikely(err))
                goto err_deactivate;
 
@@ -169,7 +169,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
                goto remove_pool;
 
        c = priv->channels.c[ix];
-       mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+       mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
        mlx5e_deactivate_xsk(c);
        mlx5e_close_xsk(c);
 
index ab485d0..c062674 100644 (file)
@@ -183,59 +183,3 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
        mlx5e_deactivate_rq(&c->xskrq);
        /* TX queue is disabled on close. */
 }
-
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
-{
-       return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[c->ix].xsk_rqt, c->xskrq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
-{
-       return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[ix].xsk_rqt, priv->drop_rq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
-       int err, i;
-
-       if (!priv->xsk.refcnt)
-               return 0;
-
-       for (i = 0; i < chs->num; i++) {
-               struct mlx5e_channel *c = chs->c[i];
-
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
-                       continue;
-
-               err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
-               if (unlikely(err))
-                       goto err_stop;
-       }
-
-       return 0;
-
-err_stop:
-       for (i--; i >= 0; i--) {
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
-                       continue;
-
-               mlx5e_xsk_redirect_rqt_to_drop(priv, i);
-       }
-
-       return err;
-}
-
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
-       int i;
-
-       if (!priv->xsk.refcnt)
-               return;
-
-       for (i = 0; i < chs->num; i++) {
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
-                       continue;
-
-               mlx5e_xsk_redirect_rqt_to_drop(priv, i);
-       }
-}
index ca20f1f..50e111b 100644 (file)
@@ -17,9 +17,5 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
 void mlx5e_close_xsk(struct mlx5e_channel *c);
 void mlx5e_activate_xsk(struct mlx5e_channel *c);
 void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
 
 #endif /* __MLX5_EN_XSK_SETUP_H__ */
index e51f60b..4c4ee52 100644 (file)
@@ -16,13 +16,13 @@ struct mlx5e_accel_fs_tcp {
        struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
 };
 
-static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
 {
        switch (i) {
        case ACCEL_FS_IPV4_TCP:
-               return MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
        default: /* ACCEL_FS_IPV6_TCP */
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
        }
 }
 
@@ -161,7 +161,7 @@ static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
        fs_tcp = priv->fs.accel_tcp;
        accel_fs_t = &fs_tcp->tables[type];
 
-       dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type));
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type));
        rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -307,7 +307,7 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
 
        for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
                /* Modify ttc rules destination to point back to the indir TIRs */
-               err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i));
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -329,7 +329,7 @@ static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
                dest.ft = priv->fs.accel_tcp->tables[i].t;
 
                /* Modify ttc rules destination to point on the accel_fs FTs */
-               err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
index 34119ce..17da23d 100644 (file)
@@ -41,11 +41,11 @@ struct mlx5e_ipsec_tx {
 };
 
 /* IPsec RX flow steering */
-static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
 {
        if (i == ACCEL_FS_ESP4)
-               return MLX5E_TT_IPV4_IPSEC_ESP;
-       return MLX5E_TT_IPV6_IPSEC_ESP;
+               return MLX5_TT_IPV4_IPSEC_ESP;
+       return MLX5_TT_IPV6_IPSEC_ESP;
 }
 
 static int rx_err_add_rule(struct mlx5e_priv *priv,
@@ -265,7 +265,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
        accel_esp = priv->ipsec->rx_fs;
        fs_prot = &accel_esp->fs_prot[type];
 
-       fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type));
+       fs_prot->default_dest =
+               mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type));
 
        err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err);
        if (err)
@@ -301,7 +302,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
        /* connect */
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
        dest.ft = fs_prot->ft;
-       mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest);
+       mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest);
 
 out:
        mutex_unlock(&fs_prot->prot_mutex);
@@ -320,7 +321,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
                goto out;
 
        /* disconnect */
-       mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type));
+       mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type));
 
        /* remove FT */
        rx_destroy(priv, type);
index bfdbc30..62abce0 100644 (file)
@@ -628,7 +628,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
        priv_rx->sw_stats = &priv->tls->sw_stats;
        mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
 
-       rqtn = mlx5e_rqt_get_rqtn(&priv->rx_res->channels[rxq].direct_rqt);
+       rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
 
        err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
        if (err)
index db6c6a9..fe5d82f 100644 (file)
@@ -98,17 +98,17 @@ struct arfs_rule {
        for (j = 0; j < ARFS_HASH_SIZE; j++) \
                hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
 
-static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
 {
        switch (type) {
        case ARFS_IPV4_TCP:
-               return MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
        case ARFS_IPV4_UDP:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
        case ARFS_IPV6_TCP:
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
        case ARFS_IPV6_UDP:
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
        default:
                return -EINVAL;
        }
@@ -120,7 +120,7 @@ static int arfs_disable(struct mlx5e_priv *priv)
 
        for (i = 0; i < ARFS_NUM_TYPES; i++) {
                /* Modify ttc rules destination back to their default */
-               err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i));
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -149,7 +149,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
        for (i = 0; i < ARFS_NUM_TYPES; i++) {
                dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
                /* Modify ttc rules destination to point on the aRFS FTs */
-               err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
@@ -194,7 +194,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
        struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
        struct mlx5_flow_destination dest = {};
        MLX5_DECLARE_FLOW_ACT(flow_act);
-       enum mlx5e_traffic_types tt;
+       enum mlx5_traffic_types tt;
        int err = 0;
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
@@ -205,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
                return -EINVAL;
        }
 
-       /* FIXME: Must use mlx5e_ttc_get_default_dest(),
+       /* FIXME: Must use mlx5_ttc_get_default_dest(),
         * but can't since TTC default is not setup yet !
         */
-       dest.tir_num = priv->rx_res->rss[tt].indir_tir.tirn;
+       dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
        arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
                                                   &flow_act,
                                                   &dest, 1);
@@ -552,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
                       16);
        }
        dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dest.tir_num = priv->rx_res->channels[arfs_rule->rxq].direct_tir.tirn;
+       dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq);
        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -575,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
        int err = 0;
 
        dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dst.tir_num = priv->rx_res->channels[rxq].direct_tir.tirn;
+       dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq);
        err =  mlx5_modify_rule_destination(rule, &dst, NULL);
        if (err)
                netdev_warn(priv->netdev,
index 9264d18..2cf59bb 100644 (file)
@@ -1172,7 +1172,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
 
 u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
 {
-       return sizeof(priv->rx_res->rss_params.hash.toeplitz_hash_key);
+       return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key);
 }
 
 static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -1198,18 +1198,10 @@ int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
                   u8 *hfunc)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct mlx5e_rss_params *rss;
 
-       rss = &priv->rx_res->rss_params;
-
-       if (indir)
-               memcpy(indir, rss->indir.table, sizeof(rss->indir.table));
-
-       if (key)
-               memcpy(key, rss->hash.toeplitz_hash_key, sizeof(rss->hash.toeplitz_hash_key));
-
-       if (hfunc)
-               *hfunc = rss->hash.hfunc;
+       mutex_lock(&priv->state_lock);
+       mlx5e_rx_res_rss_get_rxfh(priv->rx_res, indir, key, hfunc);
+       mutex_unlock(&priv->state_lock);
 
        return 0;
 }
@@ -1218,58 +1210,13 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
                   const u8 *key, const u8 hfunc)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       struct mlx5e_rss_params *rss;
-       bool refresh_tirs = false;
-       bool refresh_rqt = false;
-
-       if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
-           (hfunc != ETH_RSS_HASH_XOR) &&
-           (hfunc != ETH_RSS_HASH_TOP))
-               return -EINVAL;
+       int err;
 
        mutex_lock(&priv->state_lock);
-
-       rss = &priv->rx_res->rss_params;
-
-       if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hash.hfunc) {
-               rss->hash.hfunc = hfunc;
-               refresh_rqt = true;
-               refresh_tirs = true;
-       }
-
-       if (indir) {
-               memcpy(rss->indir.table, indir, sizeof(rss->indir.table));
-               refresh_rqt = true;
-       }
-
-       if (key) {
-               memcpy(rss->hash.toeplitz_hash_key, key, sizeof(rss->hash.toeplitz_hash_key));
-               refresh_tirs = refresh_tirs || rss->hash.hfunc == ETH_RSS_HASH_TOP;
-       }
-
-       if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-               u32 *rqns;
-
-               rqns = kvmalloc_array(priv->channels.num, sizeof(*rqns), GFP_KERNEL);
-               if (rqns) {
-                       unsigned int ix;
-
-                       for (ix = 0; ix < priv->channels.num; ix++)
-                               rqns[ix] = priv->channels.c[ix]->rq.rqn;
-
-                       mlx5e_rqt_redirect_indir(&priv->rx_res->indir_rqt, rqns,
-                                                priv->channels.num,
-                                                rss->hash.hfunc, &rss->indir);
-                       kvfree(rqns);
-               }
-       }
-
-       if (refresh_tirs)
-               mlx5e_modify_tirs_hash(priv);
-
+       err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, indir, key,
+                                       hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
        mutex_unlock(&priv->state_lock);
-
-       return 0;
+       return err;
 }
 
 #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC                100
index e798157..5c754e9 100644 (file)
@@ -718,7 +718,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv)
        if (!spec)
                return -ENOMEM;
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft = priv->fs.ttc.ft.t;
+       dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
 
        rule_p = &priv->fs.promisc.rule;
        *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
@@ -854,587 +854,59 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
        ft->t = NULL;
 }
 
-static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
-{
-       int i;
-
-       for (i = 0; i < MLX5E_NUM_TT; i++) {
-               if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
-                       mlx5_del_flow_rules(ttc->rules[i].rule);
-                       ttc->rules[i].rule = NULL;
-               }
-       }
-
-       for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
-               if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
-                       mlx5_del_flow_rules(ttc->tunnel_rules[i]);
-                       ttc->tunnel_rules[i] = NULL;
-               }
-       }
-}
-
-struct mlx5e_etype_proto {
-       u16 etype;
-       u8 proto;
-};
-
-static struct mlx5e_etype_proto ttc_rules[] = {
-       [MLX5E_TT_IPV4_TCP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_TCP,
-       },
-       [MLX5E_TT_IPV6_TCP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_TCP,
-       },
-       [MLX5E_TT_IPV4_UDP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_UDP,
-       },
-       [MLX5E_TT_IPV6_UDP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_UDP,
-       },
-       [MLX5E_TT_IPV4_IPSEC_AH] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_AH,
-       },
-       [MLX5E_TT_IPV6_IPSEC_AH] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_AH,
-       },
-       [MLX5E_TT_IPV4_IPSEC_ESP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_ESP,
-       },
-       [MLX5E_TT_IPV6_IPSEC_ESP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_ESP,
-       },
-       [MLX5E_TT_IPV4] = {
-               .etype = ETH_P_IP,
-               .proto = 0,
-       },
-       [MLX5E_TT_IPV6] = {
-               .etype = ETH_P_IPV6,
-               .proto = 0,
-       },
-       [MLX5E_TT_ANY] = {
-               .etype = 0,
-               .proto = 0,
-       },
-};
-
-static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
-       [MLX5E_TT_IPV4_GRE] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_GRE,
-       },
-       [MLX5E_TT_IPV6_GRE] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_GRE,
-       },
-       [MLX5E_TT_IPV4_IPIP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_IPIP,
-       },
-       [MLX5E_TT_IPV6_IPIP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_IPIP,
-       },
-       [MLX5E_TT_IPV4_IPV6] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_IPV6,
-       },
-       [MLX5E_TT_IPV6_IPV6] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_IPV6,
-       },
-
-};
-
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt)
-{
-       return ttc_tunnel_rules[tt].proto;
-}
-
-static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type)
-{
-       switch (proto_type) {
-       case IPPROTO_GRE:
-               return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
-       case IPPROTO_IPIP:
-       case IPPROTO_IPV6:
-               return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
-                       MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
-       default:
-               return false;
-       }
-}
-
-static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
-{
-       int tt;
-
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto))
-                       return true;
-       }
-       return false;
-}
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
-{
-       return (mlx5e_tunnel_any_rx_proto_supported(mdev) &&
-               MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
-}
-
-static u8 mlx5e_etype_to_ipv(u16 ethertype)
-{
-       if (ethertype == ETH_P_IP)
-               return 4;
-
-       if (ethertype == ETH_P_IPV6)
-               return 6;
-
-       return 0;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
-                       struct mlx5_flow_table *ft,
-                       struct mlx5_flow_destination *dest,
-                       u16 etype,
-                       u8 proto)
-{
-       int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
-       MLX5_DECLARE_FLOW_ACT(flow_act);
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_spec *spec;
-       int err = 0;
-       u8 ipv;
-
-       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-       if (!spec)
-               return ERR_PTR(-ENOMEM);
-
-       if (proto) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
-       }
-
-       ipv = mlx5e_etype_to_ipv(etype);
-       if (match_ipv_outer && ipv) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
-       } else if (etype) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
-       }
-
-       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
-       if (IS_ERR(rule)) {
-               err = PTR_ERR(rule);
-               netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
-       }
-
-       kvfree(spec);
-       return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
-                                         struct ttc_params *params,
-                                         struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5_flow_destination dest = {};
-       struct mlx5_flow_handle **trules;
-       struct mlx5e_ttc_rule *rules;
-       struct mlx5_flow_table *ft;
-       int tt;
-       int err;
-
-       ft = ttc->ft.t;
-       rules = ttc->rules;
-
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
-               struct mlx5e_ttc_rule *rule = &rules[tt];
-
-               if (tt == MLX5E_TT_ANY)
-                       dest.tir_num = params->any_tt_tirn;
-               else
-                       dest.tir_num = params->indir_tirn[tt];
-
-               rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest,
-                                                    ttc_rules[tt].etype,
-                                                    ttc_rules[tt].proto);
-               if (IS_ERR(rule->rule)) {
-                       err = PTR_ERR(rule->rule);
-                       rule->rule = NULL;
-                       goto del_rules;
-               }
-               rule->default_dest = dest;
-       }
-
-       if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               return 0;
-
-       trules    = ttc->tunnel_rules;
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft   = params->inner_ttc->ft.t;
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (!mlx5e_tunnel_proto_supported_rx(priv->mdev,
-                                                    ttc_tunnel_rules[tt].proto))
-                       continue;
-               trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
-                                                    ttc_tunnel_rules[tt].etype,
-                                                    ttc_tunnel_rules[tt].proto);
-               if (IS_ERR(trules[tt])) {
-                       err = PTR_ERR(trules[tt]);
-                       trules[tt] = NULL;
-                       goto del_rules;
-               }
-       }
-
-       return 0;
-
-del_rules:
-       mlx5e_cleanup_ttc_rules(ttc);
-       return err;
-}
-
-static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
-                                        bool use_ipv)
-{
-       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int ix = 0;
-       u32 *in;
-       int err;
-       u8 *mc;
-
-       ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
-                       sizeof(*ft->g), GFP_KERNEL);
-       if (!ft->g)
-               return -ENOMEM;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in) {
-               kfree(ft->g);
-               ft->g = NULL;
-               return -ENOMEM;
-       }
-
-       /* L4 Group */
-       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
-       MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
-       if (use_ipv)
-               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
-       else
-               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
-       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP1_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* L3 Group */
-       MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP2_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* Any Group */
-       memset(in, 0, inlen);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP3_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       kvfree(in);
-       return 0;
-
-err:
-       err = PTR_ERR(ft->g[ft->num_groups]);
-       ft->g[ft->num_groups] = NULL;
-       kvfree(in);
-
-       return err;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
-                             struct mlx5_flow_table *ft,
-                             struct mlx5_flow_destination *dest,
-                             u16 etype, u8 proto)
-{
-       MLX5_DECLARE_FLOW_ACT(flow_act);
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_spec *spec;
-       int err = 0;
-       u8 ipv;
-
-       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-       if (!spec)
-               return ERR_PTR(-ENOMEM);
-
-       ipv = mlx5e_etype_to_ipv(etype);
-       if (etype && ipv) {
-               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
-               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
-       }
-
-       if (proto) {
-               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
-               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
-       }
-
-       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
-       if (IS_ERR(rule)) {
-               err = PTR_ERR(rule);
-               netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
-       }
-
-       kvfree(spec);
-       return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
-                                               struct ttc_params *params,
-                                               struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5_flow_destination dest = {};
-       struct mlx5e_ttc_rule *rules;
-       struct mlx5_flow_table *ft;
-       int err;
-       int tt;
-
-       ft = ttc->ft.t;
-       rules = ttc->rules;
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-
-       for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
-               struct mlx5e_ttc_rule *rule = &rules[tt];
-
-               if (tt == MLX5E_TT_ANY)
-                       dest.tir_num = params->any_tt_tirn;
-               else
-                       dest.tir_num = params->indir_tirn[tt];
-
-               rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
-                                                          ttc_rules[tt].etype,
-                                                          ttc_rules[tt].proto);
-               if (IS_ERR(rule->rule)) {
-                       err = PTR_ERR(rule->rule);
-                       rule->rule = NULL;
-                       goto del_rules;
-               }
-               rule->default_dest = dest;
-       }
-
-       return 0;
-
-del_rules:
-
-       mlx5e_cleanup_ttc_rules(ttc);
-       return err;
-}
-
-static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
-{
-       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int ix = 0;
-       u32 *in;
-       int err;
-       u8 *mc;
-
-       ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
-       if (!ft->g)
-               return -ENOMEM;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in) {
-               kfree(ft->g);
-               ft->g = NULL;
-               return -ENOMEM;
-       }
-
-       /* L4 Group */
-       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
-       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
-       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
-       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP1_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* L3 Group */
-       MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP2_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* Any Group */
-       memset(in, 0, inlen);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP3_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       kvfree(in);
-       return 0;
-
-err:
-       err = PTR_ERR(ft->g[ft->num_groups]);
-       ft->g[ft->num_groups] = NULL;
-       kvfree(in);
-
-       return err;
-}
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
-                               struct ttc_params *ttc_params)
-{
-       ttc_params->any_tt_tirn = priv->rx_res->channels[0].direct_tir.tirn;
-       ttc_params->inner_ttc = &priv->fs.inner_ttc;
-}
-
-static void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
+                                      struct ttc_params *ttc_params)
 {
        struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+       int tt;
 
-       ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+       memset(ttc_params, 0, sizeof(*ttc_params));
+       ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
        ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
        ft_attr->prio = MLX5E_NIC_PRIO;
+
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+                               mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res,
+                                                               tt);
+       }
 }
 
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+                         struct ttc_params *ttc_params, bool tunnel)
 
 {
        struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+       int tt;
 
-       ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+       memset(ttc_params, 0, sizeof(*ttc_params));
+       ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
        ft_attr->level = MLX5E_TTC_FT_LEVEL;
        ft_attr->prio = MLX5E_NIC_PRIO;
-}
-
-static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                                       struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int err;
 
-       ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
-       if (IS_ERR(ft->t)) {
-               err = PTR_ERR(ft->t);
-               ft->t = NULL;
-               return err;
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+                               mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
        }
 
-       err = mlx5e_create_inner_ttc_table_groups(ttc);
-       if (err)
-               goto err;
-
-       err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
-       if (err)
-               goto err;
-
-       return 0;
-
-err:
-       mlx5e_destroy_flow_table(ft);
-       return err;
-}
-
-static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
-                                         struct mlx5e_ttc_table *ttc)
-{
-       mlx5e_cleanup_ttc_rules(ttc);
-       mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
-                            struct mlx5e_ttc_table *ttc)
-{
-       mlx5e_cleanup_ttc_rules(ttc);
-       mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                          struct mlx5e_ttc_table *ttc)
-{
-       bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int err;
+       ttc_params->inner_ttc = tunnel;
+       if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return;
 
-       ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
-       if (IS_ERR(ft->t)) {
-               err = PTR_ERR(ft->t);
-               ft->t = NULL;
-               return err;
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               ttc_params->tunnel_dests[tt].type =
+                       MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+               ttc_params->tunnel_dests[tt].ft =
+                       mlx5_get_ttc_flow_table(priv->fs.inner_ttc);
        }
-
-       err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
-       if (err)
-               goto err;
-
-       err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
-       if (err)
-               goto err;
-
-       return 0;
-err:
-       mlx5e_destroy_flow_table(ft);
-       return err;
-}
-
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
-                      struct mlx5_flow_destination *new_dest)
-{
-       return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL);
-}
-
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
-       struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest;
-
-       WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
-                 "TTC[%d] default dest is not setup yet", type);
-
-       return *dest;
-}
-
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
-       struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type);
-
-       return mlx5e_ttc_fwd_dest(priv, type, &dest);
 }
 
 static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
@@ -1467,7 +939,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
                               outer_headers.dmac_47_16);
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft = priv->fs.ttc.ft.t;
+       dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
 
        switch (type) {
        case MLX5E_FULLMATCH:
@@ -1763,10 +1235,46 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
        kvfree(priv->fs.vlan);
 }
 
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+{
+       if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return;
+       mlx5_destroy_ttc_table(priv->fs.inner_ttc);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
+{
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
 {
        struct ttc_params ttc_params = {};
-       int tt, err;
+
+       if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return 0;
+
+       mlx5e_set_inner_ttc_params(priv, &ttc_params);
+       priv->fs.inner_ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.inner_ttc))
+               return PTR_ERR(priv->fs.inner_ttc);
+       return 0;
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
+{
+       struct ttc_params ttc_params = {};
+
+       mlx5e_set_ttc_params(priv, &ttc_params, true);
+       priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.ttc))
+               return PTR_ERR(priv->fs.ttc);
+       return 0;
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+{
+       int err;
 
        priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                               MLX5_FLOW_NAMESPACE_KERNEL);
@@ -1781,26 +1289,15 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
                priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
        }
 
-       mlx5e_set_ttc_basic_params(priv, &ttc_params);
-
-       if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) {
-               mlx5e_set_inner_ttc_ft_params(&ttc_params);
-               for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-                       ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].inner_indir_tir.tirn;
-
-               err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
-               if (err) {
-                       netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
-                                  err);
-                       goto err_destroy_arfs_tables;
-               }
+       err = mlx5e_create_inner_ttc_table(priv);
+       if (err) {
+               netdev_err(priv->netdev,
+                          "Failed to create inner ttc table, err=%d\n",
+                          err);
+               goto err_destroy_arfs_tables;
        }
 
-       mlx5e_set_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+       err = mlx5e_create_ttc_table(priv);
        if (err) {
                netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
                           err);
@@ -1834,10 +1331,9 @@ err_destory_vlan_table:
 err_destroy_l2_table:
        mlx5e_destroy_l2_table(priv);
 err_destroy_ttc_table:
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+       mlx5e_destroy_ttc_table(priv);
 err_destroy_inner_ttc_table:
-       if (mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+       mlx5e_destroy_inner_ttc_table(priv);
 err_destroy_arfs_tables:
        mlx5e_arfs_destroy_tables(priv);
 
@@ -1849,9 +1345,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
        mlx5e_ptp_free_rx_fs(priv);
        mlx5e_destroy_vlan_table(priv);
        mlx5e_destroy_l2_table(priv);
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-       if (mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+       mlx5e_destroy_ttc_table(priv);
+       mlx5e_destroy_inner_ttc_table(priv);
        mlx5e_arfs_destroy_tables(priv);
        mlx5e_ethtool_cleanup_steering(priv);
 }
index 494f6f8..3d8918f 100644 (file)
@@ -433,9 +433,9 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 
                dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
                if (group == MLX5E_RQ_GROUP_XSK)
-                       dst->tir_num = priv->rx_res->channels[ix].xsk_tir.tirn;
+                       dst->tir_num = mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix);
                else
-                       dst->tir_num = priv->rx_res->channels[ix].direct_tir.tirn;
+                       dst->tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix);
                flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        }
 
@@ -786,43 +786,44 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
        INIT_LIST_HEAD(&priv->fs.ethtool.rules);
 }
 
-static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+static int flow_type_to_traffic_type(u32 flow_type)
 {
        switch (flow_type) {
        case TCP_V4_FLOW:
-               return  MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
        case TCP_V6_FLOW:
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
        case UDP_V4_FLOW:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
        case UDP_V6_FLOW:
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
        case AH_V4_FLOW:
-               return MLX5E_TT_IPV4_IPSEC_AH;
+               return MLX5_TT_IPV4_IPSEC_AH;
        case AH_V6_FLOW:
-               return MLX5E_TT_IPV6_IPSEC_AH;
+               return MLX5_TT_IPV6_IPSEC_AH;
        case ESP_V4_FLOW:
-               return MLX5E_TT_IPV4_IPSEC_ESP;
+               return MLX5_TT_IPV4_IPSEC_ESP;
        case ESP_V6_FLOW:
-               return MLX5E_TT_IPV6_IPSEC_ESP;
+               return MLX5_TT_IPV6_IPSEC_ESP;
        case IPV4_FLOW:
-               return MLX5E_TT_IPV4;
+               return MLX5_TT_IPV4;
        case IPV6_FLOW:
-               return MLX5E_TT_IPV6;
+               return MLX5_TT_IPV6;
        default:
-               return MLX5E_NUM_INDIR_TIRS;
+               return -EINVAL;
        }
 }
 
 static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
                                  struct ethtool_rxnfc *nfc)
 {
-       enum mlx5e_traffic_types tt;
        u8 rx_hash_field = 0;
+       int err;
+       int tt;
 
        tt = flow_type_to_traffic_type(nfc->flow_type);
-       if (tt == MLX5E_NUM_INDIR_TIRS)
-               return -EINVAL;
+       if (tt < 0)
+               return tt;
 
        /*  RSS does not support anything other than hashing to queues
         *  on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
@@ -848,29 +849,23 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
                rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
 
        mutex_lock(&priv->state_lock);
-
-       if (rx_hash_field == priv->rx_res->rss_params.rx_hash_fields[tt])
-               goto out;
-
-       priv->rx_res->rss_params.rx_hash_fields[tt] = rx_hash_field;
-       mlx5e_modify_tirs_hash(priv);
-
-out:
+       err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field);
        mutex_unlock(&priv->state_lock);
-       return 0;
+
+       return err;
 }
 
 static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
                                  struct ethtool_rxnfc *nfc)
 {
-       enum mlx5e_traffic_types tt;
        u32 hash_field = 0;
+       int tt;
 
        tt = flow_type_to_traffic_type(nfc->flow_type);
-       if (tt == MLX5E_NUM_INDIR_TIRS)
-               return -EINVAL;
+       if (tt < 0)
+               return tt;
 
-       hash_field = priv->rx_res->rss_params.rx_hash_fields[tt];
+       hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt);
        nfc->data = 0;
 
        if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
index c663811..ccc569c 100644 (file)
@@ -1627,7 +1627,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
                                  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
        MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
-       MLX5_SET(cqc,   cqc, c_eqn,         eqn);
+       MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
        MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
                                            MLX5_ADAPTER_PAGE_SHIFT);
@@ -2194,202 +2194,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
        chs->num = 0;
 }
 
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
-{
-       int err;
-
-       err = mlx5e_rqt_init_direct(&priv->rx_res->indir_rqt, priv->mdev, true,
-                                   priv->drop_rq.rqn);
-       if (err)
-               mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
-       return err;
-}
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
-{
-       int err;
-       int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].direct_rqt,
-                                           priv->mdev, false, priv->drop_rq.rqn);
-               if (unlikely(err))
-                       goto err_destroy_rqts;
-       }
-
-       return 0;
-
-err_destroy_rqts:
-       mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
-       while (--ix >= 0)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt);
-
-       return err;
-}
-
-static int mlx5e_create_xsk_rqts(struct mlx5e_priv *priv)
-{
-       int err;
-       int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].xsk_rqt,
-                                           priv->mdev, false, priv->drop_rq.rqn);
-               if (unlikely(err))
-                       goto err_destroy_rqts;
-       }
-
-       return 0;
-
-err_destroy_rqts:
-       mlx5_core_warn(priv->mdev, "create xsk rqts failed, %d\n", err);
-       while (--ix >= 0)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt);
-
-       return err;
-}
-
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt);
-}
-
-static void mlx5e_destroy_xsk_rqts(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt);
-}
-
-static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
-                                           struct mlx5e_channels *chs)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       unsigned int ix;
-       u32 *rqns;
-
-       rqns = kvmalloc_array(chs->num, sizeof(*rqns), GFP_KERNEL);
-       if (rqns) {
-               for (ix = 0; ix < chs->num; ix++)
-                       rqns[ix] = chs->c[ix]->rq.rqn;
-
-               mlx5e_rqt_redirect_indir(&res->indir_rqt, rqns, chs->num,
-                                        res->rss_params.hash.hfunc,
-                                        &res->rss_params.indir);
-               kvfree(rqns);
-       }
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               u32 rqn = priv->drop_rq.rqn;
-
-               if (ix < chs->num)
-                       rqn = chs->c[ix]->rq.rqn;
-
-               mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
-       }
-
-       if (priv->profile->rx_ptp_support) {
-               u32 rqn;
-
-               if (mlx5e_ptp_get_rqn(priv->channels.ptp, &rqn))
-                       rqn = priv->drop_rq.rqn;
-
-               mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
-       }
-}
-
-static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       unsigned int ix;
-
-       mlx5e_rqt_redirect_direct(&res->indir_rqt, priv->drop_rq.rqn);
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, priv->drop_rq.rqn);
-
-       if (priv->profile->rx_ptp_support)
-               mlx5e_rqt_redirect_direct(&res->ptp.rqt, priv->drop_rq.rqn);
-}
-
-int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash;
-       struct mlx5e_rss_params_traffic_type rss_tt;
-       struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
-       enum mlx5e_traffic_types tt;
-
-       builder = mlx5e_tir_builder_alloc(true);
-       if (!builder)
-               return -ENOMEM;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
-               mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
-               mlx5e_tir_builder_clear(builder);
-       }
-
-       /* Verify inner tirs resources allocated */
-       if (!res->rss[0].inner_indir_tir.tirn)
-               goto out;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true);
-               mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
-               mlx5e_tir_builder_clear(builder);
-       }
-
-out:
-       mlx5e_tir_builder_free(builder);
-       return 0;
-}
-
 static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 {
        struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
        struct mlx5e_lro_param lro_param;
-       enum mlx5e_traffic_types tt;
-       int err;
-       int ix;
-
-       builder = mlx5e_tir_builder_alloc(true);
-       if (!builder)
-               return -ENOMEM;
 
        lro_param = mlx5e_get_lro_param(&priv->channels.params);
-       mlx5e_tir_builder_build_lro(builder, &lro_param);
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
-               if (err)
-                       goto err_free_builder;
-
-               /* Verify inner tirs resources allocated */
-               if (!res->rss[0].inner_indir_tir.tirn)
-                       continue;
-
-               err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder);
-               if (err)
-                       goto err_free_builder;
-       }
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
-               if (err)
-                       goto err_free_builder;
-       }
 
-err_free_builder:
-       mlx5e_tir_builder_free(builder);
-       return err;
+       return mlx5e_rx_res_lro_set_param(res, &lro_param);
 }
 
 static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
@@ -2572,8 +2384,7 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
 
        /* This function may be called on attach, before priv->rx_res is created. */
        if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
-               mlx5e_build_default_indir_rqt(priv->rx_res->rss_params.indir.table,
-                                             MLX5E_INDIR_RQT_SIZE, count);
+               mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
 
        return 0;
 }
@@ -2633,18 +2444,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 
        mlx5e_wait_channels_min_rx_wqes(&priv->channels);
 
-       if (priv->rx_res) {
-               mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
-               mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
-       }
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
 }
 
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 {
-       if (priv->rx_res) {
-               mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
-               mlx5e_redirect_rqts_to_drop(priv);
-       }
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_deactivate(priv->rx_res);
 
        if (mlx5e_is_vport_rep(priv))
                mlx5e_remove_sqs_fwd_rules(priv);
@@ -3019,194 +2826,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
        mlx5e_destroy_tises(priv);
 }
 
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
-{
-       struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash;
-       bool inner_ft_support = priv->channels.params.tunneled_offload_en;
-       struct mlx5e_rss_params_traffic_type rss_tt;
-       struct mlx5e_rx_res *res = priv->rx_res;
-       enum mlx5e_traffic_types tt, max_tt;
-       struct mlx5e_tir_builder *builder;
-       struct mlx5e_lro_param lro_param;
-       u32 indir_rqtn;
-       int err = 0;
-
-       builder = mlx5e_tir_builder_alloc(false);
-       if (!builder)
-               return -ENOMEM;
-
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
-       indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt);
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
-                                           indir_rqtn, inner_ft_support);
-               mlx5e_tir_builder_build_lro(builder, &lro_param);
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
-
-               err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, priv->mdev, true);
-               if (err) {
-                       mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
-                       goto err_destroy_tirs;
-               }
-
-               mlx5e_tir_builder_clear(builder);
-       }
-
-       if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               goto out;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
-                                           indir_rqtn, inner_ft_support);
-               mlx5e_tir_builder_build_lro(builder, &lro_param);
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true);
-
-               err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, priv->mdev, true);
-               if (err) {
-                       mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
-                       goto err_destroy_inner_tirs;
-               }
-
-               mlx5e_tir_builder_clear(builder);
-       }
-
-       goto out;
-
-err_destroy_inner_tirs:
-       max_tt = tt;
-       for (tt = 0; tt < max_tt; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
-
-       tt = MLX5E_NUM_INDIR_TIRS;
-err_destroy_tirs:
-       max_tt = tt;
-       for (tt = 0; tt < max_tt; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
-
-out:
-       mlx5e_tir_builder_free(builder);
-
-       return err;
-}
-
-static int mlx5e_create_direct_tir(struct mlx5e_priv *priv, struct mlx5e_tir *tir,
-                                  struct mlx5e_tir_builder *builder, struct mlx5e_rqt *rqt)
-{
-       bool inner_ft_support = priv->channels.params.tunneled_offload_en;
-       struct mlx5e_lro_param lro_param;
-       int err = 0;
-
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
-
-       mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
-                                   mlx5e_rqt_get_rqtn(rqt), inner_ft_support);
-       mlx5e_tir_builder_build_lro(builder, &lro_param);
-       mlx5e_tir_builder_build_direct(builder);
-
-       err = mlx5e_tir_init(tir, builder, priv->mdev, true);
-       if (unlikely(err))
-               mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
-
-       mlx5e_tir_builder_clear(builder);
-
-       return err;
-}
-
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
-       int err = 0;
-       int ix;
-
-       builder = mlx5e_tir_builder_alloc(false);
-       if (!builder)
-               return -ENOMEM;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_create_direct_tir(priv, &res->channels[ix].direct_tir,
-                                             builder, &res->channels[ix].direct_rqt);
-               if (err)
-                       goto err_destroy_tirs;
-       }
-
-       goto out;
-
-err_destroy_tirs:
-       while (--ix >= 0)
-               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
-
-out:
-       mlx5e_tir_builder_free(builder);
-
-       return err;
-}
-
-static int mlx5e_create_xsk_tirs(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
-       int err;
-       int ix;
-
-       builder = mlx5e_tir_builder_alloc(false);
-       if (!builder)
-               return -ENOMEM;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_create_direct_tir(priv, &res->channels[ix].xsk_tir,
-                                             builder, &res->channels[ix].xsk_rqt);
-               if (err)
-                       goto err_destroy_tirs;
-       }
-
-       goto out;
-
-err_destroy_tirs:
-       while (--ix >= 0)
-               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
-
-out:
-       mlx5e_tir_builder_free(builder);
-
-       return err;
-}
-
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       enum mlx5e_traffic_types tt;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
-
-       /* Verify inner tirs resources allocated */
-       if (!res->rss[0].inner_indir_tir.tirn)
-               return;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
-}
-
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_tir_destroy(&priv->rx_res->channels[ix].direct_tir);
-}
-
-static void mlx5e_destroy_xsk_tirs(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_tir_destroy(&priv->rx_res->channels[ix].xsk_tir);
-}
-
 static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
 {
        int err = 0;
@@ -3223,7 +2842,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en
 
 static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 {
-       int err = 0;
+       int err;
        int i;
 
        for (i = 0; i < chs->num; i++) {
@@ -3231,6 +2850,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
                if (err)
                        return err;
        }
+       if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+               return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
 
        return 0;
 }
@@ -3668,6 +3289,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
        return 0;
 }
 
+static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
+                                                      netdev_features_t features)
+{
+       features &= ~NETIF_F_HW_TLS_RX;
+       if (netdev->features & NETIF_F_HW_TLS_RX)
+               netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+       features &= ~NETIF_F_HW_TLS_TX;
+       if (netdev->features & NETIF_F_HW_TLS_TX)
+               netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+
+       features &= ~NETIF_F_NTUPLE;
+       if (netdev->features & NETIF_F_NTUPLE)
+               netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+
+       return features;
+}
+
 static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                                            netdev_features_t features)
 {
@@ -3699,15 +3338,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                        netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
        }
 
-       if (mlx5e_is_uplink_rep(priv)) {
-               features &= ~NETIF_F_HW_TLS_RX;
-               if (netdev->features & NETIF_F_HW_TLS_RX)
-                       netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
-
-               features &= ~NETIF_F_HW_TLS_TX;
-               if (netdev->features & NETIF_F_HW_TLS_TX)
-                       netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
-       }
+       if (mlx5e_is_uplink_rep(priv))
+               features = mlx5e_fix_uplink_rep_features(netdev, features);
 
        mutex_unlock(&priv->state_lock);
 
@@ -4446,15 +4078,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
        .ndo_get_devlink_port    = mlx5e_get_devlink_port,
 };
 
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
-                                  int num_channels)
-{
-       int i;
-
-       for (i = 0; i < len; i++)
-               indirection_rqt[i] = i % num_channels;
-}
-
 static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
 {
        int i;
@@ -4467,21 +4090,6 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
        return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
 }
 
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
-                           u16 num_channels)
-{
-       enum mlx5e_traffic_types tt;
-
-       rss_params->hash.hfunc = ETH_RSS_HASH_TOP;
-       netdev_rss_key_fill(rss_params->hash.toeplitz_hash_key,
-                           sizeof(rss_params->hash.toeplitz_hash_key));
-       mlx5e_build_default_indir_rqt(rss_params->indir.table,
-                                     MLX5E_INDIR_RQT_SIZE, num_channels);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               rss_params->rx_hash_fields[tt] =
-                       mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
-}
-
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
 {
        struct mlx5e_params *params = &priv->channels.params;
@@ -4543,7 +4151,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
        /* TX inline */
        mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
 
-       params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev);
+       params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
 
        /* AF_XDP */
        params->xsk = xsk;
@@ -4603,8 +4211,8 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
 {
        int tt;
 
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt)))
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
                        return true;
        }
        return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
@@ -4701,6 +4309,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        if (MLX5_CAP_ETH(mdev, scatter_fcs))
                netdev->hw_features |= NETIF_F_RXFCS;
 
+       if (mlx5_qos_is_supported(mdev))
+               netdev->hw_features |= NETIF_F_HW_TC;
+
        netdev->features          = netdev->hw_features;
 
        /* Defaults */
@@ -4721,8 +4332,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
                netdev->hw_features      |= NETIF_F_NTUPLE;
 #endif
        }
-       if (mlx5_qos_is_supported(mdev))
-               netdev->features |= NETIF_F_HW_TC;
 
        netdev->features         |= NETIF_F_HIGHDMA;
        netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
@@ -4805,15 +4414,14 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_tir_builder *tir_builder;
+       enum mlx5e_rx_res_features features;
+       struct mlx5e_lro_param lro_param;
        int err;
 
-       priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+       priv->rx_res = mlx5e_rx_res_alloc();
        if (!priv->rx_res)
                return -ENOMEM;
 
-       mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
        mlx5e_create_q_counters(priv);
 
        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -4822,50 +4430,20 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
                goto err_destroy_q_counters;
        }
 
-       err = mlx5e_create_indirect_rqt(priv);
+       features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+       if (priv->channels.params.tunneled_offload_en)
+               features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
 
-       err = mlx5e_create_direct_rqts(priv);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, true);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
-       err = mlx5e_create_xsk_rqts(priv);
-       if (unlikely(err))
-               goto err_destroy_direct_tirs;
-
-       err = mlx5e_create_xsk_tirs(priv);
-       if (unlikely(err))
-               goto err_destroy_xsk_rqts;
-
-       err = mlx5e_rqt_init_direct(&priv->rx_res->ptp.rqt, priv->mdev, false,
-                                   priv->drop_rq.rqn);
-       if (err)
-               goto err_destroy_xsk_tirs;
-
-       tir_builder = mlx5e_tir_builder_alloc(false);
-       if (!tir_builder) {
-               err = -ENOMEM;
-               goto err_destroy_ptp_rqt;
-       }
-       err = mlx5e_create_direct_tir(priv, &priv->rx_res->ptp.tir, tir_builder,
-                                     &priv->rx_res->ptp.rqt);
-       mlx5e_tir_builder_free(tir_builder);
-       if (err)
-               goto err_destroy_ptp_rqt;
-
        err = mlx5e_create_flow_steering(priv);
        if (err) {
                mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-               goto err_destroy_ptp_direct_tir;
+               goto err_destroy_rx_res;
        }
 
        err = mlx5e_tc_nic_init(priv);
@@ -4886,27 +4464,13 @@ err_tc_nic_cleanup:
        mlx5e_tc_nic_cleanup(priv);
 err_destroy_flow_steering:
        mlx5e_destroy_flow_steering(priv);
-err_destroy_ptp_direct_tir:
-       mlx5e_tir_destroy(&priv->rx_res->ptp.tir);
-err_destroy_ptp_rqt:
-       mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt);
-err_destroy_xsk_tirs:
-       mlx5e_destroy_xsk_tirs(priv);
-err_destroy_xsk_rqts:
-       mlx5e_destroy_xsk_rqts(priv);
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
 err_close_drop_rq:
        mlx5e_close_drop_rq(&priv->drop_rq);
 err_destroy_q_counters:
        mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
        priv->rx_res = NULL;
        return err;
 }
@@ -4916,17 +4480,10 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
        mlx5e_accel_cleanup_rx(priv);
        mlx5e_tc_nic_cleanup(priv);
        mlx5e_destroy_flow_steering(priv);
-       mlx5e_tir_destroy(&priv->rx_res->ptp.tir);
-       mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt);
-       mlx5e_destroy_xsk_tirs(priv);
-       mlx5e_destroy_xsk_rqts(priv);
-       mlx5e_destroy_direct_tirs(priv);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv);
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5e_rx_res_destroy(priv->rx_res);
        mlx5e_close_drop_rq(&priv->drop_rq);
        mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
        priv->rx_res = NULL;
 }
 
index 2c54951..c54aaef 100644 (file)
@@ -49,6 +49,7 @@
 #include "en/devlink.h"
 #include "fs_core.h"
 #include "lib/mlx5.h"
+#include "lib/devcom.h"
 #define CREATE_TRACE_POINTS
 #include "diag/en_rep_tracepoint.h"
 #include "en_accel/ipsec.h"
@@ -310,6 +311,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
                mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+               if (rep_sq->send_to_vport_rule_peer)
+                       mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
                list_del(&rep_sq->list);
                kfree(rep_sq);
        }
@@ -319,6 +322,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep,
                                 u32 *sqns_array, int sqns_num)
 {
+       struct mlx5_eswitch *peer_esw = NULL;
        struct mlx5_flow_handle *flow_rule;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
@@ -329,6 +333,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                return 0;
 
        rpriv = mlx5e_rep_to_rep_priv(rep);
+       if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+               peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+                                                    MLX5_DEVCOM_ESW_OFFLOADS);
+
        for (i = 0; i < sqns_num; i++) {
                rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
                if (!rep_sq) {
@@ -337,7 +345,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                }
 
                /* Add re-inject rule to the PF/representor sqs */
-               flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
                                                                sqns_array[i]);
                if (IS_ERR(flow_rule)) {
                        err = PTR_ERR(flow_rule);
@@ -345,12 +353,34 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                        goto out_err;
                }
                rep_sq->send_to_vport_rule = flow_rule;
+               rep_sq->sqn = sqns_array[i];
+
+               if (peer_esw) {
+                       flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+                                                                       rep, sqns_array[i]);
+                       if (IS_ERR(flow_rule)) {
+                               err = PTR_ERR(flow_rule);
+                               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+                               kfree(rep_sq);
+                               goto out_err;
+                       }
+                       rep_sq->send_to_vport_rule_peer = flow_rule;
+               }
+
                list_add(&rep_sq->list, &rpriv->vport_sqs_list);
        }
+
+       if (peer_esw)
+               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
        return 0;
 
 out_err:
        mlx5e_sqs2vport_stop(esw, rep);
+
+       if (peer_esw)
+               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
        return err;
 }
 
@@ -647,27 +677,24 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
 {
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct mlx5_eswitch_rep *rep = rpriv->rep;
-       struct mlx5e_rx_res *res = priv->rx_res;
        struct ttc_params ttc_params = {};
-       int tt, err;
+       int err;
 
        priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                              MLX5_FLOW_NAMESPACE_KERNEL);
 
        /* The inner_ttc in the ttc params is intentionally not set */
-       ttc_params.any_tt_tirn = res->channels[0].direct_tir.tirn;
-       mlx5e_set_ttc_ft_params(&ttc_params);
+       mlx5e_set_ttc_params(priv, &ttc_params, false);
 
        if (rep->vport != MLX5_VPORT_UPLINK)
                /* To give uplik rep TTC a lower level for chaining from root ft */
                ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
 
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = res->rss[tt].indir_tir.tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
-       if (err) {
-               netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+       priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.ttc)) {
+               err = PTR_ERR(priv->fs.ttc);
+               netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
+                          err);
                return err;
        }
        return 0;
@@ -685,7 +712,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
                /* non uplik reps will skip any bypass tables and go directly to
                 * their own ttc
                 */
-               rpriv->root_ft = priv->fs.ttc.ft.t;
+               rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
                return 0;
        }
 
@@ -758,14 +785,13 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_lro_param lro_param;
        int err;
 
-       priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+       priv->rx_res = mlx5e_rx_res_alloc();
        if (!priv->rx_res)
                return -ENOMEM;
 
-       mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
        mlx5e_init_l2_addr(priv);
 
        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -774,25 +800,16 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
                return err;
        }
 
-       err = mlx5e_create_indirect_rqt(priv);
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
 
-       err = mlx5e_create_direct_rqts(priv);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, false);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
        err = mlx5e_create_rep_ttc_table(priv);
        if (err)
-               goto err_destroy_direct_tirs;
+               goto err_destroy_rx_res;
 
        err = mlx5e_create_rep_root_ft(priv);
        if (err)
@@ -809,18 +826,12 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 err_destroy_root_ft:
        mlx5e_destroy_rep_root_ft(priv);
 err_destroy_ttc_table:
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
 err_close_drop_rq:
        mlx5e_close_drop_rq(&priv->drop_rq);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
        priv->rx_res = NULL;
        return err;
 }
@@ -830,13 +841,10 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
        mlx5e_ethtool_cleanup_steering(priv);
        rep_vport_rx_rule_destroy(priv);
        mlx5e_destroy_rep_root_ft(priv);
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-       mlx5e_destroy_direct_tirs(priv);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv);
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+       mlx5e_rx_res_destroy(priv->rx_res);
        mlx5e_close_drop_rq(&priv->drop_rq);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
        priv->rx_res = NULL;
 }
 
@@ -1269,10 +1277,64 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
        return rpriv->netdev;
 }
 
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_rep_sq *rep_sq;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+       list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+               if (!rep_sq->send_to_vport_rule_peer)
+                       continue;
+               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+               rep_sq->send_to_vport_rule_peer = NULL;
+       }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+                                     struct mlx5_eswitch_rep *rep,
+                                     struct mlx5_eswitch *peer_esw)
+{
+       struct mlx5_flow_handle *flow_rule;
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_rep_sq *rep_sq;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+       list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+               if (rep_sq->send_to_vport_rule_peer)
+                       continue;
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+               if (IS_ERR(flow_rule))
+                       goto err_out;
+               rep_sq->send_to_vport_rule_peer = flow_rule;
+       }
+
+       return 0;
+err_out:
+       mlx5e_vport_rep_event_unpair(rep);
+       return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+                                struct mlx5_eswitch_rep *rep,
+                                enum mlx5_switchdev_event event,
+                                void *data)
+{
+       int err = 0;
+
+       if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+               err = mlx5e_vport_rep_event_pair(esw, rep, data);
+       else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+               mlx5e_vport_rep_event_unpair(rep);
+
+       return err;
+}
+
 static const struct mlx5_eswitch_rep_ops rep_ops = {
        .load = mlx5e_vport_rep_load,
        .unload = mlx5e_vport_rep_unload,
-       .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+       .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+       .event = mlx5e_vport_rep_event,
 };
 
 static int mlx5e_rep_probe(struct auxiliary_device *adev,
index 47a2dfb..8f0c824 100644 (file)
@@ -207,6 +207,8 @@ struct mlx5e_encap_entry {
 
 struct mlx5e_rep_sq {
        struct mlx5_flow_handle *send_to_vport_rule;
+       struct mlx5_flow_handle *send_to_vport_rule_peer;
+       u32 sqn;
        struct list_head         list;
 };
 
index 0cee2fa..e5c4344 100644 (file)
 #include <net/flow_offload.h>
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_skbedit.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/device.h>
 #include <linux/rhashtable.h>
 #include <linux/refcount.h>
 #include <linux/completion.h>
-#include <net/tc_act/tc_mirred.h>
-#include <net/tc_act/tc_vlan.h>
-#include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_csum.h>
-#include <net/tc_act/tc_mpls.h>
 #include <net/psample.h>
 #include <net/arp.h>
 #include <net/ipv6_stubs.h>
@@ -345,7 +339,7 @@ struct mlx5e_hairpin {
        int num_channels;
        struct mlx5e_rqt indir_rqt;
        struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_ttc_table ttc;
+       struct mlx5_ttc_table *ttc;
 };
 
 struct mlx5e_hairpin_entry {
@@ -452,12 +446,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 static
 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
 {
+       struct mlx5_core_dev *mdev;
        struct net_device *netdev;
        struct mlx5e_priv *priv;
 
-       netdev = __dev_get_by_index(net, ifindex);
+       netdev = dev_get_by_index(net, ifindex);
+       if (!netdev)
+               return ERR_PTR(-ENODEV);
+
        priv = netdev_priv(netdev);
-       return priv->mdev;
+       mdev = priv->mdev;
+       dev_put(netdev);
+
+       /* Mirred tc action holds a refcount on the ifindex net_device (see
+        * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
+        * after dev_put(netdev), while we're in the context of adding a tc flow.
+        *
+        * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
+        * stored in a hairpin object, which exists until all flows, that refer to it, get
+        * removed.
+        *
+        * On the other hand, after a hairpin object has been created, the peer net_device may
+        * be removed/unbound while there are still some hairpin flows that are using it. This
+        * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
+        * NETDEV_UNREGISTER event of the peer net_device.
+        */
+       return mdev;
 }
 
 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
@@ -505,9 +519,10 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
        if (!indir)
                return -ENOMEM;
 
-       mlx5e_build_default_indir_rqt(indir->table, MLX5E_INDIR_RQT_SIZE, hp->num_channels);
+       mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
        err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
-                                   priv->rx_res->rss_params.hash.hfunc, indir);
+                                  mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
+                                  indir);
 
        kvfree(indir);
        return err;
@@ -516,8 +531,8 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
 {
        struct mlx5e_priv *priv = hp->func_priv;
-       struct mlx5e_rss_params_hash *rss_hash;
-       enum mlx5e_traffic_types tt, max_tt;
+       struct mlx5e_rss_params_hash rss_hash;
+       enum mlx5_traffic_types tt, max_tt;
        struct mlx5e_tir_builder *builder;
        int err = 0;
 
@@ -525,7 +540,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
        if (!builder)
                return -ENOMEM;
 
-       rss_hash = &priv->rx_res->rss_params.hash;
+       rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
 
        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
                struct mlx5e_rss_params_traffic_type rss_tt;
@@ -535,7 +550,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
                mlx5e_tir_builder_build_rqt(builder, hp->tdn,
                                            mlx5e_rqt_get_rqtn(&hp->indir_rqt),
                                            false);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
+               mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
 
                err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
                if (err) {
@@ -574,12 +589,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
 
        memset(ttc_params, 0, sizeof(*ttc_params));
 
-       ttc_params->any_tt_tirn = mlx5e_tir_get_tirn(&hp->direct_tir);
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params->indir_tirn[tt] = mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+       ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_tir_get_tirn(&hp->direct_tir) :
+                               mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+       }
 
-       ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
        ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
        ft_attr->prio = MLX5E_TC_PRIO;
 }
@@ -599,12 +618,15 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
                goto err_create_indirect_tirs;
 
        mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
-       if (err)
+       hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(hp->ttc)) {
+               err = PTR_ERR(hp->ttc);
                goto err_create_ttc_table;
+       }
 
        netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
-                  hp->num_channels, hp->ttc.ft.t->id);
+                  hp->num_channels,
+                  mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
 
        return 0;
 
@@ -618,9 +640,7 @@ err_create_indirect_tirs:
 
 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
 {
-       struct mlx5e_priv *priv = hp->func_priv;
-
-       mlx5e_destroy_ttc_table(priv, &hp->ttc);
+       mlx5_destroy_ttc_table(hp->ttc);
        mlx5e_hairpin_destroy_indirect_tirs(hp);
        mlx5e_rqt_destroy(&hp->indir_rqt);
 }
@@ -640,6 +660,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params
 
        func_mdev = priv->mdev;
        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+       if (IS_ERR(peer_mdev)) {
+               err = PTR_ERR(peer_mdev);
+               goto create_pair_err;
+       }
 
        pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
        if (IS_ERR(pair)) {
@@ -778,6 +802,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
        int err;
 
        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+       if (IS_ERR(peer_mdev)) {
+               NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
+               return PTR_ERR(peer_mdev);
+       }
+
        if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
                NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
                return -EOPNOTSUPP;
@@ -855,7 +884,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 attach_flow:
        if (hpe->hp->num_channels > 1) {
                flow_flag_set(flow, HAIRPIN_RSS);
-               flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+               flow->attr->nic_attr->hairpin_ft =
+                       mlx5_get_ttc_flow_table(hpe->hp->ttc);
        } else {
                flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
        }
@@ -1001,15 +1031,17 @@ err_ft_get:
 
 static int
 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-                     struct mlx5e_tc_flow_parse_attr *parse_attr,
                      struct mlx5e_tc_flow *flow,
                      struct netlink_ext_ack *extack)
 {
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
        struct mlx5_core_dev *dev = priv->mdev;
-       struct mlx5_fc *counter = NULL;
+       struct mlx5_fc *counter;
        int err;
 
+       parse_attr = attr->parse_attr;
+
        if (flow_flag_test(flow, HAIRPIN)) {
                err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
                if (err)
@@ -1329,9 +1361,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
        bool vf_tun = false, encap_valid = true;
        struct net_device *encap_dev = NULL;
        struct mlx5_esw_flow_attr *esw_attr;
-       struct mlx5_fc *counter = NULL;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_priv *out_priv;
+       struct mlx5_fc *counter;
        u32 max_prio, max_chain;
        int err = 0;
        int out_index;
@@ -3297,10 +3329,10 @@ static int validate_goto_chain(struct mlx5e_priv *priv,
 
 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                struct flow_action *flow_action,
-                               struct mlx5e_tc_flow_parse_attr *parse_attr,
                                struct mlx5e_tc_flow *flow,
                                struct netlink_ext_ack *extack)
 {
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
        struct pedit_headers_action hdrs[2] = {};
        const struct flow_action_entry *act;
@@ -3316,8 +3348,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
 
        nic_attr = attr->nic_attr;
-
        nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+       parse_attr = attr->parse_attr;
 
        flow_action_for_each(i, act, flow_action) {
                switch (act->id) {
@@ -3326,10 +3358,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        break;
                case FLOW_ACTION_DROP:
-                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
-                       if (MLX5_CAP_FLOWTABLE(priv->mdev,
-                                              flow_table_properties_nic_receive.flow_counter))
-                               action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+                                 MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        break;
                case FLOW_ACTION_MANGLE:
                case FLOW_ACTION_ADD:
@@ -3370,7 +3400,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                                   "device is not on same HW, can't offload");
                                netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
                                            peer_dev->name);
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                        }
                        }
                        break;
@@ -3380,7 +3410,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                        if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
                                NL_SET_ERR_MSG_MOD(extack,
                                                   "Bad flow mark - only 16 bit is supported");
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                        }
 
                        nic_attr->flow_tag = mark;
@@ -3677,8 +3707,7 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv,
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                struct flow_action *flow_action,
                                struct mlx5e_tc_flow *flow,
-                               struct netlink_ext_ack *extack,
-                               struct net_device *filter_dev)
+                               struct netlink_ext_ack *extack)
 {
        struct pedit_headers_action hdrs[2] = {};
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3743,7 +3772,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                                   "mpls pop supported only as first action");
                                return -EOPNOTSUPP;
                        }
-                       if (!netif_is_bareudp(filter_dev)) {
+                       if (!netif_is_bareudp(parse_attr->filter_dev)) {
                                NL_SET_ERR_MSG_MOD(extack,
                                                   "mpls pop supported only on bareudp devices");
                                return -EOPNOTSUPP;
@@ -3892,7 +3921,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                            "devices %s %s not on same switch HW, can't offload forwarding\n",
                                            priv->netdev->name,
                                            out_dev->name);
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                        }
                        }
                        break;
@@ -4245,7 +4274,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
        if (err)
                goto err_free;
 
-       err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
+       err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
        if (err)
                goto err_free;
 
@@ -4391,11 +4420,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
        if (err)
                goto err_free;
 
-       err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
+       err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
        if (err)
                goto err_free;
 
-       err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+       err = mlx5e_tc_add_nic_flow(priv, flow, extack);
        if (err)
                goto err_free;
 
@@ -4822,6 +4851,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
        struct mlx5_core_dev *dev = priv->mdev;
        struct mapping_ctx *chains_mapping;
        struct mlx5_chains_attr attr = {};
+       u64 mapping_id;
        int err;
 
        mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
@@ -4835,8 +4865,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
        lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
 
-       chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
-                                       MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+       mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+       chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+                                              sizeof(struct mlx5_mapped_obj),
+                                              MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
        if (IS_ERR(chains_mapping)) {
                err = PTR_ERR(chains_mapping);
                goto err_mapping;
@@ -4925,6 +4959,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        struct mapping_ctx *mapping;
        struct mlx5_eswitch *esw;
        struct mlx5e_priv *priv;
+       u64 mapping_id;
        int err = 0;
 
        uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
@@ -4941,8 +4976,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
 #endif
 
-       mapping = mapping_create(sizeof(struct tunnel_match_key),
-                                TUNNEL_INFO_BITS_MASK, true);
+       mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+       mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+                                       sizeof(struct tunnel_match_key),
+                                       TUNNEL_INFO_BITS_MASK, true);
+
        if (IS_ERR(mapping)) {
                err = PTR_ERR(mapping);
                goto err_tun_mapping;
@@ -4950,7 +4989,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        uplink_priv->tunnel_mapping = mapping;
 
        /* 0xFFF is reserved for stack devices slow path table mark */
-       mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
+       mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+                                       sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
        if (IS_ERR(mapping)) {
                err = PTR_ERR(mapping);
                goto err_enc_opts_mapping;
index 505bf81..2e504c7 100644 (file)
@@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
        vport->egress.offloads.fwd_rule = NULL;
 }
 
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+       if (!vport->egress.offloads.bounce_rule)
+               return;
+
+       mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+       vport->egress.offloads.bounce_rule = NULL;
+}
+
 static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
                                                struct mlx5_vport *vport,
                                                struct mlx5_flow_destination *fwd_dest)
@@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
 {
        esw_acl_egress_vlan_destroy(vport);
        esw_acl_egress_ofld_fwd2vport_destroy(vport);
+       esw_acl_egress_ofld_bounce_rule_destroy(vport);
 }
 
 static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
                mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
                vport->egress.offloads.fwd_grp = NULL;
        }
+
+       if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+               mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+               vport->egress.offloads.bounce_grp = NULL;
+       }
+
        esw_acl_egress_vlan_grp_destroy(vport);
 }
 
index 97e6cb6..7ffea23 100644 (file)
@@ -1458,8 +1458,6 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
 
        esw->mode = mode;
 
-       mlx5_lag_update(esw->dev);
-
        if (mode == MLX5_ESWITCH_LEGACY) {
                err = esw_legacy_enable(esw);
        } else {
@@ -1506,6 +1504,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
        if (!mlx5_esw_allowed(esw))
                return 0;
 
+       mlx5_lag_disable_change(esw->dev);
        down_write(&esw->mode_lock);
        if (esw->mode == MLX5_ESWITCH_NONE) {
                ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1519,6 +1518,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
                        esw->esw_funcs.num_vfs = num_vfs;
        }
        up_write(&esw->mode_lock);
+       mlx5_lag_enable_change(esw->dev);
        return ret;
 }
 
@@ -1550,8 +1550,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
        old_mode = esw->mode;
        esw->mode = MLX5_ESWITCH_NONE;
 
-       mlx5_lag_update(esw->dev);
-
        if (old_mode == MLX5_ESWITCH_OFFLOADS)
                mlx5_rescan_drivers(esw->dev);
 
@@ -1567,10 +1565,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
        if (!mlx5_esw_allowed(esw))
                return;
 
+       mlx5_lag_disable_change(esw->dev);
        down_write(&esw->mode_lock);
        mlx5_eswitch_disable_locked(esw, clear_vf);
        esw->esw_funcs.num_vfs = 0;
        up_write(&esw->mode_lock);
+       mlx5_lag_enable_change(esw->dev);
 }
 
 static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
@@ -1759,7 +1759,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        ida_init(&esw->offloads.vport_metadata_ida);
        xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
        mutex_init(&esw->state_lock);
+       lockdep_register_key(&esw->mode_lock_key);
        init_rwsem(&esw->mode_lock);
+       lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
 
        esw->enabled_vports = 0;
        esw->mode = MLX5_ESWITCH_NONE;
@@ -1793,6 +1795,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 
        esw->dev->priv.eswitch = NULL;
        destroy_workqueue(esw->work_queue);
+       lockdep_unregister_key(&esw->mode_lock_key);
        mutex_destroy(&esw->state_lock);
        WARN_ON(!xa_empty(&esw->offloads.vhca_map));
        xa_destroy(&esw->offloads.vhca_map);
@@ -1889,8 +1892,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
               mlx5_esw_is_sf_vport(esw, vport_num);
 }
 
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
                                           u8 *hw_addr, int *hw_addr_len,
                                           struct netlink_ext_ack *extack)
 {
@@ -1899,7 +1901,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
        int err = -EOPNOTSUPP;
        u16 vport_num;
 
-       esw = mlx5_devlink_eswitch_get(devlink);
+       esw = mlx5_devlink_eswitch_get(port->devlink);
        if (IS_ERR(esw))
                return PTR_ERR(esw);
 
@@ -1923,8 +1925,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
        return err;
 }
 
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
                                           const u8 *hw_addr, int hw_addr_len,
                                           struct netlink_ext_ack *extack)
 {
@@ -1933,7 +1934,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
        int err = -EOPNOTSUPP;
        u16 vport_num;
 
-       esw = mlx5_devlink_eswitch_get(devlink);
+       esw = mlx5_devlink_eswitch_get(port->devlink);
        if (IS_ERR(esw)) {
                NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
                return PTR_ERR(esw);
@@ -2366,9 +2367,22 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
  */
 void mlx5_esw_unlock(struct mlx5_eswitch *esw)
 {
+       if (!mlx5_esw_allowed(esw))
+               return;
        up_write(&esw->mode_lock);
 }
 
+/**
+ * mlx5_esw_lock() - Take write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_lock(struct mlx5_eswitch *esw)
+{
+       if (!mlx5_esw_allowed(esw))
+               return;
+       down_write(&esw->mode_lock);
+}
+
 /**
  * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
  *
@@ -2384,3 +2398,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
        return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+       return mlx5_esw_allowed(esw) ? esw->dev : NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
index 48cac5b..01e8dfb 100644 (file)
@@ -86,6 +86,14 @@ struct mlx5_mapped_obj {
 #define esw_chains(esw) \
        ((esw)->fdb_table.offloads.esw_chains_priv)
 
+enum {
+       MAPPING_TYPE_CHAIN,
+       MAPPING_TYPE_TUNNEL,
+       MAPPING_TYPE_TUNNEL_ENC_OPTS,
+       MAPPING_TYPE_LABELS,
+       MAPPING_TYPE_ZONE,
+};
+
 struct vport_ingress {
        struct mlx5_flow_table *acl;
        struct mlx5_flow_handle *allow_rule;
@@ -124,6 +132,8 @@ struct vport_egress {
                struct {
                        struct mlx5_flow_group *fwd_grp;
                        struct mlx5_flow_handle *fwd_rule;
+                       struct mlx5_flow_handle *bounce_rule;
+                       struct mlx5_flow_group *bounce_grp;
                } offloads;
        };
 };
@@ -315,6 +325,7 @@ struct mlx5_eswitch {
                u32             large_group_num;
        }  params;
        struct blocking_notifier_head n_head;
+       struct lock_class_key mode_lock_key;
 };
 
 void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -475,12 +486,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
                                        struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
                                        enum devlink_eswitch_encap_mode *encap);
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
                                           u8 *hw_addr, int *hw_addr_len,
                                           struct netlink_ext_ack *extack);
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
                                           const u8 *hw_addr, int hw_addr_len,
                                           struct netlink_ext_ack *extack);
 
@@ -636,7 +645,7 @@ struct esw_vport_tbl_namespace {
 };
 
 struct mlx5_vport_tbl_attr {
-       u16 chain;
+       u32 chain;
        u16 prio;
        u16 vport;
        const struct esw_vport_tbl_namespace *vport_ns;
@@ -699,11 +708,18 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
 void mlx5_esw_put(struct mlx5_core_dev *dev);
 int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
 void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+void mlx5_esw_lock(struct mlx5_eswitch *esw);
 
 void esw_vport_change_handle_locked(struct mlx5_vport *vport);
 
 bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
 
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                             struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -719,6 +735,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
        return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
+static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
+
 static inline struct mlx5_flow_handle *
 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 {
@@ -731,6 +750,23 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
 {
        return vport_num;
 }
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                       struct mlx5_eswitch *slave_esw)
+{
+       return 0;
+}
+
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                        struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+       return 0;
+}
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
index 7579f34..0e3645c 100644 (file)
@@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
 {
        dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
-       dest[dest_idx].vport.vhca_id =
-               MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
-       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+       if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+               dest[dest_idx].vport.vhca_id =
+                       MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
                dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+       }
        if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
                if (pkt_reformat) {
                        flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
@@ -925,6 +926,7 @@ out:
 
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+                                   struct mlx5_eswitch *from_esw,
                                    struct mlx5_eswitch_rep *rep,
                                    u32 sqn)
 {
@@ -943,10 +945,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
        MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
        /* source vport is the esw manager */
-       MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
+       MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
        if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
                MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-                        MLX5_CAP_GEN(rep->esw->dev, vhca_id));
+                        MLX5_CAP_GEN(from_esw->dev, vhca_id));
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -962,6 +964,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
        dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
+       if (rep->vport == MLX5_VPORT_UPLINK)
+               spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
        flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
                                        spec, &flow_act, &dest, 1);
        if (IS_ERR(flow_rule))
@@ -1612,7 +1617,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
                goto ns_err;
        }
 
-       table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+       /* To be strictly correct:
+        *      MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+        * should be:
+        *      esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+        *      peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+        * but as the peer device might not be in switchdev mode it's not
+        * possible. We use the fact that by default FW sets max vfs and max sfs
+        * to the same value on both devices. If it needs to be changed in the future note
+        * the peer miss group should also be created based on the number of
+        * total vports of the peer (currently is also uses esw->total_vports).
+        */
+       table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
                MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
 
        /* create the slow path fdb with encap set, so further table instances
@@ -1669,7 +1685,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
                         source_eswitch_owner_vhca_id_valid, 1);
        }
 
-       ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+       /* See comment above table_size calculation */
+       ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 
@@ -2309,14 +2326,293 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
                mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 }
 
+static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master,
+                                            struct mlx5_core_dev *slave)
+{
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       struct mlx5_eswitch *esw;
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_vport *vport;
+       int err;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL);
+       MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+       MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK);
+
+       if (master) {
+               esw = master->priv.eswitch;
+               vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+               MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1);
+               MLX5_SET(set_flow_table_root_in, in, table_vport_number,
+                        MLX5_VPORT_UPLINK);
+
+               ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                      MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                      vport->index);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       } else {
+               esw = slave->priv.eswitch;
+               vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+               ns = mlx5_get_flow_vport_acl_namespace(slave,
+                                                      MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                      vport->index);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id);
+       }
+
+       err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+       mutex_unlock(&root->chain_lock);
+
+       return err;
+}
+
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+                                 struct mlx5_core_dev *slave)
+{
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+       int err;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type,
+                FS_FT_FDB);
+
+       if (master) {
+               ns = mlx5_get_flow_namespace(master,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       } else {
+               ns = mlx5_get_flow_namespace(slave,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       }
+
+       err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+       mutex_unlock(&root->chain_lock);
+
+       return err;
+}
+
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                       struct mlx5_core_dev *slave,
+                                       struct mlx5_vport *vport,
+                                       struct mlx5_flow_table *acl)
+{
+       struct mlx5_flow_handle *flow_rule = NULL;
+       struct mlx5_flow_destination dest = {};
+       struct mlx5_flow_act flow_act = {};
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       void *misc;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return -ENOMEM;
+
+       spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+       misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                           misc_parameters);
+       MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+                MLX5_CAP_GEN(slave, vhca_id));
+
+       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+                        source_eswitch_owner_vhca_id);
+
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+       dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+       dest.vport.num = slave->priv.eswitch->manager_vport;
+       dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+       dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+       flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+                                       &dest, 1);
+       if (IS_ERR(flow_rule))
+               err = PTR_ERR(flow_rule);
+       else
+               vport->egress.offloads.bounce_rule = flow_rule;
+
+       kvfree(spec);
+       return err;
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                     struct mlx5_core_dev *slave)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       struct mlx5_eswitch *esw = master->priv.eswitch;
+       struct mlx5_flow_table_attr ft_attr = {
+               .max_fte = 1, .prio = 0, .level = 0,
+       };
+       struct mlx5_flow_namespace *egress_ns;
+       struct mlx5_flow_table *acl;
+       struct mlx5_flow_group *g;
+       struct mlx5_vport *vport;
+       void *match_criteria;
+       u32 *flow_group_in;
+       int err;
+
+       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+
+       egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                     MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+                                                     vport->index);
+       if (!egress_ns)
+               return -EINVAL;
+
+       if (vport->egress.acl)
+               return -EINVAL;
+
+       flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!flow_group_in)
+               return -ENOMEM;
+
+       acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+       if (IS_ERR(acl)) {
+               err = PTR_ERR(acl);
+               goto out;
+       }
+
+       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+                                     match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                        misc_parameters.source_port);
+       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                        misc_parameters.source_eswitch_owner_vhca_id);
+       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+                MLX5_MATCH_MISC_PARAMETERS);
+
+       MLX5_SET(create_flow_group_in, flow_group_in,
+                source_eswitch_owner_vhca_id_valid, 1);
+       MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+       g = mlx5_create_flow_group(acl, flow_group_in);
+       if (IS_ERR(g)) {
+               err = PTR_ERR(g);
+               goto err_group;
+       }
+
+       err = __esw_set_master_egress_rule(master, slave, vport, acl);
+       if (err)
+               goto err_rule;
+
+       vport->egress.acl = acl;
+       vport->egress.offloads.bounce_grp = g;
+
+       kvfree(flow_group_in);
+
+       return 0;
+
+err_rule:
+       mlx5_destroy_flow_group(g);
+err_group:
+       mlx5_destroy_flow_table(acl);
+out:
+       kvfree(flow_group_in);
+       return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+{
+       struct mlx5_vport *vport;
+
+       vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+                                      dev->priv.eswitch->manager_vport);
+
+       esw_acl_egress_ofld_cleanup(vport);
+}
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw)
+{
+       int err;
+
+       err = esw_set_uplink_slave_ingress_root(master_esw->dev,
+                                               slave_esw->dev);
+       if (err)
+               return -EINVAL;
+
+       err = esw_set_slave_root_fdb(master_esw->dev,
+                                    slave_esw->dev);
+       if (err)
+               goto err_fdb;
+
+       err = esw_set_master_egress_rule(master_esw->dev,
+                                        slave_esw->dev);
+       if (err)
+               goto err_acl;
+
+       return err;
+
+err_acl:
+       esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
+err_fdb:
+       esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+
+       return err;
+}
+
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                             struct mlx5_eswitch *slave_esw)
+{
+       esw_unset_master_egress_rule(master_esw->dev);
+       esw_set_slave_root_fdb(NULL, slave_esw->dev);
+       esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+}
+
 #define ESW_OFFLOADS_DEVCOM_PAIR       (0)
 #define ESW_OFFLOADS_DEVCOM_UNPAIR     (1)
 
-static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
-                                 struct mlx5_eswitch *peer_esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
 {
+       const struct mlx5_eswitch_rep_ops *ops;
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       u8 rep_type;
 
-       return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               rep_type = NUM_REP_TYPES;
+               while (rep_type--) {
+                       ops = esw->offloads.rep_ops[rep_type];
+                       if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+                           ops->event)
+                               ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+               }
+       }
 }
 
 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
@@ -2324,9 +2620,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
        mlx5e_tc_clean_fdb_peer_flows(esw);
 #endif
+       mlx5_esw_offloads_rep_event_unpair(esw);
        esw_del_fdb_peer_miss_rules(esw);
 }
 
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+                                 struct mlx5_eswitch *peer_esw)
+{
+       const struct mlx5_eswitch_rep_ops *ops;
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       u8 rep_type;
+       int err;
+
+       err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+       if (err)
+               return err;
+
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+                       ops = esw->offloads.rep_ops[rep_type];
+                       if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+                           ops->event) {
+                               err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+                               if (err)
+                                       goto err_out;
+                       }
+               }
+       }
+
+       return 0;
+
+err_out:
+       mlx5_esw_offloads_unpair(esw);
+       return err;
+}
+
 static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
                                         struct mlx5_eswitch *peer_esw,
                                         bool pair)
@@ -2367,6 +2696,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
 
        switch (event) {
        case ESW_OFFLOADS_DEVCOM_PAIR:
+               if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev)
+                       break;
+
                if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
                    mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
                        break;
@@ -2614,6 +2946,31 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
        esw_vport_destroy_offloads_acl_tables(esw, vport);
 }
 
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       int ret;
+
+       if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+               return 0;
+
+       rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+       if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+               return 0;
+
+       ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+       if (ret)
+               return ret;
+
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+                       mlx5_esw_offloads_rep_load(esw, rep->vport);
+       }
+
+       return 0;
+}
+
 static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
 {
        struct mlx5_esw_indir_table *indir;
@@ -2783,6 +3140,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
        struct mapping_ctx *reg_c0_obj_pool;
        struct mlx5_vport *vport;
        unsigned long i;
+       u64 mapping_id;
        int err;
 
        if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
@@ -2806,9 +3164,13 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
        if (err)
                goto err_vport_metadata;
 
-       reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
-                                        ESW_REG_C0_USER_DATA_METADATA_MASK,
-                                        true);
+       mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+       reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+                                               sizeof(struct mlx5_mapped_obj),
+                                               ESW_REG_C0_USER_DATA_METADATA_MASK,
+                                               true);
+
        if (IS_ERR(reg_c0_obj_pool)) {
                err = PTR_ERR(reg_c0_obj_pool);
                goto err_pool;
@@ -2986,10 +3348,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
        if (esw_mode_from_devlink(mode, &mlx5_mode))
                return -EINVAL;
 
+       mlx5_lag_disable_change(esw->dev);
        err = mlx5_esw_try_lock(esw);
        if (err < 0) {
                NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
-               return err;
+               goto enable_lag;
        }
        cur_mlx5_mode = err;
        err = 0;
@@ -3006,6 +3369,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 
 unlock:
        mlx5_esw_unlock(esw);
+enable_lag:
+       mlx5_lag_enable_change(esw->dev);
        return err;
 }
 
@@ -3079,8 +3444,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 
        switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
        case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
-               if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+               if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
+                       err = 0;
                        goto out;
+               }
+
                fallthrough;
        case MLX5_CAP_INLINE_MODE_L2:
                NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
index bd66ab2..9b2cca6 100644 (file)
@@ -454,7 +454,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 
        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
        MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
                           MLX5_ADAPTER_PAGE_SHIFT);
index 896a6c3..7db8df6 100644 (file)
@@ -152,17 +152,56 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
        return 0;
 }
 
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+                                      struct mlx5_core_dev *slave,
+                                      bool ft_id_valid,
+                                      u32 ft_id)
+{
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type,
+                FS_FT_FDB);
+       if (ft_id_valid) {
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        ft_id);
+       } else {
+               ns = mlx5_get_flow_namespace(slave,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       }
+
+       return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
 static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
                                   struct mlx5_flow_table *ft, u32 underlay_qpn,
                                   bool disconnect)
 {
        u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
        struct mlx5_core_dev *dev = ns->dev;
+       int err;
 
        if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
            underlay_qpn == 0)
                return 0;
 
+       if (ft->type == FS_FT_FDB &&
+           mlx5_lag_is_shared_fdb(dev) &&
+           !mlx5_lag_is_master(dev))
+               return 0;
+
        MLX5_SET(set_flow_table_root_in, in, opcode,
                 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
        MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
@@ -177,7 +216,24 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
        MLX5_SET(set_flow_table_root_in, in, other_vport,
                 !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
 
-       return mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+       err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+       if (!err &&
+           ft->type == FS_FT_FDB &&
+           mlx5_lag_is_shared_fdb(dev) &&
+           mlx5_lag_is_master(dev)) {
+               err = mlx5_cmd_set_slave_root_fdb(dev,
+                                                 mlx5_lag_get_peer_mdev(dev),
+                                                 !disconnect, (!disconnect) ?
+                                                 ft->id : 0);
+               if (err && !disconnect) {
+                       MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+                       MLX5_SET(set_flow_table_root_in, in, table_id,
+                                ns->root_ft->id);
+                       mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+               }
+       }
+
+       return err;
 }
 
 static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
index d7bf0a3..8481027 100644 (file)
@@ -413,7 +413,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec)
        return true;
 }
 
-static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
 {
        struct fs_node *root;
        struct mlx5_flow_namespace *ns;
@@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
                              struct fs_prio *prio)
 {
-       struct mlx5_flow_table *next_ft;
+       struct mlx5_flow_table *next_ft, *first_ft;
        int err = 0;
 
        /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
 
-       if (list_empty(&prio->node.children)) {
+       first_ft = list_first_entry_or_null(&prio->node.children,
+                                           struct mlx5_flow_table, node.list);
+       if (!first_ft || first_ft->level > ft->level) {
                err = connect_prev_fts(dev, ft, prio);
                if (err)
                        return err;
 
-               next_ft = find_next_chained_ft(prio);
+               next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
                err = connect_fwd_rules(dev, ft, next_ft);
                if (err)
                        return err;
@@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft)
                                node.list) == ft))
                return 0;
 
-       next_ft = find_next_chained_ft(prio);
+       next_ft = find_next_ft(ft);
        err = connect_fwd_rules(dev, next_ft, ft);
        if (err)
                return err;
index 7317cde..98240ba 100644 (file)
@@ -294,6 +294,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
 int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
 void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
 
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
+
 #define fs_get_obj(v, _node)  {v = container_of((_node), typeof(*v), node); }
 
 #define fs_list_for_each_entry(pos, root)              \
index 9ff163c..9abeb80 100644 (file)
@@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
        }
        fw_reporter_ctx.err_synd = health->synd;
        fw_reporter_ctx.miss_counter = health->miss_counter;
-       devlink_health_report(health->fw_fatal_reporter,
-                             "FW fatal error reported", &fw_reporter_ctx);
+       if (devlink_health_report(health->fw_fatal_reporter,
+                                 "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
+               /* If recovery wasn't performed, due to grace period,
+                * unload the driver. This ensures that the driver
+                * closes all its resources and it is not subjected to
+                * requests from the kernel.
+                */
+               mlx5_core_err(dev, "Driver is in error state. Unloading\n");
+               mlx5_unload_one(dev);
+       }
 }
 
 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
index a126cbc..67571e5 100644 (file)
@@ -314,8 +314,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
 
 static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
 {
-       struct ttc_params ttc_params = {};
-       int tt, err;
+       int err;
 
        priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                               MLX5_FLOW_NAMESPACE_KERNEL);
@@ -330,12 +329,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
                priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
        }
 
-       mlx5e_set_ttc_basic_params(priv, &ttc_params);
-       mlx5e_set_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+       err = mlx5e_create_ttc_table(priv);
        if (err) {
                netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
                           err);
@@ -352,21 +346,20 @@ err_destroy_arfs_tables:
 
 static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
 {
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+       mlx5e_destroy_ttc_table(priv);
        mlx5e_arfs_destroy_tables(priv);
 }
 
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_lro_param lro_param;
        int err;
 
-       priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+       priv->rx_res = mlx5e_rx_res_alloc();
        if (!priv->rx_res)
                return -ENOMEM;
 
-       mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
        mlx5e_create_q_counters(priv);
 
        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -375,41 +368,26 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
                goto err_destroy_q_counters;
        }
 
-       err = mlx5e_create_indirect_rqt(priv);
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
 
-       err = mlx5e_create_direct_rqts(priv);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, false);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
        err = mlx5i_create_flow_steering(priv);
        if (err)
-               goto err_destroy_direct_tirs;
+               goto err_destroy_rx_res;
 
        return 0;
 
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
 err_close_drop_rq:
        mlx5e_close_drop_rq(&priv->drop_rq);
 err_destroy_q_counters:
        mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
        priv->rx_res = NULL;
        return err;
 }
@@ -417,13 +395,10 @@ err_destroy_q_counters:
 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 {
        mlx5i_destroy_flow_steering(priv);
-       mlx5e_destroy_direct_tirs(priv);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv);
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5e_rx_res_destroy(priv->rx_res);
        mlx5e_close_drop_rq(&priv->drop_rq);
        mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
        priv->rx_res = NULL;
 }
 
index 5c043c5..f4dfa55 100644 (file)
@@ -32,7 +32,9 @@
 
 #include <linux/netdevice.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
 #include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
 #include "mlx5_core.h"
 #include "eswitch.h"
 #include "lag.h"
@@ -45,7 +47,7 @@
 static DEFINE_SPINLOCK(lag_lock);
 
 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
-                              u8 remap_port2)
+                              u8 remap_port2, bool shared_fdb)
 {
        u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
        void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 
        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+       MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
 
        return mlx5_cmd_exec_in(dev, create_lag, in);
 }
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 }
 
 static int mlx5_create_lag(struct mlx5_lag *ldev,
-                          struct lag_tracker *tracker)
+                          struct lag_tracker *tracker,
+                          bool shared_fdb)
 {
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
        int err;
 
        mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
                                       &ldev->v2p_map[MLX5_LAG_P2]);
 
-       mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
-                      ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+       mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+                      ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+                      shared_fdb);
 
        err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
-                                 ldev->v2p_map[MLX5_LAG_P2]);
-       if (err)
+                                 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+       if (err) {
                mlx5_core_err(dev0,
                              "Failed to create LAG (%d)\n",
                              err);
+               return err;
+       }
+
+       if (shared_fdb) {
+               err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+                                                             dev1->priv.eswitch);
+               if (err)
+                       mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+               else
+                       mlx5_core_info(dev0, "Operation mode is single FDB\n");
+       }
+
+       if (err) {
+               MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+               if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+                       mlx5_core_err(dev0,
+                                     "Failed to deactivate RoCE LAG; driver restart required\n");
+       }
+
        return err;
 }
 
 int mlx5_activate_lag(struct mlx5_lag *ldev,
                      struct lag_tracker *tracker,
-                     u8 flags)
+                     u8 flags,
+                     bool shared_fdb)
 {
        bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        int err;
 
-       err = mlx5_create_lag(ldev, tracker);
+       err = mlx5_create_lag(ldev, tracker, shared_fdb);
        if (err) {
                if (roce_lag) {
                        mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
        }
 
        ldev->flags |= flags;
+       ldev->shared_fdb = shared_fdb;
        return 0;
 }
 
@@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 
        ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 
+       if (ldev->shared_fdb) {
+               mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+                                                        ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+               ldev->shared_fdb = false;
+       }
+
        MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
        err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
        if (err) {
@@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
                if (!ldev->pf[i].dev)
                        continue;
 
+               if (ldev->pf[i].dev->priv.flags &
+                   MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+                       continue;
+
                ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                mlx5_rescan_drivers_locked(ldev->pf[i].dev);
        }
@@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
 {
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       bool shared_fdb = ldev->shared_fdb;
        bool roce_lag;
        int err;
 
        roce_lag = __mlx5_lag_is_roce(ldev);
 
-       if (roce_lag) {
+       if (shared_fdb) {
+               mlx5_lag_remove_devices(ldev);
+       } else if (roce_lag) {
                if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
                        dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                        mlx5_rescan_drivers_locked(dev0);
@@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
        if (err)
                return;
 
-       if (roce_lag)
+       if (shared_fdb || roce_lag)
                mlx5_lag_add_devices(ldev);
+
+       if (shared_fdb) {
+               if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+                       mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+               if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+                       mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+       }
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+       if (is_mdev_switchdev_mode(dev0) &&
+           is_mdev_switchdev_mode(dev1) &&
+           mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+           mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+           mlx5_devcom_is_paired(dev0->priv.devcom,
+                                 MLX5_DEVCOM_ESW_OFFLOADS) &&
+           MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+           MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+           MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+               return true;
+
+       return false;
 }
 
 static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -371,14 +438,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
        bool do_bond, roce_lag;
        int err;
 
-       if (!mlx5_lag_is_ready(ldev))
-               return;
-
-       tracker = ldev->tracker;
+       if (!mlx5_lag_is_ready(ldev)) {
+               do_bond = false;
+       } else {
+               tracker = ldev->tracker;
 
-       do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+               do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+       }
 
        if (do_bond && !__mlx5_lag_is_active(ldev)) {
+               bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
                roce_lag = !mlx5_sriov_is_enabled(dev0) &&
                           !mlx5_sriov_is_enabled(dev1);
 
@@ -388,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
                           dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 #endif
 
-               if (roce_lag)
+               if (shared_fdb || roce_lag)
                        mlx5_lag_remove_devices(ldev);
 
                err = mlx5_activate_lag(ldev, &tracker,
                                        roce_lag ? MLX5_LAG_FLAG_ROCE :
-                                       MLX5_LAG_FLAG_SRIOV);
+                                                  MLX5_LAG_FLAG_SRIOV,
+                                       shared_fdb);
                if (err) {
-                       if (roce_lag)
+                       if (shared_fdb || roce_lag)
                                mlx5_lag_add_devices(ldev);
 
                        return;
-               }
-
-               if (roce_lag) {
+               } else if (roce_lag) {
                        dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                        mlx5_rescan_drivers_locked(dev0);
                        mlx5_nic_vport_enable_roce(dev1);
+               } else if (shared_fdb) {
+                       dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+                       mlx5_rescan_drivers_locked(dev0);
+
+                       err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+                       if (!err)
+                               err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+                       if (err) {
+                               dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+                               mlx5_rescan_drivers_locked(dev0);
+                               mlx5_deactivate_lag(ldev);
+                               mlx5_lag_add_devices(ldev);
+                               mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+                               mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+                               mlx5_core_err(dev0, "Failed to enable lag\n");
+                               return;
+                       }
                }
        } else if (do_bond && __mlx5_lag_is_active(ldev)) {
                mlx5_modify_lag(ldev, &tracker);
@@ -418,21 +505,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
        queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
 }
 
+static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
+                                   struct mlx5_core_dev *dev1)
+{
+       if (dev0)
+               mlx5_esw_lock(dev0->priv.eswitch);
+       if (dev1)
+               mlx5_esw_lock(dev1->priv.eswitch);
+}
+
+static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
+                                     struct mlx5_core_dev *dev1)
+{
+       if (dev1)
+               mlx5_esw_unlock(dev1->priv.eswitch);
+       if (dev0)
+               mlx5_esw_unlock(dev0->priv.eswitch);
+}
+
 static void mlx5_do_bond_work(struct work_struct *work)
 {
        struct delayed_work *delayed_work = to_delayed_work(work);
        struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
                                             bond_work);
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
        int status;
 
        status = mlx5_dev_list_trylock();
        if (!status) {
-               /* 1 sec delay. */
                mlx5_queue_bond_work(ldev, HZ);
                return;
        }
 
+       if (ldev->mode_changes_in_progress) {
+               mlx5_dev_list_unlock();
+               mlx5_queue_bond_work(ldev, HZ);
+               return;
+       }
+
+       mlx5_lag_lock_eswitches(dev0, dev1);
        mlx5_do_bond(ldev);
+       mlx5_lag_unlock_eswitches(dev0, dev1);
        mlx5_dev_list_unlock();
 }
 
@@ -630,7 +744,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 }
 
 /* Must be called with intf_mutex held */
-static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
 {
        struct mlx5_lag *ldev = NULL;
        struct mlx5_core_dev *tmp_dev;
@@ -638,7 +752,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
        if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
            !MLX5_CAP_GEN(dev, lag_master) ||
            MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
-               return;
+               return 0;
 
        tmp_dev = mlx5_get_next_phys_dev(dev);
        if (tmp_dev)
@@ -648,15 +762,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
                ldev = mlx5_lag_dev_alloc(dev);
                if (!ldev) {
                        mlx5_core_err(dev, "Failed to alloc lag dev\n");
-                       return;
+                       return 0;
                }
        } else {
+               if (ldev->mode_changes_in_progress)
+                       return -EAGAIN;
                mlx5_ldev_get(ldev);
        }
 
        mlx5_ldev_add_mdev(ldev, dev);
 
-       return;
+       return 0;
 }
 
 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
@@ -667,7 +783,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
        if (!ldev)
                return;
 
+recheck:
        mlx5_dev_list_lock();
+       if (ldev->mode_changes_in_progress) {
+               mlx5_dev_list_unlock();
+               msleep(100);
+               goto recheck;
+       }
        mlx5_ldev_remove_mdev(ldev, dev);
        mlx5_dev_list_unlock();
        mlx5_ldev_put(ldev);
@@ -675,8 +797,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
 
 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
 {
+       int err;
+
+recheck:
        mlx5_dev_list_lock();
-       __mlx5_lag_dev_add_mdev(dev);
+       err = __mlx5_lag_dev_add_mdev(dev);
+       if (err) {
+               mlx5_dev_list_unlock();
+               msleep(100);
+               goto recheck;
+       }
        mlx5_dev_list_unlock();
 }
 
@@ -690,11 +820,11 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
        if (!ldev)
                return;
 
-       if (__mlx5_lag_is_active(ldev))
-               mlx5_disable_lag(ldev);
-
        mlx5_ldev_remove_netdev(ldev, netdev);
        ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
+       if (__mlx5_lag_is_active(ldev))
+               mlx5_queue_bond_work(ldev, 0);
 }
 
 /* Must be called with intf_mutex held */
@@ -716,6 +846,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
 
        if (i >= MLX5_MAX_PORTS)
                ldev->flags |= MLX5_LAG_FLAG_READY;
+       mlx5_queue_bond_work(ldev, 0);
 }
 
 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -746,6 +877,21 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_is_active);
 
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       bool res;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       res = ldev && __mlx5_lag_is_active(ldev) &&
+               dev == ldev->pf[MLX5_LAG_P1].dev;
+       spin_unlock(&lag_lock);
+
+       return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 {
        struct mlx5_lag *ldev;
@@ -760,19 +906,50 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_is_sriov);
 
-void mlx5_lag_update(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       bool res;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+       spin_unlock(&lag_lock);
+
+       return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
 {
+       struct mlx5_core_dev *dev0;
+       struct mlx5_core_dev *dev1;
        struct mlx5_lag *ldev;
 
        mlx5_dev_list_lock();
+
        ldev = mlx5_lag_dev(dev);
-       if (!ldev)
-               goto unlock;
+       dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       dev1 = ldev->pf[MLX5_LAG_P2].dev;
 
-       mlx5_do_bond(ldev);
+       ldev->mode_changes_in_progress++;
+       if (__mlx5_lag_is_active(ldev)) {
+               mlx5_lag_lock_eswitches(dev0, dev1);
+               mlx5_disable_lag(ldev);
+               mlx5_lag_unlock_eswitches(dev0, dev1);
+       }
+       mlx5_dev_list_unlock();
+}
 
-unlock:
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+
+       mlx5_dev_list_lock();
+       ldev = mlx5_lag_dev(dev);
+       ldev->mode_changes_in_progress--;
        mlx5_dev_list_unlock();
+       mlx5_queue_bond_work(ldev, 0);
 }
 
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -827,6 +1004,26 @@ unlock:
 }
 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
 
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_dev *peer_dev = NULL;
+       struct mlx5_lag *ldev;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       if (!ldev)
+               goto unlock;
+
+       peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+                          ldev->pf[MLX5_LAG_P2].dev :
+                          ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+       spin_unlock(&lag_lock);
+       return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                 u64 *values,
                                 int num_counters,
index 191392c..d4bae52 100644 (file)
@@ -39,6 +39,8 @@ struct lag_tracker {
  */
 struct mlx5_lag {
        u8                        flags;
+       int                       mode_changes_in_progress;
+       bool                      shared_fdb;
        u8                        v2p_map[MLX5_MAX_PORTS];
        struct kref               ref;
        struct lag_func           pf[MLX5_MAX_PORTS];
@@ -71,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
                     struct lag_tracker *tracker);
 int mlx5_activate_lag(struct mlx5_lag *ldev,
                      struct lag_tracker *tracker,
-                     u8 flags);
+                     u8 flags,
+                     bool shared_fdb);
 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
                                struct net_device *ndev);
 
index c4bf8b6..011b639 100644 (file)
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
                struct lag_tracker tracker;
 
                tracker = ldev->tracker;
-               mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+               mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
        }
 
        mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
new file mode 100644 (file)
index 0000000..749d17c
--- /dev/null
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS    3
+#define MLX5_TTC_GROUP1_SIZE   (BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE    BIT(1)
+#define MLX5_TTC_GROUP3_SIZE    BIT(0)
+#define MLX5_TTC_TABLE_SIZE    (MLX5_TTC_GROUP1_SIZE +\
+                                MLX5_TTC_GROUP2_SIZE +\
+                                MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS      3
+#define MLX5_INNER_TTC_GROUP1_SIZE     BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE     BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE     BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE      (MLX5_INNER_TTC_GROUP1_SIZE +\
+                                        MLX5_INNER_TTC_GROUP2_SIZE +\
+                                        MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+       int num_groups;
+       struct mlx5_flow_table *t;
+       struct mlx5_flow_group **g;
+       struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+       struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+       return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+       int i;
+
+       for (i = 0; i < MLX5_NUM_TT; i++) {
+               if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+                       mlx5_del_flow_rules(ttc->rules[i].rule);
+                       ttc->rules[i].rule = NULL;
+               }
+       }
+
+       for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+               if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+                       mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+                       ttc->tunnel_rules[i] = NULL;
+               }
+       }
+}
+
+struct mlx5_etype_proto {
+       u16 etype;
+       u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+       [MLX5_TT_IPV4_TCP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_TCP,
+       },
+       [MLX5_TT_IPV6_TCP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_TCP,
+       },
+       [MLX5_TT_IPV4_UDP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_UDP,
+       },
+       [MLX5_TT_IPV6_UDP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_UDP,
+       },
+       [MLX5_TT_IPV4_IPSEC_AH] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_AH,
+       },
+       [MLX5_TT_IPV6_IPSEC_AH] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_AH,
+       },
+       [MLX5_TT_IPV4_IPSEC_ESP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_ESP,
+       },
+       [MLX5_TT_IPV6_IPSEC_ESP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_ESP,
+       },
+       [MLX5_TT_IPV4] = {
+               .etype = ETH_P_IP,
+               .proto = 0,
+       },
+       [MLX5_TT_IPV6] = {
+               .etype = ETH_P_IPV6,
+               .proto = 0,
+       },
+       [MLX5_TT_ANY] = {
+               .etype = 0,
+               .proto = 0,
+       },
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+       [MLX5_TT_IPV4_GRE] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_GRE,
+       },
+       [MLX5_TT_IPV6_GRE] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_GRE,
+       },
+       [MLX5_TT_IPV4_IPIP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_IPIP,
+       },
+       [MLX5_TT_IPV6_IPIP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_IPIP,
+       },
+       [MLX5_TT_IPV4_IPV6] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_IPV6,
+       },
+       [MLX5_TT_IPV6_IPV6] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_IPV6,
+       },
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+       return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+                                          u8 proto_type)
+{
+       switch (proto_type) {
+       case IPPROTO_GRE:
+               return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+       case IPPROTO_IPIP:
+       case IPPROTO_IPV6:
+               return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+                       MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+       default:
+               return false;
+       }
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+       int tt;
+
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (mlx5_tunnel_proto_supported_rx(mdev,
+                                                  ttc_tunnel_rules[tt].proto))
+                       return true;
+       }
+       return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+       return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+               MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                         ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+       if (ethertype == ETH_P_IP)
+               return 4;
+
+       if (ethertype == ETH_P_IPV6)
+               return 6;
+
+       return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+                      struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+       int match_ipv_outer =
+               MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+                                         ft_field_support.outer_ip_version);
+       MLX5_DECLARE_FLOW_ACT(flow_act);
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       u8 ipv;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return ERR_PTR(-ENOMEM);
+
+       if (proto) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+       }
+
+       ipv = mlx5_etype_to_ipv(etype);
+       if (match_ipv_outer && ipv) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+       } else if (etype) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+       }
+
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+       if (IS_ERR(rule)) {
+               err = PTR_ERR(rule);
+               mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+       }
+
+       kvfree(spec);
+       return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+                                        struct ttc_params *params,
+                                        struct mlx5_ttc_table *ttc)
+{
+       struct mlx5_flow_handle **trules;
+       struct mlx5_ttc_rule *rules;
+       struct mlx5_flow_table *ft;
+       int tt;
+       int err;
+
+       ft = ttc->t;
+       rules = ttc->rules;
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               struct mlx5_ttc_rule *rule = &rules[tt];
+
+               rule->rule = mlx5_generate_ttc_rule(dev, ft, &params->dests[tt],
+                                                   ttc_rules[tt].etype,
+                                                   ttc_rules[tt].proto);
+               if (IS_ERR(rule->rule)) {
+                       err = PTR_ERR(rule->rule);
+                       rule->rule = NULL;
+                       goto del_rules;
+               }
+               rule->default_dest = params->dests[tt];
+       }
+
+       if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+               return 0;
+
+       trules    = ttc->tunnel_rules;
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (!mlx5_tunnel_proto_supported_rx(dev,
+                                                   ttc_tunnel_rules[tt].proto))
+                       continue;
+               trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+                                                   &params->tunnel_dests[tt],
+                                                   ttc_tunnel_rules[tt].etype,
+                                                   ttc_tunnel_rules[tt].proto);
+               if (IS_ERR(trules[tt])) {
+                       err = PTR_ERR(trules[tt]);
+                       trules[tt] = NULL;
+                       goto del_rules;
+               }
+       }
+
+       return 0;
+
+del_rules:
+       mlx5_cleanup_ttc_rules(ttc);
+       return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+                                       bool use_ipv)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       int ix = 0;
+       u32 *in;
+       int err;
+       u8 *mc;
+
+       ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+       if (!ttc->g)
+               return -ENOMEM;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               kfree(ttc->g);
+               ttc->g = NULL;
+               return -ENOMEM;
+       }
+
+       /* L4 Group */
+       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+       if (use_ipv)
+               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+       else
+               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP1_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* L3 Group */
+       MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP2_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* Any Group */
+       memset(in, 0, inlen);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP3_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       kvfree(in);
+       return 0;
+
+err:
+       err = PTR_ERR(ttc->g[ttc->num_groups]);
+       ttc->g[ttc->num_groups] = NULL;
+       kvfree(in);
+
+       return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+                            struct mlx5_flow_table *ft,
+                            struct mlx5_flow_destination *dest,
+                            u16 etype, u8 proto)
+{
+       MLX5_DECLARE_FLOW_ACT(flow_act);
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       u8 ipv;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return ERR_PTR(-ENOMEM);
+
+       ipv = mlx5_etype_to_ipv(etype);
+       if (etype && ipv) {
+               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+       }
+
+       if (proto) {
+               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+       }
+
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+       if (IS_ERR(rule)) {
+               err = PTR_ERR(rule);
+               mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+       }
+
+       kvfree(spec);
+       return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+                                              struct ttc_params *params,
+                                              struct mlx5_ttc_table *ttc)
+{
+       struct mlx5_ttc_rule *rules;
+       struct mlx5_flow_table *ft;
+       int err;
+       int tt;
+
+       ft = ttc->t;
+       rules = ttc->rules;
+
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               struct mlx5_ttc_rule *rule = &rules[tt];
+
+               rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+                                                         &params->dests[tt],
+                                                         ttc_rules[tt].etype,
+                                                         ttc_rules[tt].proto);
+               if (IS_ERR(rule->rule)) {
+                       err = PTR_ERR(rule->rule);
+                       rule->rule = NULL;
+                       goto del_rules;
+               }
+               rule->default_dest = params->dests[tt];
+       }
+
+       return 0;
+
+del_rules:
+
+       mlx5_cleanup_ttc_rules(ttc);
+       return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       int ix = 0;
+       u32 *in;
+       int err;
+       u8 *mc;
+
+       ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+                        GFP_KERNEL);
+       if (!ttc->g)
+               return -ENOMEM;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               kfree(ttc->g);
+               ttc->g = NULL;
+               return -ENOMEM;
+       }
+
+       /* L4 Group */
+       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP1_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* L3 Group */
+       MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP2_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* Any Group */
+       memset(in, 0, inlen);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP3_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       kvfree(in);
+       return 0;
+
+err:
+       err = PTR_ERR(ttc->g[ttc->num_groups]);
+       ttc->g[ttc->num_groups] = NULL;
+       kvfree(in);
+
+       return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+                                                  struct ttc_params *params)
+{
+       struct mlx5_ttc_table *ttc;
+       int err;
+
+       ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+       if (!ttc)
+               return ERR_PTR(-ENOMEM);
+
+       WARN_ON_ONCE(params->ft_attr.max_fte);
+       params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+       ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+       if (IS_ERR(ttc->t)) {
+               err = PTR_ERR(ttc->t);
+               kvfree(ttc);
+               return ERR_PTR(err);
+       }
+
+       err = mlx5_create_inner_ttc_table_groups(ttc);
+       if (err)
+               goto destroy_ft;
+
+       err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+       if (err)
+               goto destroy_ft;
+
+       return ttc;
+
+destroy_ft:
+       mlx5_destroy_ttc_table(ttc);
+       return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+       int i;
+
+       mlx5_cleanup_ttc_rules(ttc);
+       for (i = ttc->num_groups - 1; i >= 0; i--) {
+               if (!IS_ERR_OR_NULL(ttc->g[i]))
+                       mlx5_destroy_flow_group(ttc->g[i]);
+               ttc->g[i] = NULL;
+       }
+
+       kfree(ttc->g);
+       mlx5_destroy_flow_table(ttc->t);
+       kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+                                            struct ttc_params *params)
+{
+       bool match_ipv_outer =
+               MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+                                         ft_field_support.outer_ip_version);
+       struct mlx5_ttc_table *ttc;
+       int err;
+
+       ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+       if (!ttc)
+               return ERR_PTR(-ENOMEM);
+
+       WARN_ON_ONCE(params->ft_attr.max_fte);
+       params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+       ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+       if (IS_ERR(ttc->t)) {
+               err = PTR_ERR(ttc->t);
+               kvfree(ttc);
+               return ERR_PTR(err);
+       }
+
+       err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+       if (err)
+               goto destroy_ft;
+
+       err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+       if (err)
+               goto destroy_ft;
+
+       return ttc;
+
+destroy_ft:
+       mlx5_destroy_ttc_table(ttc);
+       return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+                     struct mlx5_flow_destination *new_dest)
+{
+       return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+                                           NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+                         enum mlx5_traffic_types type)
+{
+       struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+       WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+                 "TTC[%d] default dest is not setup yet", type);
+
+       return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+                             enum mlx5_traffic_types type)
+{
+       struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+       return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
new file mode 100644 (file)
index 0000000..ce95be8
--- /dev/null
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+       MLX5_TT_IPV4_TCP,
+       MLX5_TT_IPV6_TCP,
+       MLX5_TT_IPV4_UDP,
+       MLX5_TT_IPV6_UDP,
+       MLX5_TT_IPV4_IPSEC_AH,
+       MLX5_TT_IPV6_IPSEC_AH,
+       MLX5_TT_IPV4_IPSEC_ESP,
+       MLX5_TT_IPV6_IPSEC_ESP,
+       MLX5_TT_IPV4,
+       MLX5_TT_IPV6,
+       MLX5_TT_ANY,
+       MLX5_NUM_TT,
+       MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+       MLX5_TT_IPV4_GRE,
+       MLX5_TT_IPV6_GRE,
+       MLX5_TT_IPV4_IPIP,
+       MLX5_TT_IPV6_IPIP,
+       MLX5_TT_IPV4_IPV6,
+       MLX5_TT_IPV6_IPV6,
+       MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_flow_table_attr ft_attr;
+       struct mlx5_flow_destination dests[MLX5_NUM_TT];
+       bool   inner_ttc;
+       struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+                                            struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+                                                  struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+                     struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+                         enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+                             enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
index eb1b316..6fe5603 100644 (file)
@@ -1179,6 +1179,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
                goto err_ec;
        }
 
+       mlx5_lag_add_mdev(dev);
        err = mlx5_sriov_attach(dev);
        if (err) {
                mlx5_core_err(dev, "sriov init failed %d\n", err);
@@ -1186,11 +1187,11 @@ static int mlx5_load(struct mlx5_core_dev *dev)
        }
 
        mlx5_sf_dev_table_create(dev);
-       mlx5_lag_add_mdev(dev);
 
        return 0;
 
 err_sriov:
+       mlx5_lag_remove_mdev(dev);
        mlx5_ec_cleanup(dev);
 err_ec:
        mlx5_sf_hw_table_destroy(dev);
@@ -1222,9 +1223,9 @@ err_irq_table:
 
 static void mlx5_unload(struct mlx5_core_dev *dev)
 {
-       mlx5_lag_remove_mdev(dev);
        mlx5_sf_dev_table_destroy(dev);
        mlx5_sriov_detach(dev);
+       mlx5_lag_remove_mdev(dev);
        mlx5_ec_cleanup(dev);
        mlx5_sf_hw_table_destroy(dev);
        mlx5_vhca_event_stop(dev);
@@ -1271,7 +1272,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
 
        set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 
-       err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+       err = mlx5_devlink_register(priv_to_devlink(dev));
        if (err)
                goto err_devlink_reg;
 
@@ -1452,7 +1453,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
        struct devlink *devlink;
        int err;
 
-       devlink = mlx5_devlink_alloc();
+       devlink = mlx5_devlink_alloc(&pdev->dev);
        if (!devlink) {
                dev_err(&pdev->dev, "devlink alloc failed\n");
                return -ENOMEM;
index 343807a..14ffd74 100644 (file)
@@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
 
 int mlx5_events_init(struct mlx5_core_dev *dev);
 void mlx5_events_cleanup(struct mlx5_core_dev *dev);
index 42c8ee0..052f480 100644 (file)
@@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
        struct devlink *devlink;
        int err;
 
-       devlink = mlx5_devlink_alloc();
+       devlink = mlx5_devlink_alloc(&adev->dev);
        if (!devlink)
                return -ENOMEM;
 
index 1be0487..720195c 100644 (file)
@@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf)
        return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE;
 }
 
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state *state,
                                      enum devlink_port_fn_opstate *opstate,
                                      struct netlink_ext_ack *extack)
 {
-       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
        struct mlx5_sf_table *table;
        struct mlx5_sf *sf;
        int err = 0;
@@ -248,11 +248,11 @@ out:
        return err;
 }
 
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state state,
                                      struct netlink_ext_ack *extack)
 {
-       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
        struct mlx5_sf_table *table;
        struct mlx5_sf *sf;
        int err;
index 81ce13b..3a480e0 100644 (file)
@@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink,
                             unsigned int *new_port_index);
 int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
                             struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state *state,
                                      enum devlink_port_fn_opstate *opstate,
                                      struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state state,
                                      struct netlink_ext_ack *extack);
 #else
index 12cf323..754f892 100644 (file)
@@ -790,7 +790,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 
        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET(cqc, cqc, uar_page, uar->index);
        MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
                 MLX5_ADAPTER_PAGE_SHIFT);
index e775f08..f080fab 100644 (file)
@@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 
        if (!reload) {
                alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size;
-               devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size);
+               devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size,
+                                       mlxsw_bus_info->dev);
                if (!devlink) {
                        err = -ENOMEM;
                        goto err_devlink_alloc;
@@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                goto err_emad_init;
 
        if (!reload) {
-               err = devlink_register(devlink, mlxsw_bus_info->dev);
+               err = devlink_register(devlink);
                if (err)
                        goto err_devlink_register;
        }
index d39ae2a..7bdbb2d 100644 (file)
@@ -1,6 +1,5 @@
 config SPARX5_SWITCH
        tristate "Sparx5 switch driver"
-       depends on BRIDGE || BRIDGE=n
        depends on NET_SWITCHDEV
        depends on HAS_IOMEM
        depends on OF
index 9d485a9..cb68eaa 100644 (file)
  */
 #define VSTAX 73
 
-static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
+#define ifh_encode_bitfield(ifh, value, pos, _width)                   \
+       ({                                                              \
+               u32 width = (_width);                                   \
+                                                                       \
+               /* Max width is 5 bytes - 40 bits. In worst case this will
+                * spread over 6 bytes - 48 bits
+                */                                                     \
+               compiletime_assert(width <= 40,                         \
+                                  "Unsupported width, must be <= 40"); \
+               __ifh_encode_bitfield((ifh), (value), (pos), width);    \
+       })
+
+static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
 {
        u8 *ifh_hdr = ifh;
        /* Calculate the Start IFH byte position of this IFH bit position */
        u32 byte = (35 - (pos / 8));
        /* Calculate the Start bit position in the Start IFH byte */
        u32 bit  = (pos % 8);
-       u64 encode = GENMASK(bit + width - 1, bit) & (value << bit);
-
-       /* Max width is 5 bytes - 40 bits. In worst case this will
-        * spread over 6 bytes - 48 bits
-        */
-       compiletime_assert(width <= 40, "Unsupported width, must be <= 40");
+       u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit);
 
        /* The b0-b7 goes into the start IFH byte */
        if (encode & 0xFF)
index 4bd7e9d..aa41c9c 100644 (file)
@@ -1103,7 +1103,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
        if (!np && !pdev->dev.platform_data)
                return -ENODEV;
 
-       devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot));
+       devlink =
+               devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev);
        if (!devlink)
                return -ENOMEM;
 
@@ -1187,7 +1188,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
        if (err)
                goto out_put_ports;
 
-       err = devlink_register(devlink, ocelot->dev);
+       err = devlink_register(devlink);
        if (err)
                goto out_ocelot_deinit;
 
index ce3eca5..d74a80f 100644 (file)
@@ -193,8 +193,6 @@ static int jazz_sonic_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
        platform_set_drvdata(pdev, dev);
 
-       netdev_boot_setup_check(dev);
-
        dev->base_addr = res->start;
        dev->irq = platform_get_irq(pdev, 0);
        err = sonic_probe1(dev);
index bd9d026..3f98203 100644 (file)
@@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
                printk(version);
 #endif
 
-       i = pci_enable_device(pdev);
+       i = pcim_enable_device(pdev);
        if (i) return i;
 
        /* natsemi has a non-standard PM control register
@@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
        ioaddr = ioremap(iostart, iosize);
        if (!ioaddr) {
                i = -ENOMEM;
-               goto err_ioremap;
+               goto err_pci_request_regions;
        }
 
        /* Work around the dropped serial bit. */
@@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
  err_register_netdev:
        iounmap(ioaddr);
 
- err_ioremap:
-       pci_release_regions(pdev);
-
  err_pci_request_regions:
        free_netdev(dev);
        return i;
@@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev)
 
        NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround);
        unregister_netdev (dev);
-       pci_release_regions (pdev);
        iounmap(ioaddr);
        free_netdev (dev);
 }
index 28d9e98..ca46860 100644 (file)
@@ -215,7 +215,6 @@ int xtsonic_probe(struct platform_device *pdev)
        lp->device = &pdev->dev;
        platform_set_drvdata(pdev, dev);
        SET_NETDEV_DEV(dev, &pdev->dev);
-       netdev_boot_setup_check(dev);
 
        dev->base_addr = resmem->start;
        dev->irq = resirq->start;
index 20fb4ad..df4a3f3 100644 (file)
@@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev)
 
        kfree(vdev->vpaths);
 
-       /* we are safe to free it now */
-       free_netdev(dev);
-
        vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered",
                        buf);
        vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d  Exiting...", buf,
                             __func__, __LINE__);
+
+       /* we are safe to free it now */
+       free_netdev(dev);
 }
 
 /*
index 742a420..bb3b8a7 100644 (file)
@@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
                goto err_pci_disable;
        }
 
-       devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf));
+       devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev);
        if (!devlink) {
                err = -ENOMEM;
                goto err_rel_regions;
index 15078f9..5bfa22a 100644 (file)
@@ -3281,17 +3281,12 @@ static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
        for (r = 0; r < nn->max_r_vecs; r++)
                nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
 
-       err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
+       err = netif_set_real_num_queues(nn->dp.netdev,
+                                       nn->dp.num_stack_tx_rings,
+                                       nn->dp.num_rx_rings);
        if (err)
                return err;
 
-       if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
-               err = netif_set_real_num_tx_queues(nn->dp.netdev,
-                                                  nn->dp.num_stack_tx_rings);
-               if (err)
-                       return err;
-       }
-
        return nfp_net_set_config_and_enable(nn);
 }
 
index a213784..0bf2ff5 100644 (file)
@@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
 
        /* Init to unknowns */
        ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
        cmd->base.port = PORT_OTHER;
        cmd->base.speed = SPEED_UNKNOWN;
        cmd->base.duplex = DUPLEX_UNKNOWN;
index 921db40..d10a938 100644 (file)
@@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
        if (err)
                goto err_unmap;
 
-       err = devlink_register(devlink, &pf->pdev->dev);
+       err = devlink_register(devlink);
        if (err)
                goto err_app_clean;
 
index cd520e4..c7d0e19 100644 (file)
@@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev)
 {
        struct devlink *dl;
 
-       dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic));
+       dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev);
 
        return devlink_priv(dl);
 }
@@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic)
        struct devlink_port_attrs attrs = {};
        int err;
 
-       err = devlink_register(dl, ionic->dev);
+       err = devlink_register(dl);
        if (err) {
                dev_warn(ionic->dev, "devlink_register failed: %d\n", err);
                return err;
index f21f80c..f52c47a 100644 (file)
@@ -30,7 +30,7 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = {
                                      */
 };
 
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode);
+static void ionic_lif_rx_mode(struct ionic_lif *lif);
 static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
 static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
 static void ionic_link_status_check(struct ionic_lif *lif);
@@ -54,7 +54,19 @@ static void ionic_dim_work(struct work_struct *work)
        cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
        qcq = container_of(dim, struct ionic_qcq, dim);
        new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec);
-       qcq->intr.dim_coal_hw = new_coal ? new_coal : 1;
+       new_coal = new_coal ? new_coal : 1;
+
+       if (qcq->intr.dim_coal_hw != new_coal) {
+               unsigned int qi = qcq->cq.bound_q->index;
+               struct ionic_lif *lif = qcq->q.lif;
+
+               qcq->intr.dim_coal_hw = new_coal;
+
+               ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+                                    lif->rxqcqs[qi]->intr.index,
+                                    qcq->intr.dim_coal_hw);
+       }
+
        dim->state = DIM_START_MEASURE;
 }
 
@@ -78,7 +90,7 @@ static void ionic_lif_deferred_work(struct work_struct *work)
 
                switch (w->type) {
                case IONIC_DW_TYPE_RX_MODE:
-                       ionic_lif_rx_mode(lif, w->rx_mode);
+                       ionic_lif_rx_mode(lif);
                        break;
                case IONIC_DW_TYPE_RX_ADDR_ADD:
                        ionic_lif_addr_add(lif, w->addr);
@@ -1302,10 +1314,8 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
        return 0;
 }
 
-static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
-                         bool can_sleep)
+static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
 {
-       struct ionic_deferred_work *work;
        unsigned int nmfilters;
        unsigned int nufilters;
 
@@ -1331,97 +1341,46 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
                        lif->nucast--;
        }
 
-       if (!can_sleep) {
-               work = kzalloc(sizeof(*work), GFP_ATOMIC);
-               if (!work)
-                       return -ENOMEM;
-               work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD :
-                                  IONIC_DW_TYPE_RX_ADDR_DEL;
-               memcpy(work->addr, addr, ETH_ALEN);
-               netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
-                          add ? "add" : "del", addr);
-               ionic_lif_deferred_enqueue(&lif->deferred, work);
-       } else {
-               netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
-                          add ? "add" : "del", addr);
-               if (add)
-                       return ionic_lif_addr_add(lif, addr);
-               else
-                       return ionic_lif_addr_del(lif, addr);
-       }
+       netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
+                  add ? "add" : "del", addr);
+       if (add)
+               return ionic_lif_addr_add(lif, addr);
+       else
+               return ionic_lif_addr_del(lif, addr);
 
        return 0;
 }
 
 static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
 {
-       return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP);
-}
-
-static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr)
-{
-       return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP);
+       return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR);
 }
 
 static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
 {
-       return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP);
+       return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR);
 }
 
-static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr)
+static void ionic_lif_rx_mode(struct ionic_lif *lif)
 {
-       return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP);
-}
-
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
-{
-       struct ionic_admin_ctx ctx = {
-               .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
-               .cmd.rx_mode_set = {
-                       .opcode = IONIC_CMD_RX_MODE_SET,
-                       .lif_index = cpu_to_le16(lif->index),
-                       .rx_mode = cpu_to_le16(rx_mode),
-               },
-       };
+       struct net_device *netdev = lif->netdev;
+       unsigned int nfilters;
+       unsigned int nd_flags;
        char buf[128];
-       int err;
+       u16 rx_mode;
        int i;
 #define REMAIN(__x) (sizeof(buf) - (__x))
 
-       i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
-                     lif->rx_mode, rx_mode);
-       if (rx_mode & IONIC_RX_MODE_F_UNICAST)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
-       if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
-       if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
-       if (rx_mode & IONIC_RX_MODE_F_PROMISC)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
-       if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
-       netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
-
-       err = ionic_adminq_post_wait(lif, &ctx);
-       if (err)
-               netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n",
-                           rx_mode, err);
-       else
-               lif->rx_mode = rx_mode;
-}
+       mutex_lock(&lif->config_lock);
 
-static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
-{
-       struct ionic_lif *lif = netdev_priv(netdev);
-       struct ionic_deferred_work *work;
-       unsigned int nfilters;
-       unsigned int rx_mode;
+       /* grab the flags once for local use */
+       nd_flags = netdev->flags;
 
        rx_mode = IONIC_RX_MODE_F_UNICAST;
-       rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
-       rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
-       rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
-       rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
+       rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
+       rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
+       rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
+       rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
 
        /* sync unicast addresses
         * next check to see if we're in an overflow state
@@ -1430,49 +1389,83 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
         *       we remove our overflow flag and check the netdev flags
         *       to see if we can disable NIC PROMISC
         */
-       if (can_sleep)
-               __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
-       else
-               __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
+       __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
        nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
        if (netdev_uc_count(netdev) + 1 > nfilters) {
                rx_mode |= IONIC_RX_MODE_F_PROMISC;
                lif->uc_overflow = true;
        } else if (lif->uc_overflow) {
                lif->uc_overflow = false;
-               if (!(netdev->flags & IFF_PROMISC))
+               if (!(nd_flags & IFF_PROMISC))
                        rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
        }
 
        /* same for multicast */
-       if (can_sleep)
-               __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
-       else
-               __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
+       __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
        nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
        if (netdev_mc_count(netdev) > nfilters) {
                rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
                lif->mc_overflow = true;
        } else if (lif->mc_overflow) {
                lif->mc_overflow = false;
-               if (!(netdev->flags & IFF_ALLMULTI))
+               if (!(nd_flags & IFF_ALLMULTI))
                        rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
        }
 
+       i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
+                     lif->rx_mode, rx_mode);
+       if (rx_mode & IONIC_RX_MODE_F_UNICAST)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
+       if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
+       if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
+       if (rx_mode & IONIC_RX_MODE_F_PROMISC)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
+       if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
+       if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER");
+       netdev_dbg(netdev, "lif%d %s\n", lif->index, buf);
+
        if (lif->rx_mode != rx_mode) {
-               if (!can_sleep) {
-                       work = kzalloc(sizeof(*work), GFP_ATOMIC);
-                       if (!work) {
-                               netdev_err(lif->netdev, "rxmode change dropped\n");
-                               return;
-                       }
-                       work->type = IONIC_DW_TYPE_RX_MODE;
-                       work->rx_mode = rx_mode;
-                       netdev_dbg(lif->netdev, "deferred: rx_mode\n");
-                       ionic_lif_deferred_enqueue(&lif->deferred, work);
-               } else {
-                       ionic_lif_rx_mode(lif, rx_mode);
+               struct ionic_admin_ctx ctx = {
+                       .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+                       .cmd.rx_mode_set = {
+                               .opcode = IONIC_CMD_RX_MODE_SET,
+                               .lif_index = cpu_to_le16(lif->index),
+                       },
+               };
+               int err;
+
+               ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode);
+               err = ionic_adminq_post_wait(lif, &ctx);
+               if (err)
+                       netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n",
+                                   rx_mode, err);
+               else
+                       lif->rx_mode = rx_mode;
+       }
+
+       mutex_unlock(&lif->config_lock);
+}
+
+static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
+{
+       struct ionic_lif *lif = netdev_priv(netdev);
+       struct ionic_deferred_work *work;
+
+       if (!can_sleep) {
+               work = kzalloc(sizeof(*work), GFP_ATOMIC);
+               if (!work) {
+                       netdev_err(lif->netdev, "rxmode change dropped\n");
+                       return;
                }
+               work->type = IONIC_DW_TYPE_RX_MODE;
+               netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+               ionic_lif_deferred_enqueue(&lif->deferred, work);
+       } else {
+               ionic_lif_rx_mode(lif);
        }
 }
 
@@ -3074,6 +3067,7 @@ void ionic_lif_deinit(struct ionic_lif *lif)
        ionic_lif_qcq_deinit(lif, lif->notifyqcq);
        ionic_lif_qcq_deinit(lif, lif->adminqcq);
 
+       mutex_destroy(&lif->config_lock);
        mutex_destroy(&lif->queue_lock);
        ionic_lif_reset(lif);
 }
@@ -3201,7 +3195,7 @@ static int ionic_station_set(struct ionic_lif *lif)
                 */
                if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
                                      netdev->dev_addr))
-                       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+                       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
        } else {
                /* Update the netdev mac with the device's mac */
                memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
@@ -3218,7 +3212,7 @@ static int ionic_station_set(struct ionic_lif *lif)
 
        netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
                   netdev->dev_addr);
-       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
 
        return 0;
 }
@@ -3241,6 +3235,7 @@ int ionic_lif_init(struct ionic_lif *lif)
 
        lif->hw_index = le16_to_cpu(comp.hw_index);
        mutex_init(&lif->queue_lock);
+       mutex_init(&lif->config_lock);
 
        /* now that we have the hw_index we can figure out our doorbell page */
        lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
index 346506f..69ab59f 100644 (file)
@@ -108,7 +108,6 @@ struct ionic_deferred_work {
        struct list_head list;
        enum ionic_deferred_work_type type;
        union {
-               unsigned int rx_mode;
                u8 addr[ETH_ALEN];
                u8 fw_status;
        };
@@ -179,6 +178,7 @@ struct ionic_lif {
        unsigned int index;
        unsigned int hw_index;
        struct mutex queue_lock;        /* lock for queue structures */
+       struct mutex config_lock;       /* lock for config actions */
        spinlock_t adminq_lock;         /* lock for AdminQ operations */
        struct ionic_qcq *adminqcq;
        struct ionic_qcq *notifyqcq;
@@ -199,7 +199,7 @@ struct ionic_lif {
        unsigned int nrxq_descs;
        u32 rx_copybreak;
        u64 rxq_features;
-       unsigned int rx_mode;
+       u16 rx_mode;
        u64 hw_features;
        bool registered;
        bool mc_overflow;
@@ -302,7 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
 int ionic_lif_size(struct ionic *ionic);
 
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif);
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif);
 int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
 ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
@@ -311,10 +311,7 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif);
 void ionic_lif_alloc_phc(struct ionic_lif *lif);
 void ionic_lif_free_phc(struct ionic_lif *lif);
 #else
-static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
-{
-       return -EOPNOTSUPP;
-}
+static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {}
 
 static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 {
index 736ebc5..afc45da 100644 (file)
@@ -188,6 +188,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
        struct hwtstamp_config config;
        int err;
 
+       if (!lif->phc || !lif->phc->ptp)
+               return -EOPNOTSUPP;
+
        if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
                return -EFAULT;
 
@@ -203,15 +206,16 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
        return 0;
 }
 
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif)
 {
        int err;
 
+       if (!lif->phc || !lif->phc->ptp)
+               return;
+
        err = ionic_lif_hwstamp_set_ts_config(lif, NULL);
        if (err)
                netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err);
-
-       return err;
 }
 
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
index 2ba1924..37c3958 100644 (file)
@@ -269,12 +269,11 @@ static void ionic_rx_clean(struct ionic_queue *q,
                }
        }
 
-       if (likely(netdev->features & NETIF_F_RXCSUM)) {
-               if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
-                       skb->ip_summed = CHECKSUM_COMPLETE;
-                       skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
-                       stats->csum_complete++;
-               }
+       if (likely(netdev->features & NETIF_F_RXCSUM) &&
+           (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) {
+               skb->ip_summed = CHECKSUM_COMPLETE;
+               skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
+               stats->csum_complete++;
        } else {
                stats->csum_none++;
        }
@@ -446,11 +445,12 @@ void ionic_rx_empty(struct ionic_queue *q)
        q->tail_idx = 0;
 }
 
-static void ionic_dim_update(struct ionic_qcq *qcq)
+static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode)
 {
        struct dim_sample dim_sample;
        struct ionic_lif *lif;
        unsigned int qi;
+       u64 pkts, bytes;
 
        if (!qcq->intr.dim_coal_hw)
                return;
@@ -458,14 +458,23 @@ static void ionic_dim_update(struct ionic_qcq *qcq)
        lif = qcq->q.lif;
        qi = qcq->cq.bound_q->index;
 
-       ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-                            lif->rxqcqs[qi]->intr.index,
-                            qcq->intr.dim_coal_hw);
+       switch (napi_mode) {
+       case IONIC_LIF_F_TX_DIM_INTR:
+               pkts = lif->txqstats[qi].pkts;
+               bytes = lif->txqstats[qi].bytes;
+               break;
+       case IONIC_LIF_F_RX_DIM_INTR:
+               pkts = lif->rxqstats[qi].pkts;
+               bytes = lif->rxqstats[qi].bytes;
+               break;
+       default:
+               pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts;
+               bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes;
+               break;
+       }
 
        dim_update_sample(qcq->cq.bound_intr->rearm_count,
-                         lif->txqstats[qi].pkts,
-                         lif->txqstats[qi].bytes,
-                         &dim_sample);
+                         pkts, bytes, &dim_sample);
 
        net_dim(&qcq->dim, dim_sample);
 }
@@ -486,7 +495,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
                                     ionic_tx_service, NULL, NULL);
 
        if (work_done < budget && napi_complete_done(napi, work_done)) {
-               ionic_dim_update(qcq);
+               ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR);
                flags |= IONIC_INTR_CRED_UNMASK;
                cq->bound_intr->rearm_count++;
        }
@@ -525,7 +534,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
                ionic_rx_fill(cq->bound_q);
 
        if (work_done < budget && napi_complete_done(napi, work_done)) {
-               ionic_dim_update(qcq);
+               ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR);
                flags |= IONIC_INTR_CRED_UNMASK;
                cq->bound_intr->rearm_count++;
        }
@@ -571,7 +580,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
                ionic_rx_fill(rxcq->bound_q);
 
        if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
-               ionic_dim_update(qcq);
+               ionic_dim_update(qcq, 0);
                flags |= IONIC_INTR_CRED_UNMASK;
                rxcq->bound_intr->rearm_count++;
        }
index cf7f4da..4c7501b 100644 (file)
@@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev)
        struct devlink *dl;
        int rc;
 
-       dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink));
+       dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink),
+                          &cdev->pdev->dev);
        if (!dl)
                return ERR_PTR(-ENOMEM);
 
        qdevlink = devlink_priv(dl);
        qdevlink->cdev = cdev;
 
-       rc = devlink_register(dl, &cdev->pdev->dev);
+       rc = devlink_register(dl);
        if (rc)
                goto err_free;
 
index 578935f..ab6d4f7 100644 (file)
@@ -464,12 +464,19 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
        u32 int_sts, first_drop_reason, details, address, all_drops_reason;
        struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
 
+       int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+       if (int_sts == 0xdeadbeaf) {
+               DP_NOTICE(p_hwfn->cdev,
+                         "DORQ is being reset, skipping int_sts handler\n");
+
+               return 0;
+       }
+
        /* int_sts may be zero since all PFs were interrupted for doorbell
         * overflow but another one already handled it. Can abort here. If
         * This PF also requires overflow recovery we will be interrupted again.
         * The masked almost full indication may also be set. Ignoring.
         */
-       int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
        if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
                return 0;
 
@@ -528,6 +535,9 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
 
 static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
 {
+       if (p_hwfn->cdev->recov_in_prog)
+               return 0;
+
        p_hwfn->db_recovery_info.dorq_attn = true;
        qed_dorq_attn_overflow(p_hwfn);
 
index a998611..fc8b3e6 100644 (file)
@@ -1624,8 +1624,6 @@ qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
        static const u32 ip_zero[4] = { 0, 0, 0, 0 };
        bool found = false;
 
-       qed_iwarp_print_cm_info(p_hwfn, cm_info);
-
        list_for_each_entry(listener,
                            &p_hwfn->p_rdma_info->iwarp.listen_list,
                            list_entry) {
index aa48b1b..6871d89 100644 (file)
@@ -1215,6 +1215,10 @@ static void qed_slowpath_task(struct work_struct *work)
 
        if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
                               &hwfn->slowpath_task_flags)) {
+               /* skip qed_db_rec_handler during recovery/unload */
+               if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active)
+                       goto out;
+
                qed_db_rec_handler(hwfn, ptt);
                if (hwfn->periodic_db_rec_count--)
                        qed_slowpath_delayed_work(hwfn,
@@ -1222,6 +1226,7 @@ static void qed_slowpath_task(struct work_struct *work)
                                                  QED_PERIODIC_DB_REC_INTERVAL);
        }
 
+out:
        qed_ptt_release(hwfn, ptt);
 }
 
index c1dd71d..3b84d00 100644 (file)
@@ -4,7 +4,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/types.h>
index 8693117..66c69f0 100644 (file)
@@ -492,6 +492,7 @@ struct qede_fastpath {
 #define QEDE_SP_HW_ERR                  4
 #define QEDE_SP_ARFS_CONFIG             5
 #define QEDE_SP_AER                    7
+#define QEDE_SP_DISABLE                        8
 
 #ifdef CONFIG_RFS_ACCEL
 int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
index c59b72c..a2e4dfb 100644 (file)
@@ -831,7 +831,7 @@ int qede_configure_vlan_filters(struct qede_dev *edev)
 int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
        struct qede_dev *edev = netdev_priv(dev);
-       struct qede_vlan *vlan = NULL;
+       struct qede_vlan *vlan;
        int rc = 0;
 
        DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid);
@@ -842,7 +842,7 @@ int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
                if (vlan->vid == vid)
                        break;
 
-       if (!vlan || (vlan->vid != vid)) {
+       if (list_entry_is_head(vlan, &edev->vlan_list, list)) {
                DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
                           "Vlan isn't configured\n");
                goto out;
index 033bf2c..d400e9b 100644 (file)
@@ -1005,6 +1005,13 @@ static void qede_sp_task(struct work_struct *work)
        struct qede_dev *edev = container_of(work, struct qede_dev,
                                             sp_task.work);
 
+       /* Disable execution of this deferred work once
+        * qede removal is in progress, this stop any future
+        * scheduling of sp_task.
+        */
+       if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags))
+               return;
+
        /* The locking scheme depends on the specific flag:
         * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
         * ensure that ongoing flows are ended and new ones are not started.
@@ -1292,6 +1299,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
        qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
 
        if (mode != QEDE_REMOVE_RECOVERY) {
+               set_bit(QEDE_SP_DISABLE, &edev->sp_flags);
                unregister_netdev(ndev);
 
                cancel_delayed_work_sync(&edev->sp_task);
index 2376b27..c00ad57 100644 (file)
@@ -154,7 +154,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev)
                                      "driver lock acquired\n");
                        return 1;
                }
-               ssleep(1);
+               mdelay(1000);
        } while (++i < 10);
 
        netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n");
@@ -3274,7 +3274,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev)
                if ((value & ISP_CONTROL_SR) == 0)
                        break;
 
-               ssleep(1);
+               mdelay(1000);
        } while ((--max_wait_time));
 
        /*
@@ -3310,7 +3310,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev)
                                                   ispControlStatus);
                        if ((value & ISP_CONTROL_FSR) == 0)
                                break;
-                       ssleep(1);
+                       mdelay(1000);
                } while ((--max_wait_time));
        }
        if (max_wait_time == 0)
index d8f0863..f6b6651 100644 (file)
@@ -1021,7 +1021,7 @@ clear_diag_irq:
 
 static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[])
 {
-       unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
+       static const unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
 
        memset(data, 0x4e, QLCNIC_ILB_PKT_SIZE);
 
index ec6f7f9..60a0c0e 100644 (file)
@@ -443,7 +443,7 @@ static int sis900_probe(struct pci_dev *pci_dev,
 #endif
 
        /* setup various bits in PCI command register */
-       ret = pci_enable_device(pci_dev);
+       ret = pcim_enable_device(pci_dev);
        if(ret) return ret;
 
        i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32));
@@ -469,7 +469,7 @@ static int sis900_probe(struct pci_dev *pci_dev,
        ioaddr = pci_iomap(pci_dev, 0, 0);
        if (!ioaddr) {
                ret = -ENOMEM;
-               goto err_out_cleardev;
+               goto err_out;
        }
 
        sis_priv = netdev_priv(net_dev);
@@ -581,8 +581,6 @@ err_unmap_tx:
                          sis_priv->tx_ring_dma);
 err_out_unmap:
        pci_iounmap(pci_dev, ioaddr);
-err_out_cleardev:
-       pci_release_regions(pci_dev);
  err_out:
        free_netdev(net_dev);
        return ret;
@@ -2499,7 +2497,6 @@ static void sis900_remove(struct pci_dev *pci_dev)
                          sis_priv->tx_ring_dma);
        pci_iounmap(pci_dev, sis_priv->ioaddr);
        free_netdev(net_dev);
-       pci_release_regions(pci_dev);
 }
 
 static int __maybe_unused sis900_suspend(struct device *dev)
index c52a38d..72e42a8 100644 (file)
@@ -23,6 +23,7 @@ config SMC9194
        tristate "SMC 9194 support"
        depends on ISA
        select CRC32
+       select NETDEV_LEGACY_INIT
        help
          This is support for the SMC9xxx based Ethernet cards. Choose this
          option if you have a DELL laptop with the docking station, or
index bf7c8c8..0ce403f 100644 (file)
@@ -1508,7 +1508,7 @@ MODULE_PARM_DESC(io, "SMC 99194 I/O base address");
 MODULE_PARM_DESC(irq, "SMC 99194 IRQ number");
 MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)");
 
-int __init init_module(void)
+static int __init smc_init_module(void)
 {
        if (io == 0)
                printk(KERN_WARNING
@@ -1518,13 +1518,15 @@ int __init init_module(void)
        devSMC9194 = smc_init(-1);
        return PTR_ERR_OR_ZERO(devSMC9194);
 }
+module_init(smc_init_module);
 
-void __exit cleanup_module(void)
+static void __exit smc_cleanup_module(void)
 {
        unregister_netdev(devSMC9194);
        free_irq(devSMC9194->irq, devSMC9194);
        release_region(devSMC9194->base_addr, SMC_IO_EXTENT);
        free_netdev(devSMC9194);
 }
+module_exit(smc_cleanup_module);
 
 #endif /* MODULE */
index 28dd0ed..f7dc845 100644 (file)
@@ -289,10 +289,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL;
                break;
        default:
-               dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
-                       phy_modes(gmac->phy_mode));
-               err = -EINVAL;
-               goto err_remove_config_dt;
+               goto err_unsupported_phy;
        }
        regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val);
 
@@ -309,10 +306,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                        NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id);
                break;
        default:
-               dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
-                       phy_modes(gmac->phy_mode));
-               err = -EINVAL;
-               goto err_remove_config_dt;
+               goto err_unsupported_phy;
        }
        regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val);
 
@@ -329,8 +323,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                                NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id);
                break;
        default:
-               /* We don't get here; the switch above will have errored out */
-               unreachable();
+               goto err_unsupported_phy;
        }
        regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val);
 
@@ -361,6 +354,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 
        return 0;
 
+err_unsupported_phy:
+       dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
+               phy_modes(gmac->phy_mode));
+       err = -EINVAL;
+
 err_remove_config_dt:
        stmmac_remove_config_dt(pdev, plat_dat);
 
index 67ba083..b217453 100644 (file)
@@ -1249,6 +1249,7 @@ const struct stmmac_ops dwmac410_ops = {
        .config_l3_filter = dwmac4_config_l3_filter,
        .config_l4_filter = dwmac4_config_l4_filter,
        .est_configure = dwmac5_est_configure,
+       .est_irq_status = dwmac5_est_irq_status,
        .fpe_configure = dwmac5_fpe_configure,
        .fpe_send_mpacket = dwmac5_fpe_send_mpacket,
        .fpe_irq_status = dwmac5_fpe_irq_status,
@@ -1300,6 +1301,7 @@ const struct stmmac_ops dwmac510_ops = {
        .config_l3_filter = dwmac4_config_l3_filter,
        .config_l4_filter = dwmac4_config_l4_filter,
        .est_configure = dwmac5_est_configure,
+       .est_irq_status = dwmac5_est_irq_status,
        .fpe_configure = dwmac5_fpe_configure,
        .fpe_send_mpacket = dwmac5_fpe_send_mpacket,
        .fpe_irq_status = dwmac5_fpe_irq_status,
index 006fd42..1501e89 100644 (file)
@@ -8191,8 +8191,9 @@ static int niu_pci_vpd_fetch(struct niu *np, u32 start)
                err = niu_pci_vpd_scan_props(np, here, end);
                if (err < 0)
                        return err;
+               /* ret == 1 is not an error */
                if (err == 1)
-                       return -EINVAL;
+                       return 0;
        }
        return 0;
 }
@@ -9207,7 +9208,7 @@ static int niu_get_of_props(struct niu *np)
        else
                dp = pci_device_to_OF_node(np->pdev);
 
-       phy_type = of_get_property(dp, "phy-type", &prop_len);
+       phy_type = of_get_property(dp, "phy-type", NULL);
        if (!phy_type) {
                netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp);
                return -EINVAL;
@@ -9241,12 +9242,12 @@ static int niu_get_of_props(struct niu *np)
                return -EINVAL;
        }
 
-       model = of_get_property(dp, "model", &prop_len);
+       model = of_get_property(dp, "model", NULL);
 
        if (model)
                strcpy(np->vpd.model, model);
 
-       if (of_find_property(dp, "hot-swappable-phy", &prop_len)) {
+       if (of_find_property(dp, "hot-swappable-phy", NULL)) {
                np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER |
                        NIU_FLAGS_HOTPLUG_PHY);
        }
index 7ac8e5e..affcf92 100644 (file)
@@ -64,7 +64,6 @@ config TI_CPSW
 config TI_CPSW_SWITCHDEV
        tristate "TI CPSW Switch Support with switchdev"
        depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
-       depends on BRIDGE || BRIDGE=n
        depends on NET_SWITCHDEV
        depends on TI_CPTS || !TI_CPTS
        select PAGE_POOL
@@ -110,7 +109,6 @@ config TI_K3_AM65_CPSW_NUSS
 config TI_K3_AM65_CPSW_SWITCHDEV
        bool "TI K3 AM654x/J721E CPSW Switch mode support"
        depends on TI_K3_AM65_CPSW_NUSS
-       depends on BRIDGE || BRIDGE=n
        depends on NET_SWITCHDEV
        help
         This enables switchdev support for TI K3 CPSWxG Ethernet
index 4f67d1a..130346f 100644 (file)
@@ -7,7 +7,6 @@
 
 #include <linux/clk.h>
 #include <linux/etherdevice.h>
-#include <linux/if_bridge.h>
 #include <linux/if_vlan.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
@@ -28,6 +27,7 @@
 #include <linux/sys_soc.h>
 #include <linux/dma/ti-cppi5.h>
 #include <linux/dma/k3-udma-glue.h>
+#include <net/switchdev.h>
 
 #include "cpsw_ale.h"
 #include "cpsw_sl.h"
@@ -519,6 +519,10 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common,
        }
 
        napi_enable(&common->napi_rx);
+       if (common->rx_irq_disabled) {
+               common->rx_irq_disabled = false;
+               enable_irq(common->rx_chns.irq);
+       }
 
        dev_dbg(common->dev, "cpsw_nuss started\n");
        return 0;
@@ -872,8 +876,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
 
        dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget);
 
-       if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
-               enable_irq(common->rx_chns.irq);
+       if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
+               if (common->rx_irq_disabled) {
+                       common->rx_irq_disabled = false;
+                       enable_irq(common->rx_chns.irq);
+               }
+       }
 
        return num_rx;
 }
@@ -1078,19 +1086,20 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)
        else
                num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget);
 
-       num_tx = min(num_tx, budget);
-       if (num_tx < budget) {
-               napi_complete(napi_tx);
+       if (num_tx >= budget)
+               return budget;
+
+       if (napi_complete_done(napi_tx, num_tx))
                enable_irq(tx_chn->irq);
-       }
 
-       return num_tx;
+       return 0;
 }
 
 static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id)
 {
        struct am65_cpsw_common *common = dev_id;
 
+       common->rx_irq_disabled = true;
        disable_irq_nosync(irq);
        napi_schedule(&common->napi_rx);
 
@@ -2061,8 +2070,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm
 
        for (i = 1; i <= common->port_num; i++) {
                struct am65_cpsw_port *port = am65_common_get_port(common, i);
-               struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev);
+               struct am65_cpsw_ndev_priv *priv;
+
+               if (!port->ndev)
+                       continue;
 
+               priv = am65_ndev_to_priv(port->ndev);
                priv->offload_fwd_mark = set_val;
        }
 }
@@ -2409,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
        int i;
 
        common->devlink =
-               devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv));
+               devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev);
        if (!common->devlink)
                return -ENOMEM;
 
        dl_priv = devlink_priv(common->devlink);
        dl_priv->common = common;
 
-       ret = devlink_register(common->devlink, dev);
+       ret = devlink_register(common->devlink);
        if (ret) {
                dev_err(dev, "devlink reg fail ret:%d\n", ret);
                goto dl_free;
index 5d93e34..048ed10 100644 (file)
@@ -126,6 +126,8 @@ struct am65_cpsw_common {
        struct am65_cpsw_rx_chn rx_chns;
        struct napi_struct      napi_rx;
 
+       bool                    rx_irq_disabled;
+
        u32                     nuss_ver;
        u32                     cpsw_ver;
        unsigned long           bus_freq;
index abf9a2a..9f70e40 100644 (file)
@@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
        skb->protocol = eth_type_trans(skb, ndev);
 
        /* mark skb for recycling */
-       skb_mark_for_recycle(skb, page, pool);
+       skb_mark_for_recycle(skb);
        netif_receive_skb(skb);
 
        ndev->stats.rx_bytes += len;
@@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
        struct cpdma_chan *txch;
        int ret, q_idx;
 
-       if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) {
+       if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) {
                cpsw_err(priv, tx_err, "packet pad failed\n");
                ndev->stats.tx_dropped++;
                return NET_XMIT_DROP;
index b4f55ff..ff3a96b 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/irqreturn.h>
 #include <linux/interrupt.h>
-#include <linux/if_bridge.h>
 #include <linux/if_ether.h>
 #include <linux/etherdevice.h>
 #include <linux/net_tstamp.h>
@@ -29,6 +28,7 @@
 #include <linux/kmemleak.h>
 #include <linux/sys_soc.h>
 
+#include <net/switchdev.h>
 #include <net/page_pool.h>
 #include <net/pkt_cls.h>
 #include <net/devlink.h>
@@ -375,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
        skb->protocol = eth_type_trans(skb, ndev);
 
        /* mark skb for recycling */
-       skb_mark_for_recycle(skb, page, pool);
+       skb_mark_for_recycle(skb);
        netif_receive_skb(skb);
 
        ndev->stats.rx_bytes += len;
@@ -1800,14 +1800,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw)
        struct cpsw_devlink *dl_priv;
        int ret = 0;
 
-       cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv));
+       cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev);
        if (!cpsw->devlink)
                return -ENOMEM;
 
        dl_priv = devlink_priv(cpsw->devlink);
        dl_priv->cpsw = cpsw;
 
-       ret = devlink_register(cpsw->devlink, dev);
+       ret = devlink_register(cpsw->devlink);
        if (ret) {
                dev_err(dev, "DL reg fail ret:%d\n", ret);
                goto dl_free;
index 6377966..b1c5cbe 100644 (file)
@@ -943,7 +943,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev)
                goto fail_tx;
        }
 
-       ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
+       ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
        if (unlikely(ret_code < 0)) {
                if (netif_msg_tx_err(priv) && net_ratelimit())
                        dev_err(emac_dev, "DaVinci EMAC: packet pad failed");
index 3de67ba..a2fcdb1 100644 (file)
@@ -198,77 +198,6 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id)
        gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id));
 }
 
-/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */
-static void gsi_irq_setup(struct gsi *gsi)
-{
-       /* Disable all interrupt types */
-       gsi_irq_type_update(gsi, 0);
-
-       /* Clear all type-specific interrupt masks */
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
-
-       /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
-       if (gsi->version > IPA_VERSION_3_1) {
-               u32 offset;
-
-               /* These registers are in the non-adjusted address range */
-               offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
-               iowrite32(0, gsi->virt_raw + offset);
-               offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
-               iowrite32(0, gsi->virt_raw + offset);
-       }
-
-       iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
-}
-
-/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
-static int gsi_ring_setup(struct gsi *gsi)
-{
-       struct device *dev = gsi->dev;
-       u32 count;
-       u32 val;
-
-       if (gsi->version < IPA_VERSION_3_5_1) {
-               /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
-               gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
-               gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
-
-               return 0;
-       }
-
-       val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
-
-       count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
-       if (!count) {
-               dev_err(dev, "GSI reports zero channels supported\n");
-               return -EINVAL;
-       }
-       if (count > GSI_CHANNEL_COUNT_MAX) {
-               dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
-                        GSI_CHANNEL_COUNT_MAX, count);
-               count = GSI_CHANNEL_COUNT_MAX;
-       }
-       gsi->channel_count = count;
-
-       count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
-       if (!count) {
-               dev_err(dev, "GSI reports zero event rings supported\n");
-               return -EINVAL;
-       }
-       if (count > GSI_EVT_RING_COUNT_MAX) {
-               dev_warn(dev,
-                        "limiting to %u event rings; hardware supports %u\n",
-                        GSI_EVT_RING_COUNT_MAX, count);
-               count = GSI_EVT_RING_COUNT_MAX;
-       }
-       gsi->evt_ring_count = count;
-
-       return 0;
-}
-
 /* Event ring commands are performed one at a time.  Their completion
  * is signaled by the event ring control GSI interrupt type, which is
  * only enabled when we issue an event ring command.  Only the event
@@ -920,12 +849,13 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
        /* All done! */
 }
 
-static int __gsi_channel_start(struct gsi_channel *channel, bool start)
+static int __gsi_channel_start(struct gsi_channel *channel, bool resume)
 {
        struct gsi *gsi = channel->gsi;
        int ret;
 
-       if (!start)
+       /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+       if (resume && gsi->version < IPA_VERSION_4_0)
                return 0;
 
        mutex_lock(&gsi->mutex);
@@ -947,7 +877,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id)
        napi_enable(&channel->napi);
        gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
 
-       ret = __gsi_channel_start(channel, true);
+       ret = __gsi_channel_start(channel, false);
        if (ret) {
                gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
                napi_disable(&channel->napi);
@@ -971,7 +901,7 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel)
        return ret;
 }
 
-static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
+static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend)
 {
        struct gsi *gsi = channel->gsi;
        int ret;
@@ -979,7 +909,8 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
        /* Wait for any underway transactions to complete before stopping. */
        gsi_channel_trans_quiesce(channel);
 
-       if (!stop)
+       /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+       if (suspend && gsi->version < IPA_VERSION_4_0)
                return 0;
 
        mutex_lock(&gsi->mutex);
@@ -997,7 +928,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
        struct gsi_channel *channel = &gsi->channel[channel_id];
        int ret;
 
-       ret = __gsi_channel_stop(channel, true);
+       ret = __gsi_channel_stop(channel, false);
        if (ret)
                return ret;
 
@@ -1026,13 +957,13 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell)
        mutex_unlock(&gsi->mutex);
 }
 
-/* Stop a STARTED channel for suspend (using stop if requested) */
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
+/* Stop a started channel for suspend */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id)
 {
        struct gsi_channel *channel = &gsi->channel[channel_id];
        int ret;
 
-       ret = __gsi_channel_stop(channel, stop);
+       ret = __gsi_channel_stop(channel, true);
        if (ret)
                return ret;
 
@@ -1042,12 +973,24 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
        return 0;
 }
 
-/* Resume a suspended channel (starting will be requested if STOPPED) */
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start)
+/* Resume a suspended channel (starting if stopped) */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id)
 {
        struct gsi_channel *channel = &gsi->channel[channel_id];
 
-       return __gsi_channel_start(channel, start);
+       return __gsi_channel_start(channel, true);
+}
+
+/* Prevent all GSI interrupts while suspended */
+void gsi_suspend(struct gsi *gsi)
+{
+       disable_irq(gsi->irq);
+}
+
+/* Allow all GSI interrupts again when resuming */
+void gsi_resume(struct gsi *gsi)
+{
+       enable_irq(gsi->irq);
 }
 
 /**
@@ -1372,33 +1315,20 @@ static irqreturn_t gsi_isr(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */
 static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
 {
-       struct device *dev = &pdev->dev;
-       unsigned int irq;
        int ret;
 
        ret = platform_get_irq_byname(pdev, "gsi");
        if (ret <= 0)
                return ret ? : -EINVAL;
 
-       irq = ret;
-
-       ret = request_irq(irq, gsi_isr, 0, "gsi", gsi);
-       if (ret) {
-               dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret);
-               return ret;
-       }
-       gsi->irq = irq;
+       gsi->irq = ret;
 
        return 0;
 }
 
-static void gsi_irq_exit(struct gsi *gsi)
-{
-       free_irq(gsi->irq, gsi);
-}
-
 /* Return the transaction associated with a transfer completion event */
 static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
                                         struct gsi_event *event)
@@ -1876,6 +1806,93 @@ static void gsi_channel_teardown(struct gsi *gsi)
        gsi_irq_disable(gsi);
 }
 
+/* Turn off all GSI interrupts initially */
+static int gsi_irq_setup(struct gsi *gsi)
+{
+       int ret;
+
+       /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
+       iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+
+       /* Disable all interrupt types */
+       gsi_irq_type_update(gsi, 0);
+
+       /* Clear all type-specific interrupt masks */
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
+
+       /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
+       if (gsi->version > IPA_VERSION_3_1) {
+               u32 offset;
+
+               /* These registers are in the non-adjusted address range */
+               offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
+               iowrite32(0, gsi->virt_raw + offset);
+               offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
+               iowrite32(0, gsi->virt_raw + offset);
+       }
+
+       iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
+
+       ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi);
+       if (ret)
+               dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret);
+
+       return ret;
+}
+
+static void gsi_irq_teardown(struct gsi *gsi)
+{
+       free_irq(gsi->irq, gsi);
+}
+
+/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
+static int gsi_ring_setup(struct gsi *gsi)
+{
+       struct device *dev = gsi->dev;
+       u32 count;
+       u32 val;
+
+       if (gsi->version < IPA_VERSION_3_5_1) {
+               /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
+               gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
+               gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
+
+               return 0;
+       }
+
+       val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
+
+       count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
+       if (!count) {
+               dev_err(dev, "GSI reports zero channels supported\n");
+               return -EINVAL;
+       }
+       if (count > GSI_CHANNEL_COUNT_MAX) {
+               dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
+                        GSI_CHANNEL_COUNT_MAX, count);
+               count = GSI_CHANNEL_COUNT_MAX;
+       }
+       gsi->channel_count = count;
+
+       count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
+       if (!count) {
+               dev_err(dev, "GSI reports zero event rings supported\n");
+               return -EINVAL;
+       }
+       if (count > GSI_EVT_RING_COUNT_MAX) {
+               dev_warn(dev,
+                        "limiting to %u event rings; hardware supports %u\n",
+                        GSI_EVT_RING_COUNT_MAX, count);
+               count = GSI_EVT_RING_COUNT_MAX;
+       }
+       gsi->evt_ring_count = count;
+
+       return 0;
+}
+
 /* Setup function for GSI.  GSI firmware must be loaded and initialized */
 int gsi_setup(struct gsi *gsi)
 {
@@ -1889,25 +1906,34 @@ int gsi_setup(struct gsi *gsi)
                return -EIO;
        }
 
-       gsi_irq_setup(gsi);             /* No matching teardown required */
+       ret = gsi_irq_setup(gsi);
+       if (ret)
+               return ret;
 
        ret = gsi_ring_setup(gsi);      /* No matching teardown required */
        if (ret)
-               return ret;
+               goto err_irq_teardown;
 
        /* Initialize the error log */
        iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET);
 
-       /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
-       iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+       ret = gsi_channel_setup(gsi);
+       if (ret)
+               goto err_irq_teardown;
 
-       return gsi_channel_setup(gsi);
+       return 0;
+
+err_irq_teardown:
+       gsi_irq_teardown(gsi);
+
+       return ret;
 }
 
 /* Inverse of gsi_setup() */
 void gsi_teardown(struct gsi *gsi)
 {
        gsi_channel_teardown(gsi);
+       gsi_irq_teardown(gsi);
 }
 
 /* Initialize a channel's event ring */
@@ -2204,20 +2230,18 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev,
 
        init_completion(&gsi->completion);
 
-       ret = gsi_irq_init(gsi, pdev);
+       ret = gsi_irq_init(gsi, pdev);  /* No matching exit required */
        if (ret)
                goto err_iounmap;
 
        ret = gsi_channel_init(gsi, count, data);
        if (ret)
-               goto err_irq_exit;
+               goto err_iounmap;
 
        mutex_init(&gsi->mutex);
 
        return 0;
 
-err_irq_exit:
-       gsi_irq_exit(gsi);
 err_iounmap:
        iounmap(gsi->virt_raw);
 
@@ -2229,7 +2253,6 @@ void gsi_exit(struct gsi *gsi)
 {
        mutex_destroy(&gsi->mutex);
        gsi_channel_exit(gsi);
-       gsi_irq_exit(gsi);
        iounmap(gsi->virt_raw);
 }
 
index 81cd7b0..88b80dc 100644 (file)
@@ -232,8 +232,35 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id);
  */
 void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell);
 
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop);
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start);
+/**
+ * gsi_suspend() - Prepare the GSI subsystem for suspend
+ * @gsi:       GSI pointer
+ */
+void gsi_suspend(struct gsi *gsi);
+
+/**
+ * gsi_resume() - Resume the GSI subsystem following suspend
+ * @gsi:       GSI pointer
+ */
+void gsi_resume(struct gsi *gsi);
+
+/**
+ * gsi_channel_suspend() - Suspend a GSI channel
+ * @gsi:       GSI pointer
+ * @channel_id:        Channel to suspend
+ *
+ * For IPA v4.0+, suspend is implemented by stopping the channel.
+ */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id);
+
+/**
+ * gsi_channel_resume() - Resume a suspended GSI channel
+ * @gsi:       GSI pointer
+ * @channel_id:        Channel to resume
+ *
+ * For IPA v4.0+, the stopped channel is started again.
+ */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id);
 
 /**
  * gsi_init() - Initialize the GSI subsystem
index 71ba996..34152fe 100644 (file)
@@ -27,20 +27,9 @@ struct ipa_clock;
 struct ipa_smp2p;
 struct ipa_interrupt;
 
-/**
- * enum ipa_flag - IPA state flags
- * @IPA_FLAG_RESUMED:  Whether resume from suspend has been signaled
- * @IPA_FLAG_COUNT:    Number of defined IPA flags
- */
-enum ipa_flag {
-       IPA_FLAG_RESUMED,
-       IPA_FLAG_COUNT,         /* Last; not a flag */
-};
-
 /**
  * struct ipa - IPA information
  * @gsi:               Embedded GSI structure
- * @flags:             Boolean state flags
  * @version:           IPA hardware version
  * @pdev:              Platform device
  * @completion:                Used to signal pipeline clear transfer complete
@@ -83,7 +72,6 @@ enum ipa_flag {
  */
 struct ipa {
        struct gsi gsi;
-       DECLARE_BITMAP(flags, IPA_FLAG_COUNT);
        enum ipa_version version;
        struct platform_device *pdev;
        struct completion completion;
index 69ef6ea..a67b613 100644 (file)
@@ -9,9 +9,12 @@
 #include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/interconnect.h>
+#include <linux/pm.h>
+#include <linux/bitops.h>
 
 #include "ipa.h"
 #include "ipa_clock.h"
+#include "ipa_endpoint.h"
 #include "ipa_modem.h"
 #include "ipa_data.h"
 
@@ -42,11 +45,22 @@ struct ipa_interconnect {
        u32 peak_bandwidth;
 };
 
+/**
+ * enum ipa_power_flag - IPA power flags
+ * @IPA_POWER_FLAG_RESUMED:    Whether resume from suspend has been signaled
+ * @IPA_POWER_FLAG_COUNT:      Number of defined power flags
+ */
+enum ipa_power_flag {
+       IPA_POWER_FLAG_RESUMED,
+       IPA_POWER_FLAG_COUNT,           /* Last; not a flag */
+};
+
 /**
  * struct ipa_clock - IPA clocking information
  * @count:             Clocking reference count
  * @mutex:             Protects clock enable/disable
  * @core:              IPA core clock
+ * @flags:             Boolean state flags
  * @interconnect_count:        Number of elements in interconnect[]
  * @interconnect:      Interconnect array
  */
@@ -54,6 +68,7 @@ struct ipa_clock {
        refcount_t count;
        struct mutex mutex; /* protects clock enable/disable */
        struct clk *core;
+       DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
        u32 interconnect_count;
        struct ipa_interconnect *interconnect;
 };
@@ -144,8 +159,12 @@ static int ipa_interconnect_enable(struct ipa *ipa)
                ret = icc_set_bw(interconnect->path,
                                 interconnect->average_bandwidth,
                                 interconnect->peak_bandwidth);
-               if (ret)
+               if (ret) {
+                       dev_err(&ipa->pdev->dev,
+                               "error %d enabling %s interconnect\n",
+                               ret, icc_get_name(interconnect->path));
                        goto out_unwind;
+               }
                interconnect++;
        }
 
@@ -159,10 +178,11 @@ out_unwind:
 }
 
 /* To disable an interconnect, we just its bandwidth to 0 */
-static void ipa_interconnect_disable(struct ipa *ipa)
+static int ipa_interconnect_disable(struct ipa *ipa)
 {
        struct ipa_interconnect *interconnect;
        struct ipa_clock *clock = ipa->clock;
+       struct device *dev = &ipa->pdev->dev;
        int result = 0;
        u32 count;
        int ret;
@@ -172,13 +192,16 @@ static void ipa_interconnect_disable(struct ipa *ipa)
        while (count--) {
                interconnect--;
                ret = icc_set_bw(interconnect->path, 0, 0);
-               if (ret && !result)
-                       result = ret;
+               if (ret) {
+                       dev_err(dev, "error %d disabling %s interconnect\n",
+                               ret, icc_get_name(interconnect->path));
+                       /* Try to disable all; record only the first error */
+                       if (!result)
+                               result = ret;
+               }
        }
 
-       if (result)
-               dev_err(&ipa->pdev->dev,
-                       "error %d disabling IPA interconnects\n", ret);
+       return result;
 }
 
 /* Turn on IPA clocks, including interconnects */
@@ -191,8 +214,10 @@ static int ipa_clock_enable(struct ipa *ipa)
                return ret;
 
        ret = clk_prepare_enable(ipa->clock->core);
-       if (ret)
-               ipa_interconnect_disable(ipa);
+       if (ret) {
+               dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret);
+               (void)ipa_interconnect_disable(ipa);
+       }
 
        return ret;
 }
@@ -201,7 +226,7 @@ static int ipa_clock_enable(struct ipa *ipa)
 static void ipa_clock_disable(struct ipa *ipa)
 {
        clk_disable_unprepare(ipa->clock->core);
-       ipa_interconnect_disable(ipa);
+       (void)ipa_interconnect_disable(ipa);
 }
 
 /* Get an IPA clock reference, but only if the reference count is
@@ -238,13 +263,8 @@ void ipa_clock_get(struct ipa *ipa)
                goto out_mutex_unlock;
 
        ret = ipa_clock_enable(ipa);
-       if (ret) {
-               dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret);
-               goto out_mutex_unlock;
-       }
-
-       refcount_set(&clock->count, 1);
-
+       if (!ret)
+               refcount_set(&clock->count, 1);
 out_mutex_unlock:
        mutex_unlock(&clock->mutex);
 }
@@ -271,6 +291,40 @@ u32 ipa_clock_rate(struct ipa *ipa)
        return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0;
 }
 
+/**
+ * ipa_suspend_handler() - Handle the suspend IPA interrupt
+ * @ipa:       IPA pointer
+ * @irq_id:    IPA interrupt type (unused)
+ *
+ * If an RX endpoint is suspended, and the IPA has a packet destined for
+ * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP
+ * that it should resume the endpoint.  If we get one of these interrupts
+ * we just wake up the system.
+ */
+static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
+{
+       /* Just report the event, and let system resume handle the rest.
+        * More than one endpoint could signal this; if so, ignore
+        * all but the first.
+        */
+       if (!test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags))
+               pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
+
+       /* Acknowledge/clear the suspend interrupt on all endpoints */
+       ipa_interrupt_suspend_clear_all(ipa->interrupt);
+}
+
+void ipa_power_setup(struct ipa *ipa)
+{
+       ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
+                         ipa_suspend_handler);
+}
+
+void ipa_power_teardown(struct ipa *ipa)
+{
+       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+}
+
 /* Initialize IPA clocking */
 struct ipa_clock *
 ipa_clock_init(struct device *dev, const struct ipa_clock_data *data)
@@ -329,3 +383,62 @@ void ipa_clock_exit(struct ipa_clock *clock)
        kfree(clock);
        clk_put(clk);
 }
+
+/**
+ * ipa_suspend() - Power management system suspend callback
+ * @dev:       IPA device structure
+ *
+ * Return:     Always returns zero
+ *
+ * Called by the PM framework when a system suspend operation is invoked.
+ * Suspends endpoints and releases the clock reference held to keep
+ * the IPA clock running until this point.
+ */
+static int ipa_suspend(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+
+       /* Endpoints aren't usable until setup is complete */
+       if (ipa->setup_complete) {
+               __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags);
+               ipa_endpoint_suspend(ipa);
+               gsi_suspend(&ipa->gsi);
+       }
+
+       ipa_clock_put(ipa);
+
+       return 0;
+}
+
+/**
+ * ipa_resume() - Power management system resume callback
+ * @dev:       IPA device structure
+ *
+ * Return:     Always returns 0
+ *
+ * Called by the PM framework when a system resume operation is invoked.
+ * Takes an IPA clock reference to keep the clock running until suspend,
+ * and resumes endpoints.
+ */
+static int ipa_resume(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+
+       /* This clock reference will keep the IPA out of suspend
+        * until we get a power management suspend request.
+        */
+       ipa_clock_get(ipa);
+
+       /* Endpoints aren't usable until setup is complete */
+       if (ipa->setup_complete) {
+               gsi_resume(&ipa->gsi);
+               ipa_endpoint_resume(ipa);
+       }
+
+       return 0;
+}
+
+const struct dev_pm_ops ipa_pm_ops = {
+       .suspend        = ipa_suspend,
+       .resume         = ipa_resume,
+};
index 1fe6347..2a0f7ff 100644 (file)
@@ -11,6 +11,9 @@ struct device;
 struct ipa;
 struct ipa_clock_data;
 
+/* IPA device power management function block */
+extern const struct dev_pm_ops ipa_pm_ops;
+
 /**
  * ipa_clock_rate() - Return the current IPA core clock rate
  * @ipa:       IPA structure
@@ -19,6 +22,18 @@ struct ipa_clock_data;
  */
 u32 ipa_clock_rate(struct ipa *ipa);
 
+/**
+ * ipa_power_setup() - Set up IPA power management
+ * @ipa:       IPA pointer
+ */
+void ipa_power_setup(struct ipa *ipa);
+
+/**
+ * ipa_power_teardown() - Inverse of ipa_power_setup()
+ * @ipa:       IPA pointer
+ */
+void ipa_power_teardown(struct ipa *ipa);
+
 /**
  * ipa_clock_init() - Initialize IPA clocking
  * @dev:       IPA device
index 6ab9282..8d83e14 100644 (file)
@@ -418,18 +418,13 @@ static const struct ipa_mem_data ipa_mem_data = {
 /* Interconnect rates are in 1000 byte/second units */
 static const struct ipa_interconnect_data ipa_interconnect_data[] = {
        {
-               .name                   = "ipa_to_llcc",
+               .name                   = "memory",
                .peak_bandwidth         = 600000,       /* 600 MBps */
                .average_bandwidth      = 150000,       /* 150 MBps */
        },
-       {
-               .name                   = "llcc_to_ebi1",
-               .peak_bandwidth         = 1804000,      /* 1.804 GBps */
-               .average_bandwidth      = 150000,       /* 150 MBps */
-       },
        /* Average rate is unused for the next interconnect */
        {
-               .name                   = "appss_to_ipa",
+               .name                   = "config",
                .peak_bandwidth         = 74000,        /* 74 MBps */
                .average_bandwidth      = 0,            /* unused */
        },
index 8070d1a..08ee37a 100644 (file)
@@ -1587,7 +1587,6 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
 {
        struct device *dev = &endpoint->ipa->pdev->dev;
        struct gsi *gsi = &endpoint->ipa->gsi;
-       bool stop_channel;
        int ret;
 
        if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1598,11 +1597,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
                (void)ipa_endpoint_program_suspend(endpoint, true);
        }
 
-       /* Starting with IPA v4.0, endpoints are suspended by stopping the
-        * underlying GSI channel rather than using endpoint suspend mode.
-        */
-       stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
-       ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel);
+       ret = gsi_channel_suspend(gsi, endpoint->channel_id);
        if (ret)
                dev_err(dev, "error %d suspending channel %u\n", ret,
                        endpoint->channel_id);
@@ -1612,7 +1607,6 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
 {
        struct device *dev = &endpoint->ipa->pdev->dev;
        struct gsi *gsi = &endpoint->ipa->gsi;
-       bool start_channel;
        int ret;
 
        if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1621,11 +1615,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
        if (!endpoint->toward_ipa)
                (void)ipa_endpoint_program_suspend(endpoint, false);
 
-       /* Starting with IPA v4.0, the underlying GSI channel must be
-        * restarted for resume.
-        */
-       start_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
-       ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
+       ret = gsi_channel_resume(gsi, endpoint->channel_id);
        if (ret)
                dev_err(dev, "error %d resuming channel %u\n", ret,
                        endpoint->channel_id);
index 2e728d4..25bbb45 100644 (file)
 /* Divider for 19.2 MHz crystal oscillator clock to get common timer clock */
 #define IPA_XO_CLOCK_DIVIDER   192     /* 1 is subtracted where used */
 
-/**
- * ipa_suspend_handler() - Handle the suspend IPA interrupt
- * @ipa:       IPA pointer
- * @irq_id:    IPA interrupt type (unused)
- *
- * If an RX endpoint is in suspend state, and the IPA has a packet
- * destined for that endpoint, the IPA generates a SUSPEND interrupt
- * to inform the AP that it should resume the endpoint.  If we get
- * one of these interrupts we just resume everything.
- */
-static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
-{
-       /* Just report the event, and let system resume handle the rest.
-        * More than one endpoint could signal this; if so, ignore
-        * all but the first.
-        */
-       if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags))
-               pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
-
-       /* Acknowledge/clear the suspend interrupt on all endpoints */
-       ipa_interrupt_suspend_clear_all(ipa->interrupt);
-}
-
 /**
  * ipa_setup() - Set up IPA hardware
  * @ipa:       IPA pointer
@@ -124,12 +101,11 @@ int ipa_setup(struct ipa *ipa)
        if (ret)
                return ret;
 
-       ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
-                         ipa_suspend_handler);
+       ipa_power_setup(ipa);
 
        ret = device_init_wakeup(dev, true);
        if (ret)
-               goto err_interrupt_remove;
+               goto err_gsi_teardown;
 
        ipa_endpoint_setup(ipa);
 
@@ -177,9 +153,9 @@ err_command_disable:
        ipa_endpoint_disable_one(command_endpoint);
 err_endpoint_teardown:
        ipa_endpoint_teardown(ipa);
+       ipa_power_teardown(ipa);
        (void)device_init_wakeup(dev, false);
-err_interrupt_remove:
-       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+err_gsi_teardown:
        gsi_teardown(&ipa->gsi);
 
        return ret;
@@ -204,8 +180,8 @@ static void ipa_teardown(struct ipa *ipa)
        command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX];
        ipa_endpoint_disable_one(command_endpoint);
        ipa_endpoint_teardown(ipa);
+       ipa_power_teardown(ipa);
        (void)device_init_wakeup(&ipa->pdev->dev, false);
-       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
        gsi_teardown(&ipa->gsi);
 }
 
@@ -474,7 +450,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
 
        ret = ipa_endpoint_config(ipa);
        if (ret)
-               goto err_interrupt_deconfig;
+               goto err_uc_deconfig;
 
        ipa_table_config(ipa);          /* No deconfig required */
 
@@ -491,7 +467,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
 
 err_endpoint_deconfig:
        ipa_endpoint_deconfig(ipa);
-err_interrupt_deconfig:
+err_uc_deconfig:
        ipa_uc_deconfig(ipa);
        ipa_interrupt_deconfig(ipa->interrupt);
        ipa->interrupt = NULL;
@@ -874,62 +850,6 @@ static void ipa_shutdown(struct platform_device *pdev)
                dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret);
 }
 
-/**
- * ipa_suspend() - Power management system suspend callback
- * @dev:       IPA device structure
- *
- * Return:     Always returns zero
- *
- * Called by the PM framework when a system suspend operation is invoked.
- * Suspends endpoints and releases the clock reference held to keep
- * the IPA clock running until this point.
- */
-static int ipa_suspend(struct device *dev)
-{
-       struct ipa *ipa = dev_get_drvdata(dev);
-
-       /* Endpoints aren't usable until setup is complete */
-       if (ipa->setup_complete) {
-               __clear_bit(IPA_FLAG_RESUMED, ipa->flags);
-               ipa_endpoint_suspend(ipa);
-       }
-
-       ipa_clock_put(ipa);
-
-       return 0;
-}
-
-/**
- * ipa_resume() - Power management system resume callback
- * @dev:       IPA device structure
- *
- * Return:     Always returns 0
- *
- * Called by the PM framework when a system resume operation is invoked.
- * Takes an IPA clock reference to keep the clock running until suspend,
- * and resumes endpoints.
- */
-static int ipa_resume(struct device *dev)
-{
-       struct ipa *ipa = dev_get_drvdata(dev);
-
-       /* This clock reference will keep the IPA out of suspend
-        * until we get a power management suspend request.
-        */
-       ipa_clock_get(ipa);
-
-       /* Endpoints aren't usable until setup is complete */
-       if (ipa->setup_complete)
-               ipa_endpoint_resume(ipa);
-
-       return 0;
-}
-
-static const struct dev_pm_ops ipa_pm_ops = {
-       .suspend        = ipa_suspend,
-       .resume         = ipa_resume,
-};
-
 static const struct attribute_group *ipa_attribute_groups[] = {
        &ipa_attribute_group,
        &ipa_feature_attribute_group,
index 4ea8287..ad4019e 100644 (file)
@@ -178,6 +178,9 @@ void ipa_modem_suspend(struct net_device *netdev)
        struct ipa_priv *priv = netdev_priv(netdev);
        struct ipa *ipa = priv->ipa;
 
+       if (!(netdev->flags & IFF_UP))
+               return;
+
        netif_stop_queue(netdev);
 
        ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
@@ -194,6 +197,9 @@ void ipa_modem_resume(struct net_device *netdev)
        struct ipa_priv *priv = netdev_priv(netdev);
        struct ipa *ipa = priv->ipa;
 
+       if (!(netdev->flags & IFF_UP))
+               return;
+
        ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
        ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
 
@@ -225,13 +231,15 @@ int ipa_modem_start(struct ipa *ipa)
        SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
        priv = netdev_priv(netdev);
        priv->ipa = ipa;
+       ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
+       ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
+       ipa->modem_netdev = netdev;
 
        ret = register_netdev(netdev);
-       if (!ret) {
-               ipa->modem_netdev = netdev;
-               ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
-               ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
-       } else {
+       if (ret) {
+               ipa->modem_netdev = NULL;
+               ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
+               ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
                free_netdev(netdev);
        }
 
@@ -265,13 +273,15 @@ int ipa_modem_stop(struct ipa *ipa)
        /* Prevent the modem from triggering a call to ipa_setup() */
        ipa_smp2p_disable(ipa);
 
-       /* Stop the queue and disable the endpoints if it's open */
+       /* Clean up the netdev and endpoints if it was started */
        if (netdev) {
-               (void)ipa_stop(netdev);
+               /* If it was opened, stop it first */
+               if (netdev->flags & IFF_UP)
+                       (void)ipa_stop(netdev);
+               unregister_netdev(netdev);
+               ipa->modem_netdev = NULL;
                ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
                ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
-               ipa->modem_netdev = NULL;
-               unregister_netdev(netdev);
                free_netdev(netdev);
        }
 
diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile
deleted file mode 100644 (file)
index f71b9f8..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_MHI_NET) += mhi_net.o
-
-mhi_net-y := net.o proto_mbim.o
diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h
deleted file mode 100644 (file)
index 1d0c499..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-struct mhi_net_stats {
-       u64_stats_t rx_packets;
-       u64_stats_t rx_bytes;
-       u64_stats_t rx_errors;
-       u64_stats_t rx_dropped;
-       u64_stats_t rx_length_errors;
-       u64_stats_t tx_packets;
-       u64_stats_t tx_bytes;
-       u64_stats_t tx_errors;
-       u64_stats_t tx_dropped;
-       struct u64_stats_sync tx_syncp;
-       struct u64_stats_sync rx_syncp;
-};
-
-struct mhi_net_dev {
-       struct mhi_device *mdev;
-       struct net_device *ndev;
-       struct sk_buff *skbagg_head;
-       struct sk_buff *skbagg_tail;
-       const struct mhi_net_proto *proto;
-       void *proto_data;
-       struct delayed_work rx_refill;
-       struct mhi_net_stats stats;
-       u32 rx_queue_sz;
-       int msg_enable;
-       unsigned int mru;
-};
-
-struct mhi_net_proto {
-       int (*init)(struct mhi_net_dev *mhi_netdev);
-       struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-       void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-};
-
-extern const struct mhi_net_proto proto_mbim;
diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
deleted file mode 100644 (file)
index 0cc7dcd..0000000
+++ /dev/null
@@ -1,487 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-#include <linux/if_arp.h>
-#include <linux/mhi.h>
-#include <linux/mod_devicetable.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/wwan.h>
-
-#include "mhi.h"
-
-#define MHI_NET_MIN_MTU                ETH_MIN_MTU
-#define MHI_NET_MAX_MTU                0xffff
-#define MHI_NET_DEFAULT_MTU    0x4000
-
-/* When set to false, the default netdev (link 0) is not created, and it's up
- * to user to create the link (via wwan rtnetlink).
- */
-static bool create_default_iface = true;
-module_param(create_default_iface, bool, 0);
-
-struct mhi_device_info {
-       const char *netname;
-       const struct mhi_net_proto *proto;
-};
-
-static int mhi_ndo_open(struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       /* Feed the rx buffer pool */
-       schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-
-       /* Carrier is established via out-of-band channel (e.g. qmi) */
-       netif_carrier_on(ndev);
-
-       netif_start_queue(ndev);
-
-       return 0;
-}
-
-static int mhi_ndo_stop(struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       netif_stop_queue(ndev);
-       netif_carrier_off(ndev);
-       cancel_delayed_work_sync(&mhi_netdev->rx_refill);
-
-       return 0;
-}
-
-static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       const struct mhi_net_proto *proto = mhi_netdev->proto;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       int err;
-
-       if (proto && proto->tx_fixup) {
-               skb = proto->tx_fixup(mhi_netdev, skb);
-               if (unlikely(!skb))
-                       goto exit_drop;
-       }
-
-       err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
-       if (unlikely(err)) {
-               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
-                                   ndev->name, err);
-               dev_kfree_skb_any(skb);
-               goto exit_drop;
-       }
-
-       if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
-               netif_stop_queue(ndev);
-
-       return NETDEV_TX_OK;
-
-exit_drop:
-       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
-       u64_stats_inc(&mhi_netdev->stats.tx_dropped);
-       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
-       return NETDEV_TX_OK;
-}
-
-static void mhi_ndo_get_stats64(struct net_device *ndev,
-                               struct rtnl_link_stats64 *stats)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       unsigned int start;
-
-       do {
-               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
-               stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
-               stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
-               stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
-               stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped);
-               stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors);
-       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
-
-       do {
-               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
-               stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
-               stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
-               stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
-               stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
-       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
-}
-
-static const struct net_device_ops mhi_netdev_ops = {
-       .ndo_open               = mhi_ndo_open,
-       .ndo_stop               = mhi_ndo_stop,
-       .ndo_start_xmit         = mhi_ndo_xmit,
-       .ndo_get_stats64        = mhi_ndo_get_stats64,
-};
-
-static void mhi_net_setup(struct net_device *ndev)
-{
-       ndev->header_ops = NULL;  /* No header */
-       ndev->type = ARPHRD_RAWIP;
-       ndev->hard_header_len = 0;
-       ndev->addr_len = 0;
-       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
-       ndev->netdev_ops = &mhi_netdev_ops;
-       ndev->mtu = MHI_NET_DEFAULT_MTU;
-       ndev->min_mtu = MHI_NET_MIN_MTU;
-       ndev->max_mtu = MHI_NET_MAX_MTU;
-       ndev->tx_queue_len = 1000;
-}
-
-static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
-                                      struct sk_buff *skb)
-{
-       struct sk_buff *head = mhi_netdev->skbagg_head;
-       struct sk_buff *tail = mhi_netdev->skbagg_tail;
-
-       /* This is non-paged skb chaining using frag_list */
-       if (!head) {
-               mhi_netdev->skbagg_head = skb;
-               return skb;
-       }
-
-       if (!skb_shinfo(head)->frag_list)
-               skb_shinfo(head)->frag_list = skb;
-       else
-               tail->next = skb;
-
-       head->len += skb->len;
-       head->data_len += skb->len;
-       head->truesize += skb->truesize;
-
-       mhi_netdev->skbagg_tail = skb;
-
-       return mhi_netdev->skbagg_head;
-}
-
-static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
-                               struct mhi_result *mhi_res)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       const struct mhi_net_proto *proto = mhi_netdev->proto;
-       struct sk_buff *skb = mhi_res->buf_addr;
-       int free_desc_count;
-
-       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
-       if (unlikely(mhi_res->transaction_status)) {
-               switch (mhi_res->transaction_status) {
-               case -EOVERFLOW:
-                       /* Packet can not fit in one MHI buffer and has been
-                        * split over multiple MHI transfers, do re-aggregation.
-                        * That usually means the device side MTU is larger than
-                        * the host side MTU/MRU. Since this is not optimal,
-                        * print a warning (once).
-                        */
-                       netdev_warn_once(mhi_netdev->ndev,
-                                        "Fragmented packets received, fix MTU?\n");
-                       skb_put(skb, mhi_res->bytes_xferd);
-                       mhi_net_skb_agg(mhi_netdev, skb);
-                       break;
-               case -ENOTCONN:
-                       /* MHI layer stopping/resetting the DL channel */
-                       dev_kfree_skb_any(skb);
-                       return;
-               default:
-                       /* Unknown error, simply drop */
-                       dev_kfree_skb_any(skb);
-                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-                       u64_stats_inc(&mhi_netdev->stats.rx_errors);
-                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-               }
-       } else {
-               skb_put(skb, mhi_res->bytes_xferd);
-
-               if (mhi_netdev->skbagg_head) {
-                       /* Aggregate the final fragment */
-                       skb = mhi_net_skb_agg(mhi_netdev, skb);
-                       mhi_netdev->skbagg_head = NULL;
-               }
-
-               switch (skb->data[0] & 0xf0) {
-               case 0x40:
-                       skb->protocol = htons(ETH_P_IP);
-                       break;
-               case 0x60:
-                       skb->protocol = htons(ETH_P_IPV6);
-                       break;
-               default:
-                       skb->protocol = htons(ETH_P_MAP);
-                       break;
-               }
-
-               if (proto && proto->rx) {
-                       proto->rx(mhi_netdev, skb);
-               } else {
-                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-                       u64_stats_inc(&mhi_netdev->stats.rx_packets);
-                       u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
-                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-                       netif_rx(skb);
-               }
-       }
-
-       /* Refill if RX buffers queue becomes low */
-       if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
-               schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-}
-
-static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
-                               struct mhi_result *mhi_res)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       struct net_device *ndev = mhi_netdev->ndev;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       struct sk_buff *skb = mhi_res->buf_addr;
-
-       /* Hardware has consumed the buffer, so free the skb (which is not
-        * freed by the MHI stack) and perform accounting.
-        */
-       dev_consume_skb_any(skb);
-
-       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
-       if (unlikely(mhi_res->transaction_status)) {
-
-               /* MHI layer stopping/resetting the UL channel */
-               if (mhi_res->transaction_status == -ENOTCONN) {
-                       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-                       return;
-               }
-
-               u64_stats_inc(&mhi_netdev->stats.tx_errors);
-       } else {
-               u64_stats_inc(&mhi_netdev->stats.tx_packets);
-               u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
-       }
-       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
-       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
-               netif_wake_queue(ndev);
-}
-
-static void mhi_net_rx_refill_work(struct work_struct *work)
-{
-       struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
-                                                     rx_refill.work);
-       struct net_device *ndev = mhi_netdev->ndev;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       struct sk_buff *skb;
-       unsigned int size;
-       int err;
-
-       size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
-
-       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
-               skb = netdev_alloc_skb(ndev, size);
-               if (unlikely(!skb))
-                       break;
-
-               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
-               if (unlikely(err)) {
-                       net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
-                                           ndev->name, err);
-                       kfree_skb(skb);
-                       break;
-               }
-
-               /* Do not hog the CPU if rx buffers are consumed faster than
-                * queued (unlikely).
-                */
-               cond_resched();
-       }
-
-       /* If we're still starved of rx buffers, reschedule later */
-       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
-               schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
-}
-
-static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
-                          struct netlink_ext_ack *extack)
-{
-       const struct mhi_device_info *info;
-       struct mhi_device *mhi_dev = ctxt;
-       struct mhi_net_dev *mhi_netdev;
-       int err;
-
-       info = (struct mhi_device_info *)mhi_dev->id->driver_data;
-
-       /* For now we only support one link (link context 0), driver must be
-        * reworked to break 1:1 relationship for net MBIM and to forward setup
-        * call to rmnet(QMAP) otherwise.
-        */
-       if (if_id != 0)
-               return -EINVAL;
-
-       if (dev_get_drvdata(&mhi_dev->dev))
-               return -EBUSY;
-
-       mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
-       mhi_netdev->ndev = ndev;
-       mhi_netdev->mdev = mhi_dev;
-       mhi_netdev->skbagg_head = NULL;
-       mhi_netdev->proto = info->proto;
-       mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
-
-       INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
-       u64_stats_init(&mhi_netdev->stats.rx_syncp);
-       u64_stats_init(&mhi_netdev->stats.tx_syncp);
-
-       /* Start MHI channels */
-       err = mhi_prepare_for_transfer(mhi_dev);
-       if (err)
-               goto out_err;
-
-       /* Number of transfer descriptors determines size of the queue */
-       mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
-       if (extack)
-               err = register_netdevice(ndev);
-       else
-               err = register_netdev(ndev);
-       if (err)
-               goto out_err;
-
-       if (mhi_netdev->proto) {
-               err = mhi_netdev->proto->init(mhi_netdev);
-               if (err)
-                       goto out_err_proto;
-       }
-
-       return 0;
-
-out_err_proto:
-       unregister_netdevice(ndev);
-out_err:
-       free_netdev(ndev);
-       return err;
-}
-
-static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
-                           struct list_head *head)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       struct mhi_device *mhi_dev = ctxt;
-
-       if (head)
-               unregister_netdevice_queue(ndev, head);
-       else
-               unregister_netdev(ndev);
-
-       mhi_unprepare_from_transfer(mhi_dev);
-
-       kfree_skb(mhi_netdev->skbagg_head);
-
-       dev_set_drvdata(&mhi_dev->dev, NULL);
-}
-
-static const struct wwan_ops mhi_wwan_ops = {
-       .priv_size = sizeof(struct mhi_net_dev),
-       .setup = mhi_net_setup,
-       .newlink = mhi_net_newlink,
-       .dellink = mhi_net_dellink,
-};
-
-static int mhi_net_probe(struct mhi_device *mhi_dev,
-                        const struct mhi_device_id *id)
-{
-       const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
-       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-       struct net_device *ndev;
-       int err;
-
-       err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev,
-                               WWAN_NO_DEFAULT_LINK);
-       if (err)
-               return err;
-
-       if (!create_default_iface)
-               return 0;
-
-       /* Create a default interface which is used as either RMNET real-dev,
-        * MBIM link 0 or ip link 0)
-        */
-       ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
-                           NET_NAME_PREDICTABLE, mhi_net_setup);
-       if (!ndev) {
-               err = -ENOMEM;
-               goto err_unregister;
-       }
-
-       SET_NETDEV_DEV(ndev, &mhi_dev->dev);
-
-       err = mhi_net_newlink(mhi_dev, ndev, 0, NULL);
-       if (err)
-               goto err_release;
-
-       return 0;
-
-err_release:
-       free_netdev(ndev);
-err_unregister:
-       wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
-       return err;
-}
-
-static void mhi_net_remove(struct mhi_device *mhi_dev)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-
-       /* WWAN core takes care of removing remaining links */
-       wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
-       if (create_default_iface)
-               mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL);
-}
-
-static const struct mhi_device_info mhi_hwip0 = {
-       .netname = "mhi_hwip%d",
-};
-
-static const struct mhi_device_info mhi_swip0 = {
-       .netname = "mhi_swip%d",
-};
-
-static const struct mhi_device_info mhi_hwip0_mbim = {
-       .netname = "mhi_mbim%d",
-       .proto = &proto_mbim,
-};
-
-static const struct mhi_device_id mhi_net_id_table[] = {
-       /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
-       { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
-       /* Software data PATH (to modem CPU) */
-       { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
-       /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
-       { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim },
-       {}
-};
-MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
-
-static struct mhi_driver mhi_net_driver = {
-       .probe = mhi_net_probe,
-       .remove = mhi_net_remove,
-       .dl_xfer_cb = mhi_net_dl_callback,
-       .ul_xfer_cb = mhi_net_ul_callback,
-       .id_table = mhi_net_id_table,
-       .driver = {
-               .name = "mhi_net",
-               .owner = THIS_MODULE,
-       },
-};
-
-module_mhi_driver(mhi_net_driver);
-
-MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
-MODULE_DESCRIPTION("Network over MHI");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c
deleted file mode 100644 (file)
index 761d90b..0000000
+++ /dev/null
@@ -1,310 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- *
- * This driver copy some code from cdc_ncm, which is:
- * Copyright (C) ST-Ericsson 2010-2012
- * and cdc_mbim, which is:
- * Copyright (c) 2012  Smith Micro Software, Inc.
- * Copyright (c) 2012  Bjørn Mork <bjorn@mork.no>
- *
- */
-
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/mii.h>
-#include <linux/netdevice.h>
-#include <linux/wwan.h>
-#include <linux/skbuff.h>
-#include <linux/usb.h>
-#include <linux/usb/cdc.h>
-#include <linux/usb/usbnet.h>
-#include <linux/usb/cdc_ncm.h>
-
-#include "mhi.h"
-
-#define MBIM_NDP16_SIGN_MASK 0x00ffffff
-
-/* Usual WWAN MTU */
-#define MHI_MBIM_DEFAULT_MTU 1500
-
-/* 3500 allows to optimize skb allocation, the skbs will basically fit in
- * one 4K page. Large MBIM packets will simply be split over several MHI
- * transfers and chained by the MHI net layer (zerocopy).
- */
-#define MHI_MBIM_DEFAULT_MRU 3500
-
-struct mbim_context {
-       u16 rx_seq;
-       u16 tx_seq;
-};
-
-static void __mbim_length_errors_inc(struct mhi_net_dev *dev)
-{
-       u64_stats_update_begin(&dev->stats.rx_syncp);
-       u64_stats_inc(&dev->stats.rx_length_errors);
-       u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static void __mbim_errors_inc(struct mhi_net_dev *dev)
-{
-       u64_stats_update_begin(&dev->stats.rx_syncp);
-       u64_stats_inc(&dev->stats.rx_errors);
-       u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static int mbim_rx_verify_nth16(struct sk_buff *skb)
-{
-       struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
-       struct mbim_context *ctx = dev->proto_data;
-       struct usb_cdc_ncm_nth16 *nth16;
-       int len;
-
-       if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
-                       sizeof(struct usb_cdc_ncm_ndp16)) {
-               netif_dbg(dev, rx_err, dev->ndev, "frame too short\n");
-               __mbim_length_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
-
-       if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "invalid NTH16 signature <%#010x>\n",
-                         le32_to_cpu(nth16->dwSignature));
-               __mbim_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       /* No limit on the block length, except the size of the data pkt */
-       len = le16_to_cpu(nth16->wBlockLength);
-       if (len > skb->len) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "NTB does not fit into the skb %u/%u\n", len,
-                         skb->len);
-               __mbim_length_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
-           (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) &&
-           !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "sequence number glitch prev=%d curr=%d\n",
-                         ctx->rx_seq, le16_to_cpu(nth16->wSequence));
-       }
-       ctx->rx_seq = le16_to_cpu(nth16->wSequence);
-
-       return le16_to_cpu(nth16->wNdpIndex);
-}
-
-static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
-{
-       struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
-       int ret;
-
-       if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
-               netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n",
-                         le16_to_cpu(ndp16->wLength));
-               return -EINVAL;
-       }
-
-       ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
-                       / sizeof(struct usb_cdc_ncm_dpe16));
-       ret--; /* Last entry is always a NULL terminator */
-
-       if (sizeof(struct usb_cdc_ncm_ndp16) +
-            ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "Invalid nframes = %d\n", ret);
-               return -EINVAL;
-       }
-
-       return ret;
-}
-
-static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
-{
-       struct net_device *ndev = mhi_netdev->ndev;
-       int ndpoffset;
-
-       /* Check NTB header and retrieve first NDP offset */
-       ndpoffset = mbim_rx_verify_nth16(skb);
-       if (ndpoffset < 0) {
-               net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name);
-               goto error;
-       }
-
-       /* Process each NDP */
-       while (1) {
-               struct usb_cdc_ncm_ndp16 ndp16;
-               struct usb_cdc_ncm_dpe16 dpe16;
-               int nframes, n, dpeoffset;
-
-               if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
-                       net_err_ratelimited("%s: Incorrect NDP offset (%u)\n",
-                                           ndev->name, ndpoffset);
-                       __mbim_length_errors_inc(mhi_netdev);
-                       goto error;
-               }
-
-               /* Check NDP header and retrieve number of datagrams */
-               nframes = mbim_rx_verify_ndp16(skb, &ndp16);
-               if (nframes < 0) {
-                       net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name);
-                       __mbim_length_errors_inc(mhi_netdev);
-                       goto error;
-               }
-
-                /* Only IP data type supported, no DSS in MHI context */
-               if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
-                               != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
-                       net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name);
-                       __mbim_errors_inc(mhi_netdev);
-                       goto next_ndp;
-               }
-
-               /* Only primary IP session 0 (0x00) supported for now */
-               if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) {
-                       net_err_ratelimited("%s: bad packet session\n", ndev->name);
-                       __mbim_errors_inc(mhi_netdev);
-                       goto next_ndp;
-               }
-
-               /* de-aggregate and deliver IP packets */
-               dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
-               for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
-                       u16 dgram_offset, dgram_len;
-                       struct sk_buff *skbn;
-
-                       if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
-                               break;
-
-                       dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
-                       dgram_len = le16_to_cpu(dpe16.wDatagramLength);
-
-                       if (!dgram_offset || !dgram_len)
-                               break; /* null terminator */
-
-                       skbn = netdev_alloc_skb(ndev, dgram_len);
-                       if (!skbn)
-                               continue;
-
-                       skb_put(skbn, dgram_len);
-                       skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
-
-                       switch (skbn->data[0] & 0xf0) {
-                       case 0x40:
-                               skbn->protocol = htons(ETH_P_IP);
-                               break;
-                       case 0x60:
-                               skbn->protocol = htons(ETH_P_IPV6);
-                               break;
-                       default:
-                               net_err_ratelimited("%s: unknown protocol\n",
-                                                   ndev->name);
-                               __mbim_errors_inc(mhi_netdev);
-                               dev_kfree_skb_any(skbn);
-                               continue;
-                       }
-
-                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-                       u64_stats_inc(&mhi_netdev->stats.rx_packets);
-                       u64_stats_add(&mhi_netdev->stats.rx_bytes, skbn->len);
-                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-                       netif_rx(skbn);
-               }
-next_ndp:
-               /* Other NDP to process? */
-               ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
-               if (!ndpoffset)
-                       break;
-       }
-
-       /* free skb */
-       dev_consume_skb_any(skb);
-       return;
-error:
-       dev_kfree_skb_any(skb);
-}
-
-struct mbim_tx_hdr {
-       struct usb_cdc_ncm_nth16 nth16;
-       struct usb_cdc_ncm_ndp16 ndp16;
-       struct usb_cdc_ncm_dpe16 dpe16[2];
-} __packed;
-
-static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev,
-                                    struct sk_buff *skb)
-{
-       struct mbim_context *ctx = mhi_netdev->proto_data;
-       unsigned int dgram_size = skb->len;
-       struct usb_cdc_ncm_nth16 *nth16;
-       struct usb_cdc_ncm_ndp16 *ndp16;
-       struct mbim_tx_hdr *mbim_hdr;
-
-       /* For now, this is a partial implementation of CDC MBIM, only one NDP
-        * is sent, containing the IP packet (no aggregation).
-        */
-
-       /* Ensure we have enough headroom for crafting MBIM header */
-       if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
-               dev_kfree_skb_any(skb);
-               return NULL;
-       }
-
-       mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
-
-       /* Fill NTB header */
-       nth16 = &mbim_hdr->nth16;
-       nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
-       nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-       nth16->wSequence = cpu_to_le16(ctx->tx_seq++);
-       nth16->wBlockLength = cpu_to_le16(skb->len);
-       nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-
-       /* Fill the unique NDP */
-       ndp16 = &mbim_hdr->ndp16;
-       ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN);
-       ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
-                                       + sizeof(struct usb_cdc_ncm_dpe16) * 2);
-       ndp16->wNextNdpIndex = 0;
-
-       /* Datagram follows the mbim header */
-       ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
-       ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
-
-       /* null termination */
-       ndp16->dpe16[1].wDatagramIndex = 0;
-       ndp16->dpe16[1].wDatagramLength = 0;
-
-       return skb;
-}
-
-static int mbim_init(struct mhi_net_dev *mhi_netdev)
-{
-       struct net_device *ndev = mhi_netdev->ndev;
-
-       mhi_netdev->proto_data = devm_kzalloc(&ndev->dev,
-                                             sizeof(struct mbim_context),
-                                             GFP_KERNEL);
-       if (!mhi_netdev->proto_data)
-               return -ENOMEM;
-
-       ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
-       ndev->mtu = MHI_MBIM_DEFAULT_MTU;
-
-       if (!mhi_netdev->mru)
-               mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU;
-
-       return 0;
-}
-
-const struct mhi_net_proto proto_mbim = {
-       .init = mbim_init,
-       .rx = mbim_rx,
-       .tx_fixup = mbim_tx_fixup,
-};
diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c
new file mode 100644 (file)
index 0000000..975f7f9
--- /dev/null
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI Network driver - Network over MHI bus
+ *
+ * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <linux/if_arp.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+
+#define MHI_NET_MIN_MTU                ETH_MIN_MTU
+#define MHI_NET_MAX_MTU                0xffff
+#define MHI_NET_DEFAULT_MTU    0x4000
+
+struct mhi_net_stats {
+       u64_stats_t rx_packets;
+       u64_stats_t rx_bytes;
+       u64_stats_t rx_errors;
+       u64_stats_t tx_packets;
+       u64_stats_t tx_bytes;
+       u64_stats_t tx_errors;
+       u64_stats_t tx_dropped;
+       struct u64_stats_sync tx_syncp;
+       struct u64_stats_sync rx_syncp;
+};
+
+struct mhi_net_dev {
+       struct mhi_device *mdev;
+       struct net_device *ndev;
+       struct sk_buff *skbagg_head;
+       struct sk_buff *skbagg_tail;
+       struct delayed_work rx_refill;
+       struct mhi_net_stats stats;
+       u32 rx_queue_sz;
+       int msg_enable;
+       unsigned int mru;
+};
+
+struct mhi_device_info {
+       const char *netname;
+};
+
+static int mhi_ndo_open(struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       /* Feed the rx buffer pool */
+       schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+
+       /* Carrier is established via out-of-band channel (e.g. qmi) */
+       netif_carrier_on(ndev);
+
+       netif_start_queue(ndev);
+
+       return 0;
+}
+
+static int mhi_ndo_stop(struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       netif_stop_queue(ndev);
+       netif_carrier_off(ndev);
+       cancel_delayed_work_sync(&mhi_netdev->rx_refill);
+
+       return 0;
+}
+
+static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       int err;
+
+       err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+       if (unlikely(err)) {
+               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+                                   ndev->name, err);
+               dev_kfree_skb_any(skb);
+               goto exit_drop;
+       }
+
+       if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+               netif_stop_queue(ndev);
+
+       return NETDEV_TX_OK;
+
+exit_drop:
+       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+       u64_stats_inc(&mhi_netdev->stats.tx_dropped);
+       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+       return NETDEV_TX_OK;
+}
+
+static void mhi_ndo_get_stats64(struct net_device *ndev,
+                               struct rtnl_link_stats64 *stats)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+       unsigned int start;
+
+       do {
+               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
+               stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
+               stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
+               stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
+       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
+
+       do {
+               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
+               stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
+               stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
+               stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
+               stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
+       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
+}
+
+static const struct net_device_ops mhi_netdev_ops = {
+       .ndo_open               = mhi_ndo_open,
+       .ndo_stop               = mhi_ndo_stop,
+       .ndo_start_xmit         = mhi_ndo_xmit,
+       .ndo_get_stats64        = mhi_ndo_get_stats64,
+};
+
+static void mhi_net_setup(struct net_device *ndev)
+{
+       ndev->header_ops = NULL;  /* No header */
+       ndev->type = ARPHRD_RAWIP;
+       ndev->hard_header_len = 0;
+       ndev->addr_len = 0;
+       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+       ndev->netdev_ops = &mhi_netdev_ops;
+       ndev->mtu = MHI_NET_DEFAULT_MTU;
+       ndev->min_mtu = MHI_NET_MIN_MTU;
+       ndev->max_mtu = MHI_NET_MAX_MTU;
+       ndev->tx_queue_len = 1000;
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
+                                      struct sk_buff *skb)
+{
+       struct sk_buff *head = mhi_netdev->skbagg_head;
+       struct sk_buff *tail = mhi_netdev->skbagg_tail;
+
+       /* This is non-paged skb chaining using frag_list */
+       if (!head) {
+               mhi_netdev->skbagg_head = skb;
+               return skb;
+       }
+
+       if (!skb_shinfo(head)->frag_list)
+               skb_shinfo(head)->frag_list = skb;
+       else
+               tail->next = skb;
+
+       head->len += skb->len;
+       head->data_len += skb->len;
+       head->truesize += skb->truesize;
+
+       mhi_netdev->skbagg_tail = skb;
+
+       return mhi_netdev->skbagg_head;
+}
+
+static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
+                               struct mhi_result *mhi_res)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       int free_desc_count;
+
+       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       if (unlikely(mhi_res->transaction_status)) {
+               switch (mhi_res->transaction_status) {
+               case -EOVERFLOW:
+                       /* Packet can not fit in one MHI buffer and has been
+                        * split over multiple MHI transfers, do re-aggregation.
+                        * That usually means the device side MTU is larger than
+                        * the host side MTU/MRU. Since this is not optimal,
+                        * print a warning (once).
+                        */
+                       netdev_warn_once(mhi_netdev->ndev,
+                                        "Fragmented packets received, fix MTU?\n");
+                       skb_put(skb, mhi_res->bytes_xferd);
+                       mhi_net_skb_agg(mhi_netdev, skb);
+                       break;
+               case -ENOTCONN:
+                       /* MHI layer stopping/resetting the DL channel */
+                       dev_kfree_skb_any(skb);
+                       return;
+               default:
+                       /* Unknown error, simply drop */
+                       dev_kfree_skb_any(skb);
+                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+                       u64_stats_inc(&mhi_netdev->stats.rx_errors);
+                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+               }
+       } else {
+               skb_put(skb, mhi_res->bytes_xferd);
+
+               if (mhi_netdev->skbagg_head) {
+                       /* Aggregate the final fragment */
+                       skb = mhi_net_skb_agg(mhi_netdev, skb);
+                       mhi_netdev->skbagg_head = NULL;
+               }
+
+               switch (skb->data[0] & 0xf0) {
+               case 0x40:
+                       skb->protocol = htons(ETH_P_IP);
+                       break;
+               case 0x60:
+                       skb->protocol = htons(ETH_P_IPV6);
+                       break;
+               default:
+                       skb->protocol = htons(ETH_P_MAP);
+                       break;
+               }
+
+               u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+               u64_stats_inc(&mhi_netdev->stats.rx_packets);
+               u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
+               u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+               netif_rx(skb);
+       }
+
+       /* Refill if RX buffers queue becomes low */
+       if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
+               schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+}
+
+static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
+                               struct mhi_result *mhi_res)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+       struct net_device *ndev = mhi_netdev->ndev;
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       struct sk_buff *skb = mhi_res->buf_addr;
+
+       /* Hardware has consumed the buffer, so free the skb (which is not
+        * freed by the MHI stack) and perform accounting.
+        */
+       dev_consume_skb_any(skb);
+
+       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+       if (unlikely(mhi_res->transaction_status)) {
+               /* MHI layer stopping/resetting the UL channel */
+               if (mhi_res->transaction_status == -ENOTCONN) {
+                       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+                       return;
+               }
+
+               u64_stats_inc(&mhi_netdev->stats.tx_errors);
+       } else {
+               u64_stats_inc(&mhi_netdev->stats.tx_packets);
+               u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
+       }
+       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+               netif_wake_queue(ndev);
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+       struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
+                                                     rx_refill.work);
+       struct net_device *ndev = mhi_netdev->ndev;
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       struct sk_buff *skb;
+       unsigned int size;
+       int err;
+
+       size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
+
+       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+               skb = netdev_alloc_skb(ndev, size);
+               if (unlikely(!skb))
+                       break;
+
+               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
+               if (unlikely(err)) {
+                       net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
+                                           ndev->name, err);
+                       kfree_skb(skb);
+                       break;
+               }
+
+               /* Do not hog the CPU if rx buffers are consumed faster than
+                * queued (unlikely).
+                */
+               cond_resched();
+       }
+
+       /* If we're still starved of rx buffers, reschedule later */
+       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
+               schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
+}
+
+static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev;
+       int err;
+
+       mhi_netdev = netdev_priv(ndev);
+
+       dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
+       mhi_netdev->ndev = ndev;
+       mhi_netdev->mdev = mhi_dev;
+       mhi_netdev->skbagg_head = NULL;
+       mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
+
+       INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
+       u64_stats_init(&mhi_netdev->stats.rx_syncp);
+       u64_stats_init(&mhi_netdev->stats.tx_syncp);
+
+       /* Start MHI channels */
+       err = mhi_prepare_for_transfer(mhi_dev, 0);
+       if (err)
+               goto out_err;
+
+       /* Number of transfer descriptors determines size of the queue */
+       mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       err = register_netdev(ndev);
+       if (err)
+               return err;
+
+       return 0;
+
+out_err:
+       free_netdev(ndev);
+       return err;
+}
+
+static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       unregister_netdev(ndev);
+
+       mhi_unprepare_from_transfer(mhi_dev);
+
+       kfree_skb(mhi_netdev->skbagg_head);
+
+       dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static int mhi_net_probe(struct mhi_device *mhi_dev,
+                        const struct mhi_device_id *id)
+{
+       const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
+       struct net_device *ndev;
+       int err;
+
+       ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
+                           NET_NAME_PREDICTABLE, mhi_net_setup);
+       if (!ndev)
+               return -ENOMEM;
+
+       SET_NETDEV_DEV(ndev, &mhi_dev->dev);
+
+       err = mhi_net_newlink(mhi_dev, ndev);
+       if (err) {
+               free_netdev(ndev);
+               return err;
+       }
+
+       return 0;
+}
+
+static void mhi_net_remove(struct mhi_device *mhi_dev)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+
+       mhi_net_dellink(mhi_dev, mhi_netdev->ndev);
+}
+
+static const struct mhi_device_info mhi_hwip0 = {
+       .netname = "mhi_hwip%d",
+};
+
+static const struct mhi_device_info mhi_swip0 = {
+       .netname = "mhi_swip%d",
+};
+
+static const struct mhi_device_id mhi_net_id_table[] = {
+       /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
+       { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
+       /* Software data PATH (to modem CPU) */
+       { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
+       {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
+
+static struct mhi_driver mhi_net_driver = {
+       .probe = mhi_net_probe,
+       .remove = mhi_net_remove,
+       .dl_xfer_cb = mhi_net_dl_callback,
+       .ul_xfer_cb = mhi_net_ul_callback,
+       .id_table = mhi_net_id_table,
+       .driver = {
+               .name = "mhi_net",
+               .owner = THIS_MODULE,
+       },
+};
+
+module_mhi_driver(mhi_net_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network over MHI");
+MODULE_LICENSE("GPL v2");
index ff01e5b..62d033a 100644 (file)
@@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr,
               const char *buf, size_t count)
 {
        struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
-       struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
-       struct devlink *devlink;
        unsigned int port_index;
        int ret;
 
@@ -195,12 +193,15 @@ new_port_store(struct device *dev, struct device_attribute *attr,
        if (ret)
                return ret;
 
-       devlink = priv_to_devlink(nsim_dev);
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EBUSY;
+
+       if (nsim_bus_dev->in_reload) {
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+               return -EBUSY;
+       }
 
-       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
-       devlink_reload_disable(devlink);
        ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
-       devlink_reload_enable(devlink);
        mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
        return ret ? ret : count;
 }
@@ -212,8 +213,6 @@ del_port_store(struct device *dev, struct device_attribute *attr,
               const char *buf, size_t count)
 {
        struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
-       struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
-       struct devlink *devlink;
        unsigned int port_index;
        int ret;
 
@@ -224,12 +223,15 @@ del_port_store(struct device *dev, struct device_attribute *attr,
        if (ret)
                return ret;
 
-       devlink = priv_to_devlink(nsim_dev);
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EBUSY;
+
+       if (nsim_bus_dev->in_reload) {
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+               return -EBUSY;
+       }
 
-       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
-       devlink_reload_disable(devlink);
        ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
-       devlink_reload_enable(devlink);
        mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
        return ret ? ret : count;
 }
index d538a39..54313bd 100644 (file)
@@ -864,16 +864,24 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
                                struct netlink_ext_ack *extack)
 {
        struct nsim_dev *nsim_dev = devlink_priv(devlink);
+       struct nsim_bus_dev *nsim_bus_dev;
+
+       nsim_bus_dev = nsim_dev->nsim_bus_dev;
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EOPNOTSUPP;
 
        if (nsim_dev->dont_allow_reload) {
                /* For testing purposes, user set debugfs dont_allow_reload
                 * value to true. So forbid it.
                 */
                NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes");
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
                return -EOPNOTSUPP;
        }
+       nsim_bus_dev->in_reload = true;
 
        nsim_dev_reload_destroy(nsim_dev);
+       mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
        return 0;
 }
 
@@ -882,17 +890,26 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio
                              struct netlink_ext_ack *extack)
 {
        struct nsim_dev *nsim_dev = devlink_priv(devlink);
+       struct nsim_bus_dev *nsim_bus_dev;
+       int ret;
+
+       nsim_bus_dev = nsim_dev->nsim_bus_dev;
+       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
+       nsim_bus_dev->in_reload = false;
 
        if (nsim_dev->fail_reload) {
                /* For testing purposes, user set debugfs fail_reload
                 * value to true. Fail right away.
                 */
                NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes");
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
                return -EINVAL;
        }
 
        *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
-       return nsim_dev_reload_create(nsim_dev, extack);
+       ret = nsim_dev_reload_create(nsim_dev, extack);
+       mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+       return ret;
 }
 
 static int nsim_dev_info_get(struct devlink *devlink,
@@ -1432,7 +1449,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
        int err;
 
        devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev),
-                                  nsim_bus_dev->initial_net);
+                                nsim_bus_dev->initial_net, &nsim_bus_dev->dev);
        if (!devlink)
                return -ENOMEM;
        nsim_dev = devlink_priv(devlink);
@@ -1453,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
        if (err)
                goto err_devlink_free;
 
-       err = devlink_register(devlink, &nsim_bus_dev->dev);
+       err = devlink_register(devlink);
        if (err)
                goto err_resources_unregister;
 
index 213d3e5..4300261 100644 (file)
@@ -1441,7 +1441,7 @@ static u64 nsim_fib_nexthops_res_occ_get(void *priv)
 static void nsim_fib_set_max_all(struct nsim_fib_data *data,
                                 struct devlink *devlink)
 {
-       enum nsim_resource_id res_ids[] = {
+       static const enum nsim_resource_id res_ids[] = {
                NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
                NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES,
                NSIM_RESOURCE_NEXTHOPS,
index 1c20bcb..793c86d 100644 (file)
@@ -362,6 +362,7 @@ struct nsim_bus_dev {
        struct nsim_vf_config *vfconfigs;
        /* Lock for devlink->reload_enabled in netdevsim module */
        struct mutex nsim_bus_reload_lock;
+       bool in_reload;
        bool init;
 };
 
index 7bf3011..83aea5c 100644 (file)
@@ -288,7 +288,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
        if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) {
                if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E ||
                    BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 ||
-                   BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E)
+                   BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811)
                        val |= BCM54XX_SHD_SCR3_RXCTXC_DIS;
                else
                        val |= BCM54XX_SHD_SCR3_TRDDAPD;
index 4d53886..53bdd67 100644 (file)
@@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev)
 }
 
 static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
-                                           const u32 ksz_phy_id)
+                                           const bool ksz_8051)
 {
        int ret;
 
-       if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id)
+       if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051)
                return 0;
 
        ret = phy_read(phydev, MII_BMSR);
@@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
         * the switch does not.
         */
        ret &= BMSR_ERCAP;
-       if (ksz_phy_id == PHY_ID_KSZ8051)
+       if (ksz_8051)
                return ret;
        else
                return !ret;
@@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
 
 static int ksz8051_match_phy_device(struct phy_device *phydev)
 {
-       return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051);
+       return ksz8051_ksz8795_match_phy_device(phydev, true);
 }
 
 static int ksz8081_config_init(struct phy_device *phydev)
@@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev)
 
 static int ksz8795_match_phy_device(struct phy_device *phydev)
 {
-       return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX);
+       return ksz8051_ksz8795_match_phy_device(phydev, false);
 }
 
 static int ksz9021_load_values_from_of(struct phy_device *phydev,
index 924ed5b..edb9516 100644 (file)
@@ -506,7 +506,7 @@ static int vsc85xx_ptp_cmp_init(struct phy_device *phydev, enum ts_blk blk)
 {
        struct vsc8531_private *vsc8531 = phydev->priv;
        bool base = phydev->mdio.addr == vsc8531->ts_base_addr;
-       u8 msgs[] = {
+       static const u8 msgs[] = {
                PTP_MSGTYPE_SYNC,
                PTP_MSGTYPE_DELAY_REQ
        };
@@ -847,7 +847,7 @@ static int vsc85xx_ts_ptp_action_flow(struct phy_device *phydev, enum ts_blk blk
 static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk,
                            bool one_step, bool enable)
 {
-       u8 msgs[] = {
+       static const u8 msgs[] = {
                PTP_MSGTYPE_SYNC,
                PTP_MSGTYPE_DELAY_REQ
        };
@@ -1268,8 +1268,8 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev)
 static int __vsc8584_init_ptp(struct phy_device *phydev)
 {
        struct vsc8531_private *vsc8531 = phydev->priv;
-       u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
-       u8  ltc_seq_a[] = { 8, 6, 5, 4, 2 };
+       static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
+       static const u8  ltc_seq_a[] = { 8, 6, 5, 4, 2 };
        u32 val;
 
        if (!vsc8584_is_1588_input_clk_configured(phydev)) {
index 207e59e..06e2181 100644 (file)
@@ -443,7 +443,7 @@ static int ipheth_probe(struct usb_interface *intf,
 
        netdev->netdev_ops = &ipheth_netdev_ops;
        netdev->watchdog_timeo = IPHETH_TX_TIMEOUT;
-       strcpy(netdev->name, "eth%d");
+       strscpy(netdev->name, "eth%d", sizeof(netdev->name));
 
        dev = netdev_priv(netdev);
        dev->udev = udev;
index 13f8636..4e8d3c2 100644 (file)
@@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 {
        struct phy_device *phydev = dev->net->phydev;
        struct ethtool_link_ksettings ecmd;
-       int ladv, radv, ret;
+       int ladv, radv, ret, link;
        u32 buf;
 
        /* clear LAN78xx interrupt status */
@@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
        if (unlikely(ret < 0))
                return -EIO;
 
+       mutex_lock(&phydev->lock);
        phy_read_status(phydev);
+       link = phydev->link;
+       mutex_unlock(&phydev->lock);
 
-       if (!phydev->link && dev->link_on) {
+       if (!link && dev->link_on) {
                dev->link_on = false;
 
                /* reset MAC */
@@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                        return -EIO;
 
                del_timer(&dev->stat_monitor);
-       } else if (phydev->link && !dev->link_on) {
+       } else if (link && !dev->link_on) {
                dev->link_on = true;
 
                phy_ethtool_ksettings_get(phydev, &ecmd);
@@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata)
 
 static u32 lan78xx_get_link(struct net_device *net)
 {
+       u32 link;
+
+       mutex_lock(&net->phydev->lock);
        phy_read_status(net->phydev);
+       link = net->phydev->link;
+       mutex_unlock(&net->phydev->lock);
 
-       return net->phydev->link;
+       return link;
 }
 
 static void lan78xx_get_drvinfo(struct net_device *net,
index 0475ef0..36dafcb 100644 (file)
@@ -1,31 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- *  Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com)
+ *  Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com)
  *
- *     ChangeLog:
- *             ....    Most of the time spent on reading sources & docs.
- *             v0.2.x  First official release for the Linux kernel.
- *             v0.3.0  Beutified and structured, some bugs fixed.
- *             v0.3.x  URBifying bulk requests and bugfixing. First relatively
- *                     stable release. Still can touch device's registers only
- *                     from top-halves.
- *             v0.4.0  Control messages remained unurbified are now URBs.
- *                     Now we can touch the HW at any time.
- *             v0.4.9  Control urbs again use process context to wait. Argh...
- *                     Some long standing bugs (enable_net_traffic) fixed.
- *                     Also nasty trick about resubmiting control urb from
- *                     interrupt context used. Please let me know how it
- *                     behaves. Pegasus II support added since this version.
- *                     TODO: suppressing HCD warnings spewage on disconnect.
- *             v0.4.13 Ethernet address is now set at probe(), not at open()
- *                     time as this seems to break dhcpd.
- *             v0.5.0  branch to 2.5.x kernels
- *             v0.5.1  ethtool support added
- *             v0.5.5  rx socket buffers are in a pool and the their allocation
- *                     is out of the interrupt routine.
- *             ...
- *             v0.9.3  simplified [get|set]_register(s), async update registers
- *                     logic revisited, receive skb_pool removed.
  */
 
 #include <linux/sched.h>
@@ -45,7 +21,6 @@
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.9.3 (2013/04/25)"
 #define DRIVER_AUTHOR "Petko Manolov <petkan@nucleusys.com>"
 #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver"
 
@@ -132,9 +107,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data)
 static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
                         const void *data)
 {
-       return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
+       int ret;
+
+       ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
                                    PEGASUS_REQT_WRITE, 0, indx, data, size,
                                    1000, GFP_NOIO);
+       if (ret < 0)
+               netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+       return ret;
 }
 
 /*
@@ -145,10 +126,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
 static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data)
 {
        void *buf = &data;
+       int ret;
 
-       return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
+       ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
                                    PEGASUS_REQT_WRITE, data, indx, buf, 1,
                                    1000, GFP_NOIO);
+       if (ret < 0)
+               netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+       return ret;
 }
 
 static int update_eth_regs_async(pegasus_t *pegasus)
@@ -188,10 +174,9 @@ static int update_eth_regs_async(pegasus_t *pegasus)
 
 static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd)
 {
-       int i;
-       __u8 data[4] = { phy, 0, 0, indx };
+       int i, ret;
        __le16 regdi;
-       int ret = -ETIMEDOUT;
+       __u8 data[4] = { phy, 0, 0, indx };
 
        if (cmd & PHY_WRITE) {
                __le16 *t = (__le16 *) & data[1];
@@ -207,12 +192,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd)
                if (data[0] & PHY_DONE)
                        break;
        }
-       if (i >= REG_TIMEOUT)
+       if (i >= REG_TIMEOUT) {
+               ret = -ETIMEDOUT;
                goto fail;
+       }
        if (cmd & PHY_READ) {
                ret = get_registers(p, PhyData, 2, &regdi);
+               if (ret < 0)
+                       goto fail;
                *regd = le16_to_cpu(regdi);
-               return ret;
        }
        return 0;
 fail:
@@ -235,9 +223,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd)
 static int mdio_read(struct net_device *dev, int phy_id, int loc)
 {
        pegasus_t *pegasus = netdev_priv(dev);
+       int ret;
        u16 res;
 
-       read_mii_word(pegasus, phy_id, loc, &res);
+       ret = read_mii_word(pegasus, phy_id, loc, &res);
+       if (ret < 0)
+               return ret;
+
        return (int)res;
 }
 
@@ -251,10 +243,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val)
 
 static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
 {
-       int i;
-       __u8 tmp = 0;
+       int ret, i;
        __le16 retdatai;
-       int ret;
+       __u8 tmp = 0;
 
        set_register(pegasus, EpromCtrl, 0);
        set_register(pegasus, EpromOffset, index);
@@ -262,21 +253,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
 
        for (i = 0; i < REG_TIMEOUT; i++) {
                ret = get_registers(pegasus, EpromCtrl, 1, &tmp);
+               if (ret < 0)
+                       goto fail;
                if (tmp & EPROM_DONE)
                        break;
-               if (ret == -ESHUTDOWN)
-                       goto fail;
        }
-       if (i >= REG_TIMEOUT)
+       if (i >= REG_TIMEOUT) {
+               ret = -ETIMEDOUT;
                goto fail;
+       }
 
        ret = get_registers(pegasus, EpromData, 2, &retdatai);
+       if (ret < 0)
+               goto fail;
        *retdata = le16_to_cpu(retdatai);
        return ret;
 
 fail:
-       netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
-       return -ETIMEDOUT;
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+       return ret;
 }
 
 #ifdef PEGASUS_WRITE_EEPROM
@@ -324,10 +319,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data)
        return ret;
 
 fail:
-       netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
        return -ETIMEDOUT;
 }
-#endif                         /* PEGASUS_WRITE_EEPROM */
+#endif /* PEGASUS_WRITE_EEPROM */
 
 static inline int get_node_id(pegasus_t *pegasus, u8 *id)
 {
@@ -367,19 +362,21 @@ static void set_ethernet_addr(pegasus_t *pegasus)
        return;
 err:
        eth_hw_addr_random(pegasus->net);
-       dev_info(&pegasus->intf->dev, "software assigned MAC address.\n");
+       netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n");
 
        return;
 }
 
 static inline int reset_mac(pegasus_t *pegasus)
 {
+       int ret, i;
        __u8 data = 0x8;
-       int i;
 
        set_register(pegasus, EthCtrl1, data);
        for (i = 0; i < REG_TIMEOUT; i++) {
-               get_registers(pegasus, EthCtrl1, 1, &data);
+               ret = get_registers(pegasus, EthCtrl1, 1, &data);
+               if (ret < 0)
+                       goto fail;
                if (~data & 0x08) {
                        if (loopback)
                                break;
@@ -402,22 +399,29 @@ static inline int reset_mac(pegasus_t *pegasus)
        }
        if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) {
                __u16 auxmode;
-               read_mii_word(pegasus, 3, 0x1b, &auxmode);
+               ret = read_mii_word(pegasus, 3, 0x1b, &auxmode);
+               if (ret < 0)
+                       goto fail;
                auxmode |= 4;
                write_mii_word(pegasus, 3, 0x1b, &auxmode);
        }
 
        return 0;
+fail:
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+       return ret;
 }
 
 static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
 {
-       __u16 linkpart;
-       __u8 data[4];
        pegasus_t *pegasus = netdev_priv(dev);
        int ret;
+       __u16 linkpart;
+       __u8 data[4];
 
-       read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+       ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+       if (ret < 0)
+               goto fail;
        data[0] = 0xc8; /* TX & RX enable, append status, no CRC */
        data[1] = 0;
        if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL))
@@ -435,11 +439,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
            usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 ||
            usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) {
                u16 auxmode;
-               read_mii_word(pegasus, 0, 0x1b, &auxmode);
+               ret = read_mii_word(pegasus, 0, 0x1b, &auxmode);
+               if (ret < 0)
+                       goto fail;
                auxmode |= 4;
                write_mii_word(pegasus, 0, 0x1b, &auxmode);
        }
 
+       return 0;
+fail:
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
        return ret;
 }
 
@@ -447,9 +456,9 @@ static void read_bulk_callback(struct urb *urb)
 {
        pegasus_t *pegasus = urb->context;
        struct net_device *net;
+       u8 *buf = urb->transfer_buffer;
        int rx_status, count = urb->actual_length;
        int status = urb->status;
-       u8 *buf = urb->transfer_buffer;
        __u16 pkt_len;
 
        if (!pegasus)
@@ -735,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus)
        set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp);
 }
 
-static inline void get_interrupt_interval(pegasus_t *pegasus)
+static inline int get_interrupt_interval(pegasus_t *pegasus)
 {
        u16 data;
        u8 interval;
+       int ret;
+
+       ret = read_eprom_word(pegasus, 4, &data);
+       if (ret < 0)
+               return ret;
 
-       read_eprom_word(pegasus, 4, &data);
        interval = data >> 8;
        if (pegasus->usb->speed != USB_SPEED_HIGH) {
                if (interval < 0x80) {
@@ -755,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus)
                }
        }
        pegasus->intr_interval = interval;
+
+       return 0;
 }
 
 static void set_carrier(struct net_device *net)
@@ -880,7 +895,6 @@ static void pegasus_get_drvinfo(struct net_device *dev,
        pegasus_t *pegasus = netdev_priv(dev);
 
        strlcpy(info->driver, driver_name, sizeof(info->driver));
-       strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
        usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info));
 }
 
@@ -999,8 +1013,7 @@ static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq,
                data[0] = pegasus->phy;
                fallthrough;
        case SIOCDEVPRIVATE + 1:
-               read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
-               res = 0;
+               res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
                break;
        case SIOCDEVPRIVATE + 2:
                if (!capable(CAP_NET_ADMIN))
@@ -1034,22 +1047,25 @@ static void pegasus_set_multicast(struct net_device *net)
 
 static __u8 mii_phy_probe(pegasus_t *pegasus)
 {
-       int i;
+       int i, ret;
        __u16 tmp;
 
        for (i = 0; i < 32; i++) {
-               read_mii_word(pegasus, i, MII_BMSR, &tmp);
+               ret = read_mii_word(pegasus, i, MII_BMSR, &tmp);
+               if (ret < 0)
+                       goto fail;
                if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0)
                        continue;
                else
                        return i;
        }
-
+fail:
        return 0xff;
 }
 
 static inline void setup_pegasus_II(pegasus_t *pegasus)
 {
+       int ret;
        __u8 data = 0xa5;
 
        set_register(pegasus, Reg1d, 0);
@@ -1061,7 +1077,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus)
                set_register(pegasus, Reg7b, 2);
 
        set_register(pegasus, 0x83, data);
-       get_registers(pegasus, 0x83, 1, &data);
+       ret = get_registers(pegasus, 0x83, 1, &data);
+       if (ret < 0)
+               goto fail;
 
        if (data == 0xa5)
                pegasus->chip = 0x8513;
@@ -1076,6 +1094,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus)
                set_register(pegasus, Reg81, 6);
        else
                set_register(pegasus, Reg81, 2);
+
+       return;
+fail:
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
 }
 
 static void check_carrier(struct work_struct *work)
@@ -1150,7 +1172,9 @@ static int pegasus_probe(struct usb_interface *intf,
                                | NETIF_MSG_PROBE | NETIF_MSG_LINK);
 
        pegasus->features = usb_dev_id[dev_index].private;
-       get_interrupt_interval(pegasus);
+       res = get_interrupt_interval(pegasus);
+       if (res)
+               goto out2;
        if (reset_mac(pegasus)) {
                dev_err(&intf->dev, "can't reset MAC\n");
                res = -EIO;
@@ -1297,7 +1321,7 @@ static void __init parse_id(char *id)
 
 static int __init pegasus_init(void)
 {
-       pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION);
+       pr_info("%s: " DRIVER_DESC "\n", driver_name);
        if (devid)
                parse_id(devid);
        return usb_register(&pegasus_driver);
index 470e1c1..840c1c2 100644 (file)
@@ -1725,7 +1725,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
        dev->interrupt_count = 0;
 
        dev->net = net;
-       strcpy (net->name, "usb%d");
+       strscpy(net->name, "usb%d", sizeof(net->name));
        memcpy (net->dev_addr, node_id, sizeof node_id);
 
        /* rx and tx sides can use different message sizes;
@@ -1752,13 +1752,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
                if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
                    ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
                     (net->dev_addr [0] & 0x02) == 0))
-                       strcpy (net->name, "eth%d");
+                       strscpy(net->name, "eth%d", sizeof(net->name));
                /* WLAN devices should always be named "wlan%d" */
                if ((dev->driver_info->flags & FLAG_WLAN) != 0)
-                       strcpy(net->name, "wlan%d");
+                       strscpy(net->name, "wlan%d", sizeof(net->name));
                /* WWAN devices should always be named "wwan%d" */
                if ((dev->driver_info->flags & FLAG_WWAN) != 0)
-                       strcpy(net->name, "wwan%d");
+                       strscpy(net->name, "wwan%d", sizeof(net->name));
 
                /* devices that cannot do ARP */
                if ((dev->driver_info->flags & FLAG_NOARP) != 0)
index 56c3f85..2e42210 100644 (file)
@@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                   struct page *page, unsigned int offset,
                                   unsigned int len, unsigned int truesize,
                                   bool hdr_valid, unsigned int metasize,
-                                  bool whole_page)
+                                  unsigned int headroom)
 {
        struct sk_buff *skb;
        struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -398,28 +398,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
        else
                hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-       /* If whole_page, there is an offset between the beginning of the
+       /* If headroom is not 0, there is an offset between the beginning of the
         * data and the allocated space, otherwise the data and the allocated
         * space are aligned.
         *
         * Buffers with headroom use PAGE_SIZE as alloc size, see
         * add_recvbuf_mergeable() + get_mergeable_buf_len()
         */
-       if (whole_page) {
-               /* Buffers with whole_page use PAGE_SIZE as alloc size,
-                * see add_recvbuf_mergeable() + get_mergeable_buf_len()
-                */
-               truesize = PAGE_SIZE;
-
-               /* page maybe head page, so we should get the buf by p, not the
-                * page
-                */
-               tailroom = truesize - len - offset_in_page(p);
-               buf = (char *)((unsigned long)p & PAGE_MASK);
-       } else {
-               tailroom = truesize - len;
-               buf = p;
-       }
+       truesize = headroom ? PAGE_SIZE : truesize;
+       tailroom = truesize - len - headroom;
+       buf = p - headroom;
 
        len -= hdr_len;
        offset += hdr_padded_len;
@@ -978,7 +966,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                                put_page(page);
                                head_skb = page_to_skb(vi, rq, xdp_page, offset,
                                                       len, PAGE_SIZE, false,
-                                                      metasize, true);
+                                                      metasize,
+                                                      VIRTIO_XDP_HEADROOM);
                                return head_skb;
                        }
                        break;
@@ -1029,7 +1018,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        rcu_read_unlock();
 
        head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
-                              metasize, !!headroom);
+                              metasize, headroom);
        curr_skb = head_skb;
 
        if (unlikely(!curr_skb))
@@ -2208,14 +2197,14 @@ static int virtnet_set_channels(struct net_device *dev,
        if (vi->rq[0].xdp_prog)
                return -EINVAL;
 
-       get_online_cpus();
+       cpus_read_lock();
        err = _virtnet_set_queues(vi, queue_pairs);
        if (err) {
-               put_online_cpus();
+               cpus_read_unlock();
                goto err;
        }
        virtnet_set_affinity(vi);
-       put_online_cpus();
+       cpus_read_unlock();
 
        netif_set_real_num_tx_queues(dev, queue_pairs);
        netif_set_real_num_rx_queues(dev, queue_pairs);
@@ -2970,9 +2959,9 @@ static int init_vqs(struct virtnet_info *vi)
        if (ret)
                goto err_free;
 
-       get_online_cpus();
+       cpus_read_lock();
        virtnet_set_affinity(vi);
-       put_online_cpus();
+       cpus_read_unlock();
 
        return 0;
 
index 2b1b944..662e261 100644 (file)
@@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
        bool is_v6gw = false;
-       int ret = -EINVAL;
 
        nf_reset_ct(skb);
 
        /* Be paranoid, rather than too clever. */
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
-               if (!skb2) {
-                       ret = -ENOMEM;
-                       goto err;
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb) {
+                       dev->stats.tx_errors++;
+                       return -ENOMEM;
                }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-
-               consume_skb(skb);
-               skb = skb2;
        }
 
        rcu_read_lock_bh();
 
        neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        if (!IS_ERR(neigh)) {
+               int ret;
+
                sock_confirm_neigh(skb, neigh);
                /* if crossing protocols, can not use the cached header */
                ret = neigh_output(neigh, skb, is_v6gw);
@@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
        }
 
        rcu_read_unlock_bh();
-err:
        vrf_tx_error(skb->dev, skb);
-       return ret;
+       return -EINVAL;
 }
 
 static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
index 473df25..592a838 100644 (file)
@@ -290,30 +290,6 @@ config SLIC_DS26522
          To compile this driver as a module, choose M here: the
          module will be called slic_ds26522.
 
-config DSCC4_PCISYNC
-       bool "Etinc PCISYNC features"
-       depends on DSCC4
-       help
-         Due to Etinc's design choice for its PCISYNC cards, some operations
-         are only allowed on specific ports of the DSCC4. This option is the
-         only way for the driver to know that it shouldn't return a success
-         code for these operations.
-
-         Please say Y if your card is an Etinc's PCISYNC.
-
-config DSCC4_PCI_RST
-       bool "Hard reset support"
-       depends on DSCC4
-       help
-         Various DSCC4 bugs forbid any reliable software reset of the ASIC.
-         As a replacement, some vendors provide a way to assert the PCI #RST
-         pin of DSCC4 through the GPIO port of the card. If you choose Y,
-         the driver will make use of this feature before module removal
-         (i.e. rmmod). The feature is known to be available on Commtech's
-         cards. Contact your manufacturer for details.
-
-         Say Y if your card supports this feature.
-
 config IXP4XX_HSS
        tristate "Intel IXP4xx HSS (synchronous serial port) support"
        depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR
@@ -337,33 +313,6 @@ config LAPBETHER
          To compile this driver as a module, choose M here: the
          module will be called lapbether.
 
-         If unsure, say N.
-
-config SBNI
-       tristate "Granch SBNI12 Leased Line adapter support"
-       depends on X86
-       help
-         Driver for ISA SBNI12-xx cards which are low cost alternatives to
-         leased line modems.
-
-         You can find more information and last versions of drivers and
-         utilities at <http://www.granch.ru/>. If you have any question you
-         can send email to <sbni@granch.ru>.
-
-         To compile this driver as a module, choose M here: the
-         module will be called sbni.
-
-         If unsure, say N.
-
-config SBNI_MULTILINE
-       bool "Multiple line feature support"
-       depends on SBNI
-       help
-         Schedule traffic for some parallel lines, via SBNI12 adapters.
-
-         If you have two computers connected with two parallel lines it's
-         possible to increase transfer rate nearly twice. You should have
-         a program named 'sbniconfig' to configure adapters.
 
          If unsure, say N.
 
index 081666c..f6b92ef 100644 (file)
@@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC)         += farsync.o
 obj-$(CONFIG_LANMEDIA)         += lmc/
 
 obj-$(CONFIG_LAPBETHER)                += lapbether.o
-obj-$(CONFIG_SBNI)             += sbni.o
 obj-$(CONFIG_N2)               += n2.o
 obj-$(CONFIG_C101)             += c101.o
 obj-$(CONFIG_WANXL)            += wanxl.o
index 15a7543..e985e54 100644 (file)
@@ -319,16 +319,18 @@ MODULE_DESCRIPTION("Modular driver for the Comtrol Hostess SV11");
 
 static struct z8530_dev *sv11_unit;
 
-int init_module(void)
+static int sv11_module_init(void)
 {
        sv11_unit = sv11_init(io, irq);
        if (!sv11_unit)
                return -ENODEV;
        return 0;
 }
+module_init(sv11_module_init);
 
-void cleanup_module(void)
+static void sv11_module_cleanup(void)
 {
        if (sv11_unit)
                sv11_shutdown(sv11_unit);
 }
+module_exit(sv11_module_cleanup);
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
deleted file mode 100644 (file)
index 469fe97..0000000
+++ /dev/null
@@ -1,1639 +0,0 @@
-/* sbni.c:  Granch SBNI12 leased line adapters driver for linux
- *
- *     Written 2001 by Denis I.Timofeev (timofeev@granch.ru)
- *
- *     Previous versions were written by Yaroslav Polyakov,
- *     Alexey Zverev and Max Khon.
- *
- *     Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and
- *     double-channel, PCI and ISA modifications.
- *     More info and useful utilities to work with SBNI12 cards you can find
- *     at http://www.granch.com (English) or http://www.granch.ru (Russian)
- *
- *     This software may be used and distributed according to the terms
- *     of the GNU General Public License.
- *
- *
- *  5.0.1      Jun 22 2001
- *       - Fixed bug in probe
- *  5.0.0      Jun 06 2001
- *       - Driver was completely redesigned by Denis I.Timofeev,
- *       - now PCI/Dual, ISA/Dual (with single interrupt line) models are
- *       - supported
- *  3.3.0      Thu Feb 24 21:30:28 NOVT 2000 
- *        - PCI cards support
- *  3.2.0      Mon Dec 13 22:26:53 NOVT 1999
- *       - Completely rebuilt all the packet storage system
- *       -    to work in Ethernet-like style.
- *  3.1.1      just fixed some bugs (5 aug 1999)
- *  3.1.0      added balancing feature (26 apr 1999)
- *  3.0.1      just fixed some bugs (14 apr 1999).
- *  3.0.0      Initial Revision, Yaroslav Polyakov (24 Feb 1999)
- *        - added pre-calculation for CRC, fixed bug with "len-2" frames, 
- *        - removed outbound fragmentation (MTU=1000), written CRC-calculation 
- *        - on asm, added work with hard_headers and now we have our own cache 
- *        - for them, optionally supported word-interchange on some chipsets,
- * 
- *     Known problem: this driver wasn't tested on multiprocessor machine.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/fcntl.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/pci.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-
-#include <net/net_namespace.h>
-#include <net/arp.h>
-#include <net/Space.h>
-
-#include <asm/io.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-
-#include "sbni.h"
-
-/* device private data */
-
-struct net_local {
-       struct timer_list       watchdog;
-       struct net_device       *watchdog_dev;
-
-       spinlock_t      lock;
-       struct sk_buff  *rx_buf_p;              /* receive buffer ptr */
-       struct sk_buff  *tx_buf_p;              /* transmit buffer ptr */
-       
-       unsigned int    framelen;               /* current frame length */
-       unsigned int    maxframe;               /* maximum valid frame length */
-       unsigned int    state;
-       unsigned int    inppos, outpos;         /* positions in rx/tx buffers */
-
-       /* transmitting frame number - from frames qty to 1 */
-       unsigned int    tx_frameno;
-
-       /* expected number of next receiving frame */
-       unsigned int    wait_frameno;
-
-       /* count of failed attempts to frame send - 32 attempts do before
-          error - while receiver tunes on opposite side of wire */
-       unsigned int    trans_errors;
-
-       /* idle time; send pong when limit exceeded */
-       unsigned int    timer_ticks;
-
-       /* fields used for receive level autoselection */
-       int     delta_rxl;
-       unsigned int    cur_rxl_index, timeout_rxl;
-       unsigned long   cur_rxl_rcvd, prev_rxl_rcvd;
-
-       struct sbni_csr1        csr1;           /* current value of CSR1 */
-       struct sbni_in_stats    in_stats;       /* internal statistics */ 
-
-       struct net_device               *second;        /* for ISA/dual cards */
-
-#ifdef CONFIG_SBNI_MULTILINE
-       struct net_device               *master;
-       struct net_device               *link;
-#endif
-};
-
-
-static int  sbni_card_probe( unsigned long );
-static int  sbni_pci_probe( struct net_device  * );
-static struct net_device  *sbni_probe1(struct net_device *, unsigned long, int);
-static int  sbni_open( struct net_device * );
-static int  sbni_close( struct net_device * );
-static netdev_tx_t sbni_start_xmit(struct sk_buff *,
-                                        struct net_device * );
-static int  sbni_siocdevprivate(struct net_device *, struct ifreq *,
-                               void __user *, int);
-static void  set_multicast_list( struct net_device * );
-
-static irqreturn_t sbni_interrupt( int, void * );
-static void  handle_channel( struct net_device * );
-static int   recv_frame( struct net_device * );
-static void  send_frame( struct net_device * );
-static int   upload_data( struct net_device *,
-                         unsigned, unsigned, unsigned, u32 );
-static void  download_data( struct net_device *, u32 * );
-static void  sbni_watchdog(struct timer_list *);
-static void  interpret_ack( struct net_device *, unsigned );
-static int   append_frame_to_pkt( struct net_device *, unsigned, u32 );
-static void  indicate_pkt( struct net_device * );
-static void  card_start( struct net_device * );
-static void  prepare_to_send( struct sk_buff *, struct net_device * );
-static void  drop_xmit_queue( struct net_device * );
-static void  send_frame_header( struct net_device *, u32 * );
-static int   skip_tail( unsigned int, unsigned int, u32 );
-static int   check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * );
-static void  change_level( struct net_device * );
-static void  timeout_change_level( struct net_device * );
-static u32   calc_crc32( u32, u8 *, u32 );
-static struct sk_buff *  get_rx_buf( struct net_device * );
-static int  sbni_init( struct net_device * );
-
-#ifdef CONFIG_SBNI_MULTILINE
-static int  enslave( struct net_device *, struct net_device * );
-static int  emancipate( struct net_device * );
-#endif
-
-static const char  version[] =
-       "Granch SBNI12 driver ver 5.0.1  Jun 22 2001  Denis I.Timofeev.\n";
-
-static bool skip_pci_probe     __initdata = false;
-static int  scandone   __initdata = 0;
-static int  num                __initdata = 0;
-
-static unsigned char  rxl_tab[];
-static u32  crc32tab[];
-
-/* A list of all installed devices, for removing the driver module. */
-static struct net_device  *sbni_cards[ SBNI_MAX_NUM_CARDS ];
-
-/* Lists of device's parameters */
-static u32     io[   SBNI_MAX_NUM_CARDS ] __initdata =
-       { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32     irq[  SBNI_MAX_NUM_CARDS ] __initdata;
-static u32     baud[ SBNI_MAX_NUM_CARDS ] __initdata;
-static u32     rxl[  SBNI_MAX_NUM_CARDS ] __initdata =
-       { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32     mac[  SBNI_MAX_NUM_CARDS ] __initdata;
-
-#ifndef MODULE
-typedef u32  iarr[];
-static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac };
-#endif
-
-/* A zero-terminated list of I/O addresses to be probed on ISA bus */
-static unsigned int  netcard_portlist[ ] __initdata = { 
-       0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254,
-       0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4,
-       0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4,
-       0 };
-
-#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock)
-
-/*
- * Look for SBNI card which addr stored in dev->base_addr, if nonzero.
- * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA.
- */
-
-static inline int __init
-sbni_isa_probe( struct net_device  *dev )
-{
-       if( dev->base_addr > 0x1ff &&
-           request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) &&
-           sbni_probe1( dev, dev->base_addr, dev->irq ) )
-
-               return  0;
-       else {
-               pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n",
-                      dev->base_addr);
-               return  -ENODEV;
-       }
-}
-
-static const struct net_device_ops sbni_netdev_ops = {
-       .ndo_open               = sbni_open,
-       .ndo_stop               = sbni_close,
-       .ndo_start_xmit         = sbni_start_xmit,
-       .ndo_set_rx_mode        = set_multicast_list,
-       .ndo_siocdevprivate     = sbni_siocdevprivate,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static void __init sbni_devsetup(struct net_device *dev)
-{
-       ether_setup( dev );
-       dev->netdev_ops = &sbni_netdev_ops;
-}
-
-int __init sbni_probe(int unit)
-{
-       struct net_device *dev;
-       int err;
-
-       dev = alloc_netdev(sizeof(struct net_local), "sbni",
-                          NET_NAME_UNKNOWN, sbni_devsetup);
-       if (!dev)
-               return -ENOMEM;
-
-       dev->netdev_ops = &sbni_netdev_ops;
-
-       sprintf(dev->name, "sbni%d", unit);
-       netdev_boot_setup_check(dev);
-
-       err = sbni_init(dev);
-       if (err) {
-               free_netdev(dev);
-               return err;
-       }
-
-       err = register_netdev(dev);
-       if (err) {
-               release_region( dev->base_addr, SBNI_IO_EXTENT );
-               free_netdev(dev);
-               return err;
-       }
-       pr_info_once("%s", version);
-       return 0;
-}
-
-static int __init sbni_init(struct net_device *dev)
-{
-       int  i;
-       if( dev->base_addr )
-               return  sbni_isa_probe( dev );
-       /* otherwise we have to perform search our adapter */
-
-       if( io[ num ] != -1 ) {
-               dev->base_addr  = io[ num ];
-               dev->irq        = irq[ num ];
-       } else if( scandone  ||  io[ 0 ] != -1 ) {
-               return  -ENODEV;
-       }
-
-       /* if io[ num ] contains non-zero address, then that is on ISA bus */
-       if( dev->base_addr )
-               return  sbni_isa_probe( dev );
-
-       /* ...otherwise - scan PCI first */
-       if( !skip_pci_probe  &&  !sbni_pci_probe( dev ) )
-               return  0;
-
-       if( io[ num ] == -1 ) {
-               /* Auto-scan will be stopped when first ISA card were found */
-               scandone = 1;
-               if( num > 0 )
-                       return  -ENODEV;
-       }
-
-       for( i = 0;  netcard_portlist[ i ];  ++i ) {
-               int  ioaddr = netcard_portlist[ i ];
-               if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) &&
-                   sbni_probe1( dev, ioaddr, 0 ))
-                       return 0;
-       }
-
-       return  -ENODEV;
-}
-
-
-static int __init
-sbni_pci_probe( struct net_device  *dev )
-{
-       struct pci_dev  *pdev = NULL;
-
-       while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev ))
-              != NULL ) {
-               int  pci_irq_line;
-               unsigned long  pci_ioaddr;
-
-               if( pdev->vendor != SBNI_PCI_VENDOR &&
-                   pdev->device != SBNI_PCI_DEVICE )
-                       continue;
-
-               pci_ioaddr = pci_resource_start( pdev, 0 );
-               pci_irq_line = pdev->irq;
-
-               /* Avoid already found cards from previous calls */
-               if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) {
-                       if (pdev->subsystem_device != 2)
-                               continue;
-
-                       /* Dual adapter is present */
-                       if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT,
-                                                       dev->name ) )
-                               continue;
-               }
-
-               if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
-                       pr_warn(
-"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n"
-"You should use the PCI BIOS setup to assign a valid IRQ line.\n",
-                               pci_irq_line );
-
-               /* avoiding re-enable dual adapters */
-               if( (pci_ioaddr & 7) == 0  &&  pci_enable_device( pdev ) ) {
-                       release_region( pci_ioaddr, SBNI_IO_EXTENT );
-                       pci_dev_put( pdev );
-                       return  -EIO;
-               }
-               if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) {
-                       SET_NETDEV_DEV(dev, &pdev->dev);
-                       /* not the best thing to do, but this is all messed up 
-                          for hotplug systems anyway... */
-                       pci_dev_put( pdev );
-                       return  0;
-               }
-       }
-       return  -ENODEV;
-}
-
-
-static struct net_device * __init
-sbni_probe1( struct net_device  *dev,  unsigned long  ioaddr,  int  irq )
-{
-       struct net_local  *nl;
-
-       if( sbni_card_probe( ioaddr ) ) {
-               release_region( ioaddr, SBNI_IO_EXTENT );
-               return NULL;
-       }
-
-       outb( 0, ioaddr + CSR0 );
-
-       if( irq < 2 ) {
-               unsigned long irq_mask;
-
-               irq_mask = probe_irq_on();
-               outb( EN_INT | TR_REQ, ioaddr + CSR0 );
-               outb( PR_RES, ioaddr + CSR1 );
-               mdelay(50);
-               irq = probe_irq_off(irq_mask);
-               outb( 0, ioaddr + CSR0 );
-
-               if( !irq ) {
-                       pr_err("%s: can't detect device irq!\n", dev->name);
-                       release_region( ioaddr, SBNI_IO_EXTENT );
-                       return NULL;
-               }
-       } else if( irq == 2 )
-               irq = 9;
-
-       dev->irq = irq;
-       dev->base_addr = ioaddr;
-
-       /* Fill in sbni-specific dev fields. */
-       nl = netdev_priv(dev);
-       if( !nl ) {
-               pr_err("%s: unable to get memory!\n", dev->name);
-               release_region( ioaddr, SBNI_IO_EXTENT );
-               return NULL;
-       }
-
-       memset( nl, 0, sizeof(struct net_local) );
-       spin_lock_init( &nl->lock );
-
-       /* store MAC address (generate if that isn't known) */
-       *(__be16 *)dev->dev_addr = htons( 0x00ff );
-       *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 |
-               ((mac[num] ?
-               mac[num] :
-               (u32)((long)netdev_priv(dev))) & 0x00ffffff));
-
-       /* store link settings (speed, receive level ) */
-       nl->maxframe  = DEFAULT_FRAME_LEN;
-       nl->csr1.rate = baud[ num ];
-
-       if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) {
-               /* autotune rxl */
-               nl->cur_rxl_index = DEF_RXL;
-               nl->delta_rxl = DEF_RXL_DELTA;
-       } else {
-               nl->delta_rxl = 0;
-       }
-       nl->csr1.rxl  = rxl_tab[ nl->cur_rxl_index ];
-       if( inb( ioaddr + CSR0 ) & 0x01 )
-               nl->state |= FL_SLOW_MODE;
-
-       pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n",
-                 dev->name, dev->base_addr, dev->irq,
-                 ((u8 *)dev->dev_addr)[3],
-                 ((u8 *)dev->dev_addr)[4],
-                 ((u8 *)dev->dev_addr)[5]);
-
-       pr_notice("%s: speed %d",
-                 dev->name,
-                 ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000)
-                 / (1 << nl->csr1.rate));
-
-       if( nl->delta_rxl == 0 )
-               pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index);
-       else
-               pr_cont(", receive level (auto)\n");
-
-#ifdef CONFIG_SBNI_MULTILINE
-       nl->master = dev;
-       nl->link   = NULL;
-#endif
-   
-       sbni_cards[ num++ ] = dev;
-       return  dev;
-}
-
-/* -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_device  *p;
-
-       netif_stop_queue( dev );
-
-       /* Looking for idle device in the list */
-       for( p = dev;  p; ) {
-               struct net_local  *nl = netdev_priv(p);
-               spin_lock( &nl->lock );
-               if( nl->tx_buf_p  ||  (nl->state & FL_LINE_DOWN) ) {
-                       p = nl->link;
-                       spin_unlock( &nl->lock );
-               } else {
-                       /* Idle dev is found */
-                       prepare_to_send( skb, p );
-                       spin_unlock( &nl->lock );
-                       netif_start_queue( dev );
-                       return NETDEV_TX_OK;
-               }
-       }
-
-       return NETDEV_TX_BUSY;
-}
-
-#else  /* CONFIG_SBNI_MULTILINE */
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-
-       netif_stop_queue( dev );
-       spin_lock( &nl->lock );
-
-       prepare_to_send( skb, dev );
-
-       spin_unlock( &nl->lock );
-       return NETDEV_TX_OK;
-}
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-/* -------------------------------------------------------------------------- */
-
-/* interrupt handler */
-
-/*
- *     SBNI12D-10, -11/ISA boards within "common interrupt" mode could not
- * be looked as two independent single-channel devices. Every channel seems
- * as Ethernet interface but interrupt handler must be common. Really, first
- * channel ("master") driver only registers the handler. In its struct net_local
- * it has got pointer to "slave" channel's struct net_local and handles that's
- * interrupts too.
- *     dev of successfully attached ISA SBNI boards is linked to list.
- * While next board driver is initialized, it scans this list. If one
- * has found dev with same irq and ioaddr different by 4 then it assumes
- * this board to be "master".
- */ 
-
-static irqreturn_t
-sbni_interrupt( int  irq,  void  *dev_id )
-{
-       struct net_device         *dev = dev_id;
-       struct net_local  *nl  = netdev_priv(dev);
-       int     repeat;
-
-       spin_lock( &nl->lock );
-       if( nl->second )
-               spin_lock(&NET_LOCAL_LOCK(nl->second));
-
-       do {
-               repeat = 0;
-               if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) {
-                       handle_channel( dev );
-                       repeat = 1;
-               }
-               if( nl->second  &&      /* second channel present */
-                   (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) {
-                       handle_channel( nl->second );
-                       repeat = 1;
-               }
-       } while( repeat );
-
-       if( nl->second )
-               spin_unlock(&NET_LOCAL_LOCK(nl->second));
-       spin_unlock( &nl->lock );
-       return IRQ_HANDLED;
-}
-
-
-static void
-handle_channel( struct net_device  *dev )
-{
-       struct net_local        *nl    = netdev_priv(dev);
-       unsigned long           ioaddr = dev->base_addr;
-
-       int  req_ans;
-       unsigned char  csr0;
-
-#ifdef CONFIG_SBNI_MULTILINE
-       /* Lock the master device because we going to change its local data */
-       if( nl->state & FL_SLAVE )
-               spin_lock(&NET_LOCAL_LOCK(nl->master));
-#endif
-
-       outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 );
-
-       nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
-       for(;;) {
-               csr0 = inb( ioaddr + CSR0 );
-               if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 )
-                       break;
-
-               req_ans = !(nl->state & FL_PREV_OK);
-
-               if( csr0 & RC_RDY )
-                       req_ans = recv_frame( dev );
-
-               /*
-                * TR_RDY always equals 1 here because we have owned the marker,
-                * and we set TR_REQ when disabled interrupts
-                */
-               csr0 = inb( ioaddr + CSR0 );
-               if( !(csr0 & TR_RDY)  ||  (csr0 & RC_RDY) )
-                       netdev_err(dev, "internal error!\n");
-
-               /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */
-               if( req_ans  ||  nl->tx_frameno != 0 )
-                       send_frame( dev );
-               else
-                       /* send marker without any data */
-                       outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 );
-       }
-
-       outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 );
-
-#ifdef CONFIG_SBNI_MULTILINE
-       if( nl->state & FL_SLAVE )
-               spin_unlock(&NET_LOCAL_LOCK(nl->master));
-#endif
-}
-
-
-/*
- * Routine returns 1 if it needs to acknowledge received frame.
- * Empty frame received without errors won't be acknowledged.
- */
-
-static int
-recv_frame( struct net_device  *dev )
-{
-       struct net_local  *nl   = netdev_priv(dev);
-       unsigned long  ioaddr   = dev->base_addr;
-
-       u32  crc = CRC32_INITIAL;
-
-       unsigned  framelen = 0, frameno, ack;
-       unsigned  is_first, frame_ok = 0;
-
-       if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) {
-               frame_ok = framelen > 4
-                       ?  upload_data( dev, framelen, frameno, is_first, crc )
-                       :  skip_tail( ioaddr, framelen, crc );
-               if( frame_ok )
-                       interpret_ack( dev, ack );
-       }
-
-       outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 );
-       if( frame_ok ) {
-               nl->state |= FL_PREV_OK;
-               if( framelen > 4 )
-                       nl->in_stats.all_rx_number++;
-       } else {
-               nl->state &= ~FL_PREV_OK;
-               change_level( dev );
-               nl->in_stats.all_rx_number++;
-               nl->in_stats.bad_rx_number++;
-       }
-
-       return  !frame_ok  ||  framelen > 4;
-}
-
-
-static void
-send_frame( struct net_device  *dev )
-{
-       struct net_local  *nl    = netdev_priv(dev);
-
-       u32  crc = CRC32_INITIAL;
-
-       if( nl->state & FL_NEED_RESEND ) {
-
-               /* if frame was sended but not ACK'ed - resend it */
-               if( nl->trans_errors ) {
-                       --nl->trans_errors;
-                       if( nl->framelen != 0 )
-                               nl->in_stats.resend_tx_number++;
-               } else {
-                       /* cannot xmit with many attempts */
-#ifdef CONFIG_SBNI_MULTILINE
-                       if( (nl->state & FL_SLAVE)  ||  nl->link )
-#endif
-                       nl->state |= FL_LINE_DOWN;
-                       drop_xmit_queue( dev );
-                       goto  do_send;
-               }
-       } else
-               nl->trans_errors = TR_ERROR_COUNT;
-
-       send_frame_header( dev, &crc );
-       nl->state |= FL_NEED_RESEND;
-       /*
-        * FL_NEED_RESEND will be cleared after ACK, but if empty
-        * frame sended then in prepare_to_send next frame
-        */
-
-
-       if( nl->framelen ) {
-               download_data( dev, &crc );
-               nl->in_stats.all_tx_number++;
-               nl->state |= FL_WAIT_ACK;
-       }
-
-       outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc );
-
-do_send:
-       outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 );
-
-       if( nl->tx_frameno )
-               /* next frame exists - we request card to send it */
-               outb( inb( dev->base_addr + CSR0 ) | TR_REQ,
-                     dev->base_addr + CSR0 );
-}
-
-
-/*
- * Write the frame data into adapter's buffer memory, and calculate CRC.
- * Do padding if necessary.
- */
-
-static void
-download_data( struct net_device  *dev,  u32  *crc_p )
-{
-       struct net_local  *nl    = netdev_priv(dev);
-       struct sk_buff    *skb   = nl->tx_buf_p;
-
-       unsigned  len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen);
-
-       outsb( dev->base_addr + DAT, skb->data + nl->outpos, len );
-       *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len );
-
-       /* if packet too short we should write some more bytes to pad */
-       for( len = nl->framelen - len;  len--; ) {
-               outb( 0, dev->base_addr + DAT );
-               *crc_p = CRC32( 0, *crc_p );
-       }
-}
-
-
-static int
-upload_data( struct net_device  *dev,  unsigned  framelen,  unsigned  frameno,
-            unsigned  is_first,  u32  crc )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       int  frame_ok;
-
-       if( is_first ) {
-               nl->wait_frameno = frameno;
-               nl->inppos = 0;
-       }
-
-       if( nl->wait_frameno == frameno ) {
-
-               if( nl->inppos + framelen  <=  ETHER_MAX_LEN )
-                       frame_ok = append_frame_to_pkt( dev, framelen, crc );
-
-               /*
-                * if CRC is right but framelen incorrect then transmitter
-                * error was occurred... drop entire packet
-                */
-               else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc ))
-                        != 0 ) {
-                       nl->wait_frameno = 0;
-                       nl->inppos = 0;
-#ifdef CONFIG_SBNI_MULTILINE
-                       nl->master->stats.rx_errors++;
-                       nl->master->stats.rx_missed_errors++;
-#else
-                       dev->stats.rx_errors++;
-                       dev->stats.rx_missed_errors++;
-#endif
-               }
-                       /* now skip all frames until is_first != 0 */
-       } else
-               frame_ok = skip_tail( dev->base_addr, framelen, crc );
-
-       if( is_first  &&  !frame_ok ) {
-               /*
-                * Frame has been broken, but we had already stored
-                * is_first... Drop entire packet.
-                */
-               nl->wait_frameno = 0;
-#ifdef CONFIG_SBNI_MULTILINE
-               nl->master->stats.rx_errors++;
-               nl->master->stats.rx_crc_errors++;
-#else
-               dev->stats.rx_errors++;
-               dev->stats.rx_crc_errors++;
-#endif
-       }
-
-       return  frame_ok;
-}
-
-
-static inline void
-send_complete( struct net_device *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-#ifdef CONFIG_SBNI_MULTILINE
-       nl->master->stats.tx_packets++;
-       nl->master->stats.tx_bytes += nl->tx_buf_p->len;
-#else
-       dev->stats.tx_packets++;
-       dev->stats.tx_bytes += nl->tx_buf_p->len;
-#endif
-       dev_consume_skb_irq(nl->tx_buf_p);
-
-       nl->tx_buf_p = NULL;
-
-       nl->outpos = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-       nl->framelen   = 0;
-}
-
-
-static void
-interpret_ack( struct net_device  *dev,  unsigned  ack )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( ack == FRAME_SENT_OK ) {
-               nl->state &= ~FL_NEED_RESEND;
-
-               if( nl->state & FL_WAIT_ACK ) {
-                       nl->outpos += nl->framelen;
-
-                       if( --nl->tx_frameno ) {
-                               nl->framelen = min_t(unsigned int,
-                                                  nl->maxframe,
-                                                  nl->tx_buf_p->len - nl->outpos);
-                       } else {
-                               send_complete( dev );
-#ifdef CONFIG_SBNI_MULTILINE
-                               netif_wake_queue( nl->master );
-#else
-                               netif_wake_queue( dev );
-#endif
-                       }
-               }
-       }
-
-       nl->state &= ~FL_WAIT_ACK;
-}
-
-
-/*
- * Glue received frame with previous fragments of packet.
- * Indicate packet when last frame would be accepted.
- */
-
-static int
-append_frame_to_pkt( struct net_device  *dev,  unsigned  framelen,  u32  crc )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       u8  *p;
-
-       if( nl->inppos + framelen  >  ETHER_MAX_LEN )
-               return  0;
-
-       if( !nl->rx_buf_p  &&  !(nl->rx_buf_p = get_rx_buf( dev )) )
-               return  0;
-
-       p = nl->rx_buf_p->data + nl->inppos;
-       insb( dev->base_addr + DAT, p, framelen );
-       if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER )
-               return  0;
-
-       nl->inppos += framelen - 4;
-       if( --nl->wait_frameno == 0 )           /* last frame received */
-               indicate_pkt( dev );
-
-       return  1;
-}
-
-
-/*
- * Prepare to start output on adapter.
- * Transmitter will be actually activated when marker is accepted.
- */
-
-static void
-prepare_to_send( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       unsigned int  len;
-
-       /* nl->tx_buf_p == NULL here! */
-       if( nl->tx_buf_p )
-               netdev_err(dev, "memory leak!\n");
-
-       nl->outpos = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-
-       len = skb->len;
-       if( len < SBNI_MIN_LEN )
-               len = SBNI_MIN_LEN;
-
-       nl->tx_buf_p    = skb;
-       nl->tx_frameno  = DIV_ROUND_UP(len, nl->maxframe);
-       nl->framelen    = len < nl->maxframe  ?  len  :  nl->maxframe;
-
-       outb( inb( dev->base_addr + CSR0 ) | TR_REQ,  dev->base_addr + CSR0 );
-#ifdef CONFIG_SBNI_MULTILINE
-       netif_trans_update(nl->master);
-#else
-       netif_trans_update(dev);
-#endif
-}
-
-
-static void
-drop_xmit_queue( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->tx_buf_p ) {
-               dev_kfree_skb_any( nl->tx_buf_p );
-               nl->tx_buf_p = NULL;
-#ifdef CONFIG_SBNI_MULTILINE
-               nl->master->stats.tx_errors++;
-               nl->master->stats.tx_carrier_errors++;
-#else
-               dev->stats.tx_errors++;
-               dev->stats.tx_carrier_errors++;
-#endif
-       }
-
-       nl->tx_frameno  = 0;
-       nl->framelen    = 0;
-       nl->outpos      = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-#ifdef CONFIG_SBNI_MULTILINE
-       netif_start_queue( nl->master );
-       netif_trans_update(nl->master);
-#else
-       netif_start_queue( dev );
-       netif_trans_update(dev);
-#endif
-}
-
-
-static void
-send_frame_header( struct net_device  *dev,  u32  *crc_p )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-
-       u32  crc = *crc_p;
-       u32  len_field = nl->framelen + 6;      /* CRC + frameno + reserved */
-       u8   value;
-
-       if( nl->state & FL_NEED_RESEND )
-               len_field |= FRAME_RETRY;       /* non-first attempt... */
-
-       if( nl->outpos == 0 )
-               len_field |= FRAME_FIRST;
-
-       len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD;
-       outb( SBNI_SIG, dev->base_addr + DAT );
-
-       value = (u8) len_field;
-       outb( value, dev->base_addr + DAT );
-       crc = CRC32( value, crc );
-       value = (u8) (len_field >> 8);
-       outb( value, dev->base_addr + DAT );
-       crc = CRC32( value, crc );
-
-       outb( nl->tx_frameno, dev->base_addr + DAT );
-       crc = CRC32( nl->tx_frameno, crc );
-       outb( 0, dev->base_addr + DAT );
-       crc = CRC32( 0, crc );
-       *crc_p = crc;
-}
-
-
-/*
- * if frame tail not needed (incorrect number or received twice),
- * it won't store, but CRC will be calculated
- */
-
-static int
-skip_tail( unsigned int  ioaddr,  unsigned int  tail_len,  u32 crc )
-{
-       while( tail_len-- )
-               crc = CRC32( inb( ioaddr + DAT ), crc );
-
-       return  crc == CRC32_REMAINDER;
-}
-
-
-/*
- * Preliminary checks if frame header is correct, calculates its CRC
- * and split it to simple fields
- */
-
-static int
-check_fhdr( u32  ioaddr,  u32  *framelen,  u32  *frameno,  u32  *ack,
-           u32  *is_first,  u32  *crc_p )
-{
-       u32  crc = *crc_p;
-       u8   value;
-
-       if( inb( ioaddr + DAT ) != SBNI_SIG )
-               return  0;
-
-       value = inb( ioaddr + DAT );
-       *framelen = (u32)value;
-       crc = CRC32( value, crc );
-       value = inb( ioaddr + DAT );
-       *framelen |= ((u32)value) << 8;
-       crc = CRC32( value, crc );
-
-       *ack = *framelen & FRAME_ACK_MASK;
-       *is_first = (*framelen & FRAME_FIRST) != 0;
-
-       if( (*framelen &= FRAME_LEN_MASK) < 6 ||
-           *framelen > SBNI_MAX_FRAME - 3 )
-               return  0;
-
-       value = inb( ioaddr + DAT );
-       *frameno = (u32)value;
-       crc = CRC32( value, crc );
-
-       crc = CRC32( inb( ioaddr + DAT ), crc );        /* reserved byte */
-       *framelen -= 2;
-
-       *crc_p = crc;
-       return  1;
-}
-
-
-static struct sk_buff *
-get_rx_buf( struct net_device  *dev )
-{
-       /* +2 is to compensate for the alignment fixup below */
-       struct sk_buff  *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 );
-       if( !skb )
-               return  NULL;
-
-       skb_reserve( skb, 2 );          /* Align IP on longword boundaries */
-       return  skb;
-}
-
-
-static void
-indicate_pkt( struct net_device  *dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-       struct sk_buff    *skb = nl->rx_buf_p;
-
-       skb_put( skb, nl->inppos );
-
-#ifdef CONFIG_SBNI_MULTILINE
-       skb->protocol = eth_type_trans( skb, nl->master );
-       netif_rx( skb );
-       ++nl->master->stats.rx_packets;
-       nl->master->stats.rx_bytes += nl->inppos;
-#else
-       skb->protocol = eth_type_trans( skb, dev );
-       netif_rx( skb );
-       ++dev->stats.rx_packets;
-       dev->stats.rx_bytes += nl->inppos;
-#endif
-       nl->rx_buf_p = NULL;    /* protocol driver will clear this sk_buff */
-}
-
-
-/* -------------------------------------------------------------------------- */
-
-/*
- * Routine checks periodically wire activity and regenerates marker if
- * connect was inactive for a long time.
- */
-
-static void
-sbni_watchdog(struct timer_list *t)
-{
-       struct net_local   *nl  = from_timer(nl, t, watchdog);
-       struct net_device  *dev = nl->watchdog_dev;
-       unsigned long      flags;
-       unsigned char      csr0;
-
-       spin_lock_irqsave( &nl->lock, flags );
-
-       csr0 = inb( dev->base_addr + CSR0 );
-       if( csr0 & RC_CHK ) {
-
-               if( nl->timer_ticks ) {
-                       if( csr0 & (RC_RDY | BU_EMP) )
-                               /* receiving not active */
-                               nl->timer_ticks--;
-               } else {
-                       nl->in_stats.timeout_number++;
-                       if( nl->delta_rxl )
-                               timeout_change_level( dev );
-
-                       outb( *(u_char *)&nl->csr1 | PR_RES,
-                             dev->base_addr + CSR1 );
-                       csr0 = inb( dev->base_addr + CSR0 );
-               }
-       } else
-               nl->state &= ~FL_LINE_DOWN;
-
-       outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); 
-
-       mod_timer(t, jiffies + SBNI_TIMEOUT);
-
-       spin_unlock_irqrestore( &nl->lock, flags );
-}
-
-
-static unsigned char  rxl_tab[] = {
-       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
-       0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f
-};
-
-#define SIZE_OF_TIMEOUT_RXL_TAB 4
-static unsigned char  timeout_rxl_tab[] = {
-       0x03, 0x05, 0x08, 0x0b
-};
-
-/* -------------------------------------------------------------------------- */
-
-static void
-card_start( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-       nl->state |= FL_PREV_OK;
-
-       nl->inppos = nl->outpos = 0;
-       nl->wait_frameno = 0;
-       nl->tx_frameno   = 0;
-       nl->framelen     = 0;
-
-       outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
-       outb( EN_INT, dev->base_addr + CSR0 );
-}
-
-/* -------------------------------------------------------------------------- */
-
-/* Receive level auto-selection */
-
-static void
-change_level( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->delta_rxl == 0 )        /* do not auto-negotiate RxL */
-               return;
-
-       if( nl->cur_rxl_index == 0 )
-               nl->delta_rxl = 1;
-       else if( nl->cur_rxl_index == 15 )
-               nl->delta_rxl = -1;
-       else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd )
-               nl->delta_rxl = -nl->delta_rxl;
-
-       nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ];
-       inb( dev->base_addr + CSR0 );   /* needs for PCI cards */
-       outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 );
-
-       nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
-       nl->cur_rxl_rcvd  = 0;
-}
-
-
-static void
-timeout_change_level( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ];
-       if( ++nl->timeout_rxl >= 4 )
-               nl->timeout_rxl = 0;
-
-       nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
-       inb( dev->base_addr + CSR0 );
-       outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 );
-
-       nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
-       nl->cur_rxl_rcvd  = 0;
-}
-
-/* -------------------------------------------------------------------------- */
-
-/*
- *     Open/initialize the board. 
- */
-
-static int
-sbni_open( struct net_device  *dev )
-{
-       struct net_local        *nl = netdev_priv(dev);
-       struct timer_list       *w  = &nl->watchdog;
-
-       /*
-        * For double ISA adapters within "common irq" mode, we have to
-        * determine whether primary or secondary channel is initialized,
-        * and set the irq handler only in first case.
-        */
-       if( dev->base_addr < 0x400 ) {          /* ISA only */
-               struct net_device  **p = sbni_cards;
-               for( ;  *p  &&  p < sbni_cards + SBNI_MAX_NUM_CARDS;  ++p )
-                       if( (*p)->irq == dev->irq &&
-                           ((*p)->base_addr == dev->base_addr + 4 ||
-                            (*p)->base_addr == dev->base_addr - 4) &&
-                           (*p)->flags & IFF_UP ) {
-
-                               ((struct net_local *) (netdev_priv(*p)))
-                                       ->second = dev;
-                               netdev_notice(dev, "using shared irq with %s\n",
-                                             (*p)->name);
-                               nl->state |= FL_SECONDARY;
-                               goto  handler_attached;
-                       }
-       }
-
-       if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) {
-               netdev_err(dev, "unable to get IRQ %d\n", dev->irq);
-               return  -EAGAIN;
-       }
-
-handler_attached:
-
-       spin_lock( &nl->lock );
-       memset( &dev->stats, 0, sizeof(struct net_device_stats) );
-       memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-
-       card_start( dev );
-
-       netif_start_queue( dev );
-
-       /* set timer watchdog */
-       nl->watchdog_dev = dev;
-       timer_setup(w, sbni_watchdog, 0);
-       w->expires      = jiffies + SBNI_TIMEOUT;
-       add_timer( w );
-   
-       spin_unlock( &nl->lock );
-       return 0;
-}
-
-
-static int
-sbni_close( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->second  &&  nl->second->flags & IFF_UP ) {
-               netdev_notice(dev, "Secondary channel (%s) is active!\n",
-                             nl->second->name);
-               return  -EBUSY;
-       }
-
-#ifdef CONFIG_SBNI_MULTILINE
-       if( nl->state & FL_SLAVE )
-               emancipate( dev );
-       else
-               while( nl->link )       /* it's master device! */
-                       emancipate( nl->link );
-#endif
-
-       spin_lock( &nl->lock );
-
-       nl->second = NULL;
-       drop_xmit_queue( dev ); 
-       netif_stop_queue( dev );
-   
-       del_timer( &nl->watchdog );
-
-       outb( 0, dev->base_addr + CSR0 );
-
-       if( !(nl->state & FL_SECONDARY) )
-               free_irq( dev->irq, dev );
-       nl->state &= FL_SECONDARY;
-
-       spin_unlock( &nl->lock );
-       return 0;
-}
-
-
-/*
-       Valid combinations in CSR0 (for probing):
-
-       VALID_DECODER   0000,0011,1011,1010
-
-                                       ; 0   ; -
-                               TR_REQ  ; 1   ; +
-                       TR_RDY          ; 2   ; -
-                       TR_RDY  TR_REQ  ; 3   ; +
-               BU_EMP                  ; 4   ; +
-               BU_EMP          TR_REQ  ; 5   ; +
-               BU_EMP  TR_RDY          ; 6   ; -
-               BU_EMP  TR_RDY  TR_REQ  ; 7   ; +
-       RC_RDY                          ; 8   ; +
-       RC_RDY                  TR_REQ  ; 9   ; +
-       RC_RDY          TR_RDY          ; 10  ; -
-       RC_RDY          TR_RDY  TR_REQ  ; 11  ; -
-       RC_RDY  BU_EMP                  ; 12  ; -
-       RC_RDY  BU_EMP          TR_REQ  ; 13  ; -
-       RC_RDY  BU_EMP  TR_RDY          ; 14  ; -
-       RC_RDY  BU_EMP  TR_RDY  TR_REQ  ; 15  ; -
-*/
-
-#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200)
-
-
-static int
-sbni_card_probe( unsigned long  ioaddr )
-{
-       unsigned char  csr0;
-
-       csr0 = inb( ioaddr + CSR0 );
-       if( csr0 != 0xff  &&  csr0 != 0x00 ) {
-               csr0 &= ~EN_INT;
-               if( csr0 & BU_EMP )
-                       csr0 |= EN_INT;
-      
-               if( VALID_DECODER & (1 << (csr0 >> 4)) )
-                       return  0;
-       }
-   
-       return  -ENODEV;
-}
-
-/* -------------------------------------------------------------------------- */
-
-static int
-sbni_siocdevprivate(struct net_device  *dev,  struct ifreq  *ifr, void __user *data, int  cmd)
-{
-       struct net_local  *nl = netdev_priv(dev);
-       struct sbni_flags  flags;
-       int  error = 0;
-
-#ifdef CONFIG_SBNI_MULTILINE
-       struct net_device  *slave_dev;
-       char  slave_name[ 8 ];
-#endif
-  
-       switch( cmd ) {
-       case  SIOCDEVGETINSTATS :
-               if (copy_to_user(data, &nl->in_stats,
-                                sizeof(struct sbni_in_stats)))
-                       error = -EFAULT;
-               break;
-
-       case  SIOCDEVRESINSTATS :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-               memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-               break;
-
-       case  SIOCDEVGHWSTATE :
-               flags.mac_addr  = *(u32 *)(dev->dev_addr + 3);
-               flags.rate      = nl->csr1.rate;
-               flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0;
-               flags.rxl       = nl->cur_rxl_index;
-               flags.fixed_rxl = nl->delta_rxl == 0;
-
-               if (copy_to_user(data, &flags, sizeof(flags)))
-                       error = -EFAULT;
-               break;
-
-       case  SIOCDEVSHWSTATE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               spin_lock( &nl->lock );
-               flags = *(struct sbni_flags*) &ifr->ifr_ifru;
-               if( flags.fixed_rxl ) {
-                       nl->delta_rxl = 0;
-                       nl->cur_rxl_index = flags.rxl;
-               } else {
-                       nl->delta_rxl = DEF_RXL_DELTA;
-                       nl->cur_rxl_index = DEF_RXL;
-               }
-
-               nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
-               nl->csr1.rate = flags.rate;
-               outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
-               spin_unlock( &nl->lock );
-               break;
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-       case  SIOCDEVENSLAVE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               if (copy_from_user(slave_name, data, sizeof(slave_name)))
-                       return -EFAULT;
-               slave_dev = dev_get_by_name(&init_net, slave_name );
-               if( !slave_dev  ||  !(slave_dev->flags & IFF_UP) ) {
-                       netdev_err(dev, "trying to enslave non-active device %s\n",
-                                  slave_name);
-                       if (slave_dev)
-                               dev_put(slave_dev);
-                       return  -EPERM;
-               }
-
-               return  enslave( dev, slave_dev );
-
-       case  SIOCDEVEMANSIPATE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               return  emancipate( dev );
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-       default :
-               return  -EOPNOTSUPP;
-       }
-
-       return  error;
-}
-
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static int
-enslave( struct net_device  *dev,  struct net_device  *slave_dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-       struct net_local  *snl = netdev_priv(slave_dev);
-
-       if( nl->state & FL_SLAVE )      /* This isn't master or free device */
-               return  -EBUSY;
-
-       if( snl->state & FL_SLAVE )     /* That was already enslaved */
-               return  -EBUSY;
-
-       spin_lock( &nl->lock );
-       spin_lock( &snl->lock );
-
-       /* append to list */
-       snl->link = nl->link;
-       nl->link  = slave_dev;
-       snl->master = dev;
-       snl->state |= FL_SLAVE;
-
-       /* Summary statistics of MultiLine operation will be stored
-          in master's counters */
-       memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) );
-       netif_stop_queue( slave_dev );
-       netif_wake_queue( dev );        /* Now we are able to transmit */
-
-       spin_unlock( &snl->lock );
-       spin_unlock( &nl->lock );
-       netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name);
-       return  0;
-}
-
-
-static int
-emancipate( struct net_device  *dev )
-{
-       struct net_local   *snl = netdev_priv(dev);
-       struct net_device  *p   = snl->master;
-       struct net_local   *nl  = netdev_priv(p);
-
-       if( !(snl->state & FL_SLAVE) )
-               return  -EINVAL;
-
-       spin_lock( &nl->lock );
-       spin_lock( &snl->lock );
-       drop_xmit_queue( dev );
-
-       /* exclude from list */
-       for(;;) {       /* must be in list */
-               struct net_local  *t = netdev_priv(p);
-               if( t->link == dev ) {
-                       t->link = snl->link;
-                       break;
-               }
-               p = t->link;
-       }
-
-       snl->link = NULL;
-       snl->master = dev;
-       snl->state &= ~FL_SLAVE;
-
-       netif_start_queue( dev );
-
-       spin_unlock( &snl->lock );
-       spin_unlock( &nl->lock );
-
-       dev_put( dev );
-       return  0;
-}
-
-#endif
-
-static void
-set_multicast_list( struct net_device  *dev )
-{
-       return;         /* sbni always operate in promiscuos mode */
-}
-
-
-#ifdef MODULE
-module_param_hw_array(io, int, ioport, NULL, 0);
-module_param_hw_array(irq, int, irq, NULL, 0);
-module_param_array(baud, int, NULL, 0);
-module_param_array(rxl, int, NULL, 0);
-module_param_array(mac, int, NULL, 0);
-module_param(skip_pci_probe, bool, 0);
-
-MODULE_LICENSE("GPL");
-
-
-int __init init_module( void )
-{
-       struct net_device  *dev;
-       int err;
-
-       while( num < SBNI_MAX_NUM_CARDS ) {
-               dev = alloc_netdev(sizeof(struct net_local), "sbni%d",
-                                  NET_NAME_UNKNOWN, sbni_devsetup);
-               if( !dev)
-                       break;
-
-               sprintf( dev->name, "sbni%d", num );
-
-               err = sbni_init(dev);
-               if (err) {
-                       free_netdev(dev);
-                       break;
-               }
-
-               if( register_netdev( dev ) ) {
-                       release_region( dev->base_addr, SBNI_IO_EXTENT );
-                       free_netdev( dev );
-                       break;
-               }
-       }
-
-       return  *sbni_cards  ?  0  :  -ENODEV;
-}
-
-void
-cleanup_module(void)
-{
-       int i;
-
-       for (i = 0;  i < SBNI_MAX_NUM_CARDS;  ++i) {
-               struct net_device *dev = sbni_cards[i];
-               if (dev != NULL) {
-                       unregister_netdev(dev);
-                       release_region(dev->base_addr, SBNI_IO_EXTENT);
-                       free_netdev(dev);
-               }
-       }
-}
-
-#else  /* MODULE */
-
-static int __init
-sbni_setup( char  *p )
-{
-       int  n, parm;
-
-       if( *p++ != '(' )
-               goto  bad_param;
-
-       for( n = 0, parm = 0;  *p  &&  n < 8; ) {
-               (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 );
-               if( !*p  ||  *p == ')' )
-                       return 1;
-               if( *p == ';' ) {
-                       ++p;
-                       ++n;
-                       parm = 0;
-               } else if( *p++ != ',' ) {
-                       break;
-               } else {
-                       if( ++parm >= 5 )
-                               break;
-               }
-       }
-bad_param:
-       pr_err("Error in sbni kernel parameter!\n");
-       return 0;
-}
-
-__setup( "sbni=", sbni_setup );
-
-#endif /* MODULE */
-
-/* -------------------------------------------------------------------------- */
-
-static u32
-calc_crc32( u32  crc,  u8  *p,  u32  len )
-{
-       while( len-- )
-               crc = CRC32( *p++, crc );
-
-       return  crc;
-}
-
-static u32  crc32tab[] __attribute__ ((aligned(8))) = {
-       0xD202EF8D,  0xA505DF1B,  0x3C0C8EA1,  0x4B0BBE37,
-       0xD56F2B94,  0xA2681B02,  0x3B614AB8,  0x4C667A2E,
-       0xDCD967BF,  0xABDE5729,  0x32D70693,  0x45D03605,
-       0xDBB4A3A6,  0xACB39330,  0x35BAC28A,  0x42BDF21C,
-       0xCFB5FFE9,  0xB8B2CF7F,  0x21BB9EC5,  0x56BCAE53,
-       0xC8D83BF0,  0xBFDF0B66,  0x26D65ADC,  0x51D16A4A,
-       0xC16E77DB,  0xB669474D,  0x2F6016F7,  0x58672661,
-       0xC603B3C2,  0xB1048354,  0x280DD2EE,  0x5F0AE278,
-       0xE96CCF45,  0x9E6BFFD3,  0x0762AE69,  0x70659EFF,
-       0xEE010B5C,  0x99063BCA,  0x000F6A70,  0x77085AE6,
-       0xE7B74777,  0x90B077E1,  0x09B9265B,  0x7EBE16CD,
-       0xE0DA836E,  0x97DDB3F8,  0x0ED4E242,  0x79D3D2D4,
-       0xF4DBDF21,  0x83DCEFB7,  0x1AD5BE0D,  0x6DD28E9B,
-       0xF3B61B38,  0x84B12BAE,  0x1DB87A14,  0x6ABF4A82,
-       0xFA005713,  0x8D076785,  0x140E363F,  0x630906A9,
-       0xFD6D930A,  0x8A6AA39C,  0x1363F226,  0x6464C2B0,
-       0xA4DEAE1D,  0xD3D99E8B,  0x4AD0CF31,  0x3DD7FFA7,
-       0xA3B36A04,  0xD4B45A92,  0x4DBD0B28,  0x3ABA3BBE,
-       0xAA05262F,  0xDD0216B9,  0x440B4703,  0x330C7795,
-       0xAD68E236,  0xDA6FD2A0,  0x4366831A,  0x3461B38C,
-       0xB969BE79,  0xCE6E8EEF,  0x5767DF55,  0x2060EFC3,
-       0xBE047A60,  0xC9034AF6,  0x500A1B4C,  0x270D2BDA,
-       0xB7B2364B,  0xC0B506DD,  0x59BC5767,  0x2EBB67F1,
-       0xB0DFF252,  0xC7D8C2C4,  0x5ED1937E,  0x29D6A3E8,
-       0x9FB08ED5,  0xE8B7BE43,  0x71BEEFF9,  0x06B9DF6F,
-       0x98DD4ACC,  0xEFDA7A5A,  0x76D32BE0,  0x01D41B76,
-       0x916B06E7,  0xE66C3671,  0x7F6567CB,  0x0862575D,
-       0x9606C2FE,  0xE101F268,  0x7808A3D2,  0x0F0F9344,
-       0x82079EB1,  0xF500AE27,  0x6C09FF9D,  0x1B0ECF0B,
-       0x856A5AA8,  0xF26D6A3E,  0x6B643B84,  0x1C630B12,
-       0x8CDC1683,  0xFBDB2615,  0x62D277AF,  0x15D54739,
-       0x8BB1D29A,  0xFCB6E20C,  0x65BFB3B6,  0x12B88320,
-       0x3FBA6CAD,  0x48BD5C3B,  0xD1B40D81,  0xA6B33D17,
-       0x38D7A8B4,  0x4FD09822,  0xD6D9C998,  0xA1DEF90E,
-       0x3161E49F,  0x4666D409,  0xDF6F85B3,  0xA868B525,
-       0x360C2086,  0x410B1010,  0xD80241AA,  0xAF05713C,
-       0x220D7CC9,  0x550A4C5F,  0xCC031DE5,  0xBB042D73,
-       0x2560B8D0,  0x52678846,  0xCB6ED9FC,  0xBC69E96A,
-       0x2CD6F4FB,  0x5BD1C46D,  0xC2D895D7,  0xB5DFA541,
-       0x2BBB30E2,  0x5CBC0074,  0xC5B551CE,  0xB2B26158,
-       0x04D44C65,  0x73D37CF3,  0xEADA2D49,  0x9DDD1DDF,
-       0x03B9887C,  0x74BEB8EA,  0xEDB7E950,  0x9AB0D9C6,
-       0x0A0FC457,  0x7D08F4C1,  0xE401A57B,  0x930695ED,
-       0x0D62004E,  0x7A6530D8,  0xE36C6162,  0x946B51F4,
-       0x19635C01,  0x6E646C97,  0xF76D3D2D,  0x806A0DBB,
-       0x1E0E9818,  0x6909A88E,  0xF000F934,  0x8707C9A2,
-       0x17B8D433,  0x60BFE4A5,  0xF9B6B51F,  0x8EB18589,
-       0x10D5102A,  0x67D220BC,  0xFEDB7106,  0x89DC4190,
-       0x49662D3D,  0x3E611DAB,  0xA7684C11,  0xD06F7C87,
-       0x4E0BE924,  0x390CD9B2,  0xA0058808,  0xD702B89E,
-       0x47BDA50F,  0x30BA9599,  0xA9B3C423,  0xDEB4F4B5,
-       0x40D06116,  0x37D75180,  0xAEDE003A,  0xD9D930AC,
-       0x54D13D59,  0x23D60DCF,  0xBADF5C75,  0xCDD86CE3,
-       0x53BCF940,  0x24BBC9D6,  0xBDB2986C,  0xCAB5A8FA,
-       0x5A0AB56B,  0x2D0D85FD,  0xB404D447,  0xC303E4D1,
-       0x5D677172,  0x2A6041E4,  0xB369105E,  0xC46E20C8,
-       0x72080DF5,  0x050F3D63,  0x9C066CD9,  0xEB015C4F,
-       0x7565C9EC,  0x0262F97A,  0x9B6BA8C0,  0xEC6C9856,
-       0x7CD385C7,  0x0BD4B551,  0x92DDE4EB,  0xE5DAD47D,
-       0x7BBE41DE,  0x0CB97148,  0x95B020F2,  0xE2B71064,
-       0x6FBF1D91,  0x18B82D07,  0x81B17CBD,  0xF6B64C2B,
-       0x68D2D988,  0x1FD5E91E,  0x86DCB8A4,  0xF1DB8832,
-       0x616495A3,  0x1663A535,  0x8F6AF48F,  0xF86DC419,
-       0x660951BA,  0x110E612C,  0x88073096,  0xFF000000
-};
-
diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h
deleted file mode 100644 (file)
index 8426451..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/* sbni.h:  definitions for a Granch SBNI12 driver, version 5.0.0
- * Written 2001 Denis I.Timofeev (timofeev@granch.ru)
- * This file is distributed under the GNU GPL
- */
-
-#ifndef SBNI_H
-#define SBNI_H
-
-#ifdef SBNI_DEBUG
-#define DP( A ) A
-#else
-#define DP( A )
-#endif
-
-
-/* We don't have official vendor id yet... */
-#define SBNI_PCI_VENDOR        0x55 
-#define SBNI_PCI_DEVICE        0x9f
-
-#define ISA_MODE 0x00
-#define PCI_MODE 0x01
-
-#define        SBNI_IO_EXTENT  4
-
-enum sbni_reg {
-       CSR0 = 0,
-       CSR1 = 1,
-       DAT  = 2
-};
-
-/* CSR0 mapping */
-enum {
-       BU_EMP = 0x02,
-       RC_CHK = 0x04,
-       CT_ZER = 0x08,
-       TR_REQ = 0x10,
-       TR_RDY = 0x20,
-       EN_INT = 0x40,
-       RC_RDY = 0x80
-};
-
-
-/* CSR1 mapping */
-#define PR_RES 0x80
-
-struct sbni_csr1 {
-#ifdef __LITTLE_ENDIAN_BITFIELD
-       u8 rxl  : 5;
-       u8 rate : 2;
-       u8      : 1;
-#else
-       u8      : 1;
-       u8 rate : 2;
-       u8 rxl  : 5;
-#endif
-};
-
-/* fields in frame header */
-#define FRAME_ACK_MASK  (unsigned short)0x7000
-#define FRAME_LEN_MASK  (unsigned short)0x03FF
-#define FRAME_FIRST     (unsigned short)0x8000
-#define FRAME_RETRY     (unsigned short)0x0800
-
-#define FRAME_SENT_BAD  (unsigned short)0x4000
-#define FRAME_SENT_OK   (unsigned short)0x3000
-
-
-/* state flags */
-enum {
-       FL_WAIT_ACK    = 0x01,
-       FL_NEED_RESEND = 0x02,
-       FL_PREV_OK     = 0x04,
-       FL_SLOW_MODE   = 0x08,
-       FL_SECONDARY   = 0x10,
-#ifdef CONFIG_SBNI_MULTILINE
-       FL_SLAVE       = 0x20,
-#endif
-       FL_LINE_DOWN   = 0x40
-};
-
-
-enum {
-       DEFAULT_IOBASEADDR = 0x210,
-       DEFAULT_INTERRUPTNUMBER = 5,
-       DEFAULT_RATE = 0,
-       DEFAULT_FRAME_LEN = 1012
-};
-
-#define DEF_RXL_DELTA  -1
-#define DEF_RXL                0xf
-
-#define SBNI_SIG 0x5a
-
-#define        SBNI_MIN_LEN    60      /* Shortest Ethernet frame without FCS */
-#define SBNI_MAX_FRAME 1023
-#define ETHER_MAX_LEN  1518
-
-#define SBNI_TIMEOUT   (HZ/10)
-
-#define TR_ERROR_COUNT 32
-#define CHANGE_LEVEL_START_TICKS 4
-
-#define SBNI_MAX_NUM_CARDS     16
-
-/* internal SBNI-specific statistics */
-struct sbni_in_stats {
-       u32     all_rx_number;
-       u32     bad_rx_number;
-       u32     timeout_number;
-       u32     all_tx_number;
-       u32     resend_tx_number;
-};
-
-/* SBNI ioctl params */
-#define SIOCDEVGETINSTATS      SIOCDEVPRIVATE
-#define SIOCDEVRESINSTATS      SIOCDEVPRIVATE+1
-#define SIOCDEVGHWSTATE        SIOCDEVPRIVATE+2
-#define SIOCDEVSHWSTATE        SIOCDEVPRIVATE+3
-#define SIOCDEVENSLAVE         SIOCDEVPRIVATE+4
-#define SIOCDEVEMANSIPATE      SIOCDEVPRIVATE+5
-
-
-/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */
-struct sbni_flags {
-       u32     rxl             : 4;
-       u32     rate            : 2;
-       u32     fixed_rxl       : 1;
-       u32     slow_mode       : 1;
-       u32     mac_addr        : 24;
-};
-
-/*
- * CRC-32 stuff
- */
-#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF))
-      /* CRC generator 0xEDB88320 */
-      /* CRC remainder 0x2144DF1C */
-      /* CRC initial value 0x00000000 */
-#define CRC32_REMAINDER 0x2144DF1C
-#define CRC32_INITIAL 0x00000000
-
-#ifndef __initdata
-#define __initdata
-#endif
-
-#endif
-
index 1df9595..514f2c1 100644 (file)
@@ -136,6 +136,29 @@ static struct ieee80211_supported_band band_5ghz = {
 /* Assigned at module init. Guaranteed locally-administered and unicast. */
 static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {};
 
+static void virt_wifi_inform_bss(struct wiphy *wiphy)
+{
+       u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
+       struct cfg80211_bss *informed_bss;
+       static const struct {
+               u8 tag;
+               u8 len;
+               u8 ssid[8];
+       } __packed ssid = {
+               .tag = WLAN_EID_SSID,
+               .len = 8,
+               .ssid = "VirtWifi",
+       };
+
+       informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
+                                          CFG80211_BSS_FTYPE_PRESP,
+                                          fake_router_bssid, tsf,
+                                          WLAN_CAPABILITY_ESS, 0,
+                                          (void *)&ssid, sizeof(ssid),
+                                          DBM_TO_MBM(-50), GFP_KERNEL);
+       cfg80211_put_bss(wiphy, informed_bss);
+}
+
 /* Called with the rtnl lock held. */
 static int virt_wifi_scan(struct wiphy *wiphy,
                          struct cfg80211_scan_request *request)
@@ -156,28 +179,13 @@ static int virt_wifi_scan(struct wiphy *wiphy,
 /* Acquires and releases the rdev BSS lock. */
 static void virt_wifi_scan_result(struct work_struct *work)
 {
-       struct {
-               u8 tag;
-               u8 len;
-               u8 ssid[8];
-       } __packed ssid = {
-               .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi",
-       };
-       struct cfg80211_bss *informed_bss;
        struct virt_wifi_wiphy_priv *priv =
                container_of(work, struct virt_wifi_wiphy_priv,
                             scan_result.work);
        struct wiphy *wiphy = priv_to_wiphy(priv);
        struct cfg80211_scan_info scan_info = { .aborted = false };
-       u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
 
-       informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
-                                          CFG80211_BSS_FTYPE_PRESP,
-                                          fake_router_bssid, tsf,
-                                          WLAN_CAPABILITY_ESS, 0,
-                                          (void *)&ssid, sizeof(ssid),
-                                          DBM_TO_MBM(-50), GFP_KERNEL);
-       cfg80211_put_bss(wiphy, informed_bss);
+       virt_wifi_inform_bss(wiphy);
 
        /* Schedules work which acquires and releases the rtnl lock. */
        cfg80211_scan_done(priv->scan_request, &scan_info);
@@ -225,10 +233,12 @@ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev,
        if (!could_schedule)
                return -EBUSY;
 
-       if (sme->bssid)
+       if (sme->bssid) {
                ether_addr_copy(priv->connect_requested_bss, sme->bssid);
-       else
+       } else {
+               virt_wifi_inform_bss(wiphy);
                eth_zero_addr(priv->connect_requested_bss);
+       }
 
        wiphy_debug(wiphy, "connect\n");
 
@@ -241,11 +251,13 @@ static void virt_wifi_connect_complete(struct work_struct *work)
        struct virt_wifi_netdev_priv *priv =
                container_of(work, struct virt_wifi_netdev_priv, connect.work);
        u8 *requested_bss = priv->connect_requested_bss;
-       bool has_addr = !is_zero_ether_addr(requested_bss);
        bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid);
        u16 status = WLAN_STATUS_SUCCESS;
 
-       if (!priv->is_up || (has_addr && !right_addr))
+       if (is_zero_ether_addr(requested_bss))
+               requested_bss = NULL;
+
+       if (!priv->is_up || (requested_bss && !right_addr))
                status = WLAN_STATUS_UNSPECIFIED_FAILURE;
        else
                priv->is_connected = true;
index de93843..77dbfc4 100644 (file)
@@ -38,6 +38,18 @@ config MHI_WWAN_CTRL
          To compile this driver as a module, choose M here: the module will be
          called mhi_wwan_ctrl.
 
+config MHI_WWAN_MBIM
+        tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems"
+        depends on MHI_BUS
+        help
+          MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems.
+          It implements MBIM over MHI, for IP data aggregation and muxing.
+          A default wwan0 network interface is created for MBIM data session
+          ID 0. Additional links can be created via wwan rtnetlink type.
+
+          To compile this driver as a module, choose M here: the module will be
+          called mhi_wwan_mbim.
+
 config RPMSG_WWAN_CTRL
        tristate "RPMSG WWAN control driver"
        depends on RPMSG
index d90ac33..fe51fee 100644 (file)
@@ -9,5 +9,6 @@ wwan-objs += wwan_core.o
 obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
 
 obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
+obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o
 obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
 obj-$(CONFIG_IOSM) += iosm/
index 45e6923..f861994 100644 (file)
 #define IOSM_CP_VERSION 0x0100UL
 
 /* DL dir Aggregation support mask */
-#define DL_AGGR BIT(23)
+#define DL_AGGR BIT(9)
 
 /* UL dir Aggregation support mask */
-#define UL_AGGR BIT(22)
+#define UL_AGGR BIT(8)
 
 /* UL flow credit support mask */
 #define UL_FLOW_CREDIT BIT(21)
index 562de27..bdb2d32 100644 (file)
@@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux,
                return;
        }
 
-       ul_credits = fct->vfl.nr_of_bytes;
+       ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes);
 
        dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d",
                if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits);
@@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux)
                qlt->reserved[0] = 0;
                qlt->reserved[1] = 0;
 
-               qlt->vfl.nr_of_bytes = session->ul_list.qlen;
+               qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen);
 
                /* Add QLT to the transfer list. */
                skb_queue_tail(&ipc_mux->channel->ul_list,
index 4a74e3c..aae83db 100644 (file)
@@ -106,7 +106,7 @@ struct mux_lite_cmdh {
  * @nr_of_bytes:       Number of bytes available to transmit in the queue.
  */
 struct mux_lite_vfl {
-       u32 nr_of_bytes;
+       __le32 nr_of_bytes;
 };
 
 /**
index 91109e2..35d5907 100644 (file)
@@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol,
        }
 
        if (p_td->buffer.address != IPC_CB(skb)->mapping) {
-               dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p",
-                       (void *)p_td->buffer.address, skb->data);
+               dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p",
+                       (unsigned long long)p_td->buffer.address, skb->data);
                ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
                skb = NULL;
                goto ret;
index b2357ad..b571d9c 100644 (file)
@@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev,
 
        RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL);
        /* unregistering includes synchronize_net() */
-       unregister_netdevice(dev);
+       unregister_netdevice_queue(dev, head);
 
 unlock:
        mutex_unlock(&ipc_wwan->if_mutex);
index 1bc6b69..1e18420 100644 (file)
@@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port)
        int ret;
 
        /* Start mhi device's channel(s) */
-       ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev);
+       ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0);
        if (ret)
                return ret;
 
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
new file mode 100644 (file)
index 0000000..377529b
--- /dev/null
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI MBIM Network driver - Network/MBIM over MHI bus
+ *
+ * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
+ *
+ * This driver copy some code from cdc_ncm, which is:
+ * Copyright (C) ST-Ericsson 2010-2012
+ * and cdc_mbim, which is:
+ * Copyright (c) 2012  Smith Micro Software, Inc.
+ * Copyright (c) 2012  Bjørn Mork <bjorn@mork.no>
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/mhi.h>
+#include <linux/mii.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/usb.h>
+#include <linux/usb/cdc.h>
+#include <linux/usb/usbnet.h>
+#include <linux/usb/cdc_ncm.h>
+#include <linux/wwan.h>
+
+/* 3500 allows to optimize skb allocation, the skbs will basically fit in
+ * one 4K page. Large MBIM packets will simply be split over several MHI
+ * transfers and chained by the MHI net layer (zerocopy).
+ */
+#define MHI_DEFAULT_MRU 3500
+
+#define MHI_MBIM_DEFAULT_MTU 1500
+#define MHI_MAX_BUF_SZ 0xffff
+
+#define MBIM_NDP16_SIGN_MASK 0x00ffffff
+
+#define MHI_MBIM_LINK_HASH_SIZE 8
+#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE)
+
+struct mhi_mbim_link {
+       struct mhi_mbim_context *mbim;
+       struct net_device *ndev;
+       unsigned int session;
+
+       /* stats */
+       u64_stats_t rx_packets;
+       u64_stats_t rx_bytes;
+       u64_stats_t rx_errors;
+       u64_stats_t tx_packets;
+       u64_stats_t tx_bytes;
+       u64_stats_t tx_errors;
+       u64_stats_t tx_dropped;
+       struct u64_stats_sync tx_syncp;
+       struct u64_stats_sync rx_syncp;
+
+       struct hlist_node hlnode;
+};
+
+struct mhi_mbim_context {
+       struct mhi_device *mdev;
+       struct sk_buff *skbagg_head;
+       struct sk_buff *skbagg_tail;
+       unsigned int mru;
+       u32 rx_queue_sz;
+       u16 rx_seq;
+       u16 tx_seq;
+       struct delayed_work rx_refill;
+       spinlock_t tx_lock;
+       struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE];
+};
+
+struct mbim_tx_hdr {
+       struct usb_cdc_ncm_nth16 nth16;
+       struct usb_cdc_ncm_ndp16 ndp16;
+       struct usb_cdc_ncm_dpe16 dpe16[2];
+} __packed;
+
+static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim,
+                                                  unsigned int session)
+{
+       struct mhi_mbim_link *link;
+
+       hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) {
+               if (link->session == session)
+                       return link;
+       }
+
+       return NULL;
+}
+
+static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session,
+                                    u16 tx_seq)
+{
+       unsigned int dgram_size = skb->len;
+       struct usb_cdc_ncm_nth16 *nth16;
+       struct usb_cdc_ncm_ndp16 *ndp16;
+       struct mbim_tx_hdr *mbim_hdr;
+
+       /* Only one NDP is sent, containing the IP packet (no aggregation) */
+
+       /* Ensure we have enough headroom for crafting MBIM header */
+       if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
+               dev_kfree_skb_any(skb);
+               return NULL;
+       }
+
+       mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
+
+       /* Fill NTB header */
+       nth16 = &mbim_hdr->nth16;
+       nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
+       nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+       nth16->wSequence = cpu_to_le16(tx_seq);
+       nth16->wBlockLength = cpu_to_le16(skb->len);
+       nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+
+       /* Fill the unique NDP */
+       ndp16 = &mbim_hdr->ndp16;
+       ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24));
+       ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
+                                       + sizeof(struct usb_cdc_ncm_dpe16) * 2);
+       ndp16->wNextNdpIndex = 0;
+
+       /* Datagram follows the mbim header */
+       ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
+       ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
+
+       /* null termination */
+       ndp16->dpe16[1].wDatagramIndex = 0;
+       ndp16->dpe16[1].wDatagramLength = 0;
+
+       return skb;
+}
+
+static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       struct mhi_mbim_context *mbim = link->mbim;
+       unsigned long flags;
+       int err = -ENOMEM;
+
+       /* Serialize MHI channel queuing and MBIM seq */
+       spin_lock_irqsave(&mbim->tx_lock, flags);
+
+       skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq);
+       if (unlikely(!skb))
+               goto exit_unlock;
+
+       err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+
+       if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+               netif_stop_queue(ndev);
+
+       if (!err)
+               mbim->tx_seq++;
+
+exit_unlock:
+       spin_unlock_irqrestore(&mbim->tx_lock, flags);
+
+       if (unlikely(err)) {
+               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+                                   ndev->name, err);
+               dev_kfree_skb_any(skb);
+               goto exit_drop;
+       }
+
+       return NETDEV_TX_OK;
+
+exit_drop:
+       u64_stats_update_begin(&link->tx_syncp);
+       u64_stats_inc(&link->tx_dropped);
+       u64_stats_update_end(&link->tx_syncp);
+
+       return NETDEV_TX_OK;
+}
+
+static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+       struct usb_cdc_ncm_nth16 *nth16;
+       int len;
+
+       if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
+                       sizeof(struct usb_cdc_ncm_ndp16)) {
+               net_err_ratelimited("frame too short\n");
+               return -EINVAL;
+       }
+
+       nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
+
+       if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
+               net_err_ratelimited("invalid NTH16 signature <%#010x>\n",
+                                   le32_to_cpu(nth16->dwSignature));
+               return -EINVAL;
+       }
+
+       /* No limit on the block length, except the size of the data pkt */
+       len = le16_to_cpu(nth16->wBlockLength);
+       if (len > skb->len) {
+               net_err_ratelimited("NTB does not fit into the skb %u/%u\n",
+                                   len, skb->len);
+               return -EINVAL;
+       }
+
+       if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
+           (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) &&
+           !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
+               net_err_ratelimited("sequence number glitch prev=%d curr=%d\n",
+                                   mbim->rx_seq, le16_to_cpu(nth16->wSequence));
+       }
+       mbim->rx_seq = le16_to_cpu(nth16->wSequence);
+
+       return le16_to_cpu(nth16->wNdpIndex);
+}
+
+static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
+{
+       int ret;
+
+       if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
+               net_err_ratelimited("invalid DPT16 length <%u>\n",
+                                   le16_to_cpu(ndp16->wLength));
+               return -EINVAL;
+       }
+
+       ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
+                       / sizeof(struct usb_cdc_ncm_dpe16));
+       ret--; /* Last entry is always a NULL terminator */
+
+       if (sizeof(struct usb_cdc_ncm_ndp16) +
+            ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
+               net_err_ratelimited("Invalid nframes = %d\n", ret);
+               return -EINVAL;
+       }
+
+       return ret;
+}
+
+static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+       int ndpoffset;
+
+       /* Check NTB header and retrieve first NDP offset */
+       ndpoffset = mbim_rx_verify_nth16(mbim, skb);
+       if (ndpoffset < 0) {
+               net_err_ratelimited("mbim: Incorrect NTB header\n");
+               goto error;
+       }
+
+       /* Process each NDP */
+       while (1) {
+               struct usb_cdc_ncm_ndp16 ndp16;
+               struct usb_cdc_ncm_dpe16 dpe16;
+               struct mhi_mbim_link *link;
+               int nframes, n, dpeoffset;
+               unsigned int session;
+
+               if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
+                       net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n",
+                                           ndpoffset);
+                       goto error;
+               }
+
+               /* Check NDP header and retrieve number of datagrams */
+               nframes = mbim_rx_verify_ndp16(skb, &ndp16);
+               if (nframes < 0) {
+                       net_err_ratelimited("mbim: Incorrect NDP16\n");
+                       goto error;
+               }
+
+                /* Only IP data type supported, no DSS in MHI context */
+               if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
+                               != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
+                       net_err_ratelimited("mbim: Unsupported NDP type\n");
+                       goto next_ndp;
+               }
+
+               session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24;
+
+               rcu_read_lock();
+
+               link = mhi_mbim_get_link_rcu(mbim, session);
+               if (!link) {
+                       net_err_ratelimited("mbim: bad packet session (%u)\n", session);
+                       goto unlock;
+               }
+
+               /* de-aggregate and deliver IP packets */
+               dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
+               for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
+                       u16 dgram_offset, dgram_len;
+                       struct sk_buff *skbn;
+
+                       if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
+                               break;
+
+                       dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
+                       dgram_len = le16_to_cpu(dpe16.wDatagramLength);
+
+                       if (!dgram_offset || !dgram_len)
+                               break; /* null terminator */
+
+                       skbn = netdev_alloc_skb(link->ndev, dgram_len);
+                       if (!skbn)
+                               continue;
+
+                       skb_put(skbn, dgram_len);
+                       skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
+
+                       switch (skbn->data[0] & 0xf0) {
+                       case 0x40:
+                               skbn->protocol = htons(ETH_P_IP);
+                               break;
+                       case 0x60:
+                               skbn->protocol = htons(ETH_P_IPV6);
+                               break;
+                       default:
+                               net_err_ratelimited("%s: unknown protocol\n",
+                                                   link->ndev->name);
+                               dev_kfree_skb_any(skbn);
+                               u64_stats_update_begin(&link->rx_syncp);
+                               u64_stats_inc(&link->rx_errors);
+                               u64_stats_update_end(&link->rx_syncp);
+                               continue;
+                       }
+
+                       u64_stats_update_begin(&link->rx_syncp);
+                       u64_stats_inc(&link->rx_packets);
+                       u64_stats_add(&link->rx_bytes, skbn->len);
+                       u64_stats_update_end(&link->rx_syncp);
+
+                       netif_rx(skbn);
+               }
+unlock:
+               rcu_read_unlock();
+next_ndp:
+               /* Other NDP to process? */
+               ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
+               if (!ndpoffset)
+                       break;
+       }
+
+       /* free skb */
+       dev_consume_skb_any(skb);
+       return;
+error:
+       dev_kfree_skb_any(skb);
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim,
+                                      struct sk_buff *skb)
+{
+       struct sk_buff *head = mbim->skbagg_head;
+       struct sk_buff *tail = mbim->skbagg_tail;
+
+       /* This is non-paged skb chaining using frag_list */
+       if (!head) {
+               mbim->skbagg_head = skb;
+               return skb;
+       }
+
+       if (!skb_shinfo(head)->frag_list)
+               skb_shinfo(head)->frag_list = skb;
+       else
+               tail->next = skb;
+
+       head->len += skb->len;
+       head->data_len += skb->len;
+       head->truesize += skb->truesize;
+
+       mbim->skbagg_tail = skb;
+
+       return mbim->skbagg_head;
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+       struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context,
+                                                    rx_refill.work);
+       struct mhi_device *mdev = mbim->mdev;
+       int err;
+
+       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+               struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+
+               if (unlikely(!skb))
+                       break;
+
+               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
+                                   MHI_DEFAULT_MRU, MHI_EOT);
+               if (unlikely(err)) {
+                       kfree_skb(skb);
+                       break;
+               }
+
+               /* Do not hog the CPU if rx buffers are consumed faster than
+                * queued (unlikely).
+                */
+               cond_resched();
+       }
+
+       /* If we're still starved of rx buffers, reschedule later */
+       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz)
+               schedule_delayed_work(&mbim->rx_refill, HZ / 2);
+}
+
+static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev,
+                                struct mhi_result *mhi_res)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       int free_desc_count;
+
+       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       if (unlikely(mhi_res->transaction_status)) {
+               switch (mhi_res->transaction_status) {
+               case -EOVERFLOW:
+                       /* Packet has been split over multiple transfers */
+                       skb_put(skb, mhi_res->bytes_xferd);
+                       mhi_net_skb_agg(mbim, skb);
+                       break;
+               case -ENOTCONN:
+                       /* MHI layer stopping/resetting the DL channel */
+                       dev_kfree_skb_any(skb);
+                       return;
+               default:
+                       /* Unknown error, simply drop */
+                       dev_kfree_skb_any(skb);
+               }
+       } else {
+               skb_put(skb, mhi_res->bytes_xferd);
+
+               if (mbim->skbagg_head) {
+                       /* Aggregate the final fragment */
+                       skb = mhi_net_skb_agg(mbim, skb);
+                       mbim->skbagg_head = NULL;
+               }
+
+               mhi_mbim_rx(mbim, skb);
+       }
+
+       /* Refill if RX buffers queue becomes low */
+       if (free_desc_count >= mbim->rx_queue_sz / 2)
+               schedule_delayed_work(&mbim->rx_refill, 0);
+}
+
+static void mhi_mbim_ndo_get_stats64(struct net_device *ndev,
+                                    struct rtnl_link_stats64 *stats)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       unsigned int start;
+
+       do {
+               start = u64_stats_fetch_begin_irq(&link->rx_syncp);
+               stats->rx_packets = u64_stats_read(&link->rx_packets);
+               stats->rx_bytes = u64_stats_read(&link->rx_bytes);
+               stats->rx_errors = u64_stats_read(&link->rx_errors);
+       } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start));
+
+       do {
+               start = u64_stats_fetch_begin_irq(&link->tx_syncp);
+               stats->tx_packets = u64_stats_read(&link->tx_packets);
+               stats->tx_bytes = u64_stats_read(&link->tx_bytes);
+               stats->tx_errors = u64_stats_read(&link->tx_errors);
+               stats->tx_dropped = u64_stats_read(&link->tx_dropped);
+       } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start));
+}
+
+static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev,
+                                struct mhi_result *mhi_res)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       struct net_device *ndev = skb->dev;
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       /* Hardware has consumed the buffer, so free the skb (which is not
+        * freed by the MHI stack) and perform accounting.
+        */
+       dev_consume_skb_any(skb);
+
+       u64_stats_update_begin(&link->tx_syncp);
+       if (unlikely(mhi_res->transaction_status)) {
+               /* MHI layer stopping/resetting the UL channel */
+               if (mhi_res->transaction_status == -ENOTCONN) {
+                       u64_stats_update_end(&link->tx_syncp);
+                       return;
+               }
+
+               u64_stats_inc(&link->tx_errors);
+       } else {
+               u64_stats_inc(&link->tx_packets);
+               u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd);
+       }
+       u64_stats_update_end(&link->tx_syncp);
+
+       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+               netif_wake_queue(ndev);
+}
+
+static int mhi_mbim_ndo_open(struct net_device *ndev)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       /* Feed the MHI rx buffer pool */
+       schedule_delayed_work(&link->mbim->rx_refill, 0);
+
+       /* Carrier is established via out-of-band channel (e.g. qmi) */
+       netif_carrier_on(ndev);
+
+       netif_start_queue(ndev);
+
+       return 0;
+}
+
+static int mhi_mbim_ndo_stop(struct net_device *ndev)
+{
+       netif_stop_queue(ndev);
+       netif_carrier_off(ndev);
+
+       return 0;
+}
+
+static const struct net_device_ops mhi_mbim_ndo = {
+       .ndo_open = mhi_mbim_ndo_open,
+       .ndo_stop = mhi_mbim_ndo_stop,
+       .ndo_start_xmit = mhi_mbim_ndo_xmit,
+       .ndo_get_stats64 = mhi_mbim_ndo_get_stats64,
+};
+
+static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
+                           struct netlink_ext_ack *extack)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       struct mhi_mbim_context *mbim = ctxt;
+
+       link->session = if_id;
+       link->mbim = mbim;
+       link->ndev = ndev;
+       u64_stats_init(&link->rx_syncp);
+       u64_stats_init(&link->tx_syncp);
+
+       rcu_read_lock();
+       if (mhi_mbim_get_link_rcu(mbim, if_id)) {
+               rcu_read_unlock();
+               return -EEXIST;
+       }
+       rcu_read_unlock();
+
+       /* Already protected by RTNL lock */
+       hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]);
+
+       return register_netdevice(ndev);
+}
+
+static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev,
+                            struct list_head *head)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       hlist_del_init_rcu(&link->hlnode);
+       synchronize_rcu();
+
+       unregister_netdevice_queue(ndev, head);
+}
+
+static void mhi_mbim_setup(struct net_device *ndev)
+{
+       ndev->header_ops = NULL;  /* No header */
+       ndev->type = ARPHRD_RAWIP;
+       ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
+       ndev->hard_header_len = 0;
+       ndev->addr_len = 0;
+       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+       ndev->netdev_ops = &mhi_mbim_ndo;
+       ndev->mtu = MHI_MBIM_DEFAULT_MTU;
+       ndev->min_mtu = ETH_MIN_MTU;
+       ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom;
+       ndev->tx_queue_len = 1000;
+}
+
+static const struct wwan_ops mhi_mbim_wwan_ops = {
+       .priv_size = sizeof(struct mhi_mbim_link),
+       .setup = mhi_mbim_setup,
+       .newlink = mhi_mbim_newlink,
+       .dellink = mhi_mbim_dellink,
+};
+
+static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+       struct mhi_mbim_context *mbim;
+       int err;
+
+       mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL);
+       if (!mbim)
+               return -ENOMEM;
+
+       spin_lock_init(&mbim->tx_lock);
+       dev_set_drvdata(&mhi_dev->dev, mbim);
+       mbim->mdev = mhi_dev;
+       mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU;
+
+       INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work);
+
+       /* Start MHI channels */
+       err = mhi_prepare_for_transfer(mhi_dev, 0);
+       if (err)
+               return err;
+
+       /* Number of transfer descriptors determines size of the queue */
+       mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       /* Register wwan link ops with MHI controller representing WWAN instance */
+       return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0);
+}
+
+static void mhi_mbim_remove(struct mhi_device *mhi_dev)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+
+       mhi_unprepare_from_transfer(mhi_dev);
+       cancel_delayed_work_sync(&mbim->rx_refill);
+       wwan_unregister_ops(&cntrl->mhi_dev->dev);
+       kfree_skb(mbim->skbagg_head);
+       dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static const struct mhi_device_id mhi_mbim_id_table[] = {
+       /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
+       { .chan = "IP_HW0_MBIM", .driver_data = 0 },
+       {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table);
+
+static struct mhi_driver mhi_mbim_driver = {
+       .probe = mhi_mbim_probe,
+       .remove = mhi_mbim_remove,
+       .dl_xfer_cb = mhi_mbim_dl_callback,
+       .ul_xfer_cb = mhi_mbim_ul_callback,
+       .id_table = mhi_mbim_id_table,
+       .driver = {
+               .name = "mhi_wwan_mbim",
+               .owner = THIS_MODULE,
+       },
+};
+
+module_mhi_driver(mhi_mbim_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network/MBIM over MHI");
+MODULE_LICENSE("GPL v2");
index 1575467..85bf8d5 100644 (file)
@@ -192,8 +192,7 @@ static void nfcsim_recv_wq(struct work_struct *work)
 
                if (!IS_ERR(skb))
                        dev_kfree_skb(skb);
-
-               skb = ERR_PTR(-ENODEV);
+               return;
        }
 
        dev->cb(dev->nfc_digital_dev, dev->arg, skb);
index 1421ffd..1af7a1e 100644 (file)
@@ -422,7 +422,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
        tfm = crypto_alloc_shash("sha1", 0, 0);
        if (IS_ERR(tfm)) {
                dev_err(&fw_info->ndev->nfc_dev->dev,
-                       "Cannot allocate shash (code=%d)\n", ret);
+                       "Cannot allocate shash (code=%pe)\n", tfm);
                return PTR_ERR(tfm);
        }
 
index 11779be..dfd9dec 100644 (file)
@@ -900,7 +900,10 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
                cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
        cmnd->write_zeroes.length =
                cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-       cmnd->write_zeroes.control = 0;
+       if (nvme_ns_has_pi(ns))
+               cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
+       else
+               cmnd->write_zeroes.control = 0;
        return BLK_STS_OK;
 }
 
@@ -3807,6 +3810,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
+       bool last_path = false;
+
        if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
                return;
 
@@ -3815,8 +3820,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 
        mutex_lock(&ns->ctrl->subsys->lock);
        list_del_rcu(&ns->siblings);
-       if (list_empty(&ns->head->list))
-               list_del_init(&ns->head->entry);
        mutex_unlock(&ns->ctrl->subsys->lock);
 
        synchronize_rcu(); /* guarantee not available in head->list */
@@ -3836,7 +3839,15 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        list_del_init(&ns->list);
        up_write(&ns->ctrl->namespaces_rwsem);
 
-       nvme_mpath_check_last_path(ns);
+       /* Synchronize with nvme_init_ns_head() */
+       mutex_lock(&ns->head->subsys->lock);
+       if (list_empty(&ns->head->list)) {
+               list_del_init(&ns->head->entry);
+               last_path = true;
+       }
+       mutex_unlock(&ns->head->subsys->lock);
+       if (last_path)
+               nvme_mpath_shutdown_disk(ns->head);
        nvme_put_ns(ns);
 }
 
index 0ea5298..3f32c5e 100644 (file)
@@ -760,14 +760,21 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
 #endif
 }
 
-void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
 {
        if (!head->disk)
                return;
+       kblockd_schedule_work(&head->requeue_work);
        if (head->disk->flags & GENHD_FL_UP) {
                nvme_cdev_del(&head->cdev, &head->cdev_device);
                del_gendisk(head->disk);
        }
+}
+
+void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+{
+       if (!head->disk)
+               return;
        blk_set_queue_dying(head->disk->queue);
        /* make sure all pending bios are cleaned up */
        kblockd_schedule_work(&head->requeue_work);
index 18ef8dd..5cd1fa3 100644 (file)
@@ -716,14 +716,7 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
-
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
-{
-       struct nvme_ns_head *head = ns->head;
-
-       if (head->disk && list_empty(&head->list))
-               kblockd_schedule_work(&head->requeue_work);
-}
+void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
 
 static inline void nvme_trace_bio_complete(struct request *req)
 {
@@ -772,7 +765,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 }
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
 {
 }
 static inline void nvme_trace_bio_complete(struct request *req)
index 320051f..5185208 100644 (file)
@@ -2631,7 +2631,9 @@ static void nvme_reset_work(struct work_struct *work)
        bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
        int result;
 
-       if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
+       if (dev->ctrl.state != NVME_CTRL_RESETTING) {
+               dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
+                        dev->ctrl.state);
                result = -ENODEV;
                goto out;
        }
index daaf700..35bac7a 100644 (file)
@@ -56,7 +56,7 @@ TRACE_EVENT(nvme_setup_cmd,
                __field(u8, fctype)
                __field(u16, cid)
                __field(u32, nsid)
-               __field(u64, metadata)
+               __field(bool, metadata)
                __array(u8, cdw10, 24)
            ),
            TP_fast_assign(
@@ -66,13 +66,13 @@ TRACE_EVENT(nvme_setup_cmd,
                __entry->flags = cmd->common.flags;
                __entry->cid = cmd->common.command_id;
                __entry->nsid = le32_to_cpu(cmd->common.nsid);
-               __entry->metadata = le64_to_cpu(cmd->common.metadata);
+               __entry->metadata = !!blk_integrity_rq(req);
                __entry->fctype = cmd->fabrics.fctype;
                __assign_disk_name(__entry->disk, req->rq_disk);
                memcpy(__entry->cdw10, &cmd->common.cdw10,
                        sizeof(__entry->cdw10));
            ),
-           TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
+           TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)",
                      __entry->ctrl_id, __print_disk_name(__entry->disk),
                      __entry->qid, __entry->cid, __entry->nsid,
                      __entry->flags, __entry->metadata,
index 85887d8..192c904 100644 (file)
@@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev,
        for (i = 0; i < socket_count; i++) {
                sockets[i].card_state = 1; /* 1 = present but empty */
                sockets[i].io_base = pci_resource_start(dev, 0);
+               sockets[i].dev = dev;
                sockets[i].socket.features |= SS_CAP_PCCARD;
                sockets[i].socket.map_size = 0x1000;
                sockets[i].socket.irq_mask = 0;
index b9da58e..3481479 100644 (file)
 #define AMD_PMC_RESULT_CMD_UNKNOWN           0xFE
 #define AMD_PMC_RESULT_FAILED                0xFF
 
+/* FCH SSC Registers */
+#define FCH_S0I3_ENTRY_TIME_L_OFFSET   0x30
+#define FCH_S0I3_ENTRY_TIME_H_OFFSET   0x34
+#define FCH_S0I3_EXIT_TIME_L_OFFSET    0x38
+#define FCH_S0I3_EXIT_TIME_H_OFFSET    0x3C
+#define FCH_SSC_MAPPING_SIZE           0x800
+#define FCH_BASE_PHY_ADDR_LOW          0xFED81100
+#define FCH_BASE_PHY_ADDR_HIGH         0x00000000
+
+/* SMU Message Definations */
+#define SMU_MSG_GETSMUVERSION          0x02
+#define SMU_MSG_LOG_GETDRAM_ADDR_HI    0x04
+#define SMU_MSG_LOG_GETDRAM_ADDR_LO    0x05
+#define SMU_MSG_LOG_START              0x06
+#define SMU_MSG_LOG_RESET              0x07
+#define SMU_MSG_LOG_DUMP_DATA          0x08
+#define SMU_MSG_GET_SUP_CONSTRAINTS    0x09
 /* List of supported CPU ids */
 #define AMD_CPU_ID_RV                  0x15D0
 #define AMD_CPU_ID_RN                  0x1630
 #define AMD_CPU_ID_PCO                 AMD_CPU_ID_RV
 #define AMD_CPU_ID_CZN                 AMD_CPU_ID_RN
+#define AMD_CPU_ID_YC                  0x14B5
 
-#define AMD_SMU_FW_VERSION             0x0
 #define PMC_MSG_DELAY_MIN_US           100
 #define RESPONSE_REGISTER_LOOP_MAX     200
 
+#define SOC_SUBSYSTEM_IP_MAX   12
+#define DELAY_MIN_US           2000
+#define DELAY_MAX_US           3000
 enum amd_pmc_def {
        MSG_TEST = 0x01,
        MSG_OS_HINT_PCO,
        MSG_OS_HINT_RN,
 };
 
+struct amd_pmc_bit_map {
+       const char *name;
+       u32 bit_mask;
+};
+
+static const struct amd_pmc_bit_map soc15_ip_blk[] = {
+       {"DISPLAY",     BIT(0)},
+       {"CPU",         BIT(1)},
+       {"GFX",         BIT(2)},
+       {"VDD",         BIT(3)},
+       {"ACP",         BIT(4)},
+       {"VCN",         BIT(5)},
+       {"ISP",         BIT(6)},
+       {"NBIO",        BIT(7)},
+       {"DF",          BIT(8)},
+       {"USB0",        BIT(9)},
+       {"USB1",        BIT(10)},
+       {"LAPIC",       BIT(11)},
+       {}
+};
+
 struct amd_pmc_dev {
        void __iomem *regbase;
-       void __iomem *smu_base;
+       void __iomem *smu_virt_addr;
+       void __iomem *fch_virt_addr;
        u32 base_addr;
        u32 cpu_id;
+       u32 active_ips;
        struct device *dev;
+       struct mutex lock; /* generic mutex lock */
 #if IS_ENABLED(CONFIG_DEBUG_FS)
        struct dentry *dbgfs_dir;
 #endif /* CONFIG_DEBUG_FS */
 };
 
 static struct amd_pmc_dev pmc;
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret);
 
 static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset)
 {
@@ -85,18 +130,77 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3
        iowrite32(val, dev->regbase + reg_offset);
 }
 
+struct smu_metrics {
+       u32 table_version;
+       u32 hint_count;
+       u32 s0i3_cyclecount;
+       u32 timein_s0i2;
+       u64 timeentering_s0i3_lastcapture;
+       u64 timeentering_s0i3_totaltime;
+       u64 timeto_resume_to_os_lastcapture;
+       u64 timeto_resume_to_os_totaltime;
+       u64 timein_s0i3_lastcapture;
+       u64 timein_s0i3_totaltime;
+       u64 timein_swdrips_lastcapture;
+       u64 timein_swdrips_totaltime;
+       u64 timecondition_notmet_lastcapture[SOC_SUBSYSTEM_IP_MAX];
+       u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX];
+} __packed;
+
 #ifdef CONFIG_DEBUG_FS
 static int smu_fw_info_show(struct seq_file *s, void *unused)
 {
        struct amd_pmc_dev *dev = s->private;
-       u32 value;
+       struct smu_metrics table;
+       int idx;
+
+       if (dev->cpu_id == AMD_CPU_ID_PCO)
+               return -EINVAL;
+
+       memcpy_fromio(&table, dev->smu_virt_addr, sizeof(struct smu_metrics));
+
+       seq_puts(s, "\n=== SMU Statistics ===\n");
+       seq_printf(s, "Table Version: %d\n", table.table_version);
+       seq_printf(s, "Hint Count: %d\n", table.hint_count);
+       seq_printf(s, "S0i3 Cycle Count: %d\n", table.s0i3_cyclecount);
+       seq_printf(s, "Time (in us) to S0i3: %lld\n", table.timeentering_s0i3_lastcapture);
+       seq_printf(s, "Time (in us) in S0i3: %lld\n", table.timein_s0i3_lastcapture);
+
+       seq_puts(s, "\n=== Active time (in us) ===\n");
+       for (idx = 0 ; idx < SOC_SUBSYSTEM_IP_MAX ; idx++) {
+               if (soc15_ip_blk[idx].bit_mask & dev->active_ips)
+                       seq_printf(s, "%-8s : %lld\n", soc15_ip_blk[idx].name,
+                                  table.timecondition_notmet_lastcapture[idx]);
+       }
 
-       value = ioread32(dev->smu_base + AMD_SMU_FW_VERSION);
-       seq_printf(s, "SMU FW Info: %x\n", value);
        return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(smu_fw_info);
 
+static int s0ix_stats_show(struct seq_file *s, void *unused)
+{
+       struct amd_pmc_dev *dev = s->private;
+       u64 entry_time, exit_time, residency;
+
+       entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET);
+       entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET);
+
+       exit_time = ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_H_OFFSET);
+       exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET);
+
+       /* It's in 48MHz. We need to convert it */
+       residency = exit_time - entry_time;
+       do_div(residency, 48);
+
+       seq_puts(s, "=== S0ix statistics ===\n");
+       seq_printf(s, "S0ix Entry Time: %lld\n", entry_time);
+       seq_printf(s, "S0ix Exit Time: %lld\n", exit_time);
+       seq_printf(s, "Residency Time: %lld\n", residency);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(s0ix_stats);
+
 static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
 {
        debugfs_remove_recursive(dev->dbgfs_dir);
@@ -107,6 +211,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
        dev->dbgfs_dir = debugfs_create_dir("amd_pmc", NULL);
        debugfs_create_file("smu_fw_info", 0644, dev->dbgfs_dir, dev,
                            &smu_fw_info_fops);
+       debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev,
+                           &s0ix_stats_fops);
 }
 #else
 static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
@@ -118,6 +224,32 @@ static inline void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
 }
 #endif /* CONFIG_DEBUG_FS */
 
+static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev)
+{
+       u32 phys_addr_low, phys_addr_hi;
+       u64 smu_phys_addr;
+
+       if (dev->cpu_id == AMD_CPU_ID_PCO)
+               return -EINVAL;
+
+       /* Get Active devices list from SMU */
+       amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1);
+
+       /* Get dram address */
+       amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1);
+       amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1);
+       smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low);
+
+       dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics));
+       if (!dev->smu_virt_addr)
+               return -ENOMEM;
+
+       /* Start the logging */
+       amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0);
+
+       return 0;
+}
+
 static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
 {
        u32 value;
@@ -132,19 +264,19 @@ static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
        dev_dbg(dev->dev, "AMD_PMC_REGISTER_MESSAGE:%x\n", value);
 }
 
-static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret)
 {
        int rc;
-       u8 msg;
        u32 val;
 
+       mutex_lock(&dev->lock);
        /* Wait until we get a valid response */
        rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
-                               val, val > 0, PMC_MSG_DELAY_MIN_US,
+                               val, val != 0, PMC_MSG_DELAY_MIN_US,
                                PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
        if (rc) {
                dev_err(dev->dev, "failed to talk to SMU\n");
-               return rc;
+               goto out_unlock;
        }
 
        /* Write zero to response register */
@@ -154,34 +286,91 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
        amd_pmc_reg_write(dev, AMD_PMC_REGISTER_ARGUMENT, set);
 
        /* Write message ID to message ID register */
-       msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO;
        amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg);
-       return 0;
+
+       /* Wait until we get a valid response */
+       rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
+                               val, val != 0, PMC_MSG_DELAY_MIN_US,
+                               PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
+       if (rc) {
+               dev_err(dev->dev, "SMU response timed out\n");
+               goto out_unlock;
+       }
+
+       switch (val) {
+       case AMD_PMC_RESULT_OK:
+               if (ret) {
+                       /* PMFW may take longer time to return back the data */
+                       usleep_range(DELAY_MIN_US, 10 * DELAY_MAX_US);
+                       *data = amd_pmc_reg_read(dev, AMD_PMC_REGISTER_ARGUMENT);
+               }
+               break;
+       case AMD_PMC_RESULT_CMD_REJECT_BUSY:
+               dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val);
+               rc = -EBUSY;
+               goto out_unlock;
+       case AMD_PMC_RESULT_CMD_UNKNOWN:
+               dev_err(dev->dev, "SMU cmd unknown. err: 0x%x\n", val);
+               rc = -EINVAL;
+               goto out_unlock;
+       case AMD_PMC_RESULT_CMD_REJECT_PREREQ:
+       case AMD_PMC_RESULT_FAILED:
+       default:
+               dev_err(dev->dev, "SMU cmd failed. err: 0x%x\n", val);
+               rc = -EIO;
+               goto out_unlock;
+       }
+
+out_unlock:
+       mutex_unlock(&dev->lock);
+       amd_pmc_dump_registers(dev);
+       return rc;
+}
+
+static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
+{
+       switch (dev->cpu_id) {
+       case AMD_CPU_ID_PCO:
+               return MSG_OS_HINT_PCO;
+       case AMD_CPU_ID_RN:
+       case AMD_CPU_ID_YC:
+               return MSG_OS_HINT_RN;
+       }
+       return -EINVAL;
 }
 
 static int __maybe_unused amd_pmc_suspend(struct device *dev)
 {
        struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
        int rc;
+       u8 msg;
+
+       /* Reset and Start SMU logging - to monitor the s0i3 stats */
+       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0);
+       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
 
-       rc = amd_pmc_send_cmd(pdev, 1);
+       msg = amd_pmc_get_os_hint(pdev);
+       rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0);
        if (rc)
                dev_err(pdev->dev, "suspend failed\n");
 
-       amd_pmc_dump_registers(pdev);
-       return 0;
+       return rc;
 }
 
 static int __maybe_unused amd_pmc_resume(struct device *dev)
 {
        struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
        int rc;
+       u8 msg;
+
+       /* Let SMU know that we are looking for stats */
+       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0);
 
-       rc = amd_pmc_send_cmd(pdev, 0);
+       msg = amd_pmc_get_os_hint(pdev);
+       rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0);
        if (rc)
                dev_err(pdev->dev, "resume failed\n");
 
-       amd_pmc_dump_registers(pdev);
        return 0;
 }
 
@@ -190,6 +379,7 @@ static const struct dev_pm_ops amd_pmc_pm_ops = {
 };
 
 static const struct pci_device_id pmc_pci_ids[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) },
@@ -201,9 +391,8 @@ static int amd_pmc_probe(struct platform_device *pdev)
 {
        struct amd_pmc_dev *dev = &pmc;
        struct pci_dev *rdev;
-       u32 base_addr_lo;
-       u32 base_addr_hi;
-       u64 base_addr;
+       u32 base_addr_lo, base_addr_hi;
+       u64 base_addr, fch_phys_addr;
        int err;
        u32 val;
 
@@ -248,16 +437,25 @@ static int amd_pmc_probe(struct platform_device *pdev)
        pci_dev_put(rdev);
        base_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
 
-       dev->smu_base = devm_ioremap(dev->dev, base_addr, AMD_PMC_MAPPING_SIZE);
-       if (!dev->smu_base)
-               return -ENOMEM;
-
        dev->regbase = devm_ioremap(dev->dev, base_addr + AMD_PMC_BASE_ADDR_OFFSET,
                                    AMD_PMC_MAPPING_SIZE);
        if (!dev->regbase)
                return -ENOMEM;
 
-       amd_pmc_dump_registers(dev);
+       mutex_init(&dev->lock);
+
+       /* Use FCH registers to get the S0ix stats */
+       base_addr_lo = FCH_BASE_PHY_ADDR_LOW;
+       base_addr_hi = FCH_BASE_PHY_ADDR_HIGH;
+       fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
+       dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE);
+       if (!dev->fch_virt_addr)
+               return -ENOMEM;
+
+       /* Use SMU to get the s0i3 debug stats */
+       err = amd_pmc_setup_smu_logging(dev);
+       if (err)
+               dev_err(dev->dev, "SMU debugging info not supported on this platform\n");
 
        platform_set_drvdata(pdev, dev);
        amd_pmc_dbgfs_register(dev);
@@ -269,11 +467,14 @@ static int amd_pmc_remove(struct platform_device *pdev)
        struct amd_pmc_dev *dev = platform_get_drvdata(pdev);
 
        amd_pmc_dbgfs_unregister(dev);
+       mutex_destroy(&dev->lock);
        return 0;
 }
 
 static const struct acpi_device_id amd_pmc_acpi_ids[] = {
        {"AMDI0005", 0},
+       {"AMDI0006", 0},
+       {"AMDI0007", 0},
        {"AMD0004", 0},
        { }
 };
index 5529d7b..fbb224a 100644 (file)
@@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
 
 static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
+       DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
index 078648a..e5fbe01 100644 (file)
@@ -25,6 +25,7 @@ static const struct acpi_device_id intel_hid_ids[] = {
        {"INT33D5", 0},
        {"INTC1051", 0},
        {"INTC1054", 0},
+       {"INTC1070", 0},
        {"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
index 3671b5d..6cfed44 100644 (file)
@@ -571,6 +571,11 @@ static ssize_t current_value_store(struct kobject *kobj,
        else
                ret = tlmi_save_bios_settings("");
 
+       if (!ret && !tlmi_priv.pending_changes) {
+               tlmi_priv.pending_changes = true;
+               /* let userland know it may need to check reboot pending again */
+               kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE);
+       }
 out:
        kfree(auth_str);
        kfree(set_str);
@@ -647,6 +652,14 @@ static struct kobj_type tlmi_pwd_setting_ktype = {
        .sysfs_ops      = &tlmi_kobj_sysfs_ops,
 };
 
+static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr,
+                                  char *buf)
+{
+       return sprintf(buf, "%d\n", tlmi_priv.pending_changes);
+}
+
+static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot);
+
 /* ---- Initialisation --------------------------------------------------------- */
 static void tlmi_release_attr(void)
 {
@@ -659,6 +672,7 @@ static void tlmi_release_attr(void)
                        kobject_put(&tlmi_priv.setting[i]->kobj);
                }
        }
+       sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
        kset_unregister(tlmi_priv.attribute_kset);
 
        /* Authentication structures */
@@ -709,8 +723,8 @@ static int tlmi_sysfs_init(void)
 
                /* Build attribute */
                tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
-               ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,
-                               NULL, "%s", tlmi_priv.setting[i]->display_name);
+               ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL,
+                                 "%s", tlmi_priv.setting[i]->display_name);
                if (ret)
                        goto fail_create_attr;
 
@@ -719,6 +733,10 @@ static int tlmi_sysfs_init(void)
                        goto fail_create_attr;
        }
 
+       ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
+       if (ret)
+               goto fail_create_attr;
+
        /* Create authentication entries */
        tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
                                                                &tlmi_priv.class_dev->kobj);
@@ -727,8 +745,7 @@ static int tlmi_sysfs_init(void)
                goto fail_create_attr;
        }
        tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset;
-       ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype,
-                       NULL, "%s", "Admin");
+       ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin");
        if (ret)
                goto fail_create_attr;
 
@@ -737,8 +754,7 @@ static int tlmi_sysfs_init(void)
                goto fail_create_attr;
 
        tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset;
-       ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype,
-                       NULL, "%s", "System");
+       ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System");
        if (ret)
                goto fail_create_attr;
 
@@ -818,6 +834,7 @@ static int tlmi_analyze(void)
                                pr_info("Error retrieving possible values for %d : %s\n",
                                                i, setting->display_name);
                }
+               kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
                tlmi_priv.setting[i] = setting;
                tlmi_priv.settings_count++;
                kfree(item);
@@ -844,10 +861,12 @@ static int tlmi_analyze(void)
        if (pwdcfg.password_state & TLMI_PAP_PWD)
                tlmi_priv.pwd_admin->valid = true;
 
+       kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype);
+
        tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
        if (!tlmi_priv.pwd_power) {
                ret = -ENOMEM;
-               goto fail_clear_attr;
+               goto fail_free_pwd_admin;
        }
        strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN);
        tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII;
@@ -859,11 +878,19 @@ static int tlmi_analyze(void)
        if (pwdcfg.password_state & TLMI_POP_PWD)
                tlmi_priv.pwd_power->valid = true;
 
+       kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype);
+
        return 0;
 
+fail_free_pwd_admin:
+       kfree(tlmi_priv.pwd_admin);
 fail_clear_attr:
-       for (i = 0; i < TLMI_SETTINGS_COUNT; ++i)
-               kfree(tlmi_priv.setting[i]);
+       for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) {
+               if (tlmi_priv.setting[i]) {
+                       kfree(tlmi_priv.setting[i]->possible_values);
+                       kfree(tlmi_priv.setting[i]);
+               }
+       }
        return ret;
 }
 
index 6fa8da7..eb59884 100644 (file)
@@ -60,6 +60,7 @@ struct think_lmi {
        bool can_get_bios_selections;
        bool can_set_bios_password;
        bool can_get_password_settings;
+       bool pending_changes;
 
        struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT];
        struct device *class_dev;
index b010e4c..11c60a2 100644 (file)
@@ -78,7 +78,7 @@ static int wl_add(struct acpi_device *device)
 
        err = wireless_input_setup();
        if (err)
-               pr_err("Failed to setup hp wireless hotkeys\n");
+               pr_err("Failed to setup wireless hotkeys\n");
 
        return err;
 }
index 8c20e52..8b08745 100644 (file)
@@ -157,6 +157,13 @@ config PTP_1588_CLOCK_OCP
        tristate "OpenCompute TimeCard as PTP clock"
        depends on PTP_1588_CLOCK
        depends on HAS_IOMEM && PCI
+       depends on SPI && I2C && MTD
+       imply SPI_MEM
+       imply SPI_XILINX
+       imply MTD_SPI_NOR
+       imply I2C_XILINX
+       select SERIAL_8250
+
        default n
        help
          This driver adds support for an OpenCompute time card.
index 0d1034e..92edf77 100644 (file)
@@ -6,15 +6,29 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/serial_8250.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/xilinx_spi.h>
+#include <net/devlink.h>
+#include <linux/i2c.h>
+#include <linux/mtd/mtd.h>
 
-static const struct pci_device_id ptp_ocp_pcidev_id[] = {
-       { PCI_DEVICE(0x1d9b, 0x0400) },
-       { 0 }
-};
-MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+#ifndef PCI_VENDOR_ID_FACEBOOK
+#define PCI_VENDOR_ID_FACEBOOK 0x1d9b
+#endif
 
-#define OCP_REGISTER_OFFSET    0x01000000
+#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD
+#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400
+#endif
+
+static struct class timecard_class = {
+       .owner          = THIS_MODULE,
+       .name           = "timecard",
+};
 
 struct ocp_reg {
        u32     ctrl;
@@ -29,18 +43,29 @@ struct ocp_reg {
        u32     __pad1[2];
        u32     offset_ns;
        u32     offset_window_ns;
+       u32     __pad2[2];
+       u32     drift_ns;
+       u32     drift_window_ns;
+       u32     __pad3[6];
+       u32     servo_offset_p;
+       u32     servo_offset_i;
+       u32     servo_drift_p;
+       u32     servo_drift_i;
 };
 
 #define OCP_CTRL_ENABLE                BIT(0)
 #define OCP_CTRL_ADJUST_TIME   BIT(1)
 #define OCP_CTRL_ADJUST_OFFSET BIT(2)
+#define OCP_CTRL_ADJUST_DRIFT  BIT(3)
+#define OCP_CTRL_ADJUST_SERVO  BIT(8)
 #define OCP_CTRL_READ_TIME_REQ BIT(30)
 #define OCP_CTRL_READ_TIME_DONE        BIT(31)
 
 #define OCP_STATUS_IN_SYNC     BIT(0)
+#define OCP_STATUS_IN_HOLDOVER BIT(1)
 
 #define OCP_SELECT_CLK_NONE    0
-#define OCP_SELECT_CLK_REG     6
+#define OCP_SELECT_CLK_REG     0xfe
 
 struct tod_reg {
        u32     ctrl;
@@ -55,8 +80,6 @@ struct tod_reg {
        u32     leap;
 };
 
-#define TOD_REGISTER_OFFSET    0x01050000
-
 #define TOD_CTRL_PROTOCOL      BIT(28)
 #define TOD_CTRL_DISABLE_FMT_A BIT(17)
 #define TOD_CTRL_DISABLE_FMT_B BIT(16)
@@ -68,16 +91,264 @@ struct tod_reg {
 #define TOD_STATUS_UTC_VALID   BIT(8)
 #define TOD_STATUS_LEAP_VALID  BIT(16)
 
+struct ts_reg {
+       u32     enable;
+       u32     error;
+       u32     polarity;
+       u32     version;
+       u32     __pad0[4];
+       u32     cable_delay;
+       u32     __pad1[3];
+       u32     intr;
+       u32     intr_mask;
+       u32     event_count;
+       u32     __pad2[1];
+       u32     ts_count;
+       u32     time_ns;
+       u32     time_sec;
+       u32     data_width;
+       u32     data;
+};
+
+struct pps_reg {
+       u32     ctrl;
+       u32     status;
+       u32     __pad0[6];
+       u32     cable_delay;
+};
+
+#define PPS_STATUS_FILTER_ERR  BIT(0)
+#define PPS_STATUS_SUPERV_ERR  BIT(1)
+
+struct img_reg {
+       u32     version;
+};
+
+struct ptp_ocp_flash_info {
+       const char *name;
+       int pci_offset;
+       int data_size;
+       void *data;
+};
+
+struct ptp_ocp_ext_info {
+       const char *name;
+       int index;
+       irqreturn_t (*irq_fcn)(int irq, void *priv);
+       int (*enable)(void *priv, bool enable);
+};
+
+struct ptp_ocp_ext_src {
+       void __iomem            *mem;
+       struct ptp_ocp          *bp;
+       struct ptp_ocp_ext_info *info;
+       int                     irq_vec;
+};
+
 struct ptp_ocp {
        struct pci_dev          *pdev;
+       struct device           dev;
        spinlock_t              lock;
-       void __iomem            *base;
        struct ocp_reg __iomem  *reg;
        struct tod_reg __iomem  *tod;
+       struct pps_reg __iomem  *pps_to_ext;
+       struct pps_reg __iomem  *pps_to_clk;
+       struct ptp_ocp_ext_src  *pps;
+       struct ptp_ocp_ext_src  *ts0;
+       struct ptp_ocp_ext_src  *ts1;
+       struct img_reg __iomem  *image;
        struct ptp_clock        *ptp;
        struct ptp_clock_info   ptp_info;
+       struct platform_device  *i2c_ctrl;
+       struct platform_device  *spi_flash;
+       struct clk_hw           *i2c_clk;
+       struct timer_list       watchdog;
+       time64_t                gnss_lost;
+       int                     id;
+       int                     n_irqs;
+       int                     gnss_port;
+       int                     mac_port;       /* miniature atomic clock */
+       u8                      serial[6];
+       int                     flash_start;
+       bool                    has_serial;
 };
 
+struct ocp_resource {
+       unsigned long offset;
+       int size;
+       int irq_vec;
+       int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r);
+       void *extra;
+       unsigned long bp_offset;
+};
+
+static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r);
+static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv);
+static int ptp_ocp_ts_enable(void *priv, bool enable);
+
+#define bp_assign_entry(bp, res, val) ({                               \
+       uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset;            \
+       *(typeof(val) *)addr = val;                                     \
+})
+
+#define OCP_RES_LOCATION(member) \
+       .bp_offset = offsetof(struct ptp_ocp, member)
+
+#define OCP_MEM_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem
+
+#define OCP_SERIAL_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial
+
+#define OCP_I2C_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c
+
+#define OCP_SPI_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi
+
+#define OCP_EXT_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext
+
+/* This is the MSI vector mapping used.
+ * 0: N/C
+ * 1: TS0
+ * 2: TS1
+ * 3: GPS
+ * 4: GPS2 (n/c)
+ * 5: MAC
+ * 6: SPI IMU (inertial measurement unit)
+ * 7: I2C oscillator
+ * 8: HWICAP
+ * 9: SPI Flash
+ */
+
+static struct ocp_resource ocp_fb_resource[] = {
+       {
+               OCP_MEM_RESOURCE(reg),
+               .offset = 0x01000000, .size = 0x10000,
+       },
+       {
+               OCP_EXT_RESOURCE(ts0),
+               .offset = 0x01010000, .size = 0x10000, .irq_vec = 1,
+               .extra = &(struct ptp_ocp_ext_info) {
+                       .name = "ts0", .index = 0,
+                       .irq_fcn = ptp_ocp_ts_irq,
+                       .enable = ptp_ocp_ts_enable,
+               },
+       },
+       {
+               OCP_EXT_RESOURCE(ts1),
+               .offset = 0x01020000, .size = 0x10000, .irq_vec = 2,
+               .extra = &(struct ptp_ocp_ext_info) {
+                       .name = "ts1", .index = 1,
+                       .irq_fcn = ptp_ocp_ts_irq,
+                       .enable = ptp_ocp_ts_enable,
+               },
+       },
+       {
+               OCP_MEM_RESOURCE(pps_to_ext),
+               .offset = 0x01030000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(pps_to_clk),
+               .offset = 0x01040000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(tod),
+               .offset = 0x01050000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(image),
+               .offset = 0x00020000, .size = 0x1000,
+       },
+       {
+               OCP_I2C_RESOURCE(i2c_ctrl),
+               .offset = 0x00150000, .size = 0x10000, .irq_vec = 7,
+       },
+       {
+               OCP_SERIAL_RESOURCE(gnss_port),
+               .offset = 0x00160000 + 0x1000, .irq_vec = 3,
+       },
+       {
+               OCP_SERIAL_RESOURCE(mac_port),
+               .offset = 0x00180000 + 0x1000, .irq_vec = 5,
+       },
+       {
+               OCP_SPI_RESOURCE(spi_flash),
+               .offset = 0x00310000, .size = 0x10000, .irq_vec = 9,
+               .extra = &(struct ptp_ocp_flash_info) {
+                       .name = "xilinx_spi", .pci_offset = 0,
+                       .data_size = sizeof(struct xspi_platform_data),
+                       .data = &(struct xspi_platform_data) {
+                               .num_chipselect = 1,
+                               .bits_per_word = 8,
+                               .num_devices = 1,
+                               .devices = &(struct spi_board_info) {
+                                       .modalias = "spi-nor",
+                               },
+                       },
+               },
+       },
+       {
+               .setup = ptp_ocp_fb_board_init,
+       },
+       { }
+};
+
+static const struct pci_device_id ptp_ocp_pcidev_id[] = {
+       { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) },
+       { 0 }
+};
+MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+
+static DEFINE_MUTEX(ptp_ocp_lock);
+static DEFINE_IDR(ptp_ocp_idr);
+
+static struct {
+       const char *name;
+       int value;
+} ptp_ocp_clock[] = {
+       { .name = "NONE",       .value = 0 },
+       { .name = "TOD",        .value = 1 },
+       { .name = "IRIG",       .value = 2 },
+       { .name = "PPS",        .value = 3 },
+       { .name = "PTP",        .value = 4 },
+       { .name = "RTC",        .value = 5 },
+       { .name = "DCF",        .value = 6 },
+       { .name = "REGS",       .value = 0xfe },
+       { .name = "EXT",        .value = 0xff },
+};
+
+static const char *
+ptp_ocp_clock_name_from_val(int val)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++)
+               if (ptp_ocp_clock[i].value == val)
+                       return ptp_ocp_clock[i].name;
+       return NULL;
+}
+
+static int
+ptp_ocp_clock_val_from_name(const char *name)
+{
+       const char *clk;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+               clk = ptp_ocp_clock[i].name;
+               if (!strncasecmp(name, clk, strlen(clk)))
+                       return ptp_ocp_clock[i].value;
+       }
+       return -EINVAL;
+}
+
 static int
 __ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts,
                         struct ptp_system_timestamp *sts)
@@ -192,6 +463,45 @@ ptp_ocp_null_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
        return -EOPNOTSUPP;
 }
 
+static int
+ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns)
+{
+       return -EOPNOTSUPP;
+}
+
+static int
+ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq,
+              int on)
+{
+       struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info);
+       struct ptp_ocp_ext_src *ext = NULL;
+       int err;
+
+       switch (rq->type) {
+       case PTP_CLK_REQ_EXTTS:
+               switch (rq->extts.index) {
+               case 0:
+                       ext = bp->ts0;
+                       break;
+               case 1:
+                       ext = bp->ts1;
+                       break;
+               }
+               break;
+       case PTP_CLK_REQ_PPS:
+               ext = bp->pps;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       err = -ENXIO;
+       if (ext)
+               err = ext->info->enable(ext, on);
+
+       return err;
+}
+
 static const struct ptp_clock_info ptp_ocp_clock_info = {
        .owner          = THIS_MODULE,
        .name           = KBUILD_MODNAME,
@@ -200,10 +510,57 @@ static const struct ptp_clock_info ptp_ocp_clock_info = {
        .settime64      = ptp_ocp_settime,
        .adjtime        = ptp_ocp_adjtime,
        .adjfine        = ptp_ocp_null_adjfine,
+       .adjphase       = ptp_ocp_adjphase,
+       .enable         = ptp_ocp_enable,
+       .pps            = true,
+       .n_ext_ts       = 2,
 };
 
+static void
+__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp)
+{
+       u32 ctrl, select;
+
+       select = ioread32(&bp->reg->select);
+       iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select);
+
+       iowrite32(0, &bp->reg->drift_ns);
+
+       ctrl = ioread32(&bp->reg->ctrl);
+       ctrl |= OCP_CTRL_ADJUST_DRIFT;
+       iowrite32(ctrl, &bp->reg->ctrl);
+
+       /* restore clock selection */
+       iowrite32(select >> 16, &bp->reg->select);
+}
+
+static void
+ptp_ocp_watchdog(struct timer_list *t)
+{
+       struct ptp_ocp *bp = from_timer(bp, t, watchdog);
+       unsigned long flags;
+       u32 status;
+
+       status = ioread32(&bp->pps_to_clk->status);
+
+       if (status & PPS_STATUS_SUPERV_ERR) {
+               iowrite32(status, &bp->pps_to_clk->status);
+               if (!bp->gnss_lost) {
+                       spin_lock_irqsave(&bp->lock, flags);
+                       __ptp_ocp_clear_drift_locked(bp);
+                       spin_unlock_irqrestore(&bp->lock, flags);
+                       bp->gnss_lost = ktime_get_real_seconds();
+               }
+
+       } else if (bp->gnss_lost) {
+               bp->gnss_lost = 0;
+       }
+
+       mod_timer(&bp->watchdog, jiffies + HZ);
+}
+
 static int
-ptp_ocp_check_clock(struct ptp_ocp *bp)
+ptp_ocp_init_clock(struct ptp_ocp *bp)
 {
        struct timespec64 ts;
        bool sync;
@@ -214,6 +571,17 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
        ctrl |= OCP_CTRL_ENABLE;
        iowrite32(ctrl, &bp->reg->ctrl);
 
+       /* NO DRIFT Correction */
+       /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */
+       iowrite32(0x2000, &bp->reg->servo_offset_p);
+       iowrite32(0x1000, &bp->reg->servo_offset_i);
+       iowrite32(0,      &bp->reg->servo_drift_p);
+       iowrite32(0,      &bp->reg->servo_drift_i);
+
+       /* latch servo values */
+       ctrl |= OCP_CTRL_ADJUST_SERVO;
+       iowrite32(ctrl, &bp->reg->ctrl);
+
        if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) {
                dev_err(&bp->pdev->dev, "clock not enabled\n");
                return -ENODEV;
@@ -229,6 +597,9 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
                         ts.tv_sec, ts.tv_nsec,
                         sync ? "in-sync" : "UNSYNCED");
 
+       timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0);
+       mod_timer(&bp->watchdog, jiffies + HZ);
+
        return 0;
 }
 
@@ -278,82 +649,839 @@ ptp_ocp_tod_info(struct ptp_ocp *bp)
                 reg & TOD_STATUS_LEAP_VALID ? 1 : 0);
 }
 
+static int
+ptp_ocp_firstchild(struct device *dev, void *data)
+{
+       return 1;
+}
+
+static int
+ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data)
+{
+       struct i2c_msg msgs[2] = {
+               {
+                       .addr = addr,
+                       .len = 1,
+                       .buf = &reg,
+               },
+               {
+                       .addr = addr,
+                       .flags = I2C_M_RD,
+                       .len = 2,
+                       .buf = data,
+               },
+       };
+       int err;
+       u8 len;
+
+       /* xiic-i2c for some stupid reason only does 2 byte reads. */
+       while (sz) {
+               len = min_t(u8, sz, 2);
+               msgs[1].len = len;
+               err = i2c_transfer(adap, msgs, 2);
+               if (err != msgs[1].len)
+                       return err;
+               msgs[1].buf += len;
+               reg += len;
+               sz -= len;
+       }
+       return 0;
+}
+
+static void
+ptp_ocp_get_serial_number(struct ptp_ocp *bp)
+{
+       struct i2c_adapter *adap;
+       struct device *dev;
+       int err;
+
+       dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild);
+       if (!dev) {
+               dev_err(&bp->pdev->dev, "Can't find I2C adapter\n");
+               return;
+       }
+
+       adap = i2c_verify_adapter(dev);
+       if (!adap) {
+               dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n",
+                       dev_name(dev));
+               goto out;
+       }
+
+       err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial);
+       if (err) {
+               dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err);
+               goto out;
+       }
+
+       bp->has_serial = true;
+
+out:
+       put_device(dev);
+}
+
 static void
 ptp_ocp_info(struct ptp_ocp *bp)
 {
-       static const char * const clock_name[] = {
-               "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT"
-       };
        u32 version, select;
 
        version = ioread32(&bp->reg->version);
        select = ioread32(&bp->reg->select);
        dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n",
                 version >> 24, (version >> 16) & 0xff, version & 0xffff,
-                clock_name[select & 7],
+                ptp_ocp_clock_name_from_val(select >> 16),
                 ptp_clock_index(bp->ptp));
 
        ptp_ocp_tod_info(bp);
 }
 
+static struct device *
+ptp_ocp_find_flash(struct ptp_ocp *bp)
+{
+       struct device *dev, *last;
+
+       last = NULL;
+       dev = &bp->spi_flash->dev;
+
+       while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) {
+               if (!strcmp("mtd", dev_bus_name(dev)))
+                       break;
+               put_device(last);
+               last = dev;
+       }
+       put_device(last);
+
+       return dev;
+}
+
 static int
-ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev,
+                     const struct firmware *fw)
 {
-       struct ptp_ocp *bp;
+       struct mtd_info *mtd = dev_get_drvdata(dev);
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       size_t off, len, resid, wrote;
+       struct erase_info erase;
+       size_t base, blksz;
+       int err;
+
+       off = 0;
+       base = bp->flash_start;
+       blksz = 4096;
+       resid = fw->size;
+
+       while (resid) {
+               devlink_flash_update_status_notify(devlink, "Flashing",
+                                                  NULL, off, fw->size);
+
+               len = min_t(size_t, resid, blksz);
+               erase.addr = base + off;
+               erase.len = blksz;
+
+               err = mtd_erase(mtd, &erase);
+               if (err)
+                       goto out;
+
+               err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]);
+               if (err)
+                       goto out;
+
+               off += blksz;
+               resid -= len;
+       }
+out:
+       return err;
+}
+
+static int
+ptp_ocp_devlink_flash_update(struct devlink *devlink,
+                            struct devlink_flash_update_params *params,
+                            struct netlink_ext_ack *extack)
+{
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       struct device *dev;
+       const char *msg;
+       int err;
+
+       dev = ptp_ocp_find_flash(bp);
+       if (!dev) {
+               dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n");
+               return -ENODEV;
+       }
+
+       devlink_flash_update_status_notify(devlink, "Preparing to flash",
+                                          NULL, 0, 0);
+
+       err = ptp_ocp_devlink_flash(devlink, dev, params->fw);
+
+       msg = err ? "Flash error" : "Flash complete";
+       devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0);
+
+       put_device(dev);
+       return err;
+}
+
+static int
+ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+                        struct netlink_ext_ack *extack)
+{
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       char buf[32];
+       int err;
+
+       err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+       if (err)
+               return err;
+
+       if (bp->image) {
+               u32 ver = ioread32(&bp->image->version);
+
+               if (ver & 0xffff) {
+                       sprintf(buf, "%d", ver);
+                       err = devlink_info_version_running_put(req,
+                                                              "fw",
+                                                              buf);
+               } else {
+                       sprintf(buf, "%d", ver >> 16);
+                       err = devlink_info_version_running_put(req,
+                                                              "loader",
+                                                              buf);
+               }
+               if (err)
+                       return err;
+       }
+
+       if (!bp->has_serial)
+               ptp_ocp_get_serial_number(bp);
+
+       if (bp->has_serial) {
+               sprintf(buf, "%pM", bp->serial);
+               err = devlink_info_serial_number_put(req, buf);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static const struct devlink_ops ptp_ocp_devlink_ops = {
+       .flash_update = ptp_ocp_devlink_flash_update,
+       .info_get = ptp_ocp_devlink_info_get,
+};
+
+static void __iomem *
+__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size)
+{
+       struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp");
+
+       return devm_ioremap_resource(&bp->pdev->dev, &res);
+}
+
+static void __iomem *
+ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       unsigned long start;
+
+       start = pci_resource_start(bp->pdev, 0) + r->offset;
+       return __ptp_ocp_get_mem(bp, start, r->size);
+}
+
+static void
+ptp_ocp_set_irq_resource(struct resource *res, int irq)
+{
+       struct resource r = DEFINE_RES_IRQ(irq);
+       *res = r;
+}
+
+static void
+ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size)
+{
+       struct resource r = DEFINE_RES_MEM(start, size);
+       *res = r;
+}
+
+static int
+ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct ptp_ocp_flash_info *info;
+       struct pci_dev *pdev = bp->pdev;
+       struct platform_device *p;
+       struct resource res[2];
+       unsigned long start;
+       int id;
+
+       /* XXX hack to work around old FPGA */
+       if (bp->n_irqs < 10) {
+               dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n");
+               return 0;
+       }
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "spi device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
+       }
+
+       start = pci_resource_start(pdev, 0) + r->offset;
+       ptp_ocp_set_mem_resource(&res[0], start, r->size);
+       ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+       info = r->extra;
+       id = pci_dev_id(pdev) << 1;
+       id += info->pci_offset;
+
+       p = platform_device_register_resndata(&pdev->dev, info->name, id,
+                                             res, 2, info->data,
+                                             info->data_size);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       bp_assign_entry(bp, r, p);
+
+       return 0;
+}
+
+static struct platform_device *
+ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id)
+{
+       struct resource res[2];
+       unsigned long start;
+
+       start = pci_resource_start(pdev, 0) + r->offset;
+       ptp_ocp_set_mem_resource(&res[0], start, r->size);
+       ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+       return platform_device_register_resndata(&pdev->dev, "xiic-i2c",
+                                                id, res, 2, NULL, 0);
+}
+
+static int
+ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct platform_device *p;
+       struct clk_hw *clk;
+       char buf[32];
+       int id;
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
+       }
+
+       id = pci_dev_id(bp->pdev);
+
+       sprintf(buf, "AXI.%d", id);
+       clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000);
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+       bp->i2c_clk = clk;
+
+       sprintf(buf, "xiic-i2c.%d", id);
+       devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf);
+       p = ptp_ocp_i2c_bus(bp->pdev, r, id);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       bp_assign_entry(bp, r, p);
+
+       return 0;
+}
+
+static irqreturn_t
+ptp_ocp_ts_irq(int irq, void *priv)
+{
+       struct ptp_ocp_ext_src *ext = priv;
+       struct ts_reg __iomem *reg = ext->mem;
+       struct ptp_clock_event ev;
+       u32 sec, nsec;
+
+       /* XXX should fix API - this converts s/ns -> ts -> s/ns */
+       sec = ioread32(&reg->time_sec);
+       nsec = ioread32(&reg->time_ns);
+
+       ev.type = PTP_CLOCK_EXTTS;
+       ev.index = ext->info->index;
+       ev.timestamp = sec * 1000000000ULL + nsec;
+
+       ptp_clock_event(ext->bp->ptp, &ev);
+
+       iowrite32(1, &reg->intr);       /* write 1 to ack */
+
+       return IRQ_HANDLED;
+}
+
+static int
+ptp_ocp_ts_enable(void *priv, bool enable)
+{
+       struct ptp_ocp_ext_src *ext = priv;
+       struct ts_reg __iomem *reg = ext->mem;
+
+       if (enable) {
+               iowrite32(1, &reg->enable);
+               iowrite32(1, &reg->intr_mask);
+               iowrite32(1, &reg->intr);
+       } else {
+               iowrite32(0, &reg->intr_mask);
+               iowrite32(0, &reg->enable);
+       }
+
+       return 0;
+}
+
+static void
+ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext)
+{
+       ext->info->enable(ext, false);
+       pci_free_irq(ext->bp->pdev, ext->irq_vec, ext);
+       kfree(ext);
+}
+
+static int
+ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct ptp_ocp_ext_src *ext;
        int err;
 
-       bp = kzalloc(sizeof(*bp), GFP_KERNEL);
-       if (!bp)
+       ext = kzalloc(sizeof(*ext), GFP_KERNEL);
+       if (!ext)
                return -ENOMEM;
-       bp->pdev = pdev;
-       pci_set_drvdata(pdev, bp);
 
-       err = pci_enable_device(pdev);
+       err = -EINVAL;
+       ext->mem = ptp_ocp_get_mem(bp, r);
+       if (!ext->mem)
+               goto out;
+
+       ext->bp = bp;
+       ext->info = r->extra;
+       ext->irq_vec = r->irq_vec;
+
+       err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL,
+                             ext, "ocp%d.%s", bp->id, ext->info->name);
        if (err) {
-               dev_err(&pdev->dev, "pci_enable_device\n");
-               goto out_free;
+               dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec);
+               goto out;
        }
 
-       err = pci_request_regions(pdev, KBUILD_MODNAME);
-       if (err) {
-               dev_err(&pdev->dev, "pci_request_region\n");
-               goto out_disable;
+       bp_assign_entry(bp, r, ext);
+
+       return 0;
+
+out:
+       kfree(ext);
+       return err;
+}
+
+static int
+ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct uart_8250_port uart;
+
+       /* Setting UPF_IOREMAP and leaving port.membase unspecified lets
+        * the serial port device claim and release the pci resource.
+        */
+       memset(&uart, 0, sizeof(uart));
+       uart.port.dev = &pdev->dev;
+       uart.port.iotype = UPIO_MEM;
+       uart.port.regshift = 2;
+       uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset;
+       uart.port.irq = pci_irq_vector(pdev, r->irq_vec);
+       uart.port.uartclk = 50000000;
+       uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP;
+       uart.port.type = PORT_16550A;
+
+       return serial8250_register_8250_port(&uart);
+}
+
+static int
+ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       int port;
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "serial device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
        }
 
-       bp->base = pci_ioremap_bar(pdev, 0);
-       if (!bp->base) {
-               dev_err(&pdev->dev, "io_remap bar0\n");
-               err = -ENOMEM;
-               goto out_release_regions;
+       port = ptp_ocp_serial_line(bp, r);
+       if (port < 0)
+               return port;
+
+       bp_assign_entry(bp, r, port);
+
+       return 0;
+}
+
+static int
+ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       void __iomem *mem;
+
+       mem = ptp_ocp_get_mem(bp, r);
+       if (!mem)
+               return -EINVAL;
+
+       bp_assign_entry(bp, r, mem);
+
+       return 0;
+}
+
+/* FB specific board initializers; last "resource" registered. */
+static int
+ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       bp->flash_start = 1024 * 4096;
+
+       return ptp_ocp_init_clock(bp);
+}
+
+static int
+ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data)
+{
+       struct ocp_resource *r, *table;
+       int err = 0;
+
+       table = (struct ocp_resource *)driver_data;
+       for (r = table; r->setup; r++) {
+               err = r->setup(bp, r);
+               if (err)
+                       break;
+       }
+       return err;
+}
+
+static ssize_t
+serialnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+       if (!bp->has_serial)
+               ptp_ocp_get_serial_number(bp);
+
+       return sysfs_emit(buf, "%pM\n", bp->serial);
+}
+static DEVICE_ATTR_RO(serialnum);
+
+static ssize_t
+gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       ssize_t ret;
+
+       if (bp->gnss_lost)
+               ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost);
+       else
+               ret = sysfs_emit(buf, "SYNC\n");
+
+       return ret;
+}
+static DEVICE_ATTR_RO(gnss_sync);
+
+static ssize_t
+clock_source_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       const char *p;
+       u32 select;
+
+       select = ioread32(&bp->reg->select);
+       p = ptp_ocp_clock_name_from_val(select >> 16);
+
+       return sysfs_emit(buf, "%s\n", p);
+}
+
+static ssize_t
+clock_source_store(struct device *dev, struct device_attribute *attr,
+                  const char *buf, size_t count)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       unsigned long flags;
+       int val;
+
+       val = ptp_ocp_clock_val_from_name(buf);
+       if (val < 0)
+               return val;
+
+       spin_lock_irqsave(&bp->lock, flags);
+       iowrite32(val, &bp->reg->select);
+       spin_unlock_irqrestore(&bp->lock, flags);
+
+       return count;
+}
+static DEVICE_ATTR_RW(clock_source);
+
+static ssize_t
+available_clock_sources_show(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+       const char *clk;
+       ssize_t count;
+       int i;
+
+       count = 0;
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+               clk = ptp_ocp_clock[i].name;
+               count += sysfs_emit_at(buf, count, "%s ", clk);
+       }
+       if (count)
+               count--;
+       count += sysfs_emit_at(buf, count, "\n");
+       return count;
+}
+static DEVICE_ATTR_RO(available_clock_sources);
+
+static struct attribute *timecard_attrs[] = {
+       &dev_attr_serialnum.attr,
+       &dev_attr_gnss_sync.attr,
+       &dev_attr_clock_source.attr,
+       &dev_attr_available_clock_sources.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(timecard);
+
+static void
+ptp_ocp_dev_release(struct device *dev)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+       mutex_lock(&ptp_ocp_lock);
+       idr_remove(&ptp_ocp_idr, bp->id);
+       mutex_unlock(&ptp_ocp_lock);
+}
+
+static int
+ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev)
+{
+       int err;
+
+       mutex_lock(&ptp_ocp_lock);
+       err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL);
+       mutex_unlock(&ptp_ocp_lock);
+       if (err < 0) {
+               dev_err(&pdev->dev, "idr_alloc failed: %d\n", err);
+               return err;
        }
-       bp->reg = bp->base + OCP_REGISTER_OFFSET;
-       bp->tod = bp->base + TOD_REGISTER_OFFSET;
+       bp->id = err;
+
        bp->ptp_info = ptp_ocp_clock_info;
        spin_lock_init(&bp->lock);
+       bp->gnss_port = -1;
+       bp->mac_port = -1;
+       bp->pdev = pdev;
+
+       device_initialize(&bp->dev);
+       dev_set_name(&bp->dev, "ocp%d", bp->id);
+       bp->dev.class = &timecard_class;
+       bp->dev.parent = &pdev->dev;
+       bp->dev.release = ptp_ocp_dev_release;
+       dev_set_drvdata(&bp->dev, bp);
+
+       err = device_add(&bp->dev);
+       if (err) {
+               dev_err(&bp->dev, "device add failed: %d\n", err);
+               goto out;
+       }
+
+       pci_set_drvdata(pdev, bp);
+
+       return 0;
+
+out:
+       ptp_ocp_dev_release(&bp->dev);
+       put_device(&bp->dev);
+       return err;
+}
+
+static void
+ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link)
+{
+       struct device *dev = &bp->dev;
+
+       if (sysfs_create_link(&dev->kobj, &child->kobj, link))
+               dev_err(dev, "%s symlink failed\n", link);
+}
+
+static void
+ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link)
+{
+       struct device *dev, *child;
+
+       dev = &bp->pdev->dev;
+
+       child = device_find_child_by_name(dev, name);
+       if (!child) {
+               dev_err(dev, "Could not find device %s\n", name);
+               return;
+       }
+
+       ptp_ocp_symlink(bp, child, link);
+       put_device(child);
+}
+
+static int
+ptp_ocp_complete(struct ptp_ocp *bp)
+{
+       struct pps_device *pps;
+       char buf[32];
+
+       if (bp->gnss_port != -1) {
+               sprintf(buf, "ttyS%d", bp->gnss_port);
+               ptp_ocp_link_child(bp, buf, "ttyGNSS");
+       }
+       if (bp->mac_port != -1) {
+               sprintf(buf, "ttyS%d", bp->mac_port);
+               ptp_ocp_link_child(bp, buf, "ttyMAC");
+       }
+       sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp));
+       ptp_ocp_link_child(bp, buf, "ptp");
+
+       pps = pps_lookup_dev(bp->ptp);
+       if (pps)
+               ptp_ocp_symlink(bp, pps->dev, "pps");
+
+       if (device_add_groups(&bp->dev, timecard_groups))
+               pr_err("device add groups failed\n");
 
-       err = ptp_ocp_check_clock(bp);
+       return 0;
+}
+
+static void
+ptp_ocp_resource_summary(struct ptp_ocp *bp)
+{
+       struct device *dev = &bp->pdev->dev;
+
+       if (bp->image) {
+               u32 ver = ioread32(&bp->image->version);
+
+               dev_info(dev, "version %x\n", ver);
+               if (ver & 0xffff)
+                       dev_info(dev, "regular image, version %d\n",
+                                ver & 0xffff);
+               else
+                       dev_info(dev, "golden image, version %d\n",
+                                ver >> 16);
+       }
+       if (bp->gnss_port != -1)
+               dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port);
+       if (bp->mac_port != -1)
+               dev_info(dev, "MAC @ /dev/ttyS%d   57600\n", bp->mac_port);
+}
+
+static void
+ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
+{
+       struct device *dev = &bp->dev;
+
+       sysfs_remove_link(&dev->kobj, "ttyGNSS");
+       sysfs_remove_link(&dev->kobj, "ttyMAC");
+       sysfs_remove_link(&dev->kobj, "ptp");
+       sysfs_remove_link(&dev->kobj, "pps");
+       device_remove_groups(dev, timecard_groups);
+}
+
+static void
+ptp_ocp_detach(struct ptp_ocp *bp)
+{
+       ptp_ocp_detach_sysfs(bp);
+       if (timer_pending(&bp->watchdog))
+               del_timer_sync(&bp->watchdog);
+       if (bp->ts0)
+               ptp_ocp_unregister_ext(bp->ts0);
+       if (bp->ts1)
+               ptp_ocp_unregister_ext(bp->ts1);
+       if (bp->pps)
+               ptp_ocp_unregister_ext(bp->pps);
+       if (bp->gnss_port != -1)
+               serial8250_unregister_port(bp->gnss_port);
+       if (bp->mac_port != -1)
+               serial8250_unregister_port(bp->mac_port);
+       if (bp->spi_flash)
+               platform_device_unregister(bp->spi_flash);
+       if (bp->i2c_ctrl)
+               platform_device_unregister(bp->i2c_ctrl);
+       if (bp->i2c_clk)
+               clk_hw_unregister_fixed_rate(bp->i2c_clk);
+       if (bp->n_irqs)
+               pci_free_irq_vectors(bp->pdev);
+       if (bp->ptp)
+               ptp_clock_unregister(bp->ptp);
+       device_unregister(&bp->dev);
+}
+
+static int
+ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct devlink *devlink;
+       struct ptp_ocp *bp;
+       int err;
+
+       devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev);
+       if (!devlink) {
+               dev_err(&pdev->dev, "devlink_alloc failed\n");
+               return -ENOMEM;
+       }
+
+       err = devlink_register(devlink);
+       if (err)
+               goto out_free;
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "pci_enable_device\n");
+               goto out_unregister;
+       }
+
+       bp = devlink_priv(devlink);
+       err = ptp_ocp_device_init(bp, pdev);
+       if (err)
+               goto out_unregister;
+
+       /* compat mode.
+        * Older FPGA firmware only returns 2 irq's.
+        * allow this - if not all of the IRQ's are returned, skip the
+        * extra devices and just register the clock.
+        */
+       err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+       if (err < 0) {
+               dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err);
+               goto out;
+       }
+       bp->n_irqs = err;
+       pci_set_master(pdev);
+
+       err = ptp_ocp_register_resources(bp, id->driver_data);
        if (err)
                goto out;
 
        bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev);
        if (IS_ERR(bp->ptp)) {
-               dev_err(&pdev->dev, "ptp_clock_register\n");
                err = PTR_ERR(bp->ptp);
+               dev_err(&pdev->dev, "ptp_clock_register: %d\n", err);
+               bp->ptp = NULL;
                goto out;
        }
 
+       err = ptp_ocp_complete(bp);
+       if (err)
+               goto out;
+
        ptp_ocp_info(bp);
+       ptp_ocp_resource_summary(bp);
 
        return 0;
 
 out:
-       pci_iounmap(pdev, bp->base);
-out_release_regions:
-       pci_release_regions(pdev);
-out_disable:
+       ptp_ocp_detach(bp);
        pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+out_unregister:
+       devlink_unregister(devlink);
 out_free:
-       kfree(bp);
+       devlink_free(devlink);
 
        return err;
 }
@@ -362,13 +1490,14 @@ static void
 ptp_ocp_remove(struct pci_dev *pdev)
 {
        struct ptp_ocp *bp = pci_get_drvdata(pdev);
+       struct devlink *devlink = priv_to_devlink(bp);
 
-       ptp_clock_unregister(bp->ptp);
-       pci_iounmap(pdev, bp->base);
-       pci_release_regions(pdev);
+       ptp_ocp_detach(bp);
        pci_disable_device(pdev);
        pci_set_drvdata(pdev, NULL);
-       kfree(bp);
+
+       devlink_unregister(devlink);
+       devlink_free(devlink);
 }
 
 static struct pci_driver ptp_ocp_driver = {
@@ -378,19 +1507,84 @@ static struct pci_driver ptp_ocp_driver = {
        .remove         = ptp_ocp_remove,
 };
 
+static int
+ptp_ocp_i2c_notifier_call(struct notifier_block *nb,
+                         unsigned long action, void *data)
+{
+       struct device *dev, *child = data;
+       struct ptp_ocp *bp;
+       bool add;
+
+       switch (action) {
+       case BUS_NOTIFY_ADD_DEVICE:
+       case BUS_NOTIFY_DEL_DEVICE:
+               add = action == BUS_NOTIFY_ADD_DEVICE;
+               break;
+       default:
+               return 0;
+       }
+
+       if (!i2c_verify_adapter(child))
+               return 0;
+
+       dev = child;
+       while ((dev = dev->parent))
+               if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME))
+                       goto found;
+       return 0;
+
+found:
+       bp = dev_get_drvdata(dev);
+       if (add)
+               ptp_ocp_symlink(bp, child, "i2c");
+       else
+               sysfs_remove_link(&bp->dev.kobj, "i2c");
+
+       return 0;
+}
+
+static struct notifier_block ptp_ocp_i2c_notifier = {
+       .notifier_call = ptp_ocp_i2c_notifier_call,
+};
+
 static int __init
 ptp_ocp_init(void)
 {
+       const char *what;
        int err;
 
+       what = "timecard class";
+       err = class_register(&timecard_class);
+       if (err)
+               goto out;
+
+       what = "i2c notifier";
+       err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+       if (err)
+               goto out_notifier;
+
+       what = "ptp_ocp driver";
        err = pci_register_driver(&ptp_ocp_driver);
+       if (err)
+               goto out_register;
+
+       return 0;
+
+out_register:
+       bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+out_notifier:
+       class_unregister(&timecard_class);
+out:
+       pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err);
        return err;
 }
 
 static void __exit
 ptp_ocp_fini(void)
 {
+       bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
        pci_unregister_driver(&ptp_ocp_driver);
+       class_unregister(&timecard_class);
 }
 
 module_init(ptp_ocp_init);
index cff91b4..9c67b97 100644 (file)
@@ -74,6 +74,7 @@ config QETH_L2
        def_tristate y
        prompt "qeth layer 2 device support"
        depends on QETH
+       depends on BRIDGE || BRIDGE=n
        help
          Select this option to be able to run qeth devices in layer 2 mode.
          To compile as a module, choose M. The module name is qeth_l2.
index 69afc03..4871f71 100644 (file)
@@ -717,6 +717,227 @@ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable)
        return rc;
 }
 
+struct qeth_l2_br2dev_event_work {
+       struct work_struct work;
+       struct net_device *br_dev;
+       struct net_device *lsync_dev;
+       struct net_device *dst_dev;
+       unsigned long event;
+       unsigned char addr[ETH_ALEN];
+};
+
+static const struct net_device_ops qeth_l2_netdev_ops;
+
+static bool qeth_l2_must_learn(struct net_device *netdev,
+                              struct net_device *dstdev)
+{
+       struct qeth_priv *priv;
+
+       priv = netdev_priv(netdev);
+       return (netdev != dstdev &&
+               (priv->brport_features & BR_LEARNING_SYNC) &&
+               !(br_port_flag_is_set(netdev, BR_ISOLATED) &&
+                 br_port_flag_is_set(dstdev, BR_ISOLATED)) &&
+               netdev->netdev_ops == &qeth_l2_netdev_ops);
+}
+
+/**
+ *     qeth_l2_br2dev_worker() - update local MACs
+ *     @work: bridge to device FDB update
+ *
+ *     Update local MACs of a learning_sync bridgeport so it can receive
+ *     messages for a destination port.
+ *     In case of an isolated learning_sync port, also update its isolated
+ *     siblings.
+ */
+static void qeth_l2_br2dev_worker(struct work_struct *work)
+{
+       struct qeth_l2_br2dev_event_work *br2dev_event_work =
+               container_of(work, struct qeth_l2_br2dev_event_work, work);
+       struct net_device *lsyncdev = br2dev_event_work->lsync_dev;
+       struct net_device *dstdev = br2dev_event_work->dst_dev;
+       struct net_device *brdev = br2dev_event_work->br_dev;
+       unsigned long event = br2dev_event_work->event;
+       unsigned char *addr = br2dev_event_work->addr;
+       struct qeth_card *card = lsyncdev->ml_priv;
+       struct net_device *lowerdev;
+       struct list_head *iter;
+       int err = 0;
+
+       kfree(br2dev_event_work);
+       QETH_CARD_TEXT_(card, 4, "b2dw%04x", event);
+       QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr));
+
+       rcu_read_lock();
+       /* Verify preconditions are still valid: */
+       if (!netif_is_bridge_port(lsyncdev) ||
+           brdev != netdev_master_upper_dev_get_rcu(lsyncdev))
+               goto unlock;
+       if (!qeth_l2_must_learn(lsyncdev, dstdev))
+               goto unlock;
+
+       if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) {
+               /* Update lsyncdev and its isolated sibling(s): */
+               iter = &brdev->adj_list.lower;
+               lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+               while (lowerdev) {
+                       if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) {
+                               switch (event) {
+                               case SWITCHDEV_FDB_ADD_TO_DEVICE:
+                                       err = dev_uc_add(lowerdev, addr);
+                                       break;
+                               case SWITCHDEV_FDB_DEL_TO_DEVICE:
+                                       err = dev_uc_del(lowerdev, addr);
+                                       break;
+                               default:
+                                       break;
+                               }
+                               if (err) {
+                                       QETH_CARD_TEXT(card, 2, "b2derris");
+                                       QETH_CARD_TEXT_(card, 2,
+                                                       "err%02x%03d", event,
+                                                       lowerdev->ifindex);
+                               }
+                       }
+                       lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+               }
+       } else {
+               switch (event) {
+               case SWITCHDEV_FDB_ADD_TO_DEVICE:
+                       err = dev_uc_add(lsyncdev, addr);
+                       break;
+               case SWITCHDEV_FDB_DEL_TO_DEVICE:
+                       err = dev_uc_del(lsyncdev, addr);
+                       break;
+               default:
+                       break;
+               }
+               if (err)
+                       QETH_CARD_TEXT_(card, 2, "b2derr%02x", event);
+       }
+
+unlock:
+       rcu_read_unlock();
+       dev_put(brdev);
+       dev_put(lsyncdev);
+       dev_put(dstdev);
+}
+
+static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
+                                    struct net_device *lsyncdev,
+                                    struct net_device *dstdev,
+                                    unsigned long event,
+                                    const unsigned char *addr)
+{
+       struct qeth_l2_br2dev_event_work *worker_data;
+       struct qeth_card *card;
+
+       worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC);
+       if (!worker_data)
+               return -ENOMEM;
+       INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker);
+       worker_data->br_dev = brdev;
+       worker_data->lsync_dev = lsyncdev;
+       worker_data->dst_dev = dstdev;
+       worker_data->event = event;
+       ether_addr_copy(worker_data->addr, addr);
+
+       card = lsyncdev->ml_priv;
+       /* Take a reference on the sw port devices and the bridge */
+       dev_hold(brdev);
+       dev_hold(lsyncdev);
+       dev_hold(dstdev);
+       queue_work(card->event_wq, &worker_data->work);
+       return 0;
+}
+
+/* Called under rtnl_lock */
+static int qeth_l2_switchdev_event(struct notifier_block *unused,
+                                  unsigned long event, void *ptr)
+{
+       struct net_device *dstdev, *brdev, *lowerdev;
+       struct switchdev_notifier_fdb_info *fdb_info;
+       struct switchdev_notifier_info *info = ptr;
+       struct list_head *iter;
+       struct qeth_card *card;
+       int rc;
+
+       if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE ||
+             event == SWITCHDEV_FDB_DEL_TO_DEVICE))
+               return NOTIFY_DONE;
+
+       dstdev = switchdev_notifier_info_to_dev(info);
+       brdev = netdev_master_upper_dev_get_rcu(dstdev);
+       if (!brdev || !netif_is_bridge_master(brdev))
+               return NOTIFY_DONE;
+       fdb_info = container_of(info,
+                               struct switchdev_notifier_fdb_info,
+                               info);
+       iter = &brdev->adj_list.lower;
+       lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+       while (lowerdev) {
+               if (qeth_l2_must_learn(lowerdev, dstdev)) {
+                       card = lowerdev->ml_priv;
+                       QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event);
+                       rc = qeth_l2_br2dev_queue_work(brdev, lowerdev,
+                                                      dstdev, event,
+                                                      fdb_info->addr);
+                       if (rc) {
+                               QETH_CARD_TEXT(card, 2, "b2dqwerr");
+                               return NOTIFY_BAD;
+                       }
+               }
+               lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block qeth_l2_sw_notifier = {
+               .notifier_call = qeth_l2_switchdev_event,
+};
+
+static refcount_t qeth_l2_switchdev_notify_refcnt;
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_get(void)
+{
+       int rc;
+
+       if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) {
+               rc = register_switchdev_notifier(&qeth_l2_sw_notifier);
+               if (rc) {
+                       QETH_DBF_MESSAGE(2,
+                                        "failed to register qeth_l2_sw_notifier: %d\n",
+                                        rc);
+               } else {
+                       refcount_set(&qeth_l2_switchdev_notify_refcnt, 1);
+                       QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n");
+               }
+       }
+       QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d",
+                      qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_put(void)
+{
+       int rc;
+
+       if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) {
+               rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier);
+               if (rc) {
+                       QETH_DBF_MESSAGE(2,
+                                        "failed to unregister qeth_l2_sw_notifier: %d\n",
+                                        rc);
+               } else {
+                       QETH_DBF_MESSAGE(2,
+                                        "qeth_l2_sw_notifier unregistered\n");
+               }
+       }
+       QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d",
+                      qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
 static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                  struct net_device *dev, u32 filter_mask,
                                  int nlflags)
@@ -810,16 +1031,19 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
        } else if (enable) {
                qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO);
                rc = qeth_l2_dev2br_an_set(card, true);
-               if (rc)
+               if (rc) {
                        qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
-               else
+               } else {
                        priv->brport_features |= BR_LEARNING_SYNC;
+                       qeth_l2_br2dev_get();
+               }
        } else {
                rc = qeth_l2_dev2br_an_set(card, false);
                if (!rc) {
                        qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
                        priv->brport_features ^= BR_LEARNING_SYNC;
                        qeth_l2_dev2br_fdb_flush(card);
+                       qeth_l2_br2dev_put();
                }
        }
        mutex_unlock(&card->sbp_lock);
@@ -2072,6 +2296,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
 static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
 {
        struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+       struct qeth_priv *priv;
 
        if (gdev->dev.type != &qeth_l2_devtype)
                device_remove_groups(&gdev->dev, qeth_l2_attr_groups);
@@ -2083,8 +2308,15 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
                qeth_set_offline(card, card->discipline, false);
 
        cancel_work_sync(&card->close_dev_work);
-       if (card->dev->reg_state == NETREG_REGISTERED)
+       if (card->dev->reg_state == NETREG_REGISTERED) {
+               priv = netdev_priv(card->dev);
+               if (priv->brport_features & BR_LEARNING_SYNC) {
+                       rtnl_lock();
+                       qeth_l2_br2dev_put();
+                       rtnl_unlock();
+               }
                unregister_netdev(card->dev);
+       }
 }
 
 static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
@@ -2207,6 +2439,7 @@ EXPORT_SYMBOL_GPL(qeth_l2_discipline);
 static int __init qeth_l2_init(void)
 {
        pr_info("register layer 2 discipline\n");
+       refcount_set(&qeth_l2_switchdev_notify_refcnt, 0);
        return 0;
 }
 
index 84fc7a0..4a84599 100644 (file)
@@ -2642,6 +2642,7 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt)
 //#endif
                clear_bit(SCpnt->device->id * 8 +
                          (u8)(SCpnt->device->lun & 0x7), host->busyluns);
+               fallthrough;
 
        /*
         * We found the command, and cleared it out.  Either
index 6baa9b3..9c4458a 100644 (file)
@@ -1375,6 +1375,7 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne
                case IS_COMPLETE:
                        break;
                }
+               break;
 
        default:
                break;
index 25f6e1a..66652ab 100644 (file)
@@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev,
                if (!h->ctlr)
                        err = SCSI_DH_RES_TEMP_UNAVAIL;
                else {
-                       list_add_rcu(&h->node, &h->ctlr->dh_list);
                        h->sdev = sdev;
+                       list_add_rcu(&h->node, &h->ctlr->dh_list);
                }
                spin_unlock(&list_lock);
                err = SCSI_DH_OK;
@@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev )
        spin_lock(&list_lock);
        if (h->ctlr) {
                list_del_rcu(&h->node);
-               h->sdev = NULL;
                kref_put(&h->ctlr->kref, release_controller);
        }
        spin_unlock(&list_lock);
        sdev->handler_data = NULL;
+       synchronize_rcu();
        kfree(h);
 }
 
index bee1bec..935b01e 100644 (file)
@@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost,
        for (i = 0; i < size; ++i) {
                struct ibmvfc_event *evt = &pool->events[i];
 
+               /*
+                * evt->active states
+                *  1 = in flight
+                *  0 = being completed
+                * -1 = free/freed
+                */
+               atomic_set(&evt->active, -1);
                atomic_set(&evt->free, 1);
                evt->crq.valid = 0x80;
                evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i));
@@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt)
 
        BUG_ON(!ibmvfc_valid_event(pool, evt));
        BUG_ON(atomic_inc_return(&evt->free) != 1);
+       BUG_ON(atomic_dec_and_test(&evt->active));
 
        spin_lock_irqsave(&evt->queue->l_lock, flags);
        list_add_tail(&evt->queue_list, &evt->queue->free);
@@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list)
  **/
 static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
 {
+       /*
+        * Anything we are failing should still be active. Otherwise, it
+        * implies we already got a response for the command and are doing
+        * something bad like double completing it.
+        */
+       BUG_ON(!atomic_dec_and_test(&evt->active));
        if (evt->cmnd) {
                evt->cmnd->result = (error_code << 16);
                evt->done = ibmvfc_scsi_eh_done;
@@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
 
                evt->done(evt);
        } else {
+               atomic_set(&evt->active, 1);
                spin_unlock_irqrestore(&evt->queue->l_lock, flags);
                ibmvfc_trc_start(evt);
        }
@@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
                return;
        }
 
-       if (unlikely(atomic_read(&evt->free))) {
+       if (unlikely(atomic_dec_if_positive(&evt->active))) {
                dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
                        crq->ioba);
                return;
@@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost
                return;
        }
 
-       if (unlikely(atomic_read(&evt->free))) {
+       if (unlikely(atomic_dec_if_positive(&evt->active))) {
                dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
                        crq->ioba);
                return;
index 4f0f3ba..92fb889 100644 (file)
@@ -745,6 +745,7 @@ struct ibmvfc_event {
        struct ibmvfc_target *tgt;
        struct scsi_cmnd *cmnd;
        atomic_t free;
+       atomic_t active;
        union ibmvfc_iu *xfer_iu;
        void (*done)(struct ibmvfc_event *evt);
        void (*_done)(struct ibmvfc_event *evt);
index abf7b40..c509440 100644 (file)
@@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval)
        mimd_t          mimd;
        uint32_t        adapno;
        int             iterator;
-
+       bool            is_found;
 
        if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) {
                *rval = -EFAULT;
@@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval)
 
        adapter = NULL;
        iterator = 0;
+       is_found = false;
 
        list_for_each_entry(adapter, &adapters_list_g, list) {
-               if (iterator++ == adapno) break;
+               if (iterator++ == adapno) {
+                       is_found = true;
+                       break;
+               }
        }
 
-       if (!adapter) {
+       if (!is_found) {
                *rval = -ENODEV;
                return NULL;
        }
@@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc)
        uint32_t        adapno;
        int             iterator;
        mraid_mmadp_t*  adapter;
+       bool            is_found;
 
        /*
         * When the kioc returns from driver, make sure it still doesn't
@@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc)
                iterator        = 0;
                adapter         = NULL;
                adapno          = kioc->adapno;
+               is_found        = false;
 
                con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed "
                                        "ioctl that was timedout before\n"));
 
                list_for_each_entry(adapter, &adapters_list_g, list) {
-                       if (iterator++ == adapno) break;
+                       if (iterator++ == adapno) {
+                               is_found = true;
+                               break;
+                       }
                }
 
                kioc->timedout = 0;
 
-               if (adapter) {
+               if (is_found)
                        mraid_mm_dealloc_kioc( adapter, kioc );
-               }
+
        }
        else {
                wake_up(&wait_q);
index c399552..19b1c0c 100644 (file)
@@ -2983,13 +2983,13 @@ _base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 }
 
 /**
- * _base_free_irq - free irq
+ * mpt3sas_base_free_irq - free irq
  * @ioc: per adapter object
  *
  * Freeing respective reply_queue from the list.
  */
-static void
-_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
 {
        struct adapter_reply_queue *reply_q, *next;
 
@@ -3191,12 +3191,12 @@ _base_check_and_enable_high_iops_queues(struct MPT3SAS_ADAPTER *ioc,
 }
 
 /**
- * _base_disable_msix - disables msix
+ * mpt3sas_base_disable_msix - disables msix
  * @ioc: per adapter object
  *
  */
-static void
-_base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
 {
        if (!ioc->msix_enable)
                return;
@@ -3304,8 +3304,8 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
        for (i = 0; i < ioc->reply_queue_count; i++) {
                r = _base_request_irq(ioc, i);
                if (r) {
-                       _base_free_irq(ioc);
-                       _base_disable_msix(ioc);
+                       mpt3sas_base_free_irq(ioc);
+                       mpt3sas_base_disable_msix(ioc);
                        goto try_ioapic;
                }
        }
@@ -3342,8 +3342,8 @@ mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc)
 
        dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
 
-       _base_free_irq(ioc);
-       _base_disable_msix(ioc);
+       mpt3sas_base_free_irq(ioc);
+       mpt3sas_base_disable_msix(ioc);
 
        kfree(ioc->replyPostRegisterIndex);
        ioc->replyPostRegisterIndex = NULL;
@@ -7613,14 +7613,14 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
 }
 
 /**
- * _base_make_ioc_ready - put controller in READY state
+ * mpt3sas_base_make_ioc_ready - put controller in READY state
  * @ioc: per adapter object
  * @type: FORCE_BIG_HAMMER or SOFT_RESET
  *
  * Return: 0 for success, non-zero for failure.
  */
-static int
-_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type)
+int
+mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type)
 {
        u32 ioc_state;
        int rc;
@@ -7897,7 +7897,7 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc)
        if (ioc->chip_phys && ioc->chip) {
                mpt3sas_base_mask_interrupts(ioc);
                ioc->shost_recovery = 1;
-               _base_make_ioc_ready(ioc, SOFT_RESET);
+               mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
                ioc->shost_recovery = 0;
        }
 
@@ -8017,7 +8017,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
        ioc->build_sg_mpi = &_base_build_sg;
        ioc->build_zero_len_sge_mpi = &_base_build_zero_len_sge;
 
-       r = _base_make_ioc_ready(ioc, SOFT_RESET);
+       r = mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
        if (r)
                goto out_free_resources;
 
@@ -8471,7 +8471,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
        _base_pre_reset_handler(ioc);
        mpt3sas_wait_for_commands_to_complete(ioc);
        mpt3sas_base_mask_interrupts(ioc);
-       r = _base_make_ioc_ready(ioc, type);
+       r = mpt3sas_base_make_ioc_ready(ioc, type);
        if (r)
                goto out;
        _base_clear_outstanding_commands(ioc);
index d4834c8..0c6c3df 100644 (file)
@@ -1730,6 +1730,10 @@ do {     ioc_err(ioc, "In func: %s\n", __func__); \
        status, mpi_request, sz); } while (0)
 
 int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count);
+int
+mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type);
+void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc);
+void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc);
 
 /* scsih shared API */
 struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
index 866d118..8e64a6f 100644 (file)
@@ -11295,7 +11295,12 @@ scsih_shutdown(struct pci_dev *pdev)
 
        _scsih_ir_shutdown(ioc);
        _scsih_nvme_shutdown(ioc);
-       mpt3sas_base_detach(ioc);
+       mpt3sas_base_mask_interrupts(ioc);
+       ioc->shost_recovery = 1;
+       mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
+       ioc->shost_recovery = 0;
+       mpt3sas_base_free_irq(ioc);
+       mpt3sas_base_disable_msix(ioc);
 }
 
 
index 48548a9..32e60f0 100644 (file)
@@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev)
 
 void pm8001_task_done(struct sas_task *task)
 {
-       if (!del_timer(&task->slow_task->timer))
-               return;
+       del_timer(&task->slow_task->timer);
        complete(&task->slow_task->completion);
 }
 
@@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t)
 {
        struct sas_task_slow *slow = from_timer(slow, t, timer);
        struct sas_task *task = slow->task;
+       unsigned long flags;
 
-       task->task_state_flags |= SAS_TASK_STATE_ABORTED;
-       complete(&task->slow_task->completion);
+       spin_lock_irqsave(&task->task_state_lock, flags);
+       if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+               task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+               complete(&task->slow_task->completion);
+       }
+       spin_unlock_irqrestore(&task->task_state_lock, flags);
 }
 
 #define PM8001_TASK_TIMEOUT 20
@@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
                }
                res = -TMF_RESP_FUNC_FAILED;
                /* Even TMF timed out, return direct. */
-               if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
-                       if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
-                               pm8001_dbg(pm8001_ha, FAIL,
-                                          "TMF task[%x]timeout.\n",
-                                          tmf->tmf);
-                               goto ex_err;
-                       }
+               if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+                       pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
+                                  tmf->tmf);
+                       goto ex_err;
                }
 
                if (task->task_status.resp == SAS_TASK_COMPLETE &&
@@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
                wait_for_completion(&task->slow_task->completion);
                res = TMF_RESP_FUNC_FAILED;
                /* Even TMF timed out, return direct. */
-               if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
-                       if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
-                               pm8001_dbg(pm8001_ha, FAIL,
-                                          "TMF task timeout.\n");
-                               goto ex_err;
-                       }
+               if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+                       pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n");
+                       goto ex_err;
                }
 
                if (task->task_status.resp == SAS_TASK_COMPLETE &&
index b059bf2..5b6996a 100644 (file)
@@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
                error = shost->hostt->target_alloc(starget);
 
                if(error) {
-                       dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error);
+                       if (error != -ENXIO)
+                               dev_err(dev, "target allocation failed, error %d\n", error);
                        /* don't want scsi_target_reap to do the final
                         * put because it will be under the host lock */
                        scsi_target_destroy(starget);
index 32489d2..ae9bfc6 100644 (file)
@@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr,
        mutex_lock(&sdev->state_mutex);
        ret = scsi_device_set_state(sdev, state);
        /*
-        * If the device state changes to SDEV_RUNNING, we need to run
-        * the queue to avoid I/O hang.
+        * If the device state changes to SDEV_RUNNING, we need to
+        * rescan the device to revalidate it, and run the queue to
+        * avoid I/O hang.
         */
-       if (ret == 0 && state == SDEV_RUNNING)
+       if (ret == 0 && state == SDEV_RUNNING) {
+               scsi_rescan_device(dev);
                blk_mq_run_hw_queues(sdev->request_queue, true);
+       }
        mutex_unlock(&sdev->state_mutex);
 
        return ret == 0 ? count : -EINVAL;
index b07105a..d8b05d8 100644 (file)
@@ -439,39 +439,10 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
        struct device *dev = container_of(kobj, struct device, kobj);
        struct iscsi_iface *iface = iscsi_dev_to_iface(dev);
        struct iscsi_transport *t = iface->transport;
-       int param;
-       int param_type;
+       int param = -1;
 
        if (attr == &dev_attr_iface_enabled.attr)
                param = ISCSI_NET_PARAM_IFACE_ENABLE;
-       else if (attr == &dev_attr_iface_vlan_id.attr)
-               param = ISCSI_NET_PARAM_VLAN_ID;
-       else if (attr == &dev_attr_iface_vlan_priority.attr)
-               param = ISCSI_NET_PARAM_VLAN_PRIORITY;
-       else if (attr == &dev_attr_iface_vlan_enabled.attr)
-               param = ISCSI_NET_PARAM_VLAN_ENABLED;
-       else if (attr == &dev_attr_iface_mtu.attr)
-               param = ISCSI_NET_PARAM_MTU;
-       else if (attr == &dev_attr_iface_port.attr)
-               param = ISCSI_NET_PARAM_PORT;
-       else if (attr == &dev_attr_iface_ipaddress_state.attr)
-               param = ISCSI_NET_PARAM_IPADDR_STATE;
-       else if (attr == &dev_attr_iface_delayed_ack_en.attr)
-               param = ISCSI_NET_PARAM_DELAYED_ACK_EN;
-       else if (attr == &dev_attr_iface_tcp_nagle_disable.attr)
-               param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE;
-       else if (attr == &dev_attr_iface_tcp_wsf_disable.attr)
-               param = ISCSI_NET_PARAM_TCP_WSF_DISABLE;
-       else if (attr == &dev_attr_iface_tcp_wsf.attr)
-               param = ISCSI_NET_PARAM_TCP_WSF;
-       else if (attr == &dev_attr_iface_tcp_timer_scale.attr)
-               param = ISCSI_NET_PARAM_TCP_TIMER_SCALE;
-       else if (attr == &dev_attr_iface_tcp_timestamp_en.attr)
-               param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN;
-       else if (attr == &dev_attr_iface_cache_id.attr)
-               param = ISCSI_NET_PARAM_CACHE_ID;
-       else if (attr == &dev_attr_iface_redirect_en.attr)
-               param = ISCSI_NET_PARAM_REDIRECT_EN;
        else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr)
                param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO;
        else if (attr == &dev_attr_iface_header_digest.attr)
@@ -508,6 +479,38 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
                param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN;
        else if (attr == &dev_attr_iface_initiator_name.attr)
                param = ISCSI_IFACE_PARAM_INITIATOR_NAME;
+
+       if (param != -1)
+               return t->attr_is_visible(ISCSI_IFACE_PARAM, param);
+
+       if (attr == &dev_attr_iface_vlan_id.attr)
+               param = ISCSI_NET_PARAM_VLAN_ID;
+       else if (attr == &dev_attr_iface_vlan_priority.attr)
+               param = ISCSI_NET_PARAM_VLAN_PRIORITY;
+       else if (attr == &dev_attr_iface_vlan_enabled.attr)
+               param = ISCSI_NET_PARAM_VLAN_ENABLED;
+       else if (attr == &dev_attr_iface_mtu.attr)
+               param = ISCSI_NET_PARAM_MTU;
+       else if (attr == &dev_attr_iface_port.attr)
+               param = ISCSI_NET_PARAM_PORT;
+       else if (attr == &dev_attr_iface_ipaddress_state.attr)
+               param = ISCSI_NET_PARAM_IPADDR_STATE;
+       else if (attr == &dev_attr_iface_delayed_ack_en.attr)
+               param = ISCSI_NET_PARAM_DELAYED_ACK_EN;
+       else if (attr == &dev_attr_iface_tcp_nagle_disable.attr)
+               param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE;
+       else if (attr == &dev_attr_iface_tcp_wsf_disable.attr)
+               param = ISCSI_NET_PARAM_TCP_WSF_DISABLE;
+       else if (attr == &dev_attr_iface_tcp_wsf.attr)
+               param = ISCSI_NET_PARAM_TCP_WSF;
+       else if (attr == &dev_attr_iface_tcp_timer_scale.attr)
+               param = ISCSI_NET_PARAM_TCP_TIMER_SCALE;
+       else if (attr == &dev_attr_iface_tcp_timestamp_en.attr)
+               param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN;
+       else if (attr == &dev_attr_iface_cache_id.attr)
+               param = ISCSI_NET_PARAM_CACHE_ID;
+       else if (attr == &dev_attr_iface_redirect_en.attr)
+               param = ISCSI_NET_PARAM_REDIRECT_EN;
        else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) {
                if (attr == &dev_attr_ipv4_iface_ipaddress.attr)
                        param = ISCSI_NET_PARAM_IPV4_ADDR;
@@ -598,32 +601,7 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
                return 0;
        }
 
-       switch (param) {
-       case ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO:
-       case ISCSI_IFACE_PARAM_HDRDGST_EN:
-       case ISCSI_IFACE_PARAM_DATADGST_EN:
-       case ISCSI_IFACE_PARAM_IMM_DATA_EN:
-       case ISCSI_IFACE_PARAM_INITIAL_R2T_EN:
-       case ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN:
-       case ISCSI_IFACE_PARAM_PDU_INORDER_EN:
-       case ISCSI_IFACE_PARAM_ERL:
-       case ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH:
-       case ISCSI_IFACE_PARAM_FIRST_BURST:
-       case ISCSI_IFACE_PARAM_MAX_R2T:
-       case ISCSI_IFACE_PARAM_MAX_BURST:
-       case ISCSI_IFACE_PARAM_CHAP_AUTH_EN:
-       case ISCSI_IFACE_PARAM_BIDI_CHAP_EN:
-       case ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL:
-       case ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN:
-       case ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN:
-       case ISCSI_IFACE_PARAM_INITIATOR_NAME:
-               param_type = ISCSI_IFACE_PARAM;
-               break;
-       default:
-               param_type = ISCSI_NET_PARAM;
-       }
-
-       return t->attr_is_visible(param_type, param);
+       return t->attr_is_visible(ISCSI_NET_PARAM, param);
 }
 
 static struct attribute *iscsi_iface_attrs[] = {
index 94c254e..a6d3ac0 100644 (file)
@@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
        else if (med->media_event_code == 2)
                return DISK_EVENT_MEDIA_CHANGE;
        else if (med->media_event_code == 3)
-               return DISK_EVENT_EJECT_REQUEST;
+               return DISK_EVENT_MEDIA_CHANGE;
        return 0;
 }
 
index 19a02e9..8fcdf89 100644 (file)
@@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev,
        static int cards_found;
        int err;
 
-       devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));
+       devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter),
+                               &pdev->dev);
        if (!devlink)
                return -ENOMEM;
 
@@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev,
                goto netdev_free;
        }
 
-       err = devlink_register(devlink, &pdev->dev);
+       err = devlink_register(devlink);
        if (err)
                goto netdev_free;
 
index b32f4ee..ca1b231 100644 (file)
@@ -25,7 +25,7 @@
 #include "target_core_alua.h"
 
 static sense_reason_t
-sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool);
+sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char, u32, bool);
 static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd);
 
 static sense_reason_t
@@ -279,14 +279,14 @@ static inline unsigned long long transport_lba_64_ext(unsigned char *cdb)
 }
 
 static sense_reason_t
-sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *ops)
+sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops)
 {
        struct se_device *dev = cmd->se_dev;
        sector_t end_lba = dev->transport->get_blocks(dev) + 1;
        unsigned int sectors = sbc_get_write_same_sectors(cmd);
        sense_reason_t ret;
 
-       if ((flags[0] & 0x04) || (flags[0] & 0x02)) {
+       if ((flags & 0x04) || (flags & 0x02)) {
                pr_err("WRITE_SAME PBDATA and LBDATA"
                        " bits not supported for Block Discard"
                        " Emulation\n");
@@ -308,7 +308,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
        }
 
        /* We always have ANC_SUP == 0 so setting ANCHOR is always an error */
-       if (flags[0] & 0x10) {
+       if (flags & 0x10) {
                pr_warn("WRITE SAME with ANCHOR not supported\n");
                return TCM_INVALID_CDB_FIELD;
        }
@@ -316,7 +316,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
         * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting
         * translated into block discard requests within backend code.
         */
-       if (flags[0] & 0x08) {
+       if (flags & 0x08) {
                if (!ops->execute_unmap)
                        return TCM_UNSUPPORTED_SCSI_OPCODE;
 
@@ -331,7 +331,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
        if (!ops->execute_write_same)
                return TCM_UNSUPPORTED_SCSI_OPCODE;
 
-       ret = sbc_check_prot(dev, cmd, &cmd->t_task_cdb[0], sectors, true);
+       ret = sbc_check_prot(dev, cmd, flags >> 5, sectors, true);
        if (ret)
                return ret;
 
@@ -717,10 +717,9 @@ sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_
 }
 
 static sense_reason_t
-sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
+sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char protect,
               u32 sectors, bool is_write)
 {
-       u8 protect = cdb[1] >> 5;
        int sp_ops = cmd->se_sess->sup_prot_ops;
        int pi_prot_type = dev->dev_attrib.pi_prot_type;
        bool fabric_prot = false;
@@ -768,7 +767,7 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
                fallthrough;
        default:
                pr_err("Unable to determine pi_prot_type for CDB: 0x%02x "
-                      "PROTECT: 0x%02x\n", cdb[0], protect);
+                      "PROTECT: 0x%02x\n", cmd->t_task_cdb[0], protect);
                return TCM_INVALID_CDB_FIELD;
        }
 
@@ -843,7 +842,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (sbc_check_dpofua(dev, cmd, cdb))
                        return TCM_INVALID_CDB_FIELD;
 
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
                if (ret)
                        return ret;
 
@@ -857,7 +856,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (sbc_check_dpofua(dev, cmd, cdb))
                        return TCM_INVALID_CDB_FIELD;
 
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
                if (ret)
                        return ret;
 
@@ -871,7 +870,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (sbc_check_dpofua(dev, cmd, cdb))
                        return TCM_INVALID_CDB_FIELD;
 
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
                if (ret)
                        return ret;
 
@@ -892,7 +891,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (sbc_check_dpofua(dev, cmd, cdb))
                        return TCM_INVALID_CDB_FIELD;
 
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
                if (ret)
                        return ret;
 
@@ -906,7 +905,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (sbc_check_dpofua(dev, cmd, cdb))
                        return TCM_INVALID_CDB_FIELD;
 
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
                if (ret)
                        return ret;
 
@@ -921,7 +920,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (sbc_check_dpofua(dev, cmd, cdb))
                        return TCM_INVALID_CDB_FIELD;
 
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
                if (ret)
                        return ret;
 
@@ -980,7 +979,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                        size = sbc_get_size(cmd, 1);
                        cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
 
-                       ret = sbc_setup_write_same(cmd, &cdb[10], ops);
+                       ret = sbc_setup_write_same(cmd, cdb[10], ops);
                        if (ret)
                                return ret;
                        break;
@@ -1079,7 +1078,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                size = sbc_get_size(cmd, 1);
                cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
 
-               ret = sbc_setup_write_same(cmd, &cdb[1], ops);
+               ret = sbc_setup_write_same(cmd, cdb[1], ops);
                if (ret)
                        return ret;
                break;
@@ -1097,7 +1096,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 * Follow sbcr26 with WRITE_SAME (10) and check for the existence
                 * of byte 1 bit 3 UNMAP instead of original reserved field
                 */
-               ret = sbc_setup_write_same(cmd, &cdb[1], ops);
+               ret = sbc_setup_write_same(cmd, cdb[1], ops);
                if (ret)
                        return ret;
                break;
index 7e35edd..26ceabe 100644 (file)
@@ -886,7 +886,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
        INIT_WORK(&cmd->work, success ? target_complete_ok_work :
                  target_complete_failure_work);
 
-       if (wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID)
+       if (!wwn || wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID)
                cpu = cmd->cpuid;
        else
                cpu = wwn->cmd_compl_affinity;
index fdf79bc..35d5908 100644 (file)
@@ -824,7 +824,7 @@ static struct usb_class_driver wdm_class = {
 };
 
 /* --- WWAN framework integration --- */
-#ifdef CONFIG_WWAN
+#ifdef CONFIG_WWAN_CORE
 static int wdm_wwan_port_start(struct wwan_port *port)
 {
        struct wdm_device *desc = wwan_port_get_drvdata(port);
@@ -963,11 +963,11 @@ static void wdm_wwan_rx(struct wdm_device *desc, int length)
        /* inbuf has been copied, it is safe to check for outstanding data */
        schedule_work(&desc->service_outs_intr);
 }
-#else /* CONFIG_WWAN */
+#else /* CONFIG_WWAN_CORE */
 static void wdm_wwan_init(struct wdm_device *desc) {}
 static void wdm_wwan_deinit(struct wdm_device *desc) {}
 static void wdm_wwan_rx(struct wdm_device *desc, int length) {}
-#endif /* CONFIG_WWAN */
+#endif /* CONFIG_WWAN_CORE */
 
 /* --- error handling --- */
 static void wdm_rxwork(struct work_struct *work)
index b974644..9618ba6 100644 (file)
@@ -1133,7 +1133,7 @@ static int do_proc_control(struct usb_dev_state *ps,
                "wIndex=%04x wLength=%04x\n",
                ctrl->bRequestType, ctrl->bRequest, ctrl->wValue,
                ctrl->wIndex, ctrl->wLength);
-       if (ctrl->bRequestType & 0x80) {
+       if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) {
                pipe = usb_rcvctrlpipe(dev, 0);
                snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT, NULL, 0);
 
index d1efc71..86658a8 100644 (file)
@@ -48,6 +48,7 @@
 
 #define USB_TP_TRANSMISSION_DELAY      40      /* ns */
 #define USB_TP_TRANSMISSION_DELAY_MAX  65535   /* ns */
+#define USB_PING_RESPONSE_TIME         400     /* ns */
 
 /* Protect struct usb_device->state and ->children members
  * Note: Both are also protected by ->dev.sem, except that ->state can
@@ -182,8 +183,9 @@ int usb_device_supports_lpm(struct usb_device *udev)
 }
 
 /*
- * Set the Maximum Exit Latency (MEL) for the host to initiate a transition from
- * either U1 or U2.
+ * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from
+ * U1/U2, send a PING to the device and receive a PING_RESPONSE.
+ * See USB 3.1 section C.1.5.2
  */
 static void usb_set_lpm_mel(struct usb_device *udev,
                struct usb3_lpm_parameters *udev_lpm_params,
@@ -193,35 +195,37 @@ static void usb_set_lpm_mel(struct usb_device *udev,
                unsigned int hub_exit_latency)
 {
        unsigned int total_mel;
-       unsigned int device_mel;
-       unsigned int hub_mel;
 
        /*
-        * Calculate the time it takes to transition all links from the roothub
-        * to the parent hub into U0.  The parent hub must then decode the
-        * packet (hub header decode latency) to figure out which port it was
-        * bound for.
-        *
-        * The Hub Header decode latency is expressed in 0.1us intervals (0x1
-        * means 0.1us).  Multiply that by 100 to get nanoseconds.
+        * tMEL1. time to transition path from host to device into U0.
+        * MEL for parent already contains the delay up to parent, so only add
+        * the exit latency for the last link (pick the slower exit latency),
+        * and the hub header decode latency. See USB 3.1 section C 2.2.1
+        * Store MEL in nanoseconds
         */
        total_mel = hub_lpm_params->mel +
-               (hub->descriptor->u.ss.bHubHdrDecLat * 100);
+               max(udev_exit_latency, hub_exit_latency) * 1000 +
+               hub->descriptor->u.ss.bHubHdrDecLat * 100;
 
        /*
-        * How long will it take to transition the downstream hub's port into
-        * U0?  The greater of either the hub exit latency or the device exit
-        * latency.
-        *
-        * The BOS U1/U2 exit latencies are expressed in 1us intervals.
-        * Multiply that by 1000 to get nanoseconds.
+        * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for
+        * each link + wHubDelay for each hub. Add only for last link.
+        * tMEL4, the time for PING_RESPONSE to traverse upstream is similar.
+        * Multiply by 2 to include it as well.
         */
-       device_mel = udev_exit_latency * 1000;
-       hub_mel = hub_exit_latency * 1000;
-       if (device_mel > hub_mel)
-               total_mel += device_mel;
-       else
-               total_mel += hub_mel;
+       total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) +
+                     USB_TP_TRANSMISSION_DELAY) * 2;
+
+       /*
+        * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE
+        * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4
+        * to cover the delay if the PING_RESPONSE is queued behind a Max Packet
+        * Size DP.
+        * Note these delays should be added only once for the entire path, so
+        * add them to the MEL of the device connected to the roothub.
+        */
+       if (!hub->hdev->parent)
+               total_mel += USB_PING_RESPONSE_TIME + 2100;
 
        udev_lpm_params->mel = total_mel;
 }
@@ -4112,6 +4116,47 @@ static int usb_set_lpm_timeout(struct usb_device *udev,
        return 0;
 }
 
+/*
+ * Don't allow device intiated U1/U2 if the system exit latency + one bus
+ * interval is greater than the minimum service interval of any active
+ * periodic endpoint. See USB 3.2 section 9.4.9
+ */
+static bool usb_device_may_initiate_lpm(struct usb_device *udev,
+                                       enum usb3_link_state state)
+{
+       unsigned int sel;               /* us */
+       int i, j;
+
+       if (state == USB3_LPM_U1)
+               sel = DIV_ROUND_UP(udev->u1_params.sel, 1000);
+       else if (state == USB3_LPM_U2)
+               sel = DIV_ROUND_UP(udev->u2_params.sel, 1000);
+       else
+               return false;
+
+       for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
+               struct usb_interface *intf;
+               struct usb_endpoint_descriptor *desc;
+               unsigned int interval;
+
+               intf = udev->actconfig->interface[i];
+               if (!intf)
+                       continue;
+
+               for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) {
+                       desc = &intf->cur_altsetting->endpoint[j].desc;
+
+                       if (usb_endpoint_xfer_int(desc) ||
+                           usb_endpoint_xfer_isoc(desc)) {
+                               interval = (1 << (desc->bInterval - 1)) * 125;
+                               if (sel + 125 > interval)
+                                       return false;
+                       }
+               }
+       }
+       return true;
+}
+
 /*
  * Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated
  * U1/U2 entry.
@@ -4184,20 +4229,23 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev,
         * U1/U2_ENABLE
         */
        if (udev->actconfig &&
-           usb_set_device_initiated_lpm(udev, state, true) == 0) {
-               if (state == USB3_LPM_U1)
-                       udev->usb3_lpm_u1_enabled = 1;
-               else if (state == USB3_LPM_U2)
-                       udev->usb3_lpm_u2_enabled = 1;
-       } else {
-               /* Don't request U1/U2 entry if the device
-                * cannot transition to U1/U2.
-                */
-               usb_set_lpm_timeout(udev, state, 0);
-               hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
+           usb_device_may_initiate_lpm(udev, state)) {
+               if (usb_set_device_initiated_lpm(udev, state, true)) {
+                       /*
+                        * Request to enable device initiated U1/U2 failed,
+                        * better to turn off lpm in this case.
+                        */
+                       usb_set_lpm_timeout(udev, state, 0);
+                       hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
+                       return;
+               }
        }
-}
 
+       if (state == USB3_LPM_U1)
+               udev->usb3_lpm_u1_enabled = 1;
+       else if (state == USB3_LPM_U2)
+               udev->usb3_lpm_u2_enabled = 1;
+}
 /*
  * Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated
  * U1/U2 entry.
index 6114cf8..8239fe7 100644 (file)
@@ -501,10 +501,6 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* DJI CineSSD */
        { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
 
-       /* Fibocom L850-GL LTE Modem */
-       { USB_DEVICE(0x2cb7, 0x0007), .driver_info =
-                       USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
        /* INTEL VALUE SSD */
        { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
 
index ab6b815..483de2b 100644 (file)
@@ -383,6 +383,9 @@ enum dwc2_ep0_state {
  *                     0 - No (default)
  *                     1 - Partial power down
  *                     2 - Hibernation
+ * @no_clock_gating:   Specifies whether to avoid clock gating feature.
+ *                     0 - No (use clock gating)
+ *                     1 - Yes (avoid it)
  * @lpm:               Enable LPM support.
  *                     0 - No
  *                     1 - Yes
@@ -480,6 +483,7 @@ struct dwc2_core_params {
 #define DWC2_POWER_DOWN_PARAM_NONE             0
 #define DWC2_POWER_DOWN_PARAM_PARTIAL          1
 #define DWC2_POWER_DOWN_PARAM_HIBERNATION      2
+       bool no_clock_gating;
 
        bool lpm;
        bool lpm_clock_gating;
index a5ab038..a5c52b2 100644 (file)
@@ -556,7 +556,8 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg *hsotg)
                                 * If neither hibernation nor partial power down are supported,
                                 * clock gating is used to save power.
                                 */
-                               dwc2_gadget_enter_clock_gating(hsotg);
+                               if (!hsotg->params.no_clock_gating)
+                                       dwc2_gadget_enter_clock_gating(hsotg);
                        }
 
                        /*
index c581ee4..3146df6 100644 (file)
@@ -2749,12 +2749,14 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg,
                return;
        }
 
-       /* Zlp for all endpoints, for ep0 only in DATA IN stage */
+       /* Zlp for all endpoints in non DDMA, for ep0 only in DATA IN stage */
        if (hs_ep->send_zlp) {
-               dwc2_hsotg_program_zlp(hsotg, hs_ep);
                hs_ep->send_zlp = 0;
-               /* transfer will be completed on next complete interrupt */
-               return;
+               if (!using_desc_dma(hsotg)) {
+                       dwc2_hsotg_program_zlp(hsotg, hs_ep);
+                       /* transfer will be completed on next complete interrupt */
+                       return;
+               }
        }
 
        if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_DATA_IN) {
@@ -3900,9 +3902,27 @@ static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg,
                                         __func__);
                }
        } else {
+               /* Mask GINTSTS_GOUTNAKEFF interrupt */
+               dwc2_hsotg_disable_gsint(hsotg, GINTSTS_GOUTNAKEFF);
+
                if (!(dwc2_readl(hsotg, GINTSTS) & GINTSTS_GOUTNAKEFF))
                        dwc2_set_bit(hsotg, DCTL, DCTL_SGOUTNAK);
 
+               if (!using_dma(hsotg)) {
+                       /* Wait for GINTSTS_RXFLVL interrupt */
+                       if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS,
+                                                   GINTSTS_RXFLVL, 100)) {
+                               dev_warn(hsotg->dev, "%s: timeout GINTSTS.RXFLVL\n",
+                                        __func__);
+                       } else {
+                               /*
+                                * Pop GLOBAL OUT NAK status packet from RxFIFO
+                                * to assert GOUTNAKEFF interrupt
+                                */
+                               dwc2_readl(hsotg, GRXSTSP);
+                       }
+               }
+
                /* Wait for global nak to take effect */
                if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS,
                                            GINTSTS_GOUTNAKEFF, 100))
@@ -4348,6 +4368,9 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now)
                epctl = dwc2_readl(hs, epreg);
 
                if (value) {
+                       /* Unmask GOUTNAKEFF interrupt */
+                       dwc2_hsotg_en_gsint(hs, GINTSTS_GOUTNAKEFF);
+
                        if (!(dwc2_readl(hs, GINTSTS) & GINTSTS_GOUTNAKEFF))
                                dwc2_set_bit(hs, DCTL, DCTL_SGOUTNAK);
                        // STALL bit will be set in GOUTNAKEFF interrupt handler
index 035d491..2a78289 100644 (file)
@@ -3338,7 +3338,8 @@ int dwc2_port_suspend(struct dwc2_hsotg *hsotg, u16 windex)
                 * If not hibernation nor partial power down are supported,
                 * clock gating is used to save power.
                 */
-               dwc2_host_enter_clock_gating(hsotg);
+               if (!hsotg->params.no_clock_gating)
+                       dwc2_host_enter_clock_gating(hsotg);
                break;
        }
 
@@ -4402,7 +4403,8 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd)
                 * If not hibernation nor partial power down are supported,
                 * clock gating is used to save power.
                 */
-               dwc2_host_enter_clock_gating(hsotg);
+               if (!hsotg->params.no_clock_gating)
+                       dwc2_host_enter_clock_gating(hsotg);
 
                /* After entering suspend, hardware is not accessible */
                clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
index 67c5eb1..59e1193 100644 (file)
@@ -76,6 +76,7 @@ static void dwc2_set_s3c6400_params(struct dwc2_hsotg *hsotg)
        struct dwc2_core_params *p = &hsotg->params;
 
        p->power_down = DWC2_POWER_DOWN_PARAM_NONE;
+       p->no_clock_gating = true;
        p->phy_utmi_width = 8;
 }
 
index dccdf13..5991766 100644 (file)
@@ -1279,6 +1279,7 @@ struct dwc3 {
        unsigned                dis_metastability_quirk:1;
 
        unsigned                dis_split_quirk:1;
+       unsigned                async_callbacks:1;
 
        u16                     imod_interval;
 };
index 3cd2942..2f9e45e 100644 (file)
@@ -597,11 +597,13 @@ static int dwc3_ep0_set_address(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
 
 static int dwc3_ep0_delegate_req(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
 {
-       int ret;
+       int ret = -EINVAL;
 
-       spin_unlock(&dwc->lock);
-       ret = dwc->gadget_driver->setup(dwc->gadget, ctrl);
-       spin_lock(&dwc->lock);
+       if (dwc->async_callbacks) {
+               spin_unlock(&dwc->lock);
+               ret = dwc->gadget_driver->setup(dwc->gadget, ctrl);
+               spin_lock(&dwc->lock);
+       }
        return ret;
 }
 
index af6d7f1..45f2bc0 100644 (file)
@@ -2585,6 +2585,16 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA)
        return ret;
 }
 
+static void dwc3_gadget_async_callbacks(struct usb_gadget *g, bool enable)
+{
+       struct dwc3             *dwc = gadget_to_dwc(g);
+       unsigned long           flags;
+
+       spin_lock_irqsave(&dwc->lock, flags);
+       dwc->async_callbacks = enable;
+       spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
 static const struct usb_gadget_ops dwc3_gadget_ops = {
        .get_frame              = dwc3_gadget_get_frame,
        .wakeup                 = dwc3_gadget_wakeup,
@@ -2596,6 +2606,7 @@ static const struct usb_gadget_ops dwc3_gadget_ops = {
        .udc_set_ssp_rate       = dwc3_gadget_set_ssp_rate,
        .get_config_params      = dwc3_gadget_config_params,
        .vbus_draw              = dwc3_gadget_vbus_draw,
+       .udc_async_callbacks    = dwc3_gadget_async_callbacks,
 };
 
 /* -------------------------------------------------------------------------- */
@@ -3231,7 +3242,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
 
 static void dwc3_disconnect_gadget(struct dwc3 *dwc)
 {
-       if (dwc->gadget_driver && dwc->gadget_driver->disconnect) {
+       if (dwc->async_callbacks && dwc->gadget_driver->disconnect) {
                spin_unlock(&dwc->lock);
                dwc->gadget_driver->disconnect(dwc->gadget);
                spin_lock(&dwc->lock);
@@ -3240,7 +3251,7 @@ static void dwc3_disconnect_gadget(struct dwc3 *dwc)
 
 static void dwc3_suspend_gadget(struct dwc3 *dwc)
 {
-       if (dwc->gadget_driver && dwc->gadget_driver->suspend) {
+       if (dwc->async_callbacks && dwc->gadget_driver->suspend) {
                spin_unlock(&dwc->lock);
                dwc->gadget_driver->suspend(dwc->gadget);
                spin_lock(&dwc->lock);
@@ -3249,7 +3260,7 @@ static void dwc3_suspend_gadget(struct dwc3 *dwc)
 
 static void dwc3_resume_gadget(struct dwc3 *dwc)
 {
-       if (dwc->gadget_driver && dwc->gadget_driver->resume) {
+       if (dwc->async_callbacks && dwc->gadget_driver->resume) {
                spin_unlock(&dwc->lock);
                dwc->gadget_driver->resume(dwc->gadget);
                spin_lock(&dwc->lock);
@@ -3261,7 +3272,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
        if (!dwc->gadget_driver)
                return;
 
-       if (dwc->gadget->speed != USB_SPEED_UNKNOWN) {
+       if (dwc->async_callbacks && dwc->gadget->speed != USB_SPEED_UNKNOWN) {
                spin_unlock(&dwc->lock);
                usb_gadget_udc_reset(dwc->gadget, dwc->gadget_driver);
                spin_lock(&dwc->lock);
@@ -3585,7 +3596,7 @@ static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc)
         * implemented.
         */
 
-       if (dwc->gadget_driver && dwc->gadget_driver->resume) {
+       if (dwc->async_callbacks && dwc->gadget_driver->resume) {
                spin_unlock(&dwc->lock);
                dwc->gadget_driver->resume(dwc->gadget);
                spin_lock(&dwc->lock);
index bffef8e..281ca76 100644 (file)
@@ -1198,7 +1198,7 @@ void gserial_free_line(unsigned char port_num)
        struct gs_port  *port;
 
        mutex_lock(&ports[port_num].lock);
-       if (WARN_ON(!ports[port_num].port)) {
+       if (!ports[port_num].port) {
                mutex_unlock(&ports[port_num].lock);
                return;
        }
index a54d1ce..c0ca714 100644 (file)
@@ -3853,6 +3853,7 @@ static int tegra_xudc_probe(struct platform_device *pdev)
        return 0;
 
 free_eps:
+       pm_runtime_disable(&pdev->dev);
        tegra_xudc_free_eps(xudc);
 free_event_ring:
        tegra_xudc_free_event_ring(xudc);
index 36f5bf6..10b0365 100644 (file)
@@ -703,24 +703,28 @@ EXPORT_SYMBOL_GPL(ehci_setup);
 static irqreturn_t ehci_irq (struct usb_hcd *hcd)
 {
        struct ehci_hcd         *ehci = hcd_to_ehci (hcd);
-       u32                     status, masked_status, pcd_status = 0, cmd;
+       u32                     status, current_status, masked_status, pcd_status = 0;
+       u32                     cmd;
        int                     bh;
 
        spin_lock(&ehci->lock);
 
-       status = ehci_readl(ehci, &ehci->regs->status);
+       status = 0;
+       current_status = ehci_readl(ehci, &ehci->regs->status);
+restart:
 
        /* e.g. cardbus physical eject */
-       if (status == ~(u32) 0) {
+       if (current_status == ~(u32) 0) {
                ehci_dbg (ehci, "device removed\n");
                goto dead;
        }
+       status |= current_status;
 
        /*
         * We don't use STS_FLR, but some controllers don't like it to
         * remain on, so mask it out along with the other status bits.
         */
-       masked_status = status & (INTR_MASK | STS_FLR);
+       masked_status = current_status & (INTR_MASK | STS_FLR);
 
        /* Shared IRQ? */
        if (!masked_status || unlikely(ehci->rh_state == EHCI_RH_HALTED)) {
@@ -730,6 +734,12 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd)
 
        /* clear (just) interrupts */
        ehci_writel(ehci, masked_status, &ehci->regs->status);
+
+       /* For edge interrupts, don't race with an interrupt bit being raised */
+       current_status = ehci_readl(ehci, &ehci->regs->status);
+       if (current_status & INTR_MASK)
+               goto restart;
+
        cmd = ehci_readl(ehci, &ehci->regs->command);
        bh = 0;
 
index e7a8e06..59cc1bc 100644 (file)
@@ -153,8 +153,6 @@ struct max3421_hcd {
         */
        struct urb *curr_urb;
        enum scheduling_pass sched_pass;
-       struct usb_device *loaded_dev;  /* dev that's loaded into the chip */
-       int loaded_epnum;               /* epnum whose toggles are loaded */
        int urb_done;                   /* > 0 -> no errors, < 0: errno */
        size_t curr_len;
        u8 hien;
@@ -492,39 +490,17 @@ max3421_set_speed(struct usb_hcd *hcd, struct usb_device *dev)
  * Caller must NOT hold HCD spinlock.
  */
 static void
-max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum,
-                   int force_toggles)
+max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum)
 {
-       struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
-       int old_epnum, same_ep, rcvtog, sndtog;
-       struct usb_device *old_dev;
+       int rcvtog, sndtog;
        u8 hctl;
 
-       old_dev = max3421_hcd->loaded_dev;
-       old_epnum = max3421_hcd->loaded_epnum;
-
-       same_ep = (dev == old_dev && epnum == old_epnum);
-       if (same_ep && !force_toggles)
-               return;
-
-       if (old_dev && !same_ep) {
-               /* save the old end-points toggles: */
-               u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
-
-               rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1;
-               sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
-
-               /* no locking: HCD (i.e., we) own toggles, don't we? */
-               usb_settoggle(old_dev, old_epnum, 0, rcvtog);
-               usb_settoggle(old_dev, old_epnum, 1, sndtog);
-       }
        /* setup new endpoint's toggle bits: */
        rcvtog = usb_gettoggle(dev, epnum, 0);
        sndtog = usb_gettoggle(dev, epnum, 1);
        hctl = (BIT(rcvtog + MAX3421_HCTL_RCVTOG0_BIT) |
                BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT));
 
-       max3421_hcd->loaded_epnum = epnum;
        spi_wr8(hcd, MAX3421_REG_HCTL, hctl);
 
        /*
@@ -532,7 +508,6 @@ max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum,
         * address-assignment so it's best to just always load the
         * address whenever the end-point changed/was forced.
         */
-       max3421_hcd->loaded_dev = dev;
        spi_wr8(hcd, MAX3421_REG_PERADDR, dev->devnum);
 }
 
@@ -667,7 +642,7 @@ max3421_select_and_start_urb(struct usb_hcd *hcd)
        struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
        struct urb *urb, *curr_urb = NULL;
        struct max3421_ep *max3421_ep;
-       int epnum, force_toggles = 0;
+       int epnum;
        struct usb_host_endpoint *ep;
        struct list_head *pos;
        unsigned long flags;
@@ -777,7 +752,6 @@ done:
                        usb_settoggle(urb->dev, epnum, 0, 1);
                        usb_settoggle(urb->dev, epnum, 1, 1);
                        max3421_ep->pkt_state = PKT_STATE_SETUP;
-                       force_toggles = 1;
                } else
                        max3421_ep->pkt_state = PKT_STATE_TRANSFER;
        }
@@ -785,7 +759,7 @@ done:
        spin_unlock_irqrestore(&max3421_hcd->lock, flags);
 
        max3421_ep->last_active = max3421_hcd->frame_number;
-       max3421_set_address(hcd, urb->dev, epnum, force_toggles);
+       max3421_set_address(hcd, urb->dev, epnum);
        max3421_set_speed(hcd, urb->dev);
        max3421_next_transfer(hcd, 0);
        return 1;
@@ -1379,6 +1353,16 @@ max3421_urb_done(struct usb_hcd *hcd)
                status = 0;
        urb = max3421_hcd->curr_urb;
        if (urb) {
+               /* save the old end-points toggles: */
+               u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
+               int rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1;
+               int sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
+               int epnum = usb_endpoint_num(&urb->ep->desc);
+
+               /* no locking: HCD (i.e., we) own toggles, don't we? */
+               usb_settoggle(urb->dev, epnum, 0, rcvtog);
+               usb_settoggle(urb->dev, epnum, 1, sndtog);
+
                max3421_hcd->curr_urb = NULL;
                spin_lock_irqsave(&max3421_hcd->lock, flags);
                usb_hcd_unlink_urb_from_ep(hcd, urb);
index e9b18fc..151e93c 100644 (file)
@@ -1638,11 +1638,12 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf)
         * Inform the usbcore about resume-in-progress by returning
         * a non-zero value even if there are no status changes.
         */
+       spin_lock_irqsave(&xhci->lock, flags);
+
        status = bus_state->resuming_ports;
 
        mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC;
 
-       spin_lock_irqsave(&xhci->lock, flags);
        /* For each port, did anything change?  If so, set that bit in buf. */
        for (i = 0; i < max_ports; i++) {
                temp = readl(ports[i]->addr);
index 1da6479..5923844 100644 (file)
@@ -207,8 +207,7 @@ static int renesas_check_rom_state(struct pci_dev *pdev)
                        return 0;
 
                case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */
-                       dev_dbg(&pdev->dev, "Unknown ROM status ...\n");
-                       break;
+                       return 0;
 
                case RENESAS_ROM_STATUS_ERROR: /* Error State */
                default: /* All other states are marked as "Reserved states" */
@@ -225,12 +224,13 @@ static int renesas_fw_check_running(struct pci_dev *pdev)
        u8 fw_state;
        int err;
 
-       /*
-        * Only if device has ROM and loaded FW we can skip loading and
-        * return success. Otherwise (even unknown state), attempt to load FW.
-        */
-       if (renesas_check_rom(pdev) && !renesas_check_rom_state(pdev))
-               return 0;
+       /* Check if device has ROM and loaded, if so skip everything */
+       err = renesas_check_rom(pdev);
+       if (err) { /* we have rom */
+               err = renesas_check_rom_state(pdev);
+               if (!err)
+                       return err;
+       }
 
        /*
         * Test if the device is actually needing the firmware. As most
index 18c2bbd..1c9a795 100644 (file)
@@ -636,7 +636,14 @@ static const struct pci_device_id pci_ids[] = {
        { /* end: all zeroes */ }
 };
 MODULE_DEVICE_TABLE(pci, pci_ids);
+
+/*
+ * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't
+ * load firmware, so don't encumber the xhci-pci driver with it.
+ */
+#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS)
 MODULE_FIRMWARE("renesas_usb_fw.mem");
+#endif
 
 /* pci driver glue; this is a "new style" PCI driver module */
 static struct pci_driver xhci_pci_driver = {
index 83ed508..1b24492 100644 (file)
@@ -86,10 +86,10 @@ static struct usb_phy *__device_to_usb_phy(struct device *dev)
 
        list_for_each_entry(usb_phy, &phy_list, head) {
                if (usb_phy->dev == dev)
-                       break;
+                       return usb_phy;
        }
 
-       return usb_phy;
+       return NULL;
 }
 
 static void usb_phy_set_default_current(struct usb_phy *usb_phy)
@@ -150,8 +150,14 @@ static int usb_phy_uevent(struct device *dev, struct kobj_uevent_env *env)
        struct usb_phy *usb_phy;
        char uchger_state[50] = { 0 };
        char uchger_type[50] = { 0 };
+       unsigned long flags;
 
+       spin_lock_irqsave(&phy_lock, flags);
        usb_phy = __device_to_usb_phy(dev);
+       spin_unlock_irqrestore(&phy_lock, flags);
+
+       if (!usb_phy)
+               return -ENODEV;
 
        snprintf(uchger_state, ARRAY_SIZE(uchger_state),
                 "USB_CHARGER_STATE=%s", usb_chger_state[usb_phy->chg_state]);
index b5e7991..a3c2b01 100644 (file)
@@ -101,6 +101,8 @@ static struct dma_chan *usbhsf_dma_chan_get(struct usbhs_fifo *fifo,
 #define usbhsf_dma_map(p)      __usbhsf_dma_map_ctrl(p, 1)
 #define usbhsf_dma_unmap(p)    __usbhsf_dma_map_ctrl(p, 0)
 static int __usbhsf_dma_map_ctrl(struct usbhs_pkt *pkt, int map);
+static void usbhsf_tx_irq_ctrl(struct usbhs_pipe *pipe, int enable);
+static void usbhsf_rx_irq_ctrl(struct usbhs_pipe *pipe, int enable);
 struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt)
 {
        struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe);
@@ -123,6 +125,11 @@ struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt)
                if (chan) {
                        dmaengine_terminate_all(chan);
                        usbhsf_dma_unmap(pkt);
+               } else {
+                       if (usbhs_pipe_is_dir_in(pipe))
+                               usbhsf_rx_irq_ctrl(pipe, 0);
+                       else
+                               usbhsf_tx_irq_ctrl(pipe, 0);
                }
 
                usbhs_pipe_clear_without_sequence(pipe, 0, 0);
index 09b845d..3c80bfb 100644 (file)
@@ -155,6 +155,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
        { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */
        { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
+       { USB_DEVICE(0x10C4, 0x8A5B) }, /* CEL EM3588 ZigBee USB Stick */
        { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
        { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */
        { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
@@ -202,8 +203,8 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */
        { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */
        { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */
-       { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */
-       { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */
+       { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 M.2 Key E serial interface */
+       { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 Display serial interface */
        { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */
        { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
        { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
index 7608584..0fbe253 100644 (file)
@@ -238,6 +238,7 @@ static void option_instat_callback(struct urb *urb);
 #define QUECTEL_PRODUCT_UC15                   0x9090
 /* These u-blox products use Qualcomm's vendor ID */
 #define UBLOX_PRODUCT_R410M                    0x90b2
+#define UBLOX_PRODUCT_R6XX                     0x90fa
 /* These Yuga products use Qualcomm's vendor ID */
 #define YUGA_PRODUCT_CLM920_NC5                        0x9625
 
@@ -1101,6 +1102,8 @@ static const struct usb_device_id option_ids[] = {
        /* u-blox products using Qualcomm vendor ID */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M),
          .driver_info = RSVD(1) | RSVD(3) },
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX),
+         .driver_info = RSVD(3) },
        /* Quectel products using Quectel vendor ID */
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff),
          .driver_info = NUMEP2 },
index f9677a5..c35a6db 100644 (file)
@@ -45,6 +45,13 @@ UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999,
                USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME),
 
+/* Reported-by: Julian Sikorski <belegdol@gmail.com> */
+UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999,
+               "LaCie",
+               "Rugged USB3-FW",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_IGNORE_UAS),
+
 /*
  * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI
  * commands in UAS mode.  Observed with the 1.28 firmware; are there others?
index 6eaeba9..e7745d1 100644 (file)
@@ -685,6 +685,15 @@ static int stusb160x_probe(struct i2c_client *client)
        if (!fwnode)
                return -ENODEV;
 
+       /*
+        * This fwnode has a "compatible" property, but is never populated as a
+        * struct device. Instead we simply parse it to read the properties.
+        * This it breaks fw_devlink=on. To maintain backward compatibility
+        * with existing DT files, we work around this by deleting any
+        * fwnode_links to/from this fwnode.
+        */
+       fw_devlink_purge_absent_suppliers(fwnode);
+
        /*
         * When both VDD and VSYS power supplies are present, the low power
         * supply VSYS is selected when VSYS voltage is above 3.1 V.
@@ -739,10 +748,6 @@ static int stusb160x_probe(struct i2c_client *client)
        typec_set_pwr_opmode(chip->port, chip->pwr_opmode);
 
        if (client->irq) {
-               ret = stusb160x_irq_init(chip, client->irq);
-               if (ret)
-                       goto port_unregister;
-
                chip->role_sw = fwnode_usb_role_switch_get(fwnode);
                if (IS_ERR(chip->role_sw)) {
                        ret = PTR_ERR(chip->role_sw);
@@ -752,6 +757,10 @@ static int stusb160x_probe(struct i2c_client *client)
                                        ret);
                        goto port_unregister;
                }
+
+               ret = stusb160x_irq_init(chip, client->irq);
+               if (ret)
+                       goto role_sw_put;
        } else {
                /*
                 * If Source or Dual power role, need to enable VDD supply
@@ -775,6 +784,9 @@ static int stusb160x_probe(struct i2c_client *client)
 
        return 0;
 
+role_sw_put:
+       if (chip->role_sw)
+               usb_role_switch_put(chip->role_sw);
 port_unregister:
        typec_unregister_port(chip->port);
 all_reg_disable:
index 938219b..21b3ae2 100644 (file)
@@ -629,6 +629,15 @@ static int tps6598x_probe(struct i2c_client *client)
        if (!fwnode)
                return -ENODEV;
 
+       /*
+        * This fwnode has a "compatible" property, but is never populated as a
+        * struct device. Instead we simply parse it to read the properties.
+        * This breaks fw_devlink=on. To maintain backward compatibility
+        * with existing DT files, we work around this by deleting any
+        * fwnode_links to/from this fwnode.
+        */
+       fw_devlink_purge_absent_suppliers(fwnode);
+
        tps->role_sw = fwnode_usb_role_switch_get(fwnode);
        if (IS_ERR(tps->role_sw)) {
                ret = PTR_ERR(tps->role_sw);
index 2a31467..de8e8a1 100644 (file)
@@ -573,7 +573,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
        MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
 
        err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
index 06fb7a9..4d5ae61 100644 (file)
@@ -168,21 +168,6 @@ config OSF4_COMPAT
          with v4 shared libraries freely available from Compaq. If you're
          going to use shared libraries from Tru64 version 5.0 or later, say N.
 
-config BINFMT_EM86
-       tristate "Kernel support for Linux/Intel ELF binaries"
-       depends on ALPHA
-       help
-         Say Y here if you want to be able to execute Linux/Intel ELF
-         binaries just like native Alpha binaries on your Alpha machine. For
-         this to work, you need to have the emulator /usr/bin/em86 in place.
-
-         You can get the same functionality by saying N here and saying Y to
-         "Kernel support for MISC binaries".
-
-         You may answer M to compile the emulation support as a module and
-         later load the module when you want to use a Linux/Intel binary. The
-         module will be called binfmt_em86. If unsure, say Y.
-
 config BINFMT_MISC
        tristate "Kernel support for MISC binaries"
        help
index 9c708e1..f98f3e6 100644 (file)
@@ -39,7 +39,6 @@ obj-$(CONFIG_FS_ENCRYPTION)   += crypto/
 obj-$(CONFIG_FS_VERITY)                += verity/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_BINFMT_AOUT)      += binfmt_aout.o
-obj-$(CONFIG_BINFMT_EM86)      += binfmt_em86.o
 obj-$(CONFIG_BINFMT_MISC)      += binfmt_misc.o
 obj-$(CONFIG_BINFMT_SCRIPT)    += binfmt_script.o
 obj-$(CONFIG_BINFMT_ELF)       += binfmt_elf.o
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
deleted file mode 100644 (file)
index 06b9b9f..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  linux/fs/binfmt_em86.c
- *
- *  Based on linux/fs/binfmt_script.c
- *  Copyright (C) 1996  Martin von Löwis
- *  original #!-checking implemented by tytso.
- *
- *  em86 changes Copyright (C) 1997  Jim Paradis
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/binfmts.h>
-#include <linux/elf.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/errno.h>
-
-
-#define EM86_INTERP    "/usr/bin/em86"
-#define EM86_I_NAME    "em86"
-
-static int load_em86(struct linux_binprm *bprm)
-{
-       const char *i_name, *i_arg;
-       char *interp;
-       struct file * file;
-       int retval;
-       struct elfhdr   elf_ex;
-
-       /* Make sure this is a Linux/Intel ELF executable... */
-       elf_ex = *((struct elfhdr *)bprm->buf);
-
-       if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
-               return  -ENOEXEC;
-
-       /* First of all, some simple consistency checks */
-       if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
-               (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
-               !bprm->file->f_op->mmap) {
-                       return -ENOEXEC;
-       }
-
-       /* Need to be able to load the file after exec */
-       if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
-               return -ENOENT;
-
-       /* Unlike in the script case, we don't have to do any hairy
-        * parsing to find our interpreter... it's hardcoded!
-        */
-       interp = EM86_INTERP;
-       i_name = EM86_I_NAME;
-       i_arg = NULL;           /* We reserve the right to add an arg later */
-
-       /*
-        * Splice in (1) the interpreter's name for argv[0]
-        *           (2) (optional) argument to interpreter
-        *           (3) filename of emulated file (replace argv[0])
-        *
-        * This is done in reverse order, because of how the
-        * user environment and arguments are stored.
-        */
-       remove_arg_zero(bprm);
-       retval = copy_string_kernel(bprm->filename, bprm);
-       if (retval < 0) return retval; 
-       bprm->argc++;
-       if (i_arg) {
-               retval = copy_string_kernel(i_arg, bprm);
-               if (retval < 0) return retval; 
-               bprm->argc++;
-       }
-       retval = copy_string_kernel(i_name, bprm);
-       if (retval < 0) return retval;
-       bprm->argc++;
-
-       /*
-        * OK, now restart the process with the interpreter's inode.
-        * Note that we use open_exec() as the name is now in kernel
-        * space, and we don't need to copy it.
-        */
-       file = open_exec(interp);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
-
-       bprm->interpreter = file;
-       return 0;
-}
-
-static struct linux_binfmt em86_format = {
-       .module         = THIS_MODULE,
-       .load_binary    = load_em86,
-};
-
-static int __init init_em86_binfmt(void)
-{
-       register_binfmt(&em86_format);
-       return 0;
-}
-
-static void __exit exit_em86_binfmt(void)
-{
-       unregister_binfmt(&em86_format);
-}
-
-core_initcall(init_em86_binfmt);
-module_exit(exit_em86_binfmt);
-MODULE_LICENSE("GPL");
index 0c424a0..9ef4f1f 100644 (file)
@@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode)
        free_percpu(bdev->bd_stats);
        kfree(bdev->bd_meta_info);
 
+       if (!bdev_is_partition(bdev))
+               kfree(bdev->bd_disk);
        kmem_cache_free(bdev_cachep, BDEV_I(inode));
 }
 
index 7a8a2fc..78b202d 100644 (file)
@@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
 int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
                         struct btrfs_fs_info *fs_info, u64 bytenr,
                         u64 time_seq, struct ulist **roots,
-                        bool ignore_offset)
+                        bool ignore_offset, bool skip_commit_root_sem)
 {
        int ret;
 
-       if (!trans)
+       if (!trans && !skip_commit_root_sem)
                down_read(&fs_info->commit_root_sem);
        ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
                                        time_seq, roots, ignore_offset);
-       if (!trans)
+       if (!trans && !skip_commit_root_sem)
                up_read(&fs_info->commit_root_sem);
        return ret;
 }
index 17abde7..ff5f07f 100644 (file)
@@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
                         const u64 *extent_item_pos, bool ignore_offset);
 int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
                         struct btrfs_fs_info *fs_info, u64 bytenr,
-                        u64 time_seq, struct ulist **roots, bool ignore_offset);
+                        u64 time_seq, struct ulist **roots, bool ignore_offset,
+                        bool skip_commit_root_sem);
 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                        u32 name_len, unsigned long name_off,
                        struct extent_buffer *eb_in, u64 parent,
index 9a023ae..30d82cd 100644 (file)
@@ -352,7 +352,7 @@ static void end_compressed_bio_write(struct bio *bio)
        btrfs_record_physical_zoned(inode, cb->start, bio);
        btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
                        cb->start, cb->start + cb->len - 1,
-                       bio->bi_status == BLK_STS_OK);
+                       !cb->errors);
 
        end_compressed_writeback(inode, cb);
        /* note, our inode could be gone now */
index 06bc842..ca848b1 100644 (file)
@@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 
        if (qrecord_inserted)
-               btrfs_qgroup_trace_extent_post(fs_info, record);
+               btrfs_qgroup_trace_extent_post(trans, record);
 
        return 0;
 }
@@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
 
 
        if (qrecord_inserted)
-               return btrfs_qgroup_trace_extent_post(fs_info, record);
+               return btrfs_qgroup_trace_extent_post(trans, record);
        return 0;
 }
 
index b117dd3..a59ab7b 100644 (file)
@@ -209,7 +209,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
 static void csum_tree_block(struct extent_buffer *buf, u8 *result)
 {
        struct btrfs_fs_info *fs_info = buf->fs_info;
-       const int num_pages = fs_info->nodesize >> PAGE_SHIFT;
+       const int num_pages = num_extent_pages(buf);
        const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        char *kaddr;
index d296483..268ce58 100644 (file)
@@ -6019,6 +6019,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
        mutex_lock(&fs_info->fs_devices->device_list_mutex);
        devices = &fs_info->fs_devices->devices;
        list_for_each_entry(device, devices, dev_list) {
+               if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+                       continue;
+
                ret = btrfs_trim_free_extents(device, &group_trimmed);
                if (ret) {
                        dev_failed++;
index 8f60314..0117d86 100644 (file)
@@ -2992,7 +2992,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out;
        }
 
-       if (ordered_extent->disk)
+       if (ordered_extent->bdev)
                btrfs_rewrite_logical_zoned(ordered_extent);
 
        btrfs_free_io_failure_record(inode, start, end);
index 6eb41b7..5c0f848 100644 (file)
@@ -190,8 +190,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
        entry->truncated_len = (u64)-1;
        entry->qgroup_rsv = ret;
        entry->physical = (u64)-1;
-       entry->disk = NULL;
-       entry->partno = (u8)-1;
 
        ASSERT(type == BTRFS_ORDERED_REGULAR ||
               type == BTRFS_ORDERED_NOCOW ||
index 5664720..b2d88ab 100644 (file)
@@ -145,8 +145,7 @@ struct btrfs_ordered_extent {
         * command in a workqueue context
         */
        u64 physical;
-       struct gendisk *disk;
-       u8 partno;
+       struct block_device *bdev;
 };
 
 /*
index 07ec06d..0fa1211 100644 (file)
@@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
                                   struct btrfs_qgroup_extent_record *qrecord)
 {
        struct ulist *old_root;
        u64 bytenr = qrecord->bytenr;
        int ret;
 
-       ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
+       /*
+        * We are always called in a context where we are already holding a
+        * transaction handle. Often we are called when adding a data delayed
+        * reference from btrfs_truncate_inode_items() (truncating or unlinking),
+        * in which case we will be holding a write lock on extent buffer from a
+        * subvolume tree. In this case we can't allow btrfs_find_all_roots() to
+        * acquire fs_info->commit_root_sem, because that is a higher level lock
+        * that must be acquired before locking any extent buffers.
+        *
+        * So we want btrfs_find_all_roots() to not acquire the commit_root_sem
+        * but we can't pass it a non-NULL transaction handle, because otherwise
+        * it would not use commit roots and would lock extent buffers, causing
+        * a deadlock if it ends up trying to read lock the same extent buffer
+        * that was previously write locked at btrfs_truncate_inode_items().
+        *
+        * So pass a NULL transaction handle to btrfs_find_all_roots() and
+        * explicitly tell it to not acquire the commit_root_sem - if we are
+        * holding a transaction handle we don't need its protection.
+        */
+       ASSERT(trans != NULL);
+
+       ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
+                                  false, true);
        if (ret < 0) {
-               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-               btrfs_warn(fs_info,
+               trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+               btrfs_warn(trans->fs_info,
 "error accounting new delayed refs extent (err code: %d), quota inconsistent",
                        ret);
                return 0;
@@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
                kfree(record);
                return 0;
        }
-       return btrfs_qgroup_trace_extent_post(fs_info, record);
+       return btrfs_qgroup_trace_extent_post(trans, record);
 }
 
 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
                                /* Search commit root to find old_roots */
                                ret = btrfs_find_all_roots(NULL, fs_info,
                                                record->bytenr, 0,
-                                               &record->old_roots, false);
+                                               &record->old_roots, false, false);
                                if (ret < 0)
                                        goto cleanup;
                        }
@@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
                         * current root. It's safe inside commit_transaction().
                         */
                        ret = btrfs_find_all_roots(trans, fs_info,
-                               record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
+                          record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
                        if (ret < 0)
                                goto cleanup;
                        if (qgroup_to_skip) {
@@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
                        num_bytes = found.offset;
 
                ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
-                                          &roots, false);
+                                          &roots, false, false);
                if (ret < 0)
                        goto out;
                /* For rescan, just pass old_roots as NULL */
index 7283e4f..880e9df 100644 (file)
@@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock(
  * using current root, then we can move all expensive backref walk out of
  * transaction committing, but not now as qgroup accounting will be wrong again.
  */
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
                                   struct btrfs_qgroup_extent_record *qrecord);
 
 /*
index f313728..98b5aab 100644 (file)
@@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
         * quota.
         */
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
@@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
                return ret;
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                ulist_free(new_roots);
@@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
        new_roots = NULL;
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
@@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
                return -EINVAL;
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                ulist_free(new_roots);
@@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root,
        }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
@@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                return ret;
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                ulist_free(new_roots);
@@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root,
        }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
@@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                return ret;
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                ulist_free(new_roots);
@@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root,
        }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
@@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                return ret;
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
        if (ret) {
                ulist_free(old_roots);
                ulist_free(new_roots);
index dc6eb08..e6430ac 100644 (file)
@@ -5526,16 +5526,29 @@ log_extents:
                spin_lock(&inode->lock);
                inode->logged_trans = trans->transid;
                /*
-                * Don't update last_log_commit if we logged that an inode exists
-                * after it was loaded to memory (full_sync bit set).
-                * This is to prevent data loss when we do a write to the inode,
-                * then the inode gets evicted after all delalloc was flushed,
-                * then we log it exists (due to a rename for example) and then
-                * fsync it. This last fsync would do nothing (not logging the
-                * extents previously written).
+                * Don't update last_log_commit if we logged that an inode exists.
+                * We do this for two reasons:
+                *
+                * 1) We might have had buffered writes to this inode that were
+                *    flushed and had their ordered extents completed in this
+                *    transaction, but we did not previously log the inode with
+                *    LOG_INODE_ALL. Later the inode was evicted and after that
+                *    it was loaded again and this LOG_INODE_EXISTS log operation
+                *    happened. We must make sure that if an explicit fsync against
+                *    the inode is performed later, it logs the new extents, an
+                *    updated inode item, etc, and syncs the log. The same logic
+                *    applies to direct IO writes instead of buffered writes.
+                *
+                * 2) When we log the inode with LOG_INODE_EXISTS, its inode item
+                *    is logged with an i_size of 0 or whatever value was logged
+                *    before. If later the i_size of the inode is increased by a
+                *    truncate operation, the log is synced through an fsync of
+                *    some other inode and then finally an explicit fsync against
+                *    this inode is made, we must make sure this fsync logs the
+                *    inode with the new i_size, the hole between old i_size and
+                *    the new i_size, and syncs the log.
                 */
-               if (inode_only != LOG_INODE_EXISTS ||
-                   !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+               if (inode_only != LOG_INODE_EXISTS)
                        inode->last_log_commit = inode->last_sub_trans;
                spin_unlock(&inode->lock);
        }
@@ -6490,8 +6503,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
         * if this inode hasn't been logged and directory we're renaming it
         * from hasn't been logged, we don't need to log it
         */
-       if (inode->logged_trans < trans->transid &&
-           (!old_dir || old_dir->logged_trans < trans->transid))
+       if (!inode_logged(trans, inode) &&
+           (!old_dir || !inode_logged(trans, old_dir)))
                return;
 
        /*
index 1e4d43f..70f94b7 100644 (file)
@@ -1078,6 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
                if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                        list_del_init(&device->dev_alloc_list);
                        clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+                       fs_devices->rw_devices--;
                }
                list_del_init(&device->dev_list);
                fs_devices->num_devices--;
index 297c0b1..907c2cc 100644 (file)
@@ -1349,8 +1349,7 @@ void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
                return;
 
        ordered->physical = physical;
-       ordered->disk = bio->bi_bdev->bd_disk;
-       ordered->partno = bio->bi_bdev->bd_partno;
+       ordered->bdev = bio->bi_bdev;
 
        btrfs_put_ordered_extent(ordered);
 }
@@ -1362,18 +1361,16 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
        struct extent_map_tree *em_tree;
        struct extent_map *em;
        struct btrfs_ordered_sum *sum;
-       struct block_device *bdev;
        u64 orig_logical = ordered->disk_bytenr;
        u64 *logical = NULL;
        int nr, stripe_len;
 
        /* Zoned devices should not have partitions. So, we can assume it is 0 */
-       ASSERT(ordered->partno == 0);
-       bdev = bdgrab(ordered->disk->part0);
-       if (WARN_ON(!bdev))
+       ASSERT(!bdev_is_partition(ordered->bdev));
+       if (WARN_ON(!ordered->bdev))
                return;
 
-       if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev,
+       if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev,
                                     ordered->physical, &logical, &nr,
                                     &stripe_len)))
                goto out;
@@ -1402,7 +1399,6 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
 
 out:
        kfree(logical);
-       bdput(bdev);
 }
 
 bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
index a818213..9db1b39 100644 (file)
@@ -4456,7 +4456,7 @@ bool check_session_state(struct ceph_mds_session *s)
                break;
        case CEPH_MDS_SESSION_CLOSING:
                /* Should never reach this when we're unmounting */
-               WARN_ON_ONCE(true);
+               WARN_ON_ONCE(s->s_ttl);
                fallthrough;
        case CEPH_MDS_SESSION_NEW:
        case CEPH_MDS_SESSION_RESTARTING:
index f72e3b3..65d1a65 100644 (file)
@@ -873,8 +873,11 @@ PsxDelete:
                                InformationLevel) - 4;
        offset = param_offset + params;
 
-       /* Setup pointer to Request Data (inode type) */
-       pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset);
+       /* Setup pointer to Request Data (inode type).
+        * Note that SMB offsets are from the beginning of SMB which is 4 bytes
+        * in, after RFC1001 field
+        */
+       pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4);
        pRqD->type = cpu_to_le16(type);
        pSMB->ParameterOffset = cpu_to_le16(param_offset);
        pSMB->DataOffset = cpu_to_le16(offset);
@@ -1081,7 +1084,8 @@ PsxCreat:
        param_offset = offsetof(struct smb_com_transaction2_spi_req,
                                InformationLevel) - 4;
        offset = param_offset + params;
-       pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
+       /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */
+       pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4);
        pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
        pdata->Permissions = cpu_to_le64(mode);
        pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
index 1b04d6e..3781eee 100644 (file)
@@ -220,7 +220,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 #ifdef CONFIG_CIFS_DFS_UPCALL
        struct super_block *sb = NULL;
        struct cifs_sb_info *cifs_sb = NULL;
-       struct dfs_cache_tgt_list tgt_list = {0};
+       struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
        struct dfs_cache_tgt_iterator *tgt_it = NULL;
 #endif
 
@@ -3130,7 +3130,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 {
        int rc;
        char *npath = NULL;
-       struct dfs_cache_tgt_list tgt_list = {0};
+       struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
        struct dfs_cache_tgt_iterator *tgt_it = NULL;
        struct smb3_fs_context tmp_ctx = {NULL};
 
index 7c17697..2837455 100644 (file)
@@ -19,6 +19,7 @@
 #include "cifs_debug.h"
 #include "cifs_unicode.h"
 #include "smb2glob.h"
+#include "dns_resolve.h"
 
 #include "dfs_cache.h"
 
@@ -911,6 +912,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl)
 
 err_free_it:
        list_for_each_entry_safe(it, nit, head, it_list) {
+               list_del(&it->it_list);
                kfree(it->it_name);
                kfree(it);
        }
@@ -1293,6 +1295,194 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
        return 0;
 }
 
+static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2)
+{
+       char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0};
+       const char *host;
+       size_t hostlen;
+       char *ip = NULL;
+       struct sockaddr sa;
+       bool match;
+       int rc;
+
+       if (strcasecmp(s1, s2))
+               return false;
+
+       /*
+        * Resolve share's hostname and check if server address matches.  Otherwise just ignore it
+        * as we could not have upcall to resolve hostname or failed to convert ip address.
+        */
+       match = true;
+       extract_unc_hostname(s1, &host, &hostlen);
+       scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host);
+
+       rc = dns_resolve_server_name_to_ip(unc, &ip, NULL);
+       if (rc < 0) {
+               cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n",
+                        __func__, (int)hostlen, host);
+               return true;
+       }
+
+       if (!cifs_convert_address(&sa, ip, strlen(ip))) {
+               cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n",
+                        __func__, ip);
+       } else {
+               mutex_lock(&server->srv_mutex);
+               match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa);
+               mutex_unlock(&server->srv_mutex);
+       }
+
+       kfree(ip);
+       return match;
+}
+
+/*
+ * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new
+ * target shares in @refs.
+ */
+static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl,
+                                        const struct dfs_info3_param *refs, int numrefs)
+{
+       struct dfs_cache_tgt_iterator *it;
+       int i;
+
+       for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) {
+               for (i = 0; i < numrefs; i++) {
+                       if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it),
+                                              refs[i].node_name))
+                               return;
+               }
+       }
+
+       cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
+       for (i = 0; i < tcon->ses->chan_count; i++) {
+               spin_lock(&GlobalMid_Lock);
+               if (tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+                       tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+               spin_unlock(&GlobalMid_Lock);
+       }
+}
+
+/* Refresh dfs referral of tcon and mark it for reconnect if needed */
+static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+{
+       const char *path = tcon->dfs_path + 1;
+       struct cifs_ses *ses;
+       struct cache_entry *ce;
+       struct dfs_info3_param *refs = NULL;
+       int numrefs = 0;
+       bool needs_refresh = false;
+       struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+       int rc = 0;
+       unsigned int xid;
+
+       ses = find_ipc_from_server_path(sessions, path);
+       if (IS_ERR(ses)) {
+               cifs_dbg(FYI, "%s: could not find ipc session\n", __func__);
+               return PTR_ERR(ses);
+       }
+
+       down_read(&htable_rw_lock);
+       ce = lookup_cache_entry(path);
+       needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
+       if (!IS_ERR(ce)) {
+               rc = get_targets(ce, &tl);
+               if (rc)
+                       cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc);
+       }
+       up_read(&htable_rw_lock);
+
+       if (!needs_refresh) {
+               rc = 0;
+               goto out;
+       }
+
+       xid = get_xid();
+       rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
+       free_xid(xid);
+
+       /* Create or update a cache entry with the new referral */
+       if (!rc) {
+               dump_refs(refs, numrefs);
+
+               down_write(&htable_rw_lock);
+               ce = lookup_cache_entry(path);
+               if (IS_ERR(ce))
+                       add_cache_entry_locked(refs, numrefs);
+               else if (force_refresh || cache_entry_expired(ce))
+                       update_cache_entry_locked(ce, refs, numrefs);
+               up_write(&htable_rw_lock);
+
+               mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs);
+       }
+
+out:
+       dfs_cache_free_tgts(&tl);
+       free_dfs_info_array(refs, numrefs);
+       return rc;
+}
+
+/**
+ * dfs_cache_remount_fs - remount a DFS share
+ *
+ * Reconfigure dfs mount by forcing a new DFS referral and if the currently cached targets do not
+ * match any of the new targets, mark it for reconnect.
+ *
+ * @cifs_sb: cifs superblock.
+ *
+ * Return zero if remounted, otherwise non-zero.
+ */
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
+{
+       struct cifs_tcon *tcon;
+       struct mount_group *mg;
+       struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
+       int rc;
+
+       if (!cifs_sb || !cifs_sb->master_tlink)
+               return -EINVAL;
+
+       tcon = cifs_sb_master_tcon(cifs_sb);
+       if (!tcon->dfs_path) {
+               cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__);
+               return 0;
+       }
+
+       if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
+               cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__);
+               return -EINVAL;
+       }
+
+       mutex_lock(&mount_group_list_lock);
+       mg = find_mount_group_locked(&cifs_sb->dfs_mount_id);
+       if (IS_ERR(mg)) {
+               mutex_unlock(&mount_group_list_lock);
+               cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__);
+               return PTR_ERR(mg);
+       }
+       kref_get(&mg->refcount);
+       mutex_unlock(&mount_group_list_lock);
+
+       spin_lock(&mg->lock);
+       memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0]));
+       spin_unlock(&mg->lock);
+
+       /*
+        * After reconnecting to a different server, unique ids won't match anymore, so we disable
+        * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
+        */
+       cifs_autodisable_serverino(cifs_sb);
+       /*
+        * Force the use of prefix path to support failover on DFS paths that resolve to targets
+        * that have different prefix paths.
+        */
+       cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+       rc = refresh_tcon(sessions, tcon, true);
+
+       kref_put(&mg->refcount, mount_group_release);
+       return rc;
+}
+
 /*
  * Refresh all active dfs mounts regardless of whether they are in cache or not.
  * (cache can be cleared)
@@ -1303,7 +1493,6 @@ static void refresh_mounts(struct cifs_ses **sessions)
        struct cifs_ses *ses;
        struct cifs_tcon *tcon, *ntcon;
        struct list_head tcons;
-       unsigned int xid;
 
        INIT_LIST_HEAD(&tcons);
 
@@ -1321,44 +1510,8 @@ static void refresh_mounts(struct cifs_ses **sessions)
        spin_unlock(&cifs_tcp_ses_lock);
 
        list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
-               const char *path = tcon->dfs_path + 1;
-               struct cache_entry *ce;
-               struct dfs_info3_param *refs = NULL;
-               int numrefs = 0;
-               bool needs_refresh = false;
-               int rc = 0;
-
                list_del_init(&tcon->ulist);
-
-               ses = find_ipc_from_server_path(sessions, path);
-               if (IS_ERR(ses))
-                       goto next_tcon;
-
-               down_read(&htable_rw_lock);
-               ce = lookup_cache_entry(path);
-               needs_refresh = IS_ERR(ce) || cache_entry_expired(ce);
-               up_read(&htable_rw_lock);
-
-               if (!needs_refresh)
-                       goto next_tcon;
-
-               xid = get_xid();
-               rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
-               free_xid(xid);
-
-               /* Create or update a cache entry with the new referral */
-               if (!rc) {
-                       down_write(&htable_rw_lock);
-                       ce = lookup_cache_entry(path);
-                       if (IS_ERR(ce))
-                               add_cache_entry_locked(refs, numrefs);
-                       else if (cache_entry_expired(ce))
-                               update_cache_entry_locked(ce, refs, numrefs);
-                       up_write(&htable_rw_lock);
-               }
-
-next_tcon:
-               free_dfs_info_array(refs, numrefs);
+               refresh_tcon(sessions, tcon, false);
                cifs_put_tcon(tcon);
        }
 }
index b29d3ae..52070d1 100644 (file)
@@ -13,6 +13,8 @@
 #include <linux/uuid.h>
 #include "cifsglob.h"
 
+#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), }
+
 struct dfs_cache_tgt_list {
        int tl_numtgts;
        struct list_head tl_list;
@@ -44,6 +46,7 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
 void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
 void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
 char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap);
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb);
 
 static inline struct dfs_cache_tgt_iterator *
 dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl,
index cd10860..0a72840 100644 (file)
@@ -4619,7 +4619,7 @@ read_complete:
 
 static int cifs_readpage(struct file *file, struct page *page)
 {
-       loff_t offset = (loff_t)page->index << PAGE_SHIFT;
+       loff_t offset = page_file_offset(page);
        int rc = -EACCES;
        unsigned int xid;
 
index 553adfb..eed59bc 100644 (file)
@@ -13,6 +13,9 @@
 #include <linux/magic.h>
 #include <linux/security.h>
 #include <net/net_namespace.h>
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
 */
 
 #include <linux/ctype.h>
@@ -779,6 +782,10 @@ static int smb3_reconfigure(struct fs_context *fc)
        smb3_cleanup_fs_context_contents(cifs_sb->ctx);
        rc = smb3_fs_context_dup(cifs_sb->ctx, ctx);
        smb3_update_mnt_flags(cifs_sb);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+       if (!rc)
+               rc = dfs_cache_remount_fs(cifs_sb);
+#endif
 
        return rc;
 }
@@ -918,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
                ctx->cred_uid = uid;
                ctx->cruid_specified = true;
                break;
+       case Opt_backupuid:
+               uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(uid))
+                       goto cifs_parse_mount_err;
+               ctx->backupuid = uid;
+               ctx->backupuid_specified = true;
+               break;
        case Opt_backupgid:
                gid = make_kgid(current_user_ns(), result.uint_32);
                if (!gid_valid(gid))
index ba3c58e..2dfd0d8 100644 (file)
@@ -3618,6 +3618,7 @@ static int smb3_simple_fallocate_write_range(unsigned int xid,
 {
        struct cifs_io_parms io_parms = {0};
        int nbytes;
+       int rc = 0;
        struct kvec iov[2];
 
        io_parms.netfid = cfile->fid.netfid;
@@ -3625,13 +3626,25 @@ static int smb3_simple_fallocate_write_range(unsigned int xid,
        io_parms.tcon = tcon;
        io_parms.persistent_fid = cfile->fid.persistent_fid;
        io_parms.volatile_fid = cfile->fid.volatile_fid;
-       io_parms.offset = off;
-       io_parms.length = len;
 
-       /* iov[0] is reserved for smb header */
-       iov[1].iov_base = buf;
-       iov[1].iov_len = io_parms.length;
-       return SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+       while (len) {
+               io_parms.offset = off;
+               io_parms.length = len;
+               if (io_parms.length > SMB2_MAX_BUFFER_SIZE)
+                       io_parms.length = SMB2_MAX_BUFFER_SIZE;
+               /* iov[0] is reserved for smb header */
+               iov[1].iov_base = buf;
+               iov[1].iov_len = io_parms.length;
+               rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+               if (rc)
+                       break;
+               if (nbytes > len)
+                       return -EINVAL;
+               buf += nbytes;
+               off += nbytes;
+               len -= nbytes;
+       }
+       return rc;
 }
 
 static int smb3_simple_fallocate_range(unsigned int xid,
@@ -3655,11 +3668,6 @@ static int smb3_simple_fallocate_range(unsigned int xid,
                        (char **)&out_data, &out_data_len);
        if (rc)
                goto out;
-       /*
-        * It is already all allocated
-        */
-       if (out_data_len == 0)
-               goto out;
 
        buf = kzalloc(1024 * 1024, GFP_KERNEL);
        if (buf == NULL) {
@@ -3782,6 +3790,24 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
                goto out;
        }
 
+       if (keep_size == true) {
+               /*
+                * We can not preallocate pages beyond the end of the file
+                * in SMB2
+                */
+               if (off >= i_size_read(inode)) {
+                       rc = 0;
+                       goto out;
+               }
+               /*
+                * For fallocates that are partially beyond the end of file,
+                * clamp len so we only fallocate up to the end of file.
+                */
+               if (off + len > i_size_read(inode)) {
+                       len = i_size_read(inode) - off;
+               }
+       }
+
        if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
                /*
                 * At this point, we are trying to fallocate an internal
index 14292db..2c2f179 100644 (file)
@@ -106,12 +106,11 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
        return err;
 }
 
-static bool ext2_check_page(struct page *page, int quiet)
+static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
 {
        struct inode *dir = page->mapping->host;
        struct super_block *sb = dir->i_sb;
        unsigned chunk_size = ext2_chunk_size(dir);
-       char *kaddr = page_address(page);
        u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
        unsigned offs, rec_len;
        unsigned limit = PAGE_SIZE;
@@ -205,7 +204,8 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n,
        if (!IS_ERR(page)) {
                *page_addr = kmap_local_page(page);
                if (unlikely(!PageChecked(page))) {
-                       if (PageError(page) || !ext2_check_page(page, quiet))
+                       if (PageError(page) || !ext2_check_page(page, quiet,
+                                                               *page_addr))
                                goto fail;
                }
        }
@@ -584,10 +584,10 @@ out_unlock:
  * ext2_delete_entry deletes a directory entry by merging it with the
  * previous entry. Page is up-to-date.
  */
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
+int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page,
+                       char *kaddr)
 {
        struct inode *inode = page->mapping->host;
-       char *kaddr = page_address(page);
        unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
        unsigned to = ((char *)dir - kaddr) +
                                ext2_rec_len_from_disk(dir->rec_len);
@@ -607,7 +607,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
                de = ext2_next_entry(de);
        }
        if (pde)
-               from = (char*)pde - (char*)page_address(page);
+               from = (char *)pde - kaddr;
        pos = page_offset(page) + from;
        lock_page(page);
        err = ext2_prepare_chunk(page, pos, to - from);
index b0a6948..e512630 100644 (file)
@@ -740,7 +740,8 @@ extern int ext2_inode_by_name(struct inode *dir,
 extern int ext2_make_empty(struct inode *, struct inode *);
 extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *,
                                                struct page **, void **res_page_addr);
-extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page,
+                            char *kaddr);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *,
index 1f69b81..5f6b756 100644 (file)
@@ -293,7 +293,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
                goto out;
        }
 
-       err = ext2_delete_entry (de, page);
+       err = ext2_delete_entry (de, page, page_addr);
        ext2_put_page(page, page_addr);
        if (err)
                goto out;
@@ -397,7 +397,7 @@ static int ext2_rename (struct user_namespace * mnt_userns,
        old_inode->i_ctime = current_time(old_inode);
        mark_inode_dirty(old_inode);
 
-       ext2_delete_entry(old_de, old_page);
+       ext2_delete_entry(old_de, old_page, old_page_addr);
 
        if (dir_de) {
                if (old_dir != new_dir)
index 06d04a7..4c33705 100644 (file)
@@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode,
         */
        smp_mb();
 
+       if (IS_DAX(inode))
+               return false;
+
        /* while holding I_WB_SWITCH, no one else can update the association */
        spin_lock(&inode->i_lock);
        if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
index 926eeb9..cdfb1ae 100644 (file)
@@ -77,7 +77,7 @@ enum hugetlb_param {
 static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
        fsparam_u32   ("gid",           Opt_gid),
        fsparam_string("min_size",      Opt_min_size),
-       fsparam_u32   ("mode",          Opt_mode),
+       fsparam_u32oct("mode",          Opt_mode),
        fsparam_string("nr_inodes",     Opt_nr_inodes),
        fsparam_string("pagesize",      Opt_pagesize),
        fsparam_string("size",          Opt_size),
index 3ce8edb..82e8eb3 100644 (file)
@@ -61,7 +61,6 @@ extern void __init chrdev_init(void);
  */
 extern const struct fs_context_operations legacy_fs_context_ops;
 extern int parse_monolithic_mount_data(struct fs_context *, void *);
-extern void fc_drop_locked(struct fs_context *);
 extern void vfs_clean_context(struct fs_context *fc);
 extern int finish_clean_context(struct fs_context *fc);
 
index 843d4a7..cf086b0 100644 (file)
@@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
        int work_flags;
        unsigned long flags;
 
-       if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
+       /*
+        * If io-wq is exiting for this task, or if the request has explicitly
+        * been marked as one that should not get executed, cancel it here.
+        */
+       if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
+           (work->flags & IO_WQ_WORK_CANCEL)) {
                io_run_cancel(work, wqe);
                return;
        }
index 0cac361..bf548af 100644 (file)
@@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req)
 {
        struct io_kiocb *cur;
 
-       io_for_each_link(cur, req)
-               io_prep_async_work(cur);
+       if (req->flags & REQ_F_LINK_TIMEOUT) {
+               struct io_ring_ctx *ctx = req->ctx;
+
+               spin_lock_irq(&ctx->completion_lock);
+               io_for_each_link(cur, req)
+                       io_prep_async_work(cur);
+               spin_unlock_irq(&ctx->completion_lock);
+       } else {
+               io_for_each_link(cur, req)
+                       io_prep_async_work(cur);
+       }
 }
 
 static void io_queue_async_work(struct io_kiocb *req)
@@ -1294,6 +1303,17 @@ static void io_queue_async_work(struct io_kiocb *req)
 
        /* init ->work of the whole link before punting */
        io_prep_async_link(req);
+
+       /*
+        * Not expected to happen, but if we do have a bug where this _can_
+        * happen, catch it here and ensure the request is marked as
+        * canceled. That will make io-wq go through the usual work cancel
+        * procedure rather than attempt to run this request (or create a new
+        * worker for it).
+        */
+       if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
+               req->work.flags |= IO_WQ_WORK_CANCEL;
+
        trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
                                        &req->work, req->flags);
        io_wq_enqueue(tctx->io_wq, &req->work);
@@ -1939,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb)
                        node = next;
                }
                if (wq_list_empty(&tctx->task_list)) {
+                       spin_lock_irq(&tctx->task_lock);
                        clear_bit(0, &tctx->task_state);
-                       if (wq_list_empty(&tctx->task_list))
+                       if (wq_list_empty(&tctx->task_list)) {
+                               spin_unlock_irq(&tctx->task_lock);
                                break;
+                       }
+                       spin_unlock_irq(&tctx->task_lock);
                        /* another tctx_task_work() is enqueued, yield */
                        if (test_and_set_bit(0, &tctx->task_state))
                                break;
@@ -2036,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req)
        io_req_task_work_add(req);
 }
 
+static void io_req_task_queue_reissue(struct io_kiocb *req)
+{
+       req->io_task_work.func = io_queue_async_work;
+       io_req_task_work_add(req);
+}
+
 static inline void io_queue_next(struct io_kiocb *req)
 {
        struct io_kiocb *nxt = io_req_find_next(req);
@@ -2205,7 +2235,7 @@ static inline bool io_run_task_work(void)
  * Find and free completed poll iocbs
  */
 static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
-                              struct list_head *done)
+                              struct list_head *done, bool resubmit)
 {
        struct req_batch rb;
        struct io_kiocb *req;
@@ -2220,11 +2250,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
                req = list_first_entry(done, struct io_kiocb, inflight_entry);
                list_del(&req->inflight_entry);
 
-               if (READ_ONCE(req->result) == -EAGAIN &&
+               if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
                    !(req->flags & REQ_F_DONT_REISSUE)) {
                        req->iopoll_completed = 0;
                        req_ref_get(req);
-                       io_queue_async_work(req);
+                       io_req_task_queue_reissue(req);
                        continue;
                }
 
@@ -2244,7 +2274,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 }
 
 static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
-                       long min)
+                       long min, bool resubmit)
 {
        struct io_kiocb *req, *tmp;
        LIST_HEAD(done);
@@ -2287,7 +2317,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
        }
 
        if (!list_empty(&done))
-               io_iopoll_complete(ctx, nr_events, &done);
+               io_iopoll_complete(ctx, nr_events, &done, resubmit);
 
        return ret;
 }
@@ -2305,7 +2335,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
        while (!list_empty(&ctx->iopoll_list)) {
                unsigned int nr_events = 0;
 
-               io_do_iopoll(ctx, &nr_events, 0);
+               io_do_iopoll(ctx, &nr_events, 0, false);
 
                /* let it sleep and repeat later if can't complete a request */
                if (nr_events == 0)
@@ -2367,7 +2397,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
                            list_empty(&ctx->iopoll_list))
                                break;
                }
-               ret = io_do_iopoll(ctx, &nr_events, min);
+               ret = io_do_iopoll(ctx, &nr_events, min, true);
        } while (!ret && nr_events < min && !need_resched());
 out:
        mutex_unlock(&ctx->uring_lock);
@@ -2417,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
         */
        if (percpu_ref_is_dying(&ctx->refs))
                return false;
+       /*
+        * Play it safe and assume not safe to re-import and reissue if we're
+        * not in the original thread group (or in task context).
+        */
+       if (!same_thread_group(req->task, current) || !in_task())
+               return false;
        return true;
 }
 #else
@@ -2747,7 +2783,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
                req->flags &= ~REQ_F_REISSUE;
                if (io_resubmit_prep(req)) {
                        req_ref_get(req);
-                       io_queue_async_work(req);
+                       io_req_task_queue_reissue(req);
                } else {
                        int cflags = 0;
 
@@ -4802,6 +4838,7 @@ IO_NETOP_FN(recv);
 struct io_poll_table {
        struct poll_table_struct pt;
        struct io_kiocb *req;
+       int nr_entries;
        int error;
 };
 
@@ -4902,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
        if (req->poll.events & EPOLLONESHOT)
                flags = 0;
        if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
-               io_poll_remove_waitqs(req);
                req->poll.done = true;
                flags = 0;
        }
@@ -4925,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req)
 
                done = io_poll_complete(req, req->result);
                if (done) {
+                       io_poll_remove_double(req);
                        hash_del(&req->hash_node);
                } else {
                        req->result = 0;
@@ -4995,11 +5032,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
        struct io_kiocb *req = pt->req;
 
        /*
-        * If poll->head is already set, it's because the file being polled
-        * uses multiple waitqueues for poll handling (eg one for read, one
-        * for write). Setup a separate io_poll_iocb if this happens.
+        * The file being polled uses multiple waitqueues for poll handling
+        * (e.g. one for read, one for write). Setup a separate io_poll_iocb
+        * if this happens.
         */
-       if (unlikely(poll->head)) {
+       if (unlikely(pt->nr_entries)) {
                struct io_poll_iocb *poll_one = poll;
 
                /* already have a 2nd entry, fail a third attempt */
@@ -5027,7 +5064,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
                *poll_ptr = poll;
        }
 
-       pt->error = 0;
+       pt->nr_entries++;
        poll->head = head;
 
        if (poll->events & EPOLLEXCLUSIVE)
@@ -5104,11 +5141,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
 
        ipt->pt._key = mask;
        ipt->req = req;
-       ipt->error = -EINVAL;
+       ipt->error = 0;
+       ipt->nr_entries = 0;
 
        mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+       if (unlikely(!ipt->nr_entries) && !ipt->error)
+               ipt->error = -EINVAL;
 
        spin_lock_irq(&ctx->completion_lock);
+       if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
+               io_poll_remove_double(req);
        if (likely(poll->head)) {
                spin_lock(&poll->head->lock);
                if (unlikely(list_empty(&poll->wait.entry))) {
@@ -5179,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req)
        ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
                                        io_async_wake);
        if (ret || ipt.error) {
-               io_poll_remove_double(req);
                spin_unlock_irq(&ctx->completion_lock);
                if (ret)
                        return IO_APOLL_READY;
@@ -6792,7 +6833,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 
                mutex_lock(&ctx->uring_lock);
                if (!list_empty(&ctx->iopoll_list))
-                       io_do_iopoll(ctx, &nr_events, 0);
+                       io_do_iopoll(ctx, &nr_events, 0, true);
 
                /*
                 * Don't submit if refs are dying, good for io_uring_register(),
@@ -7899,15 +7940,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
        struct io_wq_data data;
        unsigned int concurrency;
 
+       mutex_lock(&ctx->uring_lock);
        hash = ctx->hash_map;
        if (!hash) {
                hash = kzalloc(sizeof(*hash), GFP_KERNEL);
-               if (!hash)
+               if (!hash) {
+                       mutex_unlock(&ctx->uring_lock);
                        return ERR_PTR(-ENOMEM);
+               }
                refcount_set(&hash->refs, 1);
                init_waitqueue_head(&hash->wait);
                ctx->hash_map = hash;
        }
+       mutex_unlock(&ctx->uring_lock);
 
        data.hash = hash;
        data.task = task;
@@ -7981,9 +8026,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
                f = fdget(p->wq_fd);
                if (!f.file)
                        return -ENXIO;
-               fdput(f);
-               if (f.file->f_op != &io_uring_fops)
+               if (f.file->f_op != &io_uring_fops) {
+                       fdput(f);
                        return -EINVAL;
+               }
+               fdput(f);
        }
        if (ctx->flags & IORING_SETUP_SQPOLL) {
                struct task_struct *tsk;
index 7756579..54d7843 100644 (file)
@@ -1529,6 +1529,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
        }
 }
 
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ *      is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+                                       u64 start, u64 len)
+{
+       int ret;
+       u64 start_block, end_block, nr_blocks;
+       u64 p_block, offset;
+       u32 cluster, p_cluster, nr_clusters;
+       struct super_block *sb = inode->i_sb;
+       u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+       if (start + len < end)
+               end = start + len;
+
+       start_block = ocfs2_blocks_for_bytes(sb, start);
+       end_block = ocfs2_blocks_for_bytes(sb, end);
+       nr_blocks = end_block - start_block;
+       if (!nr_blocks)
+               return 0;
+
+       cluster = ocfs2_bytes_to_clusters(sb, start);
+       ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+                               &nr_clusters, NULL);
+       if (ret)
+               return ret;
+       if (!p_cluster)
+               return 0;
+
+       offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+       p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+       return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
 static int ocfs2_zero_partial_clusters(struct inode *inode,
                                       u64 start, u64 len)
 {
@@ -1538,6 +1577,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        unsigned int csize = osb->s_clustersize;
        handle_t *handle;
+       loff_t isize = i_size_read(inode);
 
        /*
         * The "start" and "end" values are NOT necessarily part of
@@ -1558,6 +1598,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
        if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
                goto out;
 
+       /* No page cache for EOF blocks, issue zero out to disk. */
+       if (end > isize) {
+               /*
+                * zeroout eof blocks in last cluster starting from
+                * "isize" even "start" > "isize" because it is
+                * complicated to zeroout just at "start" as "start"
+                * may be not aligned with block size, buffer write
+                * would be required to do that, but out of eof buffer
+                * write is not supported.
+                */
+               ret = ocfs2_zeroout_partial_cluster(inode, isize,
+                                       end - isize);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+               if (start >= isize)
+                       goto out;
+               end = isize;
+       }
        handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
@@ -1855,45 +1915,6 @@ out:
        return ret;
 }
 
-/*
- * zero out partial blocks of one cluster.
- *
- * start: file offset where zero starts, will be made upper block aligned.
- * len: it will be trimmed to the end of current cluster if "start + len"
- *      is bigger than it.
- */
-static int ocfs2_zeroout_partial_cluster(struct inode *inode,
-                                       u64 start, u64 len)
-{
-       int ret;
-       u64 start_block, end_block, nr_blocks;
-       u64 p_block, offset;
-       u32 cluster, p_cluster, nr_clusters;
-       struct super_block *sb = inode->i_sb;
-       u64 end = ocfs2_align_bytes_to_clusters(sb, start);
-
-       if (start + len < end)
-               end = start + len;
-
-       start_block = ocfs2_blocks_for_bytes(sb, start);
-       end_block = ocfs2_blocks_for_bytes(sb, end);
-       nr_blocks = end_block - start_block;
-       if (!nr_blocks)
-               return 0;
-
-       cluster = ocfs2_bytes_to_clusters(sb, start);
-       ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
-                               &nr_clusters, NULL);
-       if (ret)
-               return ret;
-       if (!p_cluster)
-               return 0;
-
-       offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
-       p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
-       return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
-}
-
 /*
  * Parts of this function taken from xfs_change_file_space()
  */
@@ -1935,7 +1956,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
                goto out_inode_unlock;
        }
 
-       orig_isize = i_size_read(inode);
        switch (sr->l_whence) {
        case 0: /*SEEK_SET*/
                break;
@@ -1943,7 +1963,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
                sr->l_start += f_pos;
                break;
        case 2: /*SEEK_END*/
-               sr->l_start += orig_isize;
+               sr->l_start += i_size_read(inode);
                break;
        default:
                ret = -EINVAL;
@@ -1998,6 +2018,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
                ret = -EINVAL;
        }
 
+       orig_isize = i_size_read(inode);
        /* zeroout eof blocks in the cluster. */
        if (!ret && change_size && orig_isize < size) {
                ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
index bfd946a..8e6ef62 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
 
 #include "internal.h"
 
+/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
 /*
  * The max size that a non-root user is allowed to grow the pipe. Can
  * be set by root in /proc/sys/fs/pipe-max-size
@@ -429,20 +444,20 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 #endif
 
        /*
-        * Only wake up if the pipe started out empty, since
-        * otherwise there should be no readers waiting.
+        * Epoll nonsensically wants a wakeup whether the pipe
+        * was already empty or not.
         *
         * If it wasn't empty we try to merge new data into
         * the last buffer.
         *
         * That naturally merges small writes, but it also
-        * page-aligs the rest of the writes for large writes
+        * page-aligns the rest of the writes for large writes
         * spanning multiple pages.
         */
        head = pipe->head;
-       was_empty = pipe_empty(head, pipe->tail);
+       was_empty = true;
        chars = total_len & (PAGE_SIZE-1);
-       if (chars && !was_empty) {
+       if (chars && !pipe_empty(head, pipe->tail)) {
                unsigned int mask = pipe->ring_size - 1;
                struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
                int offset = buf->offset + buf->len;
@@ -781,8 +796,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
        user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
 
        if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
-               user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
-               pipe_bufs = 1;
+               user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
+               pipe_bufs = PIPE_MIN_DEF_BUFFERS;
        }
 
        if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
index 476a7ff..ef42729 100644 (file)
@@ -387,6 +387,24 @@ void pathrelse(struct treepath *search_path)
        search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 }
 
+static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
+{
+       struct reiserfs_de_head *deh;
+       int i;
+
+       deh = B_I_DEH(bh, ih);
+       for (i = 0; i < ih_entry_count(ih); i++) {
+               if (deh_location(&deh[i]) > ih_item_len(ih)) {
+                       reiserfs_warning(NULL, "reiserfs-5094",
+                                        "directory entry location seems wrong %h",
+                                        &deh[i]);
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
 {
        struct block_head *blkh;
@@ -454,11 +472,14 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
                                         "(second one): %h", ih);
                        return 0;
                }
-               if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) {
-                       reiserfs_warning(NULL, "reiserfs-5093",
-                                        "item entry count seems wrong %h",
-                                        ih);
-                       return 0;
+               if (is_direntry_le_ih(ih)) {
+                       if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
+                               reiserfs_warning(NULL, "reiserfs-5093",
+                                                "item entry count seems wrong %h",
+                                                ih);
+                               return 0;
+                       }
+                       return has_valid_deh_location(bh, ih);
                }
                prev_location = ih_location(ih);
        }
index 3ffafc7..58481f8 100644 (file)
@@ -2082,6 +2082,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                unlock_new_inode(root_inode);
        }
 
+       if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) ||
+           !root_inode->i_size) {
+               SWARN(silent, s, "", "corrupt root inode, run fsck");
+               iput(root_inode);
+               errval = -EUCLEAN;
+               goto error;
+       }
+
        s->s_root = d_make_root(root_inode);
        if (!s->s_root)
                goto error;
index f6e0f0c..5c2d806 100644 (file)
@@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
 }
 
 static __always_inline int validate_range(struct mm_struct *mm,
-                                         __u64 *start, __u64 len)
+                                         __u64 start, __u64 len)
 {
        __u64 task_size = mm->task_size;
 
-       *start = untagged_addr(*start);
-
-       if (*start & ~PAGE_MASK)
+       if (start & ~PAGE_MASK)
                return -EINVAL;
        if (len & ~PAGE_MASK)
                return -EINVAL;
        if (!len)
                return -EINVAL;
-       if (*start < mmap_min_addr)
+       if (start < mmap_min_addr)
                return -EINVAL;
-       if (*start >= task_size)
+       if (start >= task_size)
                return -EINVAL;
-       if (len > task_size - *start)
+       if (len > task_size - start)
                return -EINVAL;
        return 0;
 }
@@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                vm_flags |= VM_UFFD_MINOR;
        }
 
-       ret = validate_range(mm, &uffdio_register.range.start,
+       ret = validate_range(mm, uffdio_register.range.start,
                             uffdio_register.range.len);
        if (ret)
                goto out;
@@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
        if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
                goto out;
 
-       ret = validate_range(mm, &uffdio_unregister.start,
+       ret = validate_range(mm, uffdio_unregister.start,
                             uffdio_unregister.len);
        if (ret)
                goto out;
@@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
        if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
                goto out;
 
-       ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
+       ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
        if (ret)
                goto out;
 
@@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
                           sizeof(uffdio_copy)-sizeof(__s64)))
                goto out;
 
-       ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
+       ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
        if (ret)
                goto out;
        /*
@@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
                           sizeof(uffdio_zeropage)-sizeof(__s64)))
                goto out;
 
-       ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
+       ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
                             uffdio_zeropage.range.len);
        if (ret)
                goto out;
@@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
                           sizeof(struct uffdio_writeprotect)))
                return -EFAULT;
 
-       ret = validate_range(ctx->mm, &uffdio_wp.range.start,
+       ret = validate_range(ctx->mm, uffdio_wp.range.start,
                             uffdio_wp.range.len);
        if (ret)
                return ret;
@@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
                           sizeof(uffdio_continue) - (sizeof(__s64))))
                goto out;
 
-       ret = validate_range(ctx->mm, &uffdio_continue.range.start,
+       ret = validate_range(ctx->mm, uffdio_continue.range.start,
                             uffdio_continue.range.len);
        if (ret)
                goto out;
index d548ea4..2c5bcbc 100644 (file)
@@ -411,7 +411,16 @@ struct xfs_log_dinode {
        /* start of the extended dinode, writable fields */
        uint32_t        di_crc;         /* CRC of the inode */
        uint64_t        di_changecount; /* number of attribute changes */
-       xfs_lsn_t       di_lsn;         /* flush sequence */
+
+       /*
+        * The LSN we write to this field during formatting is not a reflection
+        * of the current on-disk LSN. It should never be used for recovery
+        * sequencing, nor should it be recovered into the on-disk inode at all.
+        * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
+        * for details.
+        */
+       xfs_lsn_t       di_lsn;
+
        uint64_t        di_flags2;      /* more random flags */
        uint32_t        di_cowextsize;  /* basic cow extent size for file */
        uint8_t         di_pad2[12];    /* more padding for future expansion */
index d44e8b4..4775485 100644 (file)
@@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer(
 static xfs_lsn_t
 xlog_recover_get_buf_lsn(
        struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
+       struct xfs_buf          *bp,
+       struct xfs_buf_log_format *buf_f)
 {
        uint32_t                magic32;
        uint16_t                magic16;
@@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn(
        void                    *blk = bp->b_addr;
        uuid_t                  *uuid;
        xfs_lsn_t               lsn = -1;
+       uint16_t                blft;
 
        /* v4 filesystems always recover immediately */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                goto recover_immediately;
 
+       /*
+        * realtime bitmap and summary file blocks do not have magic numbers or
+        * UUIDs, so we must recover them immediately.
+        */
+       blft = xfs_blft_from_flags(buf_f);
+       if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+               goto recover_immediately;
+
        magic32 = be32_to_cpu(*(__be32 *)blk);
        switch (magic32) {
        case XFS_ABTB_CRC_MAGIC:
@@ -796,6 +806,7 @@ xlog_recover_get_buf_lsn(
        switch (magicda) {
        case XFS_DIR3_LEAF1_MAGIC:
        case XFS_DIR3_LEAFN_MAGIC:
+       case XFS_ATTR3_LEAF_MAGIC:
        case XFS_DA3_NODE_MAGIC:
                lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
                uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
@@ -919,7 +930,7 @@ xlog_recover_buf_commit_pass2(
         * the verifier will be reset to match whatever recover turns that
         * buffer into.
         */
-       lsn = xlog_recover_get_buf_lsn(mp, bp);
+       lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f);
        if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
                trace_xfs_log_recover_buf_skip(log, buf_f);
                xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
index 7b79518..e0072a6 100644 (file)
@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
 STATIC void
 xfs_log_dinode_to_disk(
        struct xfs_log_dinode   *from,
-       struct xfs_dinode       *to)
+       struct xfs_dinode       *to,
+       xfs_lsn_t               lsn)
 {
        to->di_magic = cpu_to_be16(from->di_magic);
        to->di_mode = cpu_to_be16(from->di_mode);
@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
                to->di_flags2 = cpu_to_be64(from->di_flags2);
                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
                to->di_ino = cpu_to_be64(from->di_ino);
-               to->di_lsn = cpu_to_be64(from->di_lsn);
+               to->di_lsn = cpu_to_be64(lsn);
                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
                uuid_copy(&to->di_uuid, &from->di_uuid);
                to->di_flushiter = 0;
@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
        }
 
        /*
-        * If the inode has an LSN in it, recover the inode only if it's less
-        * than the lsn of the transaction we are replaying. Note: we still
-        * need to replay an owner change even though the inode is more recent
-        * than the transaction as there is no guarantee that all the btree
-        * blocks are more recent than this transaction, too.
+        * If the inode has an LSN in it, recover the inode only if the on-disk
+        * inode's LSN is older than the lsn of the transaction we are
+        * replaying. We can have multiple checkpoints with the same start LSN,
+        * so the current LSN being equal to the on-disk LSN doesn't necessarily
+        * mean that the on-disk inode is more recent than the change being
+        * replayed.
+        *
+        * We must check the current_lsn against the on-disk inode
+        * here because the we can't trust the log dinode to contain a valid LSN
+        * (see comment below before replaying the log dinode for details).
+        *
+        * Note: we still need to replay an owner change even though the inode
+        * is more recent than the transaction as there is no guarantee that all
+        * the btree blocks are more recent than this transaction, too.
         */
        if (dip->di_version >= 3) {
                xfs_lsn_t       lsn = be64_to_cpu(dip->di_lsn);
 
-               if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+               if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
                        trace_xfs_log_recover_inode_skip(log, in_f);
                        error = 0;
                        goto out_owner_change;
@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
                goto out_release;
        }
 
-       /* recover the log dinode inode into the on disk inode */
-       xfs_log_dinode_to_disk(ldip, dip);
+       /*
+        * Recover the log dinode inode into the on disk inode.
+        *
+        * The LSN in the log dinode is garbage - it can be zero or reflect
+        * stale in-memory runtime state that isn't coherent with the changes
+        * logged in this transaction or the changes written to the on-disk
+        * inode.  Hence we write the current lSN into the inode because that
+        * matches what xfs_iflush() would write inode the inode when flushing
+        * the changes in this transaction.
+        */
+       xfs_log_dinode_to_disk(ldip, dip, current_lsn);
 
        fields = in_f->ilf_fields;
        if (fields & XFS_ILOG_DEV)
index 36fa265..60ac5fd 100644 (file)
@@ -78,13 +78,12 @@ xlog_verify_iclog(
 STATIC void
 xlog_verify_tail_lsn(
        struct xlog             *log,
-       struct xlog_in_core     *iclog,
-       xfs_lsn_t               tail_lsn);
+       struct xlog_in_core     *iclog);
 #else
 #define xlog_verify_dest_ptr(a,b)
 #define xlog_verify_grant_tail(a)
 #define xlog_verify_iclog(a,b,c)
-#define xlog_verify_tail_lsn(a,b,c)
+#define xlog_verify_tail_lsn(a,b)
 #endif
 
 STATIC int
@@ -487,51 +486,80 @@ out_error:
        return error;
 }
 
-static bool
-__xlog_state_release_iclog(
-       struct xlog             *log,
-       struct xlog_in_core     *iclog)
-{
-       lockdep_assert_held(&log->l_icloglock);
-
-       if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
-               /* update tail before writing to iclog */
-               xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
-
-               iclog->ic_state = XLOG_STATE_SYNCING;
-               iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
-               xlog_verify_tail_lsn(log, iclog, tail_lsn);
-               /* cycle incremented when incrementing curr_block */
-               trace_xlog_iclog_syncing(iclog, _RET_IP_);
-               return true;
-       }
-
-       ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
-       return false;
-}
-
 /*
  * Flush iclog to disk if this is the last reference to the given iclog and the
  * it is in the WANT_SYNC state.
+ *
+ * If the caller passes in a non-zero @old_tail_lsn and the current log tail
+ * does not match, there may be metadata on disk that must be persisted before
+ * this iclog is written.  To satisfy that requirement, set the
+ * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new
+ * log tail value.
+ *
+ * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the
+ * log tail is updated correctly. NEED_FUA indicates that the iclog will be
+ * written to stable storage, and implies that a commit record is contained
+ * within the iclog. We need to ensure that the log tail does not move beyond
+ * the tail that the first commit record in the iclog ordered against, otherwise
+ * correct recovery of that checkpoint becomes dependent on future operations
+ * performed on this iclog.
+ *
+ * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the
+ * current tail into iclog. Once the iclog tail is set, future operations must
+ * not modify it, otherwise they potentially violate ordering constraints for
+ * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in
+ * the iclog will get zeroed on activation of the iclog after sync, so we
+ * always capture the tail lsn on the iclog on the first NEED_FUA release
+ * regardless of the number of active reference counts on this iclog.
  */
+
 int
 xlog_state_release_iclog(
        struct xlog             *log,
-       struct xlog_in_core     *iclog)
+       struct xlog_in_core     *iclog,
+       xfs_lsn_t               old_tail_lsn)
 {
+       xfs_lsn_t               tail_lsn;
        lockdep_assert_held(&log->l_icloglock);
 
        trace_xlog_iclog_release(iclog, _RET_IP_);
        if (iclog->ic_state == XLOG_STATE_IOERROR)
                return -EIO;
 
-       if (atomic_dec_and_test(&iclog->ic_refcnt) &&
-           __xlog_state_release_iclog(log, iclog)) {
-               spin_unlock(&log->l_icloglock);
-               xlog_sync(log, iclog);
-               spin_lock(&log->l_icloglock);
+       /*
+        * Grabbing the current log tail needs to be atomic w.r.t. the writing
+        * of the tail LSN into the iclog so we guarantee that the log tail does
+        * not move between deciding if a cache flush is required and writing
+        * the LSN into the iclog below.
+        */
+       if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+               tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+               if (old_tail_lsn && tail_lsn != old_tail_lsn)
+                       iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+
+               if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) &&
+                   !iclog->ic_header.h_tail_lsn)
+                       iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
        }
 
+       if (!atomic_dec_and_test(&iclog->ic_refcnt))
+               return 0;
+
+       if (iclog->ic_state != XLOG_STATE_WANT_SYNC) {
+               ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+               return 0;
+       }
+
+       iclog->ic_state = XLOG_STATE_SYNCING;
+       if (!iclog->ic_header.h_tail_lsn)
+               iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+       xlog_verify_tail_lsn(log, iclog);
+       trace_xlog_iclog_syncing(iclog, _RET_IP_);
+
+       spin_unlock(&log->l_icloglock);
+       xlog_sync(log, iclog);
+       spin_lock(&log->l_icloglock);
        return 0;
 }
 
@@ -773,6 +801,21 @@ xfs_log_mount_cancel(
        xfs_log_unmount(mp);
 }
 
+/*
+ * Flush out the iclog to disk ensuring that device caches are flushed and
+ * the iclog hits stable storage before any completion waiters are woken.
+ */
+static inline int
+xlog_force_iclog(
+       struct xlog_in_core     *iclog)
+{
+       atomic_inc(&iclog->ic_refcnt);
+       iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+       if (iclog->ic_state == XLOG_STATE_ACTIVE)
+               xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
+       return xlog_state_release_iclog(iclog->ic_log, iclog, 0);
+}
+
 /*
  * Wait for the iclog and all prior iclogs to be written disk as required by the
  * log force state machine. Waiting on ic_force_wait ensures iclog completions
@@ -827,13 +870,6 @@ xlog_write_unmount_record(
        /* account for space used by record data */
        ticket->t_curr_res -= sizeof(ulf);
 
-       /*
-        * For external log devices, we need to flush the data device cache
-        * first to ensure all metadata writeback is on stable storage before we
-        * stamp the tail LSN into the unmount record.
-        */
-       if (log->l_targ != log->l_mp->m_ddev_targp)
-               blkdev_issue_flush(log->l_targ->bt_bdev);
        return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS);
 }
 
@@ -865,18 +901,7 @@ out_err:
 
        spin_lock(&log->l_icloglock);
        iclog = log->l_iclog;
-       atomic_inc(&iclog->ic_refcnt);
-       if (iclog->ic_state == XLOG_STATE_ACTIVE)
-               xlog_state_switch_iclogs(log, iclog, 0);
-       else
-               ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
-                      iclog->ic_state == XLOG_STATE_IOERROR);
-       /*
-        * Ensure the journal is fully flushed and on stable storage once the
-        * iclog containing the unmount record is written.
-        */
-       iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
-       error = xlog_state_release_iclog(log, iclog);
+       error = xlog_force_iclog(iclog);
        xlog_wait_on_iclog(iclog);
 
        if (tic) {
@@ -1796,10 +1821,20 @@ xlog_write_iclog(
         * metadata writeback and causing priority inversions.
         */
        iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE;
-       if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH)
+       if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
                iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+               /*
+                * For external log devices, we also need to flush the data
+                * device cache first to ensure all metadata writeback covered
+                * by the LSN in this iclog is on stable storage. This is slow,
+                * but it *must* complete before we issue the external log IO.
+                */
+               if (log->l_targ != log->l_mp->m_ddev_targp)
+                       blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
+       }
        if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
                iclog->ic_bio.bi_opf |= REQ_FUA;
+
        iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 
        if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
@@ -2310,7 +2345,7 @@ xlog_write_copy_finish(
        return 0;
 
 release_iclog:
-       error = xlog_state_release_iclog(log, iclog);
+       error = xlog_state_release_iclog(log, iclog, 0);
        spin_unlock(&log->l_icloglock);
        return error;
 }
@@ -2529,7 +2564,7 @@ next_lv:
                ASSERT(optype & XLOG_COMMIT_TRANS);
                *commit_iclog = iclog;
        } else {
-               error = xlog_state_release_iclog(log, iclog);
+               error = xlog_state_release_iclog(log, iclog, 0);
        }
        spin_unlock(&log->l_icloglock);
 
@@ -2567,6 +2602,7 @@ xlog_state_activate_iclog(
        memset(iclog->ic_header.h_cycle_data, 0,
                sizeof(iclog->ic_header.h_cycle_data));
        iclog->ic_header.h_lsn = 0;
+       iclog->ic_header.h_tail_lsn = 0;
 }
 
 /*
@@ -2967,7 +3003,7 @@ restart:
                 * reference to the iclog.
                 */
                if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
-                       error = xlog_state_release_iclog(log, iclog);
+                       error = xlog_state_release_iclog(log, iclog, 0);
                spin_unlock(&log->l_icloglock);
                if (error)
                        return error;
@@ -3131,6 +3167,35 @@ xlog_state_switch_iclogs(
        log->l_iclog = iclog->ic_next;
 }
 
+/*
+ * Force the iclog to disk and check if the iclog has been completed before
+ * xlog_force_iclog() returns. This can happen on synchronous (e.g.
+ * pmem) or fast async storage because we drop the icloglock to issue the IO.
+ * If completion has already occurred, tell the caller so that it can avoid an
+ * unnecessary wait on the iclog.
+ */
+static int
+xlog_force_and_check_iclog(
+       struct xlog_in_core     *iclog,
+       bool                    *completed)
+{
+       xfs_lsn_t               lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+       int                     error;
+
+       *completed = false;
+       error = xlog_force_iclog(iclog);
+       if (error)
+               return error;
+
+       /*
+        * If the iclog has already been completed and reused the header LSN
+        * will have been rewritten by completion
+        */
+       if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+               *completed = true;
+       return 0;
+}
+
 /*
  * Write out all data in the in-core log as of this exact moment in time.
  *
@@ -3165,7 +3230,6 @@ xfs_log_force(
 {
        struct xlog             *log = mp->m_log;
        struct xlog_in_core     *iclog;
-       xfs_lsn_t               lsn;
 
        XFS_STATS_INC(mp, xs_log_force);
        trace_xfs_log_force(mp, 0, _RET_IP_);
@@ -3193,39 +3257,33 @@ xfs_log_force(
                iclog = iclog->ic_prev;
        } else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
                if (atomic_read(&iclog->ic_refcnt) == 0) {
-                       /*
-                        * We are the only one with access to this iclog.
-                        *
-                        * Flush it out now.  There should be a roundoff of zero
-                        * to show that someone has already taken care of the
-                        * roundoff from the previous sync.
-                        */
-                       atomic_inc(&iclog->ic_refcnt);
-                       lsn = be64_to_cpu(iclog->ic_header.h_lsn);
-                       xlog_state_switch_iclogs(log, iclog, 0);
-                       if (xlog_state_release_iclog(log, iclog))
+                       /* We have exclusive access to this iclog. */
+                       bool    completed;
+
+                       if (xlog_force_and_check_iclog(iclog, &completed))
                                goto out_error;
 
-                       if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+                       if (completed)
                                goto out_unlock;
                } else {
                        /*
-                        * Someone else is writing to this iclog.
-                        *
-                        * Use its call to flush out the data.  However, the
-                        * other thread may not force out this LR, so we mark
-                        * it WANT_SYNC.
+                        * Someone else is still writing to this iclog, so we
+                        * need to ensure that when they release the iclog it
+                        * gets synced immediately as we may be waiting on it.
                         */
                        xlog_state_switch_iclogs(log, iclog, 0);
                }
-       } else {
-               /*
-                * If the head iclog is not active nor dirty, we just attach
-                * ourselves to the head and go to sleep if necessary.
-                */
-               ;
        }
 
+       /*
+        * The iclog we are about to wait on may contain the checkpoint pushed
+        * by the above xlog_cil_force() call, but it may not have been pushed
+        * to disk yet. Like the ACTIVE case above, we need to make sure caches
+        * are flushed when this iclog is written.
+        */
+       if (iclog->ic_state == XLOG_STATE_WANT_SYNC)
+               iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+
        if (flags & XFS_LOG_SYNC)
                return xlog_wait_on_iclog(iclog);
 out_unlock:
@@ -3245,6 +3303,7 @@ xlog_force_lsn(
        bool                    already_slept)
 {
        struct xlog_in_core     *iclog;
+       bool                    completed;
 
        spin_lock(&log->l_icloglock);
        iclog = log->l_iclog;
@@ -3258,7 +3317,8 @@ xlog_force_lsn(
                        goto out_unlock;
        }
 
-       if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+       switch (iclog->ic_state) {
+       case XLOG_STATE_ACTIVE:
                /*
                 * We sleep here if we haven't already slept (e.g. this is the
                 * first time we've looked at the correct iclog buf) and the
@@ -3281,12 +3341,31 @@ xlog_force_lsn(
                                        &log->l_icloglock);
                        return -EAGAIN;
                }
-               atomic_inc(&iclog->ic_refcnt);
-               xlog_state_switch_iclogs(log, iclog, 0);
-               if (xlog_state_release_iclog(log, iclog))
+               if (xlog_force_and_check_iclog(iclog, &completed))
                        goto out_error;
                if (log_flushed)
                        *log_flushed = 1;
+               if (completed)
+                       goto out_unlock;
+               break;
+       case XLOG_STATE_WANT_SYNC:
+               /*
+                * This iclog may contain the checkpoint pushed by the
+                * xlog_cil_force_seq() call, but there are other writers still
+                * accessing it so it hasn't been pushed to disk yet. Like the
+                * ACTIVE case above, we need to make sure caches are flushed
+                * when this iclog is written.
+                */
+               iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+               break;
+       default:
+               /*
+                * The entire checkpoint was written by the CIL force and is on
+                * its way to disk already. It will be stable when it
+                * completes, so we don't need to manipulate caches here at all.
+                * We just need to wait for completion if necessary.
+                */
+               break;
        }
 
        if (flags & XFS_LOG_SYNC)
@@ -3559,10 +3638,10 @@ xlog_verify_grant_tail(
 STATIC void
 xlog_verify_tail_lsn(
        struct xlog             *log,
-       struct xlog_in_core     *iclog,
-       xfs_lsn_t               tail_lsn)
+       struct xlog_in_core     *iclog)
 {
-    int blocks;
+       xfs_lsn_t       tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn);
+       int             blocks;
 
     if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
        blocks =
index b128aaa..4c44bc3 100644 (file)
@@ -654,8 +654,9 @@ xlog_cil_push_work(
        struct xfs_trans_header thdr;
        struct xfs_log_iovec    lhdr;
        struct xfs_log_vec      lvhdr = { NULL };
+       xfs_lsn_t               preflush_tail_lsn;
        xfs_lsn_t               commit_lsn;
-       xfs_lsn_t               push_seq;
+       xfs_csn_t               push_seq;
        struct bio              bio;
        DECLARE_COMPLETION_ONSTACK(bdev_flush);
 
@@ -730,7 +731,15 @@ xlog_cil_push_work(
         * because we hold the flush lock exclusively. Hence we can now issue
         * a cache flush to ensure all the completed metadata in the journal we
         * are about to overwrite is on stable storage.
+        *
+        * Because we are issuing this cache flush before we've written the
+        * tail lsn to the iclog, we can have metadata IO completions move the
+        * tail forwards between the completion of this flush and the iclog
+        * being written. In this case, we need to re-issue the cache flush
+        * before the iclog write. To detect whether the log tail moves, sample
+        * the tail LSN *before* we issue the flush.
         */
+       preflush_tail_lsn = atomic64_read(&log->l_tail_lsn);
        xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev,
                                &bdev_flush);
 
@@ -941,7 +950,7 @@ restart:
         * storage.
         */
        commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
-       xlog_state_release_iclog(log, commit_iclog);
+       xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn);
        spin_unlock(&log->l_icloglock);
        return;
 
index 4c41bbf..f3e79a4 100644 (file)
@@ -59,6 +59,16 @@ enum xlog_iclog_state {
        { XLOG_STATE_DIRTY,     "XLOG_STATE_DIRTY" }, \
        { XLOG_STATE_IOERROR,   "XLOG_STATE_IOERROR" }
 
+/*
+ * In core log flags
+ */
+#define XLOG_ICL_NEED_FLUSH    (1 << 0)        /* iclog needs REQ_PREFLUSH */
+#define XLOG_ICL_NEED_FUA      (1 << 1)        /* iclog needs REQ_FUA */
+
+#define XLOG_ICL_STRINGS \
+       { XLOG_ICL_NEED_FLUSH,  "XLOG_ICL_NEED_FLUSH" }, \
+       { XLOG_ICL_NEED_FUA,    "XLOG_ICL_NEED_FUA" }
+
 
 /*
  * Log ticket flags
@@ -143,9 +153,6 @@ enum xlog_iclog_state {
 
 #define XLOG_COVER_OPS         5
 
-#define XLOG_ICL_NEED_FLUSH    (1 << 0)        /* iclog needs REQ_PREFLUSH */
-#define XLOG_ICL_NEED_FUA      (1 << 1)        /* iclog needs REQ_FUA */
-
 /* Ticket reservation region accounting */ 
 #define XLOG_TIC_LEN_MAX       15
 
@@ -497,7 +504,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
 void   xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
 void   xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
 
-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog);
+int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
+               xfs_lsn_t log_tail_lsn);
 
 /*
  * When we crack an atomic LSN, we sample it first so that the value will not
index f9d8d60..1926029 100644 (file)
@@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class,
                __field(uint32_t, state)
                __field(int32_t, refcount)
                __field(uint32_t, offset)
+               __field(uint32_t, flags)
                __field(unsigned long long, lsn)
                __field(unsigned long, caller_ip)
        ),
@@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class,
                __entry->state = iclog->ic_state;
                __entry->refcount = atomic_read(&iclog->ic_refcnt);
                __entry->offset = iclog->ic_offset;
+               __entry->flags = iclog->ic_flags;
                __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn);
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS",
+       TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __print_symbolic(__entry->state, XLOG_STATE_STRINGS),
                  __entry->refcount,
                  __entry->offset,
                  __entry->lsn,
+                 __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS),
                  (char *)__entry->caller_ip)
 
 );
index 1ae993f..13d9337 100644 (file)
@@ -707,11 +707,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv);
  * @hrv: Hardware Revision of the device, pass -1 to not check _HRV
  *
  * The caller is responsible for invoking acpi_dev_put() on the returned device.
- *
- * FIXME: Due to above requirement there is a window that may invalidate @adev
- * and next iteration will use a dangling pointer, e.g. in the case of a
- * hotplug event. That said, the caller should ensure that this will never
- * happen.
  */
 #define for_each_acpi_dev_match(adev, hid, uid, hrv)                   \
        for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv);        \
@@ -725,7 +720,8 @@ static inline struct acpi_device *acpi_dev_get(struct acpi_device *adev)
 
 static inline void acpi_dev_put(struct acpi_device *adev)
 {
-       put_device(&adev->dev);
+       if (adev)
+               put_device(&adev->dev);
 }
 
 struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle);
index 10100a4..afb27cb 100644 (file)
@@ -68,6 +68,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd,
                               unsigned long arg);
 
 #define DRM_IOCTL_NR(n)                _IOC_NR(n)
+#define DRM_IOCTL_TYPE(n)              _IOC_TYPE(n)
 #define DRM_MAJOR       226
 
 /**
index 3177181..d3afea4 100644 (file)
@@ -57,7 +57,7 @@ struct blk_keyslot_manager;
  * Maximum number of blkcg policies allowed to be registered concurrently.
  * Defined here to simplify include dependency.
  */
-#define BLKCG_MAX_POLS         5
+#define BLKCG_MAX_POLS         6
 
 typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
index 8b77d08..a74cd1c 100644 (file)
@@ -27,19 +27,6 @@ struct task_struct;
 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
 
-#define BPF_CGROUP_STORAGE_NEST_MAX    8
-
-struct bpf_cgroup_storage_info {
-       struct task_struct *task;
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
-};
-
-/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
- * to use bpf cgroup storage simultaneously.
- */
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
-               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
 #define for_each_cgroup_storage_type(stype) \
        for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
 
@@ -172,44 +159,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
        return BPF_CGROUP_STORAGE_SHARED;
 }
 
-static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
-                                        *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
-       enum bpf_cgroup_storage_type stype;
-       int i, err = 0;
-
-       preempt_disable();
-       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
-               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
-                       continue;
-
-               this_cpu_write(bpf_cgroup_storage_info[i].task, current);
-               for_each_cgroup_storage_type(stype)
-                       this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
-                                      storage[stype]);
-               goto out;
-       }
-       err = -EBUSY;
-       WARN_ON_ONCE(1);
-
-out:
-       preempt_enable();
-       return err;
-}
-
-static inline void bpf_cgroup_storage_unset(void)
-{
-       int i;
-
-       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
-               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
-                       continue;
-
-               this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
-               return;
-       }
-}
-
 struct bpf_cgroup_storage *
 cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
                      void *key, bool locked);
@@ -487,9 +436,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
        return -EINVAL;
 }
 
-static inline int bpf_cgroup_storage_set(
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
-static inline void bpf_cgroup_storage_unset(void) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
                                            struct bpf_map *map) { return 0; }
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
index 0edff8f..c8cc090 100644 (file)
@@ -1142,38 +1142,40 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                        struct bpf_prog *include_prog,
                        struct bpf_prog_array **new_array);
 
+struct bpf_run_ctx {};
+
+struct bpf_cg_run_ctx {
+       struct bpf_run_ctx run_ctx;
+       struct bpf_prog_array_item *prog_item;
+};
+
 /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
 #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE                   (1 << 0)
 /* BPF program asks to set CN on the packet. */
 #define BPF_RET_SET_CN                                         (1 << 0)
 
-/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
- * if bpf_cgroup_storage_set() failed, the rest of programs
- * will not execute. This should be a really rare scenario
- * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
- * preemptions all between bpf_cgroup_storage_set() and
- * bpf_cgroup_storage_unset() on the same cpu.
- */
 #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)          \
        ({                                                              \
                struct bpf_prog_array_item *_item;                      \
                struct bpf_prog *_prog;                                 \
                struct bpf_prog_array *_array;                          \
+               struct bpf_run_ctx *old_run_ctx;                        \
+               struct bpf_cg_run_ctx run_ctx;                          \
                u32 _ret = 1;                                           \
                u32 func_ret;                                           \
                migrate_disable();                                      \
                rcu_read_lock();                                        \
                _array = rcu_dereference(array);                        \
                _item = &_array->items[0];                              \
+               old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);        \
                while ((_prog = READ_ONCE(_item->prog))) {              \
-                       if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
-                               break;                                  \
+                       run_ctx.prog_item = _item;                      \
                        func_ret = func(_prog, ctx);                    \
                        _ret &= (func_ret & 1);                         \
-                       *(ret_flags) |= (func_ret >> 1);                        \
-                       bpf_cgroup_storage_unset();                     \
+                       *(ret_flags) |= (func_ret >> 1);                \
                        _item++;                                        \
                }                                                       \
+               bpf_reset_run_ctx(old_run_ctx);                         \
                rcu_read_unlock();                                      \
                migrate_enable();                                       \
                _ret;                                                   \
@@ -1184,6 +1186,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                struct bpf_prog_array_item *_item;      \
                struct bpf_prog *_prog;                 \
                struct bpf_prog_array *_array;          \
+               struct bpf_run_ctx *old_run_ctx;        \
+               struct bpf_cg_run_ctx run_ctx;          \
                u32 _ret = 1;                           \
                migrate_disable();                      \
                rcu_read_lock();                        \
@@ -1191,17 +1195,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                if (unlikely(check_non_null && !_array))\
                        goto _out;                      \
                _item = &_array->items[0];              \
-               while ((_prog = READ_ONCE(_item->prog))) {              \
-                       if (!set_cg_storage) {                  \
-                               _ret &= func(_prog, ctx);       \
-                       } else {                                \
-                               if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
-                                       break;                  \
-                               _ret &= func(_prog, ctx);       \
-                               bpf_cgroup_storage_unset();     \
-                       }                               \
+               old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
+               while ((_prog = READ_ONCE(_item->prog))) {      \
+                       run_ctx.prog_item = _item;      \
+                       _ret &= func(_prog, ctx);       \
                        _item++;                        \
                }                                       \
+               bpf_reset_run_ctx(old_run_ctx);         \
 _out:                                                  \
                rcu_read_unlock();                      \
                migrate_enable();                       \
@@ -1284,6 +1284,20 @@ static inline void bpf_enable_instrumentation(void)
        migrate_enable();
 }
 
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+       struct bpf_run_ctx *old_ctx;
+
+       old_ctx = current->bpf_ctx;
+       current->bpf_ctx = new_ctx;
+       return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+       current->bpf_ctx = old_ctx;
+}
+
 extern const struct file_operations bpf_map_fops;
 extern const struct file_operations bpf_prog_fops;
 extern const struct file_operations bpf_iter_fops;
@@ -1428,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
                                        struct seq_file *seq);
 typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
                                         struct bpf_link_info *info);
+typedef const struct bpf_func_proto *
+(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
+                            const struct bpf_prog *prog);
 
 enum bpf_iter_feature {
        BPF_ITER_RESCHED        = BIT(0),
@@ -1440,6 +1457,7 @@ struct bpf_iter_reg {
        bpf_iter_detach_target_t detach_target;
        bpf_iter_show_fdinfo_t show_fdinfo;
        bpf_iter_fill_link_info_t fill_link_info;
+       bpf_iter_get_func_proto_t get_func_proto;
        u32 ctx_arg_info_size;
        u32 feature;
        struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
@@ -1462,6 +1480,8 @@ struct bpf_iter__bpf_map_elem {
 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
 bool bpf_link_is_iter(struct bpf_link *link);
@@ -2036,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
 extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
+extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
+extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
        enum bpf_func_id func_id, const struct bpf_prog *prog);
index a9db1ea..ae3ac3a 100644 (file)
@@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
 BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
 #ifdef CONFIG_NET
 BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
+BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
 #endif
index b847e1c..5424124 100644 (file)
@@ -354,8 +354,8 @@ struct bpf_insn_aux_data {
        };
        u64 map_key_state; /* constant (32 bit) key tracking for maps */
        int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
-       int sanitize_stack_off; /* stack slot to be cleared */
        u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
+       bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
        bool zext_dst; /* this insn zero extends dst reg */
        u8 alu_state; /* used in combination with alu_limit */
 
@@ -429,6 +429,7 @@ struct bpf_verifier_env {
        u32 used_map_cnt;               /* number of used maps */
        u32 used_btf_cnt;               /* number of used BTF objects */
        u32 id_gen;                     /* used to generate unique reg IDs */
+       bool explore_alu_limits;
        bool allow_ptr_leaks;
        bool allow_uninit_stack;
        bool allow_ptr_to_map_access;
index ba36989..1797e85 100644 (file)
@@ -73,6 +73,11 @@ struct ctl_table_header;
 /* unused opcode to mark call to interpreter with arguments */
 #define BPF_CALL_ARGS  0xe0
 
+/* unused opcode to mark speculation barrier for mitigating
+ * Speculative Store Bypass
+ */
+#define BPF_NOSPEC     0xc0
+
 /* As per nm, we expose JITed images as text (code) section for
  * kallsyms. That way, tools like perf can find it to match
  * addresses.
@@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
                .off   = 0,                                     \
                .imm   = 0 })
 
+/* Speculation barrier */
+
+#define BPF_ST_NOSPEC()                                                \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ST | BPF_NOSPEC,                   \
+               .dst_reg = 0,                                   \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = 0 })
+
 /* Internal classic blocks for direct assignment */
 
 #define __BPF_STMT(CODE, K)                                    \
@@ -761,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 
 DECLARE_BPF_DISPATCHER(xdp)
 
+DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp);
+
 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
                                            struct xdp_buff *xdp)
 {
@@ -768,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
         * under local_bh_disable(), which provides the needed RCU protection
         * for accessing map entries.
         */
-       return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+       u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+       if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+               if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+                       act = xdp_master_redirect(xdp);
+       }
+
+       return act;
 }
 
 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
index e2bc163..6b54982 100644 (file)
@@ -141,6 +141,7 @@ extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
 extern int vfs_parse_fs_param_source(struct fs_context *fc,
                                     struct fs_parameter *param);
+extern void fc_drop_locked(struct fs_context *fc);
 
 /*
  * sget() wrappers to be called from the ->get_tree() op.
index 63b56ab..30ece3a 100644 (file)
@@ -423,7 +423,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev);
 
 void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
 
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev);
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+                                         u16 if_id);
 
 extern struct bus_type fsl_mc_bus_type;
 
index 8c6e8e9..d9a606a 100644 (file)
@@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset,
 
        VM_BUG_ON(offset + len > PAGE_SIZE);
        memcpy(to + offset, from, len);
+       flush_dcache_page(page);
        kunmap_local(to);
 }
 
 static inline void memzero_page(struct page *page, size_t offset, size_t len)
 {
-       char *addr = kmap_atomic(page);
+       char *addr = kmap_local_page(page);
        memset(addr + offset, 0, len);
-       kunmap_atomic(addr);
+       flush_dcache_page(page);
+       kunmap_local(addr);
 }
 
 #endif /* _LINUX_HIGHMEM_H */
index 21daed1..509e18c 100644 (file)
@@ -190,39 +190,4 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev)
 }
 #endif
 
-#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_NET_SWITCHDEV)
-
-int switchdev_bridge_port_offload(struct net_device *brport_dev,
-                                 struct net_device *dev, const void *ctx,
-                                 struct notifier_block *atomic_nb,
-                                 struct notifier_block *blocking_nb,
-                                 bool tx_fwd_offload,
-                                 struct netlink_ext_ack *extack);
-void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
-                                    const void *ctx,
-                                    struct notifier_block *atomic_nb,
-                                    struct notifier_block *blocking_nb);
-
-#else
-
-static inline int
-switchdev_bridge_port_offload(struct net_device *brport_dev,
-                             struct net_device *dev, const void *ctx,
-                             struct notifier_block *atomic_nb,
-                             struct notifier_block *blocking_nb,
-                             bool tx_fwd_offload,
-                             struct netlink_ext_ack *extack)
-{
-       return -EINVAL;
-}
-
-static inline void
-switchdev_bridge_port_unoffload(struct net_device *brport_dev,
-                               const void *ctx,
-                               struct notifier_block *atomic_nb,
-                               struct notifier_block *blocking_nb)
-{
-}
-#endif
-
 #endif
index 64ce8cd..93c262e 100644 (file)
@@ -41,9 +41,6 @@ struct ip_sf_socklist {
        __be32                  sl_addr[];
 };
 
-#define IP_SFLSIZE(count)      (sizeof(struct ip_sf_socklist) + \
-       (count) * sizeof(__be32))
-
 #define IP_SFBLOCK     10      /* allocate this many at once */
 
 /* ip_mc_socklist is real list now. Speed is not argument;
index 25e2b4e..aee8ff4 100644 (file)
@@ -81,6 +81,8 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device,
 
 /* Get the device * from ishtp device instance */
 struct device *ishtp_device(struct ishtp_cl_device *cl_device);
+/* wait for IPC resume */
+bool ishtp_wait_resume(struct ishtp_device *dev);
 /* Trace interface for clients */
 ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device);
 /* Get device pointer of PCI device for DMA acces */
index cbf46f5..4a53c3c 100644 (file)
@@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
  */
 #define for_each_mem_range(i, p_start, p_end) \
        __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,   \
-                            MEMBLOCK_NONE, p_start, p_end, NULL)
+                            MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
 
 /**
  * for_each_mem_range_rev - reverse iterate through memblock areas from
@@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
  */
 #define for_each_mem_range_rev(i, p_start, p_end)                      \
        __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
-                                MEMBLOCK_NONE, p_start, p_end, NULL)
+                                MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
 
 /**
  * for_each_reserved_mem_range - iterate over all reserved memblock areas
index beb9183..c493a80 100644 (file)
@@ -721,8 +721,13 @@ void mhi_device_put(struct mhi_device *mhi_dev);
  *                            host and device execution environments match and
  *                            channels are in a DISABLED state.
  * @mhi_dev: Device associated with the channels
+ * @flags: MHI channel flags
  */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev,
+                            unsigned int flags);
+
+/* Automatically allocate and queue inbound buffers */
+#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
 
 /**
  * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.
index 1efe374..af4dd6e 100644 (file)
@@ -1138,6 +1138,8 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
                           struct net_device *slave);
@@ -1145,6 +1147,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                 u64 *values,
                                 int num_counters,
                                 size_t *offsets);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
index bc7db2e..4ab5c1f 100644 (file)
@@ -29,11 +29,20 @@ enum {
        REP_LOADED,
 };
 
+enum mlx5_switchdev_event {
+       MLX5_SWITCHDEV_EVENT_PAIR,
+       MLX5_SWITCHDEV_EVENT_UNPAIR,
+};
+
 struct mlx5_eswitch_rep;
 struct mlx5_eswitch_rep_ops {
        int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep);
        void (*unload)(struct mlx5_eswitch_rep *rep);
        void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+       int (*event)(struct mlx5_eswitch *esw,
+                    struct mlx5_eswitch_rep *rep,
+                    enum mlx5_switchdev_event event,
+                    void *data);
 };
 
 struct mlx5_eswitch_rep_data {
@@ -63,6 +72,7 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+                                   struct mlx5_eswitch *from_esw,
                                    struct mlx5_eswitch_rep *rep, u32 sqn);
 
 #ifdef CONFIG_MLX5_ESWITCH
@@ -128,6 +138,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
 
 u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
@@ -171,6 +182,11 @@ static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
        return 0;
 }
 
+static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+       return NULL;
+}
+
 #endif /* CONFIG_MLX5_ESWITCH */
 
 static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
index 77746f7..0106c67 100644 (file)
@@ -38,6 +38,8 @@
 
 #define MLX5_FS_DEFAULT_FLOW_TAG 0x0
 
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
 enum {
        MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO  = 1 << 16,
        MLX5_FLOW_CONTEXT_ACTION_ENCRYPT        = 1 << 17,
index 6bbae0c..fce3cba 100644 (file)
@@ -1652,7 +1652,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         max_geneve_tlv_option_data_len[0x5];
        u8         reserved_at_570[0x10];
 
-       u8         reserved_at_580[0x33];
+       u8         reserved_at_580[0xb];
+       u8         log_max_dci_stream_channels[0x5];
+       u8         reserved_at_590[0x3];
+       u8         log_max_dci_errored_streams[0x5];
+       u8         reserved_at_598[0x8];
+
+       u8         reserved_at_5a0[0x13];
        u8         log_max_dek[0x5];
        u8         reserved_at_5b8[0x4];
        u8         mini_cqe_resp_stride_index[0x1];
@@ -3021,10 +3027,12 @@ struct mlx5_ifc_qpc_bits {
        u8         reserved_at_3c0[0x8];
        u8         next_send_psn[0x18];
 
-       u8         reserved_at_3e0[0x8];
+       u8         reserved_at_3e0[0x3];
+       u8         log_num_dci_stream_channels[0x5];
        u8         cqn_snd[0x18];
 
-       u8         reserved_at_400[0x8];
+       u8         reserved_at_400[0x3];
+       u8         log_num_dci_errored_streams[0x5];
        u8         deth_sqpn[0x18];
 
        u8         reserved_at_420[0x20];
@@ -3912,7 +3920,7 @@ struct mlx5_ifc_cqc_bits {
        u8         status[0x4];
        u8         reserved_at_4[0x2];
        u8         dbr_umem_valid[0x1];
-       u8         apu_thread_cq[0x1];
+       u8         apu_cq[0x1];
        u8         cqe_sz[0x3];
        u8         cc[0x1];
        u8         reserved_at_c[0x1];
@@ -3938,8 +3946,7 @@ struct mlx5_ifc_cqc_bits {
        u8         cq_period[0xc];
        u8         cq_max_count[0x10];
 
-       u8         reserved_at_a0[0x18];
-       u8         c_eqn[0x8];
+       u8         c_eqn_or_apu_element[0x20];
 
        u8         reserved_at_c0[0x3];
        u8         log_page_size[0x5];
index 52bbd2b..7f8ee09 100644 (file)
@@ -103,11 +103,19 @@ struct page {
                        unsigned long pp_magic;
                        struct page_pool *pp;
                        unsigned long _pp_mapping_pad;
-                       /**
-                        * @dma_addr: might require a 64-bit value on
-                        * 32-bit architectures.
-                        */
-                       unsigned long dma_addr[2];
+                       unsigned long dma_addr;
+                       union {
+                               /**
+                                * dma_addr_upper: might require a 64-bit
+                                * value on 32-bit architectures.
+                                */
+                               unsigned long dma_addr_upper;
+                               /**
+                                * For frag page support, not supported in
+                                * 32-bit architectures with 64-bit DMA.
+                                */
+                               atomic_long_t pp_frag_count;
+                       };
                };
                struct {        /* slab, slob and slub */
                        union {
index d63a94e..bd8d5b8 100644 (file)
@@ -295,18 +295,6 @@ enum netdev_state_t {
 };
 
 
-/*
- * This structure holds boot-time configured netdevice settings. They
- * are then used in the device probing.
- */
-struct netdev_boot_setup {
-       char name[IFNAMSIZ];
-       struct ifmap map;
-};
-#define NETDEV_BOOT_SETUP_MAX 8
-
-int __init netdev_boot_setup(char *str);
-
 struct gro_list {
        struct list_head        list;
        int                     count;
@@ -1330,6 +1318,9 @@ struct netdev_net_notifier {
  *     that got dropped are freed/returned via xdp_return_frame().
  *     Returns negative number, means general error invoking ndo, meaning
  *     no frames were xmit'ed and core-caller will free all frames.
+ * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ *                                             struct xdp_buff *xdp);
+ *      Get the xmit slave of master device based on the xdp_buff.
  * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
  *      This function is used to wake up the softirq, ksoftirqd or kthread
  *     responsible for sending and/or receiving packets on a specific
@@ -1557,6 +1548,8 @@ struct net_device_ops {
        int                     (*ndo_xdp_xmit)(struct net_device *dev, int n,
                                                struct xdp_frame **xdp,
                                                u32 flags);
+       struct net_device *     (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+                                                         struct xdp_buff *xdp);
        int                     (*ndo_xsk_wakeup)(struct net_device *dev,
                                                  u32 queue_id, u32 flags);
        struct devlink_port *   (*ndo_get_devlink_port)(struct net_device *dev);
@@ -2939,7 +2932,6 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
 }
 
 int netdev_boot_setup_check(struct net_device *dev);
-unsigned long netdev_boot_base(const char *prefix, int unit);
 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
                                       const char *hwaddr);
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@ -3929,6 +3921,8 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
        return 0;
 }
 #endif
+int netif_set_real_num_queues(struct net_device *dev,
+                             unsigned int txq, unsigned int rxq);
 
 static inline struct netdev_rx_queue *
 __netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
@@ -4087,6 +4081,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                      int fd, int expected_fd, u32 flags);
 int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+u8 dev_xdp_prog_count(struct net_device *dev);
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
@@ -4154,11 +4149,13 @@ void netdev_run_todo(void);
  */
 static inline void dev_put(struct net_device *dev)
 {
+       if (dev) {
 #ifdef CONFIG_PCPU_DEV_REFCNT
-       this_cpu_dec(*dev->pcpu_refcnt);
+               this_cpu_dec(*dev->pcpu_refcnt);
 #else
-       refcount_dec(&dev->dev_refcnt);
+               refcount_dec(&dev->dev_refcnt);
 #endif
+       }
 }
 
 /**
@@ -4169,11 +4166,13 @@ static inline void dev_put(struct net_device *dev)
  */
 static inline void dev_hold(struct net_device *dev)
 {
+       if (dev) {
 #ifdef CONFIG_PCPU_DEV_REFCNT
-       this_cpu_inc(*dev->pcpu_refcnt);
+               this_cpu_inc(*dev->pcpu_refcnt);
 #else
-       refcount_inc(&dev->dev_refcnt);
+               refcount_inc(&dev->dev_refcnt);
 #endif
+       }
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
index ec8d07d..c64119a 100644 (file)
@@ -42,6 +42,7 @@ struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
 struct bpf_local_storage;
+struct bpf_run_ctx;
 struct capture_control;
 struct cfs_rq;
 struct fs_struct;
@@ -1379,6 +1380,8 @@ struct task_struct {
 #ifdef CONFIG_BPF_SYSCALL
        /* Used by BPF task local storage */
        struct bpf_local_storage __rcu  *bpf_storage;
+       /* Used for BPF run context */
+       struct bpf_run_ctx              *bpf_ctx;
 #endif
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
index 2bcdc8c..6bdb0db 100644 (file)
@@ -1183,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
                                     unsigned int headroom);
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
                                int newtailroom, gfp_t priority);
 int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
@@ -4711,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
 }
 
 #ifdef CONFIG_PAGE_POOL
-static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
-                                       struct page_pool *pp)
+static inline void skb_mark_for_recycle(struct sk_buff *skb)
 {
        skb->pp_recycle = 1;
-       page_pool_store_mem_info(page, pp);
 }
 #endif
 
index 96f3190..14ab0c0 100644 (file)
@@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
        return rcu_dereference_sk_user_data(sk);
 }
 
+static inline void sk_psock_set_state(struct sk_psock *psock,
+                                     enum sk_psock_state_bits bit)
+{
+       set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+                                       enum sk_psock_state_bits bit)
+{
+       clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+                                      enum sk_psock_state_bits bit)
+{
+       return test_bit(bit, &psock->state);
+}
+
+static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
+{
+       sk_drops_add(sk, skb);
+       kfree_skb(skb);
+}
+
+static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
+{
+       if (msg->skb)
+               sock_drop(psock->sk, msg->skb);
+       kfree(msg);
+}
+
 static inline void sk_psock_queue_msg(struct sk_psock *psock,
                                      struct sk_msg *msg)
 {
        spin_lock_bh(&psock->ingress_lock);
-       list_add_tail(&msg->list, &psock->ingress_msg);
+       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+               list_add_tail(&msg->list, &psock->ingress_msg);
+       else
+               drop_sk_msg(psock, msg);
        spin_unlock_bh(&psock->ingress_lock);
 }
 
@@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk,
                psock->psock_update_sk_prot(sk, psock, true);
 }
 
-static inline void sk_psock_set_state(struct sk_psock *psock,
-                                     enum sk_psock_state_bits bit)
-{
-       set_bit(bit, &psock->state);
-}
-
-static inline void sk_psock_clear_state(struct sk_psock *psock,
-                                       enum sk_psock_state_bits bit)
-{
-       clear_bit(bit, &psock->state);
-}
-
-static inline bool sk_psock_test_state(const struct sk_psock *psock,
-                                      enum sk_psock_state_bits bit)
-{
-       return test_bit(bit, &psock->state);
-}
-
 static inline struct sk_psock *sk_psock_get(struct sock *sk)
 {
        struct sk_psock *psock;
index 9cce0d8..08ca9ce 100644 (file)
@@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit);
 struct net_device *wd_probe(int unit);
 struct net_device *ne_probe(int unit);
 struct net_device *fmv18x_probe(int unit);
-struct net_device *i82596_probe(int unit);
 struct net_device *ni65_probe(int unit);
 struct net_device *sonic_probe(int unit);
 struct net_device *smc_init(int unit);
-struct net_device *atarilance_probe(int unit);
-struct net_device *sun3lance_probe(int unit);
-struct net_device *sun3_82586_probe(int unit);
-struct net_device *apne_probe(int unit);
 struct net_device *cs89x0_probe(int unit);
-struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
 struct net_device *cops_probe(int unit);
-struct net_device *ltpc_probe(void);
 
 /* Fibre Channel adapters */
 int iph5526_probe(struct net_device *dev);
-
-/* SBNI adapters */
-int sbni_probe(int unit);
index 086b291..f19f7f4 100644 (file)
@@ -58,6 +58,14 @@ struct tc_action {
 #define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \
                              TCA_ACT_HW_STATS_DELAYED)
 
+/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */
+#define TCA_ACT_FLAGS_USER_BITS 16
+#define TCA_ACT_FLAGS_USER_MASK 0xffff
+#define TCA_ACT_FLAGS_POLICE   (1U << TCA_ACT_FLAGS_USER_BITS)
+#define TCA_ACT_FLAGS_BIND     (1U << (TCA_ACT_FLAGS_USER_BITS + 1))
+#define TCA_ACT_FLAGS_REPLACE  (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
+#define TCA_ACT_FLAGS_NO_RTNL  (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
+
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.
  */
@@ -99,8 +107,8 @@ struct tc_action_ops {
        void    (*cleanup)(struct tc_action *);
        int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
        int     (*init)(struct net *net, struct nlattr *nla,
-                       struct nlattr *est, struct tc_action **act, int ovr,
-                       int bind, bool rtnl_held, struct tcf_proto *tp,
+                       struct nlattr *est, struct tc_action **act,
+                       struct tcf_proto *tp,
                        u32 flags, struct netlink_ext_ack *extack);
        int     (*walk)(struct net *, struct sk_buff *,
                        struct netlink_callback *, int,
@@ -179,18 +187,16 @@ int tcf_action_destroy(struct tc_action *actions[], int bind);
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
                    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
-                   struct nlattr *est, char *name, int ovr, int bind,
+                   struct nlattr *est,
                    struct tc_action *actions[], int init_res[], size_t *attr_size,
-                   bool rtnl_held, struct netlink_ext_ack *extack);
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+                   u32 flags, struct netlink_ext_ack *extack);
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
                                         bool rtnl_held,
                                         struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind,
                                    struct tc_action_ops *a_o, int *init_res,
-                                   bool rtnl_held,
-                                   struct netlink_ext_ack *extack);
+                                   u32 flags, struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
                    int ref, bool terse);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
index 435a2c3..4757d7f 100644 (file)
@@ -70,6 +70,9 @@ struct unix_sock {
        struct socket_wq        peer_wq;
        wait_queue_entry_t      peer_wake;
        struct scm_stat         scm_stat;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       struct sk_buff          *oob_skb;
+#endif
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
index aa52b2e..2ed23a3 100644 (file)
@@ -38,4 +38,7 @@ struct ax_plat_data {
        int (*check_irq)(struct platform_device *pdev);
 };
 
+/* exported from ax88796.c for xsurf100.c  */
+extern void ax_NS8390_reinit(struct net_device *dev);
+
 #endif /* __NET_AX88796_PLAT_H */
index a53e944..db4312e 100644 (file)
@@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void);
 void hci_free_dev(struct hci_dev *hdev);
 int hci_register_dev(struct hci_dev *hdev);
 void hci_unregister_dev(struct hci_dev *hdev);
+void hci_cleanup_dev(struct hci_dev *hdev);
 int hci_suspend_dev(struct hci_dev *hdev);
 int hci_resume_dev(struct hci_dev *hdev);
 int hci_reset_dev(struct hci_dev *hdev);
index c8696a2..38785d4 100644 (file)
@@ -303,6 +303,7 @@ int  __bond_3ad_get_active_agg_info(struct bonding *bond,
 int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
                         struct slave *slave);
 int bond_3ad_set_carrier(struct bonding *bond);
+void bond_3ad_update_lacp_active(struct bonding *bond);
 void bond_3ad_update_lacp_rate(struct bonding *bond);
 void bond_3ad_update_ad_actor_settings(struct bonding *bond);
 int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
index 9d382f2..e64833a 100644 (file)
@@ -64,6 +64,7 @@ enum {
        BOND_OPT_AD_USER_PORT_KEY,
        BOND_OPT_NUM_PEER_NOTIF_ALIAS,
        BOND_OPT_PEER_NOTIF_DELAY,
+       BOND_OPT_LACP_ACTIVE,
        BOND_OPT_LAST
 };
 
index 625d9c7..9f3fdc1 100644 (file)
@@ -129,6 +129,7 @@ struct bond_params {
        int updelay;
        int downdelay;
        int peer_notif_delay;
+       int lacp_active;
        int lacp_fast;
        unsigned int min_links;
        int ad_select;
@@ -258,6 +259,7 @@ struct bonding {
        /* protecting ipsec_list */
        spinlock_t ipsec_lock;
 #endif /* CONFIG_XFRM_OFFLOAD */
+       struct bpf_prog *xdp_prog;
 };
 
 #define bond_slave_get_rcu(dev) \
index 84805bd..595fee0 100644 (file)
@@ -71,13 +71,26 @@ struct compat_group_source_req {
 } __packed;
 
 struct compat_group_filter {
-       __u32                            gf_interface;
-       struct __kernel_sockaddr_storage gf_group
-               __aligned(4);
-       __u32                            gf_fmode;
-       __u32                            gf_numsrc;
-       struct __kernel_sockaddr_storage gf_slist[1]
-               __aligned(4);
+       union {
+               struct {
+                       __u32                            gf_interface_aux;
+                       struct __kernel_sockaddr_storage gf_group_aux
+                               __aligned(4);
+                       __u32                            gf_fmode_aux;
+                       __u32                            gf_numsrc_aux;
+                       struct __kernel_sockaddr_storage gf_slist[1]
+                               __aligned(4);
+               } __packed;
+               struct {
+                       __u32                            gf_interface;
+                       struct __kernel_sockaddr_storage gf_group
+                               __aligned(4);
+                       __u32                            gf_fmode;
+                       __u32                            gf_numsrc;
+                       struct __kernel_sockaddr_storage gf_slist_flex[]
+                               __aligned(4);
+               } __packed;
+       };
 } __packed;
 
 #endif /* NET_COMPAT_H */
index 08f4c61..0236c77 100644 (file)
@@ -1396,8 +1396,8 @@ struct devlink_ops {
         *
         * Note: @extack can be NULL when port notifier queries the port function.
         */
-       int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port,
-                                        u8 *hw_addr, int *hw_addr_len,
+       int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+                                        int *hw_addr_len,
                                         struct netlink_ext_ack *extack);
        /**
         * @port_function_hw_addr_set: Port function's hardware address set function.
@@ -1406,7 +1406,7 @@ struct devlink_ops {
         * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
         * function handling for a particular port.
         */
-       int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port,
+       int (*port_function_hw_addr_set)(struct devlink_port *port,
                                         const u8 *hw_addr, int hw_addr_len,
                                         struct netlink_ext_ack *extack);
        /**
@@ -1462,8 +1462,7 @@ struct devlink_ops {
         *
         * Return: 0 on success, negative value otherwise.
         */
-       int (*port_fn_state_get)(struct devlink *devlink,
-                                struct devlink_port *port,
+       int (*port_fn_state_get)(struct devlink_port *port,
                                 enum devlink_port_fn_state *state,
                                 enum devlink_port_fn_opstate *opstate,
                                 struct netlink_ext_ack *extack);
@@ -1478,8 +1477,7 @@ struct devlink_ops {
         *
         * Return: 0 on success, negative value otherwise.
         */
-       int (*port_fn_state_set)(struct devlink *devlink,
-                                struct devlink_port *port,
+       int (*port_fn_state_set)(struct devlink_port *port,
                                 enum devlink_port_fn_state state,
                                 struct netlink_ext_ack *extack);
 
@@ -1546,13 +1544,15 @@ struct net *devlink_net(const struct devlink *devlink);
  * Drivers that operate on real HW must use devlink_alloc() instead.
  */
 struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
-                                size_t priv_size, struct net *net);
+                                size_t priv_size, struct net *net,
+                                struct device *dev);
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
-                                           size_t priv_size)
+                                           size_t priv_size,
+                                           struct device *dev)
 {
-       return devlink_alloc_ns(ops, priv_size, &init_net);
+       return devlink_alloc_ns(ops, priv_size, &init_net, dev);
 }
-int devlink_register(struct devlink *devlink, struct device *dev);
+int devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_reload_enable(struct devlink *devlink);
 void devlink_reload_disable(struct devlink *devlink);
index 2af6ee2..0c2cba4 100644 (file)
@@ -79,13 +79,11 @@ enum dsa_tag_protocol {
        DSA_TAG_PROTO_SJA1110           = DSA_TAG_PROTO_SJA1110_VALUE,
 };
 
-struct packet_type;
 struct dsa_switch;
 
 struct dsa_device_ops {
        struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
-       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt);
+       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
        void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
                             int *offset);
        unsigned int needed_headroom;
@@ -239,8 +237,7 @@ struct dsa_port {
 
        /* Copies for faster access in master receive hot path */
        struct dsa_switch_tree *dst;
-       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt);
+       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
 
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
@@ -257,6 +254,8 @@ struct dsa_port {
        struct device_node      *dn;
        unsigned int            ageing_time;
        bool                    vlan_filtering;
+       /* Managed by DSA on user ports and by drivers on CPU and DSA ports */
+       bool                    learning;
        u8                      stp_state;
        struct net_device       *bridge_dev;
        int                     bridge_num;
@@ -717,8 +716,6 @@ struct dsa_switch_ops {
        int     (*port_bridge_flags)(struct dsa_switch *ds, int port,
                                     struct switchdev_brport_flags flags,
                                     struct netlink_ext_ack *extack);
-       int     (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter,
-                                   struct netlink_ext_ack *extack);
 
        /*
         * VLAN support
index 69c9eab..f3c2841 100644 (file)
@@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
 }
 
 /**
- * flow_action_has_one_action() - check if exactly one action is present
+ * flow_offload_has_one_action() - check if exactly one action is present
  * @action: tc filter flow offload action
  *
  * Returns true if exactly one action is present.
index 71bb4cc..42235c1 100644 (file)
@@ -82,9 +82,6 @@ struct ip6_sf_socklist {
        struct in6_addr         sl_addr[];
 };
 
-#define IP6_SFLSIZE(count)     (sizeof(struct ip6_sf_socklist) + \
-       (count) * sizeof(struct in6_addr))
-
 #define IP6_SFBLOCK    10      /* allocate this many at once */
 
 struct ipv6_mc_socklist {
index ca6a3ea..f72ec11 100644 (file)
@@ -160,6 +160,12 @@ struct inet_hashinfo {
                                        ____cacheline_aligned_in_smp;
 };
 
+#define inet_lhash2_for_each_icsk_continue(__icsk) \
+       hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)
+
+#define inet_lhash2_for_each_icsk(__icsk, list) \
+       hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)
+
 #define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
        hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
 
index 820eae3..5efd0b7 100644 (file)
@@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb)
 {
-       int mtu;
+       unsigned int mtu;
 
        struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
                                inet6_sk(skb->sk) : NULL;
index c0f0a13..49aa79c 100644 (file)
 #include <linux/if_ether.h>
 
 /* Lengths of frame formats */
-#define LLC_PDU_LEN_I  4       /* header and 2 control bytes */
-#define LLC_PDU_LEN_S  4
-#define LLC_PDU_LEN_U  3       /* header and 1 control byte */
+#define LLC_PDU_LEN_I          4       /* header and 2 control bytes */
+#define LLC_PDU_LEN_S          4
+#define LLC_PDU_LEN_U          3       /* header and 1 control byte */
+/* header and 1 control byte and XID info */
+#define LLC_PDU_LEN_U_XID      (LLC_PDU_LEN_U + sizeof(struct llc_xid_info))
 /* Known SAP addresses */
 #define LLC_GLOBAL_SAP 0xFF
 #define LLC_NULL_SAP   0x00    /* not network-layer visible */
 #define LLC_PDU_TYPE_U_MASK    0x03    /* 8-bit control field */
 #define LLC_PDU_TYPE_MASK      0x03
 
-#define LLC_PDU_TYPE_I 0       /* first bit */
-#define LLC_PDU_TYPE_S 1       /* first two bits */
-#define LLC_PDU_TYPE_U 3       /* first two bits */
+#define LLC_PDU_TYPE_I         0       /* first bit */
+#define LLC_PDU_TYPE_S         1       /* first two bits */
+#define LLC_PDU_TYPE_U         3       /* first two bits */
+#define LLC_PDU_TYPE_U_XID     4       /* private type for detecting XID commands */
 
 #define LLC_PDU_TYPE_IS_I(pdu) \
        ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0)
@@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
 static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
                                       u8 ssap, u8 dsap, u8 cr)
 {
-       const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
+       int hlen = 4; /* default value for I and S types */
        struct llc_pdu_un *pdu;
 
+       switch (type) {
+       case LLC_PDU_TYPE_U:
+               hlen = 3;
+               break;
+       case LLC_PDU_TYPE_U_XID:
+               hlen = 6;
+               break;
+       }
+
        skb_push(skb, hlen);
        skb_reset_network_header(skb);
        pdu = llc_pdu_un_hdr(skb);
@@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb,
        xid_info->fmt_id = LLC_XID_FMT_ID;      /* 0x81 */
        xid_info->type   = svcs_supported;
        xid_info->rw     = rx_window << 1;      /* size of receive window */
-       skb_put(skb, sizeof(struct llc_xid_info));
+
+       /* no need to push/put since llc_pdu_header_init() has already
+        * pushed 3 + 3 bytes
+        */
 }
 
 /**
index e946366..1f4e181 100644 (file)
@@ -75,6 +75,7 @@ struct netns_xfrm {
 #endif
        spinlock_t              xfrm_state_lock;
        seqcount_spinlock_t     xfrm_state_hash_generation;
+       seqcount_spinlock_t     xfrm_policy_hash_generation;
 
        spinlock_t xfrm_policy_lock;
        struct mutex xfrm_cfg_mutex;
index 4770a81..a964dae 100644 (file)
@@ -276,8 +276,8 @@ int nci_register_device(struct nci_dev *ndev);
 void nci_unregister_device(struct nci_dev *ndev);
 int nci_request(struct nci_dev *ndev,
                void (*req)(struct nci_dev *ndev,
-                           unsigned long opt),
-               unsigned long opt, __u32 timeout);
+                           const void *opt),
+               const void *opt, __u32 timeout);
 int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len,
                 const __u8 *payload);
 int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
index 3dd62dd..a408240 100644 (file)
                                        * Please note DMA-sync-for-CPU is still
                                        * device driver responsibility
                                        */
-#define PP_FLAG_ALL            (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_PAGE_FRAG      BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL            (PP_FLAG_DMA_MAP |\
+                                PP_FLAG_DMA_SYNC_DEV |\
+                                PP_FLAG_PAGE_FRAG)
 
 /*
  * Fast allocation side cache array/stack
@@ -88,6 +91,9 @@ struct page_pool {
        unsigned long defer_warn;
 
        u32 pages_state_hold_cnt;
+       unsigned int frag_offset;
+       struct page *frag_page;
+       long frag_users;
 
        /*
         * Data structure for allocation side
@@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
        return page_pool_alloc_pages(pool, gfp);
 }
 
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+                                 unsigned int size, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+                                                   unsigned int *offset,
+                                                   unsigned int size)
+{
+       gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+       return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
 /* get the stored dma direction. A driver might decide to treat this locally and
  * avoid the extra cache line from page_pool to determine the direction
  */
@@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
        page_pool_put_full_page(pool, page, true);
 }
 
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT        \
+               (sizeof(dma_addr_t) > sizeof(unsigned long))
+
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-       dma_addr_t ret = page->dma_addr[0];
-       if (sizeof(dma_addr_t) > sizeof(unsigned long))
-               ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+       dma_addr_t ret = page->dma_addr;
+
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
        return ret;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
-       page->dma_addr[0] = addr;
-       if (sizeof(dma_addr_t) > sizeof(unsigned long))
-               page->dma_addr[1] = upper_32_bits(addr);
+       page->dma_addr = addr;
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline void page_pool_set_frag_count(struct page *page, long nr)
+{
+       atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
+                                                         long nr)
+{
+       long ret;
+
+       /* As suggested by Alexander, atomic_long_read() may cover up the
+        * reference count errors, so avoid calling atomic_long_read() in
+        * the cases of freeing or draining the page_frags, where we would
+        * not expect it to match or that are slowpath anyway.
+        */
+       if (__builtin_constant_p(nr) &&
+           atomic_long_read(&page->pp_frag_count) == nr)
+               return 0;
+
+       ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+       WARN_ON(ret < 0);
+       return ret;
 }
 
 static inline bool is_page_pool_compiled_in(void)
@@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool)
                spin_unlock_bh(&pool->ring.producer_lock);
 }
 
-/* Store mem_info on struct page and use it while recycling skb frags */
-static inline
-void page_pool_store_mem_info(struct page *page, struct page_pool *pp)
-{
-       page->pp = pp;
-}
-
 #endif /* _NET_PAGE_POOL_H */
index dc28fcb..8fb47fc 100644 (file)
@@ -319,7 +319,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
                      struct nlattr **tb, struct nlattr *rate_tlv,
-                     struct tcf_exts *exts, bool ovr, bool rtnl_held,
+                     struct tcf_exts *exts, u32 flags,
                      struct netlink_ext_ack *extack);
 void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
@@ -329,6 +329,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 
 /**
  * struct tcf_pkt_info - packet information
+ *
+ * @ptr: start of the pkt data
+ * @nexthdr: offset of the next header
  */
 struct tcf_pkt_info {
        unsigned char *         ptr;
@@ -347,6 +350,7 @@ struct tcf_ematch_ops;
  * @ops: the operations lookup table of the corresponding ematch module
  * @datalen: length of the ematch specific configuration data
  * @data: ematch specific data
+ * @net: the network namespace
  */
 struct tcf_ematch {
        struct tcf_ematch_ops * ops;
index 384e800..9f48733 100644 (file)
@@ -153,7 +153,8 @@ struct rtnl_af_ops {
                                                    u32 ext_filter_mask);
 
        int                     (*validate_link_af)(const struct net_device *dev,
-                                                   const struct nlattr *attr);
+                                                   const struct nlattr *attr,
+                                                   struct netlink_ext_ack *extack);
        int                     (*set_link_af)(struct net_device *dev,
                                               const struct nlattr *attr,
                                               struct netlink_ext_ack *extack);
index 9ed33e6..c0069ac 100644 (file)
@@ -357,7 +357,7 @@ struct tcf_proto_ops {
        int                     (*change)(struct net *net, struct sk_buff *,
                                        struct tcf_proto*, unsigned long,
                                        u32 handle, struct nlattr **,
-                                       void **, bool, bool,
+                                       void **, u32,
                                        struct netlink_ext_ack *);
        int                     (*delete)(struct tcf_proto *tp, void *arg,
                                          bool *last, bool rtnl_held,
index 32fc4a3..651bba6 100644 (file)
@@ -984,6 +984,7 @@ struct sctp_transport {
        } cacc;
 
        struct {
+               __u32 last_rtx_chunks;
                __u16 pmtu;
                __u16 probe_size;
                __u16 probe_high;
@@ -1024,8 +1025,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
-void sctp_transport_pl_send(struct sctp_transport *t);
-void sctp_transport_pl_recv(struct sctp_transport *t);
+bool sctp_transport_pl_send(struct sctp_transport *t);
+bool sctp_transport_pl_recv(struct sctp_transport *t);
 
 
 /* This is the structure we use to queue packets as they come into
index ff1be7e..6e76145 100644 (file)
@@ -68,6 +68,7 @@
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
 #include <net/l3mdev.h>
+#include <uapi/linux/socket.h>
 
 /*
  * This structure really needs to be cleaned up.
@@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk)
 #define RCV_SHUTDOWN   1
 #define SEND_SHUTDOWN  2
 
-#define SOCK_SNDBUF_LOCK       1
-#define SOCK_RCVBUF_LOCK       2
 #define SOCK_BINDADDR_LOCK     4
 #define SOCK_BINDPORT_LOCK     8
 
index 66468ff..60d806b 100644 (file)
@@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp {
 
 typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
 
+struct switchdev_brport {
+       struct net_device *dev;
+       const void *ctx;
+       struct notifier_block *atomic_nb;
+       struct notifier_block *blocking_nb;
+       bool tx_fwd_offload;
+};
+
 enum switchdev_notifier_type {
        SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
        SWITCHDEV_FDB_DEL_TO_BRIDGE,
@@ -197,6 +205,9 @@ enum switchdev_notifier_type {
        SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
        SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
        SWITCHDEV_VXLAN_FDB_OFFLOADED,
+
+       SWITCHDEV_BRPORT_OFFLOADED,
+       SWITCHDEV_BRPORT_UNOFFLOADED,
 };
 
 struct switchdev_notifier_info {
@@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info {
        bool handled;
 };
 
+struct switchdev_notifier_brport_info {
+       struct switchdev_notifier_info info; /* must be first */
+       const struct switchdev_brport brport;
+};
+
 static inline struct net_device *
 switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
 {
@@ -246,6 +262,17 @@ switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *f
 
 #ifdef CONFIG_NET_SWITCHDEV
 
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+                                 struct net_device *dev, const void *ctx,
+                                 struct notifier_block *atomic_nb,
+                                 struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
+                                 struct netlink_ext_ack *extack);
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                                    const void *ctx,
+                                    struct notifier_block *atomic_nb,
+                                    struct notifier_block *blocking_nb);
+
 void switchdev_deferred_process(void);
 int switchdev_port_attr_set(struct net_device *dev,
                            const struct switchdev_attr *attr,
@@ -316,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
                                      struct netlink_ext_ack *extack));
 #else
 
+static inline int
+switchdev_bridge_port_offload(struct net_device *brport_dev,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void
+switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                               const void *ctx,
+                               struct notifier_block *atomic_nb,
+                               struct notifier_block *blocking_nb)
+{
+}
+
 static inline void switchdev_deferred_process(void)
 {
 }
index 784d5c3..3166dc1 100644 (file)
@@ -1958,7 +1958,6 @@ struct tcp_iter_state {
        struct seq_net_private  p;
        enum tcp_seq_states     state;
        struct sock             *syn_wait_sk;
-       struct tcp_seq_afinfo   *bpf_seq_afinfo;
        int                     bucket, offset, sbucket, num;
        loff_t                  last_pos;
 };
index 675849d..8e6dd8a 100644 (file)
@@ -712,6 +712,12 @@ struct snd_soc_dai_link {
        /* Do not create a PCM for this DAI link (Backend link) */
        unsigned int ignore:1;
 
+       /* This flag will reorder stop sequence. By enabling this flag
+        * DMA controller stop sequence will be invoked first followed by
+        * CPU DAI driver stop sequence
+        */
+       unsigned int stop_dma_first:1;
+
 #ifdef CONFIG_SND_SOC_TOPOLOGY
        struct snd_soc_dobj dobj; /* For topology */
 #endif
index d588c24..1f0a2b4 100644 (file)
 
 #define SO_NETNS_COOKIE                71
 
+#define SO_BUF_LOCK            72
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
index df6e821..3893646 100644 (file)
@@ -78,11 +78,20 @@ enum {
 enum {
        J1939_NLA_PAD,
        J1939_NLA_BYTES_ACKED,
+       J1939_NLA_TOTAL_SIZE,
+       J1939_NLA_PGN,
+       J1939_NLA_SRC_NAME,
+       J1939_NLA_DEST_NAME,
+       J1939_NLA_SRC_ADDR,
+       J1939_NLA_DEST_ADDR,
 };
 
 enum {
        J1939_EE_INFO_NONE,
        J1939_EE_INFO_TX_ABORT,
+       J1939_EE_INFO_RX_RTS,
+       J1939_EE_INFO_RX_DPO,
+       J1939_EE_INFO_RX_ABORT,
 };
 
 struct j1939_filter {
index e33997b..edc346a 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */
 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
 #ifndef _USR_IDXD_H_
 #define _USR_IDXD_H_
index 49b22af..5310003 100644 (file)
@@ -855,6 +855,7 @@ enum {
        IFLA_BOND_AD_ACTOR_SYSTEM,
        IFLA_BOND_TLB_DYNAMIC_LB,
        IFLA_BOND_PEER_NOTIF_DELAY,
+       IFLA_BOND_AD_LACP_ACTIVE,
        __IFLA_BOND_MAX,
 };
 
index d1b3270..1416822 100644 (file)
@@ -188,11 +188,22 @@ struct ip_mreq_source {
 };
 
 struct ip_msfilter {
-       __be32          imsf_multiaddr;
-       __be32          imsf_interface;
-       __u32           imsf_fmode;
-       __u32           imsf_numsrc;
-       __be32          imsf_slist[1];
+       union {
+               struct {
+                       __be32          imsf_multiaddr_aux;
+                       __be32          imsf_interface_aux;
+                       __u32           imsf_fmode_aux;
+                       __u32           imsf_numsrc_aux;
+                       __be32          imsf_slist[1];
+               };
+               struct {
+                       __be32          imsf_multiaddr;
+                       __be32          imsf_interface;
+                       __u32           imsf_fmode;
+                       __u32           imsf_numsrc;
+                       __be32          imsf_slist_flex[];
+               };
+       };
 };
 
 #define IP_MSFILTER_SIZE(numsrc) \
@@ -211,11 +222,22 @@ struct group_source_req {
 };
 
 struct group_filter {
-       __u32                            gf_interface;  /* interface index */
-       struct __kernel_sockaddr_storage gf_group;      /* multicast address */
-       __u32                            gf_fmode;      /* filter mode */
-       __u32                            gf_numsrc;     /* number of sources */
-       struct __kernel_sockaddr_storage gf_slist[1];   /* interface index */
+       union {
+               struct {
+                       __u32                            gf_interface_aux; /* interface index */
+                       struct __kernel_sockaddr_storage gf_group_aux;     /* multicast address */
+                       __u32                            gf_fmode_aux;     /* filter mode */
+                       __u32                            gf_numsrc_aux;    /* number of sources */
+                       struct __kernel_sockaddr_storage gf_slist[1];      /* interface index */
+               };
+               struct {
+                       __u32                            gf_interface;    /* interface index */
+                       struct __kernel_sockaddr_storage gf_group;        /* multicast address */
+                       __u32                            gf_fmode;        /* filter mode */
+                       __u32                            gf_numsrc;       /* number of sources */
+                       struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
+               };
+       };
 };
 
 #define GROUP_FILTER_SIZE(numsrc) \
index 025c40f..6836ccb 100644 (file)
@@ -22,6 +22,7 @@ enum {
        __TCA_ACT_MAX
 };
 
+/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */
 #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
                                         * actions stats.
                                         */
index c3409c8..eb0a9a5 100644 (file)
@@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage {
        };
 };
 
+#define SOCK_SNDBUF_LOCK       1
+#define SOCK_RCVBUF_LOCK       2
+
+#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+
 #endif /* _UAPI_LINUX_SOCKET_H */
index 26b638a..a7085e0 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB */
 /*
  * Copyright (c) 2006 - 2021 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
index f5b8246..11cbbec 100644 (file)
@@ -1221,7 +1221,7 @@ trace_initcall_start_cb(void *data, initcall_t fn)
 {
        ktime_t *calltime = (ktime_t *)data;
 
-       printk(KERN_DEBUG "calling  %pS @ %i\n", fn, task_pid_nr(current));
+       printk(KERN_DEBUG "calling  %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled());
        *calltime = ktime_get();
 }
 
@@ -1235,8 +1235,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
        rettime = ktime_get();
        delta = ktime_sub(rettime, *calltime);
        duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-       printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
-                fn, ret, duration);
+       printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n",
+                fn, ret, duration, irqs_disabled());
 }
 
 static ktime_t initcall_calltime;
index 2d4fbdb..2e9d47b 100644 (file)
@@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
        return supported;
 }
 
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+       const struct bpf_iter_target_info *tinfo;
+       const struct bpf_func_proto *fn = NULL;
+
+       mutex_lock(&targets_mutex);
+       list_for_each_entry(tinfo, &targets, list) {
+               if (tinfo->btf_id == prog->aux->attach_btf_id) {
+                       const struct bpf_iter_reg *reg_info;
+
+                       reg_info = tinfo->reg_info;
+                       if (reg_info->get_func_proto)
+                               fn = reg_info->get_func_proto(func_id, prog);
+                       break;
+               }
+       }
+       mutex_unlock(&targets_mutex);
+
+       return fn;
+}
+
 static void bpf_iter_link_release(struct bpf_link *link)
 {
        struct bpf_iter_link *iter_link =
index 7780131..c395024 100644 (file)
@@ -4825,6 +4825,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
 
                if (ctx_arg_info->offset == off) {
+                       if (!ctx_arg_info->btf_id) {
+                               bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
+                               return false;
+                       }
+
                        info->reg_type = ctx_arg_info->reg_type;
                        info->btf = btf_vmlinux;
                        info->btf_id = ctx_arg_info->btf_id;
index 9b15774..fe807b2 100644 (file)
@@ -32,6 +32,8 @@
 #include <linux/perf_event.h>
 #include <linux/extable.h>
 #include <linux/log2.h>
+
+#include <asm/barrier.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
                /* Non-UAPI available opcodes. */
                [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
                [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+               [BPF_ST  | BPF_NOSPEC] = &&ST_NOSPEC,
                [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
                [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
                [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
@@ -1559,7 +1562,7 @@ select_insn:
 
                if (unlikely(index >= array->map.max_entries))
                        goto out;
-               if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
+               if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
                        goto out;
 
                tail_call_cnt++;
@@ -1621,7 +1624,21 @@ out:
        COND_JMP(s, JSGE, >=)
        COND_JMP(s, JSLE, <=)
 #undef COND_JMP
-       /* STX and ST and LDX*/
+       /* ST, STX and LDX*/
+       ST_NOSPEC:
+               /* Speculation barrier for mitigating Speculative Store Bypass.
+                * In case of arm64, we rely on the firmware mitigation as
+                * controlled via the ssbd kernel parameter. Whenever the
+                * mitigation is enabled, it works for all of the kernel code
+                * with no need to provide any additional instructions here.
+                * In case of x86, we use 'lfence' insn for mitigation. We
+                * reuse preexisting logic from Spectre v1 mitigation that
+                * happens to produce the required code on x86 for v4 as well.
+                */
+#ifdef CONFIG_X86
+               barrier_nospec();
+#endif
+               CONT;
 #define LDST(SIZEOP, SIZE)                                             \
        STX_MEM_##SIZEOP:                                               \
                *(SIZE *)(unsigned long) (DST + insn->off) = SRC;       \
index 542e94f..f02d045 100644 (file)
@@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
        return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
 }
 
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
-                        int exclude_ifindex)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
 {
-       if (!obj || obj->dev->ifindex == exclude_ifindex ||
+       if (!obj ||
            !obj->dev->netdev_ops->ndo_xdp_xmit)
                return false;
 
@@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
        return 0;
 }
 
+static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
+{
+       while (num_excluded--) {
+               if (ifindex == excluded[num_excluded])
+                       return true;
+       }
+       return false;
+}
+
+/* Get ifindex of each upper device. 'indexes' must be able to hold at
+ * least MAX_NEST_DEV elements.
+ * Returns the number of ifindexes added.
+ */
+static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+{
+       struct net_device *upper;
+       struct list_head *iter;
+       int n = 0;
+
+       netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+               indexes[n++] = upper->ifindex;
+       }
+       return n;
+}
+
 int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                          struct bpf_map *map, bool exclude_ingress)
 {
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
        struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       int excluded_devices[1+MAX_NEST_DEV];
        struct hlist_head *head;
        struct xdp_frame *xdpf;
+       int num_excluded = 0;
        unsigned int i;
        int err;
 
+       if (exclude_ingress) {
+               num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+               excluded_devices[num_excluded++] = dev_rx->ifindex;
+       }
+
        xdpf = xdp_convert_buff_to_frame(xdp);
        if (unlikely(!xdpf))
                return -EOVERFLOW;
@@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                for (i = 0; i < map->max_entries; i++) {
                        dst = rcu_dereference_check(dtab->netdev_map[i],
                                                    rcu_read_lock_bh_held());
-                       if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                       if (!is_valid_dst(dst, xdp))
+                               continue;
+
+                       if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                continue;
 
                        /* we only need n-1 clones; last_dst enqueued below */
@@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                        head = dev_map_index_hash(dtab, i);
                        hlist_for_each_entry_rcu(dst, head, index_hlist,
                                                 lockdep_is_held(&dtab->index_lock)) {
-                               if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                               if (!is_valid_dst(dst, xdp))
+                                       continue;
+
+                               if (is_ifindex_excluded(excluded_devices, num_excluded,
+                                                       dst->dev->ifindex))
                                        continue;
 
                                /* we only need n-1 clones; last_dst enqueued below */
@@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                           bool exclude_ingress)
 {
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
        struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       int excluded_devices[1+MAX_NEST_DEV];
        struct hlist_head *head;
        struct hlist_node *next;
+       int num_excluded = 0;
        unsigned int i;
        int err;
 
+       if (exclude_ingress) {
+               num_excluded = get_upper_ifindexes(dev, excluded_devices);
+               excluded_devices[num_excluded++] = dev->ifindex;
+       }
+
        if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
                for (i = 0; i < map->max_entries; i++) {
                        dst = rcu_dereference_check(dtab->netdev_map[i],
                                                    rcu_read_lock_bh_held());
-                       if (!dst || dst->dev->ifindex == exclude_ifindex)
+                       if (!dst)
+                               continue;
+
+                       if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                continue;
 
                        /* we only need n-1 clones; last_dst enqueued below */
@@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                                return err;
 
                        last_dst = dst;
+
                }
        } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
                for (i = 0; i < dtab->n_buckets; i++) {
                        head = dev_map_index_hash(dtab, i);
                        hlist_for_each_entry_safe(dst, next, head, index_hlist) {
-                               if (!dst || dst->dev->ifindex == exclude_ifindex)
+                               if (!dst)
+                                       continue;
+
+                               if (is_ifindex_excluded(excluded_devices, num_excluded,
+                                                       dst->dev->ifindex))
                                        continue;
 
                                /* we only need n-1 clones; last_dst enqueued below */
index bbfc6bb..ca3cd9a 100644 (file)
@@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
                        verbose(cbs->private_data, "BUG_%02x\n", insn->code);
                }
        } else if (class == BPF_ST) {
-               if (BPF_MODE(insn->code) != BPF_MEM) {
+               if (BPF_MODE(insn->code) == BPF_MEM) {
+                       verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
+                               insn->code,
+                               bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+                               insn->dst_reg,
+                               insn->off, insn->imm);
+               } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) {
+                       verbose(cbs->private_data, "(%02x) nospec\n", insn->code);
+               } else {
                        verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
-                       return;
                }
-               verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
-                       insn->code,
-                       bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
-                       insn->dst_reg,
-                       insn->off, insn->imm);
        } else if (class == BPF_LDX) {
                if (BPF_MODE(insn->code) != BPF_MEM) {
                        verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
index 9fe846e..15746f7 100644 (file)
@@ -393,8 +393,6 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
 };
 
 #ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
-               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 
 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 {
@@ -403,17 +401,13 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
         * verifier checks that its value is correct.
         */
        enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
-       struct bpf_cgroup_storage *storage = NULL;
+       struct bpf_cgroup_storage *storage;
+       struct bpf_cg_run_ctx *ctx;
        void *ptr;
-       int i;
 
-       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
-               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
-                       continue;
-
-               storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
-               break;
-       }
+       /* get current cgroup storage from BPF run context */
+       ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+       storage = ctx->prog_item->cgroup_storage[stype];
 
        if (stype == BPF_CGROUP_STORAGE_SHARED)
                ptr = &READ_ONCE(storage->buf)->data[0];
index 95d70a0..035e9e3 100644 (file)
@@ -1,6 +1,7 @@
 //SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf-cgroup.h>
 #include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
 #include <linux/btf.h>
 #include <linux/bug.h>
 #include <linux/filter.h>
@@ -11,9 +12,6 @@
 
 #ifdef CONFIG_CGROUP_BPF
 
-DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
-              bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
 #include "../cgroup/cgroup-internal.h"
 
 #define LOCAL_STORAGE_CREATE_FLAG_MASK                                 \
@@ -286,9 +284,17 @@ enoent:
 
 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 {
+       __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE;
        int numa_node = bpf_map_attr_numa_node(attr);
        struct bpf_cgroup_storage_map *map;
 
+       /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu
+        * is the same as other local storages.
+        */
+       if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+               max_value_size = min_t(__u32, max_value_size,
+                                      PCPU_MIN_UNIT_SIZE);
+
        if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
            attr->key_size != sizeof(__u64))
                return ERR_PTR(-EINVAL);
@@ -296,7 +302,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
        if (attr->value_size == 0)
                return ERR_PTR(-EINVAL);
 
-       if (attr->value_size > PAGE_SIZE)
+       if (attr->value_size > max_value_size)
                return ERR_PTR(-E2BIG);
 
        if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
@@ -409,7 +415,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
 static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
                                         struct seq_file *m)
 {
-       enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+       enum bpf_cgroup_storage_type stype;
        struct bpf_cgroup_storage *storage;
        int cpu;
 
index 475c28e..5ea2238 100644 (file)
@@ -2667,6 +2667,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
        cur = env->cur_state->frame[env->cur_state->curframe];
        if (value_regno >= 0)
                reg = &cur->regs[value_regno];
+       if (!env->bypass_spec_v4) {
+               bool sanitize = reg && is_spillable_regtype(reg->type);
+
+               for (i = 0; i < size; i++) {
+                       if (state->stack[spi].slot_type[i] == STACK_INVALID) {
+                               sanitize = true;
+                               break;
+                       }
+               }
+
+               if (sanitize)
+                       env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+       }
 
        if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
            !register_is_null(reg) && env->bpf_capable) {
@@ -2689,47 +2702,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
                        verbose(env, "invalid size of register spill\n");
                        return -EACCES;
                }
-
                if (state != cur && reg->type == PTR_TO_STACK) {
                        verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
                        return -EINVAL;
                }
-
-               if (!env->bypass_spec_v4) {
-                       bool sanitize = false;
-
-                       if (state->stack[spi].slot_type[0] == STACK_SPILL &&
-                           register_is_const(&state->stack[spi].spilled_ptr))
-                               sanitize = true;
-                       for (i = 0; i < BPF_REG_SIZE; i++)
-                               if (state->stack[spi].slot_type[i] == STACK_MISC) {
-                                       sanitize = true;
-                                       break;
-                               }
-                       if (sanitize) {
-                               int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
-                               int soff = (-spi - 1) * BPF_REG_SIZE;
-
-                               /* detected reuse of integer stack slot with a pointer
-                                * which means either llvm is reusing stack slot or
-                                * an attacker is trying to exploit CVE-2018-3639
-                                * (speculative store bypass)
-                                * Have to sanitize that slot with preemptive
-                                * store of zero.
-                                */
-                               if (*poff && *poff != soff) {
-                                       /* disallow programs where single insn stores
-                                        * into two different stack slots, since verifier
-                                        * cannot sanitize them
-                                        */
-                                       verbose(env,
-                                               "insn %d cannot access two stack slots fp%d and fp%d",
-                                               insn_idx, *poff, soff);
-                                       return -EINVAL;
-                               }
-                               *poff = soff;
-                       }
-               }
                save_register_state(state, spi, reg);
        } else {
                u8 type = STACK_MISC;
@@ -6804,6 +6780,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
                alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
                alu_state |= ptr_is_dst_reg ?
                             BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+
+               /* Limit pruning on unknown scalars to enable deep search for
+                * potential masking differences from other program paths.
+                */
+               if (!off_is_imm)
+                       env->explore_alu_limits = true;
        }
 
        err = update_alu_sanitation_state(aux, alu_state, alu_limit);
@@ -10207,8 +10189,8 @@ next:
 }
 
 /* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
-                   struct bpf_id_pair *idmap)
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+                   struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
 {
        bool equal;
 
@@ -10234,6 +10216,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
                return false;
        switch (rold->type) {
        case SCALAR_VALUE:
+               if (env->explore_alu_limits)
+                       return false;
                if (rcur->type == SCALAR_VALUE) {
                        if (!rold->precise && !rcur->precise)
                                return true;
@@ -10324,9 +10308,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
        return false;
 }
 
-static bool stacksafe(struct bpf_func_state *old,
-                     struct bpf_func_state *cur,
-                     struct bpf_id_pair *idmap)
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+                     struct bpf_func_state *cur, struct bpf_id_pair *idmap)
 {
        int i, spi;
 
@@ -10371,9 +10354,8 @@ static bool stacksafe(struct bpf_func_state *old,
                        continue;
                if (old->stack[spi].slot_type[0] != STACK_SPILL)
                        continue;
-               if (!regsafe(&old->stack[spi].spilled_ptr,
-                            &cur->stack[spi].spilled_ptr,
-                            idmap))
+               if (!regsafe(env, &old->stack[spi].spilled_ptr,
+                            &cur->stack[spi].spilled_ptr, idmap))
                        /* when explored and current stack slot are both storing
                         * spilled registers, check that stored pointers types
                         * are the same as well.
@@ -10430,10 +10412,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
 
        memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
        for (i = 0; i < MAX_BPF_REG; i++)
-               if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
+               if (!regsafe(env, &old->regs[i], &cur->regs[i],
+                            env->idmap_scratch))
                        return false;
 
-       if (!stacksafe(old, cur, env->idmap_scratch))
+       if (!stacksafe(env, old, cur, env->idmap_scratch))
                return false;
 
        if (!refsafe(old, cur))
@@ -12198,35 +12181,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 
        for (i = 0; i < insn_cnt; i++, insn++) {
                bpf_convert_ctx_access_t convert_ctx_access;
+               bool ctx_access;
 
                if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
                    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
                    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
-                   insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
+                   insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
                        type = BPF_READ;
-               else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
-                        insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
-                        insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
-                        insn->code == (BPF_STX | BPF_MEM | BPF_DW))
+                       ctx_access = true;
+               } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+                          insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+                          insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+                          insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
                        type = BPF_WRITE;
-               else
+                       ctx_access = BPF_CLASS(insn->code) == BPF_STX;
+               } else {
                        continue;
+               }
 
                if (type == BPF_WRITE &&
-                   env->insn_aux_data[i + delta].sanitize_stack_off) {
+                   env->insn_aux_data[i + delta].sanitize_stack_spill) {
                        struct bpf_insn patch[] = {
-                               /* Sanitize suspicious stack slot with zero.
-                                * There are no memory dependencies for this store,
-                                * since it's only using frame pointer and immediate
-                                * constant of zero
-                                */
-                               BPF_ST_MEM(BPF_DW, BPF_REG_FP,
-                                          env->insn_aux_data[i + delta].sanitize_stack_off,
-                                          0),
-                               /* the original STX instruction will immediately
-                                * overwrite the same stack slot with appropriate value
-                                */
                                *insn,
+                               BPF_ST_NOSPEC(),
                        };
 
                        cnt = ARRAY_SIZE(patch);
@@ -12240,6 +12221,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                        continue;
                }
 
+               if (!ctx_access)
+                       continue;
+
                switch (env->insn_aux_data[i + delta].ptr_type) {
                case PTR_TO_CTX:
                        if (!ops->convert_ctx_access)
@@ -13093,37 +13077,6 @@ static void free_states(struct bpf_verifier_env *env)
        }
 }
 
-/* The verifier is using insn_aux_data[] to store temporary data during
- * verification and to store information for passes that run after the
- * verification like dead code sanitization. do_check_common() for subprogram N
- * may analyze many other subprograms. sanitize_insn_aux_data() clears all
- * temporary data after do_check_common() finds that subprogram N cannot be
- * verified independently. pass_cnt counts the number of times
- * do_check_common() was run and insn->aux->seen tells the pass number
- * insn_aux_data was touched. These variables are compared to clear temporary
- * data from failed pass. For testing and experiments do_check_common() can be
- * run multiple times even when prior attempt to verify is unsuccessful.
- *
- * Note that special handling is needed on !env->bypass_spec_v1 if this is
- * ever called outside of error path with subsequent program rejection.
- */
-static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
-{
-       struct bpf_insn *insn = env->prog->insnsi;
-       struct bpf_insn_aux_data *aux;
-       int i, class;
-
-       for (i = 0; i < env->prog->len; i++) {
-               class = BPF_CLASS(insn[i].code);
-               if (class != BPF_LDX && class != BPF_STX)
-                       continue;
-               aux = &env->insn_aux_data[i];
-               if (aux->seen != env->pass_cnt)
-                       continue;
-               memset(aux, 0, offsetof(typeof(*aux), orig_idx));
-       }
-}
-
 static int do_check_common(struct bpf_verifier_env *env, int subprog)
 {
        bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
@@ -13200,9 +13153,6 @@ out:
        if (!ret && pop_log)
                bpf_vlog_reset(&env->log, 0);
        free_states(env);
-       if (ret)
-               /* clean aux data in case subprog was rejected */
-               sanitize_insn_aux_data(env);
        return ret;
 }
 
index 8d6bf56..de2c432 100644 (file)
@@ -1221,9 +1221,7 @@ int cgroup1_get_tree(struct fs_context *fc)
                ret = cgroup_do_get_tree(fc);
 
        if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
-               struct super_block *sb = fc->root->d_sb;
-               dput(fc->root);
-               deactivate_locked_super(sb);
+               fc_drop_locked(fc);
                ret = 1;
        }
 
index 910ae69..af4a6ef 100644 (file)
@@ -5,6 +5,13 @@
  */
 #include <linux/dma-map-ops.h>
 
+static struct page *dma_common_vaddr_to_page(void *cpu_addr)
+{
+       if (is_vmalloc_addr(cpu_addr))
+               return vmalloc_to_page(cpu_addr);
+       return virt_to_page(cpu_addr);
+}
+
 /*
  * Create scatter-list for the already allocated DMA buffer.
  */
@@ -12,7 +19,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
                 unsigned long attrs)
 {
-       struct page *page = virt_to_page(cpu_addr);
+       struct page *page = dma_common_vaddr_to_page(cpu_addr);
        int ret;
 
        ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
@@ -32,6 +39,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
        unsigned long user_count = vma_pages(vma);
        unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
        unsigned long off = vma->vm_pgoff;
+       struct page *page = dma_common_vaddr_to_page(cpu_addr);
        int ret = -ENXIO;
 
        vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
@@ -43,7 +51,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
                return -ENXIO;
 
        return remap_pfn_range(vma, vma->vm_start,
-                       page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff,
+                       page_to_pfn(page) + vma->vm_pgoff,
                        user_count << PAGE_SHIFT, vma->vm_page_prot);
 #else
        return -ENXIO;
index bc94b2c..e8b41e2 100644 (file)
@@ -2083,6 +2083,7 @@ static __latent_entropy struct task_struct *copy_process(
 #endif
 #ifdef CONFIG_BPF_SYSCALL
        RCU_INIT_POINTER(p->bpf_storage, NULL);
+       p->bpf_ctx = NULL;
 #endif
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
index e416304..cf6acab 100644 (file)
@@ -47,7 +47,7 @@ void __init idle_thread_set_boot_cpu(void)
  *
  * Creates the thread if it does not exist.
  */
-static inline void idle_init(unsigned int cpu)
+static __always_inline void idle_init(unsigned int cpu)
 {
        struct task_struct *tsk = per_cpu(idle_threads, cpu);
 
index 29a5e54..517be7f 100644 (file)
@@ -991,6 +991,11 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
        if (!p)
                goto out;
 
+       /* Protect timer list r/w in arm_timer() */
+       sighand = lock_task_sighand(p, &flags);
+       if (unlikely(sighand == NULL))
+               goto out;
+
        /*
         * Fetch the current sample and update the timer's expiry time.
         */
@@ -1001,11 +1006,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
 
        bump_cpu_timer(timer, now);
 
-       /* Protect timer list r/w in arm_timer() */
-       sighand = lock_task_sighand(p, &flags);
-       if (unlikely(sighand == NULL))
-               goto out;
-
        /*
         * Now re-arm for the new expiry time.
         */
index 3fadb58..9eb11c2 100644 (file)
@@ -207,6 +207,7 @@ struct timer_base {
        unsigned int            cpu;
        bool                    next_expiry_recalc;
        bool                    is_idle;
+       bool                    timers_pending;
        DECLARE_BITMAP(pending_map, WHEEL_SIZE);
        struct hlist_head       vectors[WHEEL_SIZE];
 } ____cacheline_aligned;
@@ -595,6 +596,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
                 * can reevaluate the wheel:
                 */
                base->next_expiry = bucket_expiry;
+               base->timers_pending = true;
                base->next_expiry_recalc = false;
                trigger_dyntick_cpu(base, timer);
        }
@@ -1582,6 +1584,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
        }
 
        base->next_expiry_recalc = false;
+       base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
 
        return next;
 }
@@ -1633,7 +1636,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
        struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
        u64 expires = KTIME_MAX;
        unsigned long nextevt;
-       bool is_max_delta;
 
        /*
         * Pretend that there is no timer pending if the cpu is offline.
@@ -1646,7 +1648,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
        if (base->next_expiry_recalc)
                base->next_expiry = __next_timer_interrupt(base);
        nextevt = base->next_expiry;
-       is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
 
        /*
         * We have a fresh next event. Check whether we can forward the
@@ -1664,7 +1665,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
                expires = basem;
                base->is_idle = false;
        } else {
-               if (!is_max_delta)
+               if (base->timers_pending)
                        expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
                /*
                 * If we expect to sleep more than a tick, mark the base idle.
@@ -1947,6 +1948,7 @@ int timers_prepare_cpu(unsigned int cpu)
                base = per_cpu_ptr(&timer_bases[b], cpu);
                base->clk = jiffies;
                base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+               base->timers_pending = false;
                base->is_idle = false;
        }
        return 0;
index 0890600..c5e0b6a 100644 (file)
@@ -965,7 +965,7 @@ BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
 {
        struct kprobe *kp = kprobe_running();
 
-       return kp ? (u64) kp->addr : 0;
+       return kp ? (uintptr_t)kp->addr : 0;
 }
 
 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
@@ -1461,6 +1461,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 const struct bpf_func_proto *
 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
+       const struct bpf_func_proto *fn;
+
        switch (func_id) {
 #ifdef CONFIG_NET
        case BPF_FUNC_skb_output:
@@ -1501,7 +1503,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_d_path:
                return &bpf_d_path_proto;
        default:
-               return raw_tp_prog_func_proto(func_id, prog);
+               fn = raw_tp_prog_func_proto(func_id, prog);
+               if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
+                       fn = bpf_iter_get_func_proto(func_id, prog);
+               return fn;
        }
 }
 
index e6fb3e6..7b180f6 100644 (file)
@@ -5985,7 +5985,8 @@ ftrace_graph_release(struct inode *inode, struct file *file)
                 * infrastructure to do the synchronization, thus we must do it
                 * ourselves.
                 */
-               synchronize_rcu_tasks_rude();
+               if (old_hash != EMPTY_HASH)
+                       synchronize_rcu_tasks_rude();
 
                free_ftrace_hash(old_hash);
        }
@@ -7544,7 +7545,7 @@ int ftrace_is_dead(void)
  */
 int register_ftrace_function(struct ftrace_ops *ops)
 {
-       int ret = -1;
+       int ret;
 
        ftrace_ops_init(ops);
 
index d1463ea..e592d1d 100644 (file)
@@ -3880,10 +3880,30 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
        if (unlikely(!head))
                return true;
 
-       return reader->read == rb_page_commit(reader) &&
-               (commit == reader ||
-                (commit == head &&
-                 head->read == rb_page_commit(commit)));
+       /* Reader should exhaust content in reader page */
+       if (reader->read != rb_page_commit(reader))
+               return false;
+
+       /*
+        * If writers are committing on the reader page, knowing all
+        * committed content has been read, the ring buffer is empty.
+        */
+       if (commit == reader)
+               return true;
+
+       /*
+        * If writers are committing on a page other than reader page
+        * and head page, there should always be content to read.
+        */
+       if (commit != head)
+               return false;
+
+       /*
+        * Writers are committing on the head page, we just need
+        * to care about there're committed data, and the reader will
+        * swap reader page with head page when it is to read data.
+        */
+       return rb_page_commit(commit) == 0;
 }
 
 /**
index f8b80b5..33899a7 100644 (file)
@@ -5609,6 +5609,10 @@ static const char readme_msg[] =
        "\t            [:name=histname1]\n"
        "\t            [:<handler>.<action>]\n"
        "\t            [if <filter>]\n\n"
+       "\t    Note, special fields can be used as well:\n"
+       "\t            common_timestamp - to record current timestamp\n"
+       "\t            common_cpu - to record the CPU the event happened on\n"
+       "\n"
        "\t    When a matching event is hit, an entry is added to a hash\n"
        "\t    table using the key(s) and value(s) named, and the value of a\n"
        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
@@ -9131,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr)
                return -EINVAL;
 
        ret = event_trace_add_tracer(tr->dir, tr);
-       if (ret)
+       if (ret) {
                tracefs_remove(tr->dir);
+               return ret;
+       }
 
        init_tracer_tracefs(tr, tr->dir);
        __update_tracer_options(tr);
index 16a9dfc..949ef09 100644 (file)
@@ -65,7 +65,8 @@
        C(INVALID_SORT_MODIFIER,"Invalid sort modifier"),               \
        C(EMPTY_SORT_FIELD,     "Empty sort field"),                    \
        C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"),      \
-       C(INVALID_SORT_FIELD,   "Sort field must be a key or a val"),
+       C(INVALID_SORT_FIELD,   "Sort field must be a key or a val"),   \
+       C(INVALID_STR_OPERAND,  "String type can not be an operand in expression"),
 
 #undef C
 #define C(a, b)                HIST_ERR_##a
@@ -1111,7 +1112,7 @@ static const char *hist_field_name(struct hist_field *field,
                 field->flags & HIST_FIELD_FL_ALIAS)
                field_name = hist_field_name(field->operands[0], ++level);
        else if (field->flags & HIST_FIELD_FL_CPU)
-               field_name = "cpu";
+               field_name = "common_cpu";
        else if (field->flags & HIST_FIELD_FL_EXPR ||
                 field->flags & HIST_FIELD_FL_VAR_REF) {
                if (field->system) {
@@ -1991,14 +1992,24 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
                hist_data->enable_timestamps = true;
                if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
                        hist_data->attrs->ts_in_usecs = true;
-       } else if (strcmp(field_name, "cpu") == 0)
+       } else if (strcmp(field_name, "common_cpu") == 0)
                *flags |= HIST_FIELD_FL_CPU;
        else {
                field = trace_find_event_field(file->event_call, field_name);
                if (!field || !field->size) {
-                       hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name));
-                       field = ERR_PTR(-EINVAL);
-                       goto out;
+                       /*
+                        * For backward compatibility, if field_name
+                        * was "cpu", then we treat this the same as
+                        * common_cpu.
+                        */
+                       if (strcmp(field_name, "cpu") == 0) {
+                               *flags |= HIST_FIELD_FL_CPU;
+                       } else {
+                               hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
+                                        errpos(field_name));
+                               field = ERR_PTR(-EINVAL);
+                               goto out;
+                       }
                }
        }
  out:
@@ -2146,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
                ret = PTR_ERR(operand1);
                goto free;
        }
+       if (operand1->flags & HIST_FIELD_FL_STRING) {
+               /* String type can not be the operand of unary operator. */
+               hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+               destroy_hist_field(operand1, 0);
+               ret = -EINVAL;
+               goto free;
+       }
 
        expr->flags |= operand1->flags &
                (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
@@ -2247,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
                operand1 = NULL;
                goto free;
        }
+       if (operand1->flags & HIST_FIELD_FL_STRING) {
+               hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str));
+               ret = -EINVAL;
+               goto free;
+       }
 
        /* rest of string could be another expression e.g. b+c in a+b+c */
        operand_flags = 0;
@@ -2256,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
                operand2 = NULL;
                goto free;
        }
+       if (operand2->flags & HIST_FIELD_FL_STRING) {
+               hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+               ret = -EINVAL;
+               goto free;
+       }
 
        ret = check_expr_operands(file->tr, operand1, operand2);
        if (ret)
@@ -2277,6 +2305,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 
        expr->operands[0] = operand1;
        expr->operands[1] = operand2;
+
+       /* The operand sizes should be the same, so just pick one */
+       expr->size = operand1->size;
+
        expr->operator = field_op;
        expr->name = expr_str(expr, 0);
        expr->type = kstrdup(operand1->type, GFP_KERNEL);
@@ -5085,7 +5117,7 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
                seq_printf(m, "%s=", hist_field->var.name);
 
        if (hist_field->flags & HIST_FIELD_FL_CPU)
-               seq_puts(m, "cpu");
+               seq_puts(m, "common_cpu");
        else if (field_name) {
                if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
                    hist_field->flags & HIST_FIELD_FL_ALIAS)
index 2ac75eb..9315fc0 100644 (file)
@@ -893,15 +893,13 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields,
        dyn_event_init(&event->devent, &synth_event_ops);
 
        for (i = 0, j = 0; i < n_fields; i++) {
+               fields[i]->field_pos = i;
                event->fields[i] = fields[i];
 
-               if (fields[i]->is_dynamic) {
-                       event->dynamic_fields[j] = fields[i];
-                       event->dynamic_fields[j]->field_pos = i;
+               if (fields[i]->is_dynamic)
                        event->dynamic_fields[j++] = fields[i];
-                       event->n_dynamic_fields++;
-               }
        }
+       event->n_dynamic_fields = j;
        event->n_fields = n_fields;
  out:
        return event;
index a6c0cda..14f46aa 100644 (file)
@@ -327,7 +327,7 @@ static void move_to_next_cpu(void)
 
        get_online_cpus();
        cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
-       next_cpu = cpumask_next(smp_processor_id(), current_mask);
+       next_cpu = cpumask_next(raw_smp_processor_id(), current_mask);
        put_online_cpus();
 
        if (next_cpu >= nr_cpu_ids)
index 6e146b9..4007fe9 100644 (file)
@@ -14,10 +14,10 @@ struct synth_field {
        char *name;
        size_t size;
        unsigned int offset;
+       unsigned int field_pos;
        bool is_signed;
        bool is_string;
        bool is_dynamic;
-       bool field_pos;
 };
 
 struct synth_event {
index 976bf8c..fc32821 100644 (file)
@@ -299,8 +299,8 @@ static int tracepoint_add_func(struct tracepoint *tp,
         * a pointer to it.  This array is referenced by __DO_TRACE from
         * include/linux/tracepoint.h using rcu_dereference_sched().
         */
-       rcu_assign_pointer(tp->funcs, tp_funcs);
        tracepoint_update_call(tp, tp_funcs, false);
+       rcu_assign_pointer(tp->funcs, tp_funcs);
        static_key_enable(&tp->key);
 
        release_probes(old);
index 87799e2..77be3bb 100644 (file)
@@ -160,6 +160,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 {
        struct hlist_head *hashent = ucounts_hashentry(ns, uid);
        struct ucounts *ucounts, *new;
+       long overflow;
 
        spin_lock_irq(&ucounts_lock);
        ucounts = find_ucounts(ns, uid, hashent);
@@ -184,8 +185,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
                        return new;
                }
        }
+       overflow = atomic_add_negative(1, &ucounts->count);
        spin_unlock_irq(&ucounts_lock);
-       ucounts = get_ucounts(ucounts);
+       if (overflow) {
+               put_ucounts(ucounts);
+               return NULL;
+       }
        return ucounts;
 }
 
@@ -193,8 +198,7 @@ void put_ucounts(struct ucounts *ucounts)
 {
        unsigned long flags;
 
-       if (atomic_dec_and_test(&ucounts->count)) {
-               spin_lock_irqsave(&ucounts_lock, flags);
+       if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
                hlist_del_init(&ucounts->node);
                spin_unlock_irqrestore(&ucounts_lock, flags);
                kfree(ucounts);
index 50142fc..f148eac 100644 (file)
@@ -3676,15 +3676,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
                                                  unbound_release_work);
        struct workqueue_struct *wq = pwq->wq;
        struct worker_pool *pool = pwq->pool;
-       bool is_last;
+       bool is_last = false;
 
-       if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
-               return;
+       /*
+        * when @pwq is not linked, it doesn't hold any reference to the
+        * @wq, and @wq is invalid to access.
+        */
+       if (!list_empty(&pwq->pwqs_node)) {
+               if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+                       return;
 
-       mutex_lock(&wq->mutex);
-       list_del_rcu(&pwq->pwqs_node);
-       is_last = list_empty(&wq->pwqs);
-       mutex_unlock(&wq->mutex);
+               mutex_lock(&wq->mutex);
+               list_del_rcu(&pwq->pwqs_node);
+               is_last = list_empty(&wq->pwqs);
+               mutex_unlock(&wq->mutex);
+       }
 
        mutex_lock(&wq_pool_mutex);
        put_unbound_pool(pool);
index d241fe4..5c9c068 100644 (file)
@@ -683,9 +683,6 @@ config PARMAN
 config OBJAGG
        tristate "objagg" if COMPILE_TEST
 
-config STRING_SELFTEST
-       tristate "Test string functions"
-
 endmenu
 
 config GENERIC_IOREMAP
index 8312127..5ddd575 100644 (file)
@@ -2180,6 +2180,9 @@ config ASYNC_RAID6_TEST
 config TEST_HEXDUMP
        tristate "Test functions located in the hexdump module at runtime"
 
+config STRING_SELFTEST
+       tristate "Test string functions at runtime"
+
 config TEST_STRING_HELPERS
        tristate "Test functions located in the string_helpers module at runtime"
 
index d500320..44d8197 100644 (file)
@@ -461,6 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self)
        return __bpf_fill_stxdw(self, BPF_DW);
 }
 
+static int bpf_fill_long_jmp(struct bpf_test *self)
+{
+       unsigned int len = BPF_MAXINSNS;
+       struct bpf_insn *insn;
+       int i;
+
+       insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return -ENOMEM;
+
+       insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1);
+       insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1);
+
+       /*
+        * Fill with a complex 64-bit operation that expands to a lot of
+        * instructions on 32-bit JITs. The large jump offset can then
+        * overflow the conditional branch field size, triggering a branch
+        * conversion mechanism in some JITs.
+        *
+        * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch
+        * conversion on the 32-bit MIPS JIT. For other JITs, the instruction
+        * count and/or operation may need to be modified to trigger the
+        * branch conversion.
+        */
+       for (i = 2; i < len - 1; i++)
+               insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i);
+
+       insn[len - 1] = BPF_EXIT_INSN();
+
+       self->u.ptr.insns = insn;
+       self->u.ptr.len = len;
+
+       return 0;
+}
+
 static struct bpf_test tests[] = {
        {
                "TAX",
@@ -1916,6 +1951,163 @@ static struct bpf_test tests[] = {
                { },
                { { 0, -1 } }
        },
+       {
+               /*
+                * Register (non-)clobbering test, in the case where a 32-bit
+                * JIT implements complex ALU64 operations via function calls.
+                * If so, the function call must be invisible in the eBPF
+                * registers. The JIT must then save and restore relevant
+                * registers during the call. The following tests check that
+                * the eBPF registers retain their values after such a call.
+                */
+               "INT: Register clobbering, R1 updated",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 123456789),
+                       BPF_ALU32_IMM(BPF_MOV, R2, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 3),
+                       BPF_ALU32_IMM(BPF_MOV, R4, 4),
+                       BPF_ALU32_IMM(BPF_MOV, R5, 5),
+                       BPF_ALU32_IMM(BPF_MOV, R6, 6),
+                       BPF_ALU32_IMM(BPF_MOV, R7, 7),
+                       BPF_ALU32_IMM(BPF_MOV, R8, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R9, 9),
+                       BPF_ALU64_IMM(BPF_DIV, R1, 123456789),
+                       BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+                       BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+                       BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+                       BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+                       BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+                       BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+                       BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+                       BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+                       BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+                       BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "INT: Register clobbering, R2 updated",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 3),
+                       BPF_ALU32_IMM(BPF_MOV, R4, 4),
+                       BPF_ALU32_IMM(BPF_MOV, R5, 5),
+                       BPF_ALU32_IMM(BPF_MOV, R6, 6),
+                       BPF_ALU32_IMM(BPF_MOV, R7, 7),
+                       BPF_ALU32_IMM(BPF_MOV, R8, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R9, 9),
+                       BPF_ALU64_IMM(BPF_DIV, R2, 123456789),
+                       BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+                       BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+                       BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+                       BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+                       BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+                       BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+                       BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+                       BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+                       BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+                       BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               /*
+                * Test 32-bit JITs that implement complex ALU64 operations as
+                * function calls R0 = f(R1, R2), and must re-arrange operands.
+                */
+#define NUMER 0xfedcba9876543210ULL
+#define DENOM 0x0123456789abcdefULL
+               "ALU64_DIV X: Operand register permutations",
+               .u.insns_int = {
+                       /* R0 / R2 */
+                       BPF_LD_IMM64(R0, NUMER),
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R0, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R0 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_LD_IMM64(R0, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R1, R0),
+                       BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R0 / R1 */
+                       BPF_LD_IMM64(R0, NUMER),
+                       BPF_LD_IMM64(R1, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R0, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R0 */
+                       BPF_LD_IMM64(R2, NUMER),
+                       BPF_LD_IMM64(R0, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R0),
+                       BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R1 */
+                       BPF_LD_IMM64(R2, NUMER),
+                       BPF_LD_IMM64(R1, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R2 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R1, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R1 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_ALU64_REG(BPF_DIV, R1, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R1, 1, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R2 */
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R2, 1, 1),
+                       BPF_EXIT_INSN(),
+                       /* R3 / R4 */
+                       BPF_LD_IMM64(R3, NUMER),
+                       BPF_LD_IMM64(R4, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R3, R4),
+                       BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* Successful return */
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+#undef NUMER
+#undef DENOM
+       },
+#ifdef CONFIG_32BIT
+       {
+               "INT: 32-bit context pointer word order and zero-extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_JMP32_IMM(BPF_JNE, R1, 0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+#endif
        {
                "check: missing ret",
                .u.insns = {
@@ -2360,6 +2552,48 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU_MOV_K: small negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       {
+               "ALU_MOV_K: small negative zero extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU_MOV_K: large negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123456789 } }
+       },
+       {
+               "ALU_MOV_K: large negative zero extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
        {
                "ALU64_MOV_K: dst = 2",
                .u.insns_int = {
@@ -2412,6 +2646,48 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_MOV_K: small negative",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       {
+               "ALU64_MOV_K: small negative sign extension",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } }
+       },
+       {
+               "ALU64_MOV_K: large negative",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123456789 } }
+       },
+       {
+               "ALU64_MOV_K: large negative sign extension",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } }
+       },
        /* BPF_ALU | BPF_ADD | BPF_X */
        {
                "ALU_ADD_X: 1 + 2 = 3",
@@ -2967,6 +3243,31 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 2147483647 } },
        },
+       {
+               "ALU64_MUL_X: 64x64 multiply, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+                       BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+                       BPF_ALU64_REG(BPF_MUL, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xe5618cf0 } }
+       },
+       {
+               "ALU64_MUL_X: 64x64 multiply, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+                       BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+                       BPF_ALU64_REG(BPF_MUL, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x2236d88f } }
+       },
        /* BPF_ALU | BPF_MUL | BPF_K */
        {
                "ALU_MUL_K: 2 * 3 = 6",
@@ -3077,6 +3378,29 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_MUL_K: 64x32 multiply, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xe242d208 } }
+       },
+       {
+               "ALU64_MUL_K: 64x32 multiply, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xc28f5c28 } }
+       },
        /* BPF_ALU | BPF_DIV | BPF_X */
        {
                "ALU_DIV_X: 6 / 2 = 3",
@@ -3430,6 +3754,44 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0xffffffff } },
        },
+       {
+               "ALU_AND_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_AND, R0, 15),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 4 } }
+       },
+       {
+               "ALU_AND_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+                       BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xa1b2c3d4 } }
+       },
+       {
+               "ALU_AND_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL),
+                       BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        {
                "ALU64_AND_K: 3 & 2 = 2",
                .u.insns_int = {
@@ -3453,7 +3815,7 @@ static struct bpf_test tests[] = {
                { { 0, 0xffffffff } },
        },
        {
-               "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000",
+               "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000",
                .u.insns_int = {
                        BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                        BPF_LD_IMM64(R3, 0x0000000000000000LL),
@@ -3469,7 +3831,7 @@ static struct bpf_test tests[] = {
                { { 0, 0x1 } },
        },
        {
-               "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff",
+               "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000",
                .u.insns_int = {
                        BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                        BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3500,6 +3862,38 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_AND_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000090b0d0fLL),
+                       BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_AND_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL),
+                       BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        /* BPF_ALU | BPF_OR | BPF_X */
        {
                "ALU_OR_X: 1 | 2 = 3",
@@ -3572,6 +3966,44 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0xffffffff } },
        },
+       {
+               "ALU_OR_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_OR, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01020305 } }
+       },
+       {
+               "ALU_OR_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xa1b2c3d4 } }
+       },
+       {
+               "ALU_OR_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL),
+                       BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        {
                "ALU64_OR_K: 1 | 2 = 3",
                .u.insns_int = {
@@ -3595,7 +4027,7 @@ static struct bpf_test tests[] = {
                { { 0, 0xffffffff } },
        },
        {
-               "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000",
+               "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000",
                .u.insns_int = {
                        BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                        BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3642,9 +4074,41 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
-       /* BPF_ALU | BPF_XOR | BPF_X */
        {
-               "ALU_XOR_X: 5 ^ 6 = 3",
+               "ALU64_OR_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x012345678fafcfefLL),
+                       BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_OR_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL),
+                       BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       /* BPF_ALU | BPF_XOR | BPF_X */
+       {
+               "ALU_XOR_X: 5 ^ 6 = 3",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 5),
                        BPF_ALU32_IMM(BPF_MOV, R1, 6),
@@ -3714,6 +4178,44 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0xfffffffe } },
        },
+       {
+               "ALU_XOR_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 15),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x0102030b } }
+       },
+       {
+               "ALU_XOR_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x5e4d3c2b } }
+       },
+       {
+               "ALU_XOR_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000795b3d1fLL),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        {
                "ALU64_XOR_K: 5 ^ 6 = 3",
                .u.insns_int = {
@@ -3726,7 +4228,7 @@ static struct bpf_test tests[] = {
                { { 0, 3 } },
        },
        {
-               "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe",
+               "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 1),
                        BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff),
@@ -3784,6 +4286,38 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_XOR_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL),
+                       BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_XOR_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL),
+                       BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        /* BPF_ALU | BPF_LSH | BPF_X */
        {
                "ALU_LSH_X: 1 << 1 = 2",
@@ -3809,6 +4343,18 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x80000000 } },
        },
+       {
+               "ALU_LSH_X: 0x12345678 << 12 = 0x45678000",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU32_REG(BPF_LSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x45678000 } }
+       },
        {
                "ALU64_LSH_X: 1 << 1 = 2",
                .u.insns_int = {
@@ -3833,570 +4379,1993 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x80000000 } },
        },
-       /* BPF_ALU | BPF_LSH | BPF_K */
        {
-               "ALU_LSH_K: 1 << 1 = 2",
+               "ALU64_LSH_X: Shift < 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU32_IMM(BPF_LSH, R0, 1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 2 } },
+               { { 0, 0xbcdef000 } }
        },
        {
-               "ALU_LSH_K: 1 << 31 = 0x80000000",
+               "ALU64_LSH_X: Shift < 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU32_IMM(BPF_LSH, R0, 31),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x80000000 } },
+               { { 0, 0x3456789a } }
        },
        {
-               "ALU64_LSH_K: 1 << 1 = 2",
+               "ALU64_LSH_X: Shift > 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU64_IMM(BPF_LSH, R0, 1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 2 } },
+               { { 0, 0 } }
        },
        {
-               "ALU64_LSH_K: 1 << 31 = 0x80000000",
+               "ALU64_LSH_X: Shift > 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU64_IMM(BPF_LSH, R0, 31),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x80000000 } },
+               { { 0, 0x9abcdef0 } }
        },
-       /* BPF_ALU | BPF_RSH | BPF_X */
        {
-               "ALU_RSH_X: 2 >> 1 = 1",
+               "ALU64_LSH_X: Shift == 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
-                       BPF_ALU32_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0 } }
        },
        {
-               "ALU_RSH_X: 0x80000000 >> 31 = 1",
+               "ALU64_LSH_X: Shift == 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
-                       BPF_ALU32_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x89abcdef } }
        },
        {
-               "ALU64_RSH_X: 2 >> 1 = 1",
+               "ALU64_LSH_X: Zero shift, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
-                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x89abcdef } }
        },
        {
-               "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+               "ALU64_LSH_X: Zero shift, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
-                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x01234567 } }
        },
-       /* BPF_ALU | BPF_RSH | BPF_K */
+       /* BPF_ALU | BPF_LSH | BPF_K */
        {
-               "ALU_RSH_K: 2 >> 1 = 1",
+               "ALU_LSH_K: 1 << 1 = 2",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_RSH, R0, 1),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 2 } },
        },
        {
-               "ALU_RSH_K: 0x80000000 >> 31 = 1",
+               "ALU_LSH_K: 1 << 31 = 0x80000000",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_RSH, R0, 31),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 31),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x80000000 } },
        },
        {
-               "ALU64_RSH_K: 2 >> 1 = 1",
+               "ALU_LSH_K: 0x12345678 << 12 = 0x45678000",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU64_IMM(BPF_RSH, R0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 12),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x45678000 } }
        },
        {
-               "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+               "ALU_LSH_K: 0x12345678 << 0 = 0x12345678",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU64_IMM(BPF_RSH, R0, 31),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x12345678 } }
        },
-       /* BPF_ALU | BPF_ARSH | BPF_X */
        {
-               "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               "ALU64_LSH_K: 1 << 1 = 2",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 40),
-                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff00ff } },
+               { { 0, 2 } },
        },
-       /* BPF_ALU | BPF_ARSH | BPF_K */
        {
-               "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               "ALU64_LSH_K: 1 << 31 = 0x80000000",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
-                       BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 31),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff00ff } },
+               { { 0, 0x80000000 } },
        },
-       /* BPF_ALU | BPF_NEG */
        {
-               "ALU_NEG: -(3) = -3",
+               "ALU64_LSH_K: Shift < 32, low word",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 3),
-                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 12),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, -3 } },
+               { { 0, 0xbcdef000 } }
        },
        {
-               "ALU_NEG: -(-3) = 3",
+               "ALU64_LSH_K: Shift < 32, high word",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, -3),
-                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 3 } },
+               { { 0, 0x3456789a } }
        },
        {
-               "ALU64_NEG: -(3) = -3",
+               "ALU64_LSH_K: Shift > 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 3),
-                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 36),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, -3 } },
+               { { 0, 0 } }
        },
        {
-               "ALU64_NEG: -(-3) = 3",
+               "ALU64_LSH_K: Shift > 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, -3),
-                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 3 } },
+               { { 0, 0x9abcdef0 } }
        },
-       /* BPF_ALU | BPF_END | BPF_FROM_BE */
        {
-               "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+               "ALU64_LSH_K: Shift == 32, low word",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0,  cpu_to_be16(0xcdef) } },
+               { { 0, 0 } }
        },
        {
-               "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+               "ALU64_LSH_K: Shift == 32, high word",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 32),
-                       BPF_ALU64_REG(BPF_MOV, R1, R0),
-                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
-                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_ALU64_IMM(BPF_LSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, cpu_to_be32(0x89abcdef) } },
+               { { 0, 0x89abcdef } }
        },
        {
-               "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+               "ALU64_LSH_K: Zero shift",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+               { { 0, 0x89abcdef } }
        },
-       /* BPF_ALU | BPF_END | BPF_FROM_LE */
+       /* BPF_ALU | BPF_RSH | BPF_X */
        {
-               "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+               "ALU_RSH_X: 2 >> 1 = 1",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, cpu_to_le16(0xcdef) } },
+               { { 0, 1 } },
        },
        {
-               "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+               "ALU_RSH_X: 0x80000000 >> 31 = 1",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 32),
-                       BPF_ALU64_REG(BPF_MOV, R1, R0),
-                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
-                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, cpu_to_le32(0x89abcdef) } },
+               { { 0, 1 } },
        },
        {
-               "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+               "ALU_RSH_X: 0x12345678 >> 20 = 0x123",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 20),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+               { { 0, 0x123 } }
        },
-       /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
        {
-               "ST_MEM_B: Store/Load byte: max negative",
+               "ALU64_RSH_X: 2 >> 1 = 1",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_B, R10, -40, 0xff),
-                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_X: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_RSH_X: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x00081234 } }
+       },
+       {
+               "ALU64_RSH_X: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x08123456 } }
+       },
+       {
+               "ALU64_RSH_X: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_X: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_RSH_X: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_X: Zero shift, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       {
+               "ALU64_RSH_X: Zero shift, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       /* BPF_ALU | BPF_RSH | BPF_K */
+       {
+               "ALU_RSH_K: 2 >> 1 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU_RSH_K: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 31),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU_RSH_K: 0x12345678 >> 20 = 0x123",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 20),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x123 } }
+       },
+       {
+               "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x12345678 } }
+       },
+       {
+               "ALU64_RSH_K: 2 >> 1 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 31),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_K: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_RSH_K: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x00081234 } }
+       },
+       {
+               "ALU64_RSH_K: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x08123456 } }
+       },
+       {
+               "ALU64_RSH_K: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_K: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_RSH_K: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_K: Zero shift",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       /* BPF_ALU | BPF_ARSH | BPF_X */
+       {
+               "ALU32_ARSH_X: -1234 >> 7 = -10",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 7),
+                       BPF_ALU32_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -10 } }
+       },
+       {
+               "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 40),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff00ff } },
+       },
+       {
+               "ALU64_ARSH_X: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_ARSH_X: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfff81234 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xf8123456 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_X: Zero shift, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       {
+               "ALU64_ARSH_X: Zero shift, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       /* BPF_ALU | BPF_ARSH | BPF_K */
+       {
+               "ALU32_ARSH_K: -1234 >> 7 = -10",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_ARSH, R0, 7),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -10 } }
+       },
+       {
+               "ALU32_ARSH_K: -1234 >> 0 = -1234",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_ARSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1234 } }
+       },
+       {
+               "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff00ff } },
+       },
+       {
+               "ALU64_ARSH_K: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_ARSH_K: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfff81234 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xf8123456 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xf123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_K: Zero shoft",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       /* BPF_ALU | BPF_NEG */
+       {
+               "ALU_NEG: -(3) = -3",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 3),
+                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -3 } },
+       },
+       {
+               "ALU_NEG: -(-3) = 3",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -3),
+                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 3 } },
+       },
+       {
+               "ALU64_NEG: -(3) = -3",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 3),
+                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -3 } },
+       },
+       {
+               "ALU64_NEG: -(-3) = 3",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, -3),
+                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 3 } },
+       },
+       /* BPF_ALU | BPF_END | BPF_FROM_BE */
+       {
+               "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0,  cpu_to_be16(0xcdef) } },
+       },
+       {
+               "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_be32(0x89abcdef) } },
+       },
+       {
+               "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+       },
+       /* BPF_ALU | BPF_END | BPF_FROM_LE */
+       {
+               "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_le16(0xcdef) } },
+       },
+       {
+               "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_le32(0x89abcdef) } },
+       },
+       {
+               "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+       },
+       /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
+       {
+               "ST_MEM_B: Store/Load byte: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_B, R10, -40, 0xff),
+                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_B: Store/Load byte: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7f } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_B: Store/Load byte: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffLL),
+                       BPF_STX_MEM(BPF_B, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_H: Store/Load half word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_H: Store/Load half word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_H: Store/Load half word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffLL),
+                       BPF_STX_MEM(BPF_H, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_W: Store/Load word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_W: Store/Load word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_W: Store/Load word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffffffLL),
+                       BPF_STX_MEM(BPF_W, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max negative 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
+                       BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_DW, R2, R10, -40),
+                       BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x1 } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store/Load double word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store double word: first word in memory",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+#ifdef __BIG_ENDIAN
+               { { 0, 0x01234567 } },
+#else
+               { { 0, 0x89abcdef } },
+#endif
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store double word: second word in memory",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+#ifdef __BIG_ENDIAN
+               { { 0, 0x89abcdef } },
+#else
+               { { 0, 0x01234567 } },
+#endif
+               .stack_depth = 40,
+       },
+       /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
+       {
+               "STX_XADD_W: X + 1 + 1 + 1 + ...",
+               { },
+               INTERNAL,
+               { },
+               { { 0, 4134 } },
+               .fill_helper = bpf_fill_stxw,
+       },
+       {
+               "STX_XADD_DW: X + 1 + 1 + 1 + ...",
+               { },
+               INTERNAL,
+               { },
+               { { 0, 4134 } },
+               .fill_helper = bpf_fill_stxdw,
+       },
+       /*
+        * Exhaustive tests of atomic operation variants.
+        * Individual tests are expanded from template macros for all
+        * combinations of ALU operation, word size and fetching.
+        */
+#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test: "                      \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU32_IMM(BPF_MOV, R5, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R5, -40),                 \
+               BPF_LDX_MEM(width, R0, R10, -40),                       \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, result } },                                              \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: "    \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU64_REG(BPF_MOV, R1, R10),                        \
+               BPF_ALU32_IMM(BPF_MOV, R0, update),                     \
+               BPF_ST_MEM(BPF_W, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R0, -40),                 \
+               BPF_ALU64_REG(BPF_MOV, R0, R10),                        \
+               BPF_ALU64_REG(BPF_SUB, R0, R1),                         \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, 0 } },                                                   \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: "     \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU64_REG(BPF_MOV, R0, R10),                        \
+               BPF_ALU32_IMM(BPF_MOV, R1, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R1, -40),                 \
+               BPF_ALU64_REG(BPF_SUB, R0, R10),                        \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, 0 } },                                                   \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test fetch: "                \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU32_IMM(BPF_MOV, R3, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R3, -40),                 \
+               BPF_ALU64_REG(BPF_MOV, R0, R3),                         \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, (op) & BPF_FETCH ? old : update } },                     \
+       .stack_depth = 40,                                              \
+}
+       /* BPF_ATOMIC | BPF_W: BPF_ADD */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_DW: BPF_ADD */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_W: BPF_AND */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_DW: BPF_AND */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_W: BPF_OR */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_DW: BPF_OR */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_W: BPF_XOR */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_DW: BPF_XOR */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_W: BPF_XCHG */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       /* BPF_ATOMIC | BPF_DW: BPF_XCHG */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+#undef BPF_ATOMIC_OP_TEST1
+#undef BPF_ATOMIC_OP_TEST2
+#undef BPF_ATOMIC_OP_TEST3
+#undef BPF_ATOMIC_OP_TEST4
+       /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_ALU32_REG(BPF_MOV, R0, R3),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } },
+               .stack_depth = 40,
+       },
+       /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R0, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R2),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LD_IMM64(R0, 0xfecdba9876543210ULL),
+                       BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R2),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       /* BPF_JMP32 | BPF_JEQ | BPF_K */
+       {
+               "JMP32_JEQ_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JEQ_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 12345678 } }
+       },
+       {
+               "JMP32_JEQ_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JEQ, R0,  123, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JEQ | BPF_X */
+       {
+               "JMP32_JEQ_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+                       BPF_JMP32_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+                       BPF_JMP32_REG(BPF_JEQ, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1234 } }
+       },
+       /* BPF_JMP32 | BPF_JNE | BPF_K */
+       {
+               "JMP32_JNE_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 321, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JNE_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 12345678 } }
+       },
+       {
+               "JMP32_JNE_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JNE, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0,  123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JNE | BPF_X */
+       {
+               "JMP32_JNE_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1234 } }
+       },
+       /* BPF_JMP32 | BPF_JSET | BPF_K */
+       {
+               "JMP32_JSET_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 2, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 3, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "JMP32_JSET_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x40000000 } }
+       },
+       {
+               "JMP32_JSET_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSET, R0, -1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JSET | BPF_X */
+       {
+               "JMP32_JSET_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 7),
+                       BPF_JMP32_REG(BPF_JSET, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 8 } }
+       },
+       /* BPF_JMP32 | BPF_JGT | BPF_K */
+       {
+               "JMP32_JGT_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 122, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JGT_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfffffffe } }
+       },
+       /* BPF_JMP32 | BPF_JGT | BPF_X */
+       {
+               "JMP32_JGT_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JGT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JGT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfffffffe } }
+       },
+       /* BPF_JMP32 | BPF_JGE | BPF_K */
+       {
+               "JMP32_JGE_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 124, 1),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
        },
        {
-               "ST_MEM_B: Store/Load byte: max positive",
+               "JMP32_JGE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7f } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JGE | BPF_X */
        {
-               "STX_MEM_B: Store/Load byte: max negative",
+               "JMP32_JGE_X",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffLL),
-                       BPF_STX_MEM(BPF_B, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JGE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+                       BPF_JMP32_REG(BPF_JGE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLT | BPF_K */
        {
-               "ST_MEM_H: Store/Load half word: max negative",
+               "JMP32_JLT_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 124, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
        },
        {
-               "ST_MEM_H: Store/Load half word: max positive",
+               "JMP32_JLT_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7fff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLT | BPF_X */
        {
-               "STX_MEM_H: Store/Load half word: max negative",
+               "JMP32_JLT_X",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffLL),
-                       BPF_STX_MEM(BPF_H, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JLT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JLT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLE | BPF_K */
        {
-               "ST_MEM_W: Store/Load word: max negative",
+               "JMP32_JLE_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 122, 1),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
        },
        {
-               "ST_MEM_W: Store/Load word: max positive",
+               "JMP32_JLE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7fffffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLE | BPF_X */
        {
-               "STX_MEM_W: Store/Load word: max negative",
+               "JMP32_JLE_X",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffffffLL),
-                       BPF_STX_MEM(BPF_W, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JLE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+                       BPF_JMP32_REG(BPF_JLE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JSGT | BPF_K */
        {
-               "ST_MEM_DW: Store/Load double word: max negative",
+               "JMP32_JSGT_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
        {
-               "ST_MEM_DW: Store/Load double word: max negative 2",
+               "JMP32_JSGT_K: Large immediate",
                .u.insns_int = {
-                       BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
-                       BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_DW, R2, R10, -40),
-                       BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
-                       BPF_MOV32_IMM(R0, 2),
-                       BPF_EXIT_INSN(),
-                       BPF_MOV32_IMM(R0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x1 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSGT | BPF_X */
        {
-               "ST_MEM_DW: Store/Load double word: max positive",
+               "JMP32_JSGT_X",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSGT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+                       BPF_JMP32_REG(BPF_JSGT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7fffffff } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSGE | BPF_K */
        {
-               "STX_MEM_DW: Store/Load double word: max negative",
+               "JMP32_JSGE_K: Small immediate",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
-                       BPF_STX_MEM(BPF_W, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
-       /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
        {
-               "STX_XADD_W: Test: 0x12 + 0x10 = 0x22",
+               "JMP32_JSGE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x22 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSGE | BPF_X */
        {
-               "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+               "JMP32_JSGE_X",
                .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R1, R10),
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
-                       BPF_ALU64_REG(BPF_MOV, R0, R10),
-                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+                       BPF_JMP32_REG(BPF_JSGE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSGE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLT | BPF_K */
        {
-               "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLT_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x12 } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
        {
-               "STX_XADD_W: X + 1 + 1 + 1 + ...",
-               { },
+               "JMP32_JSLT_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
                INTERNAL,
                { },
-               { { 0, 4134 } },
-               .fill_helper = bpf_fill_stxw,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLT | BPF_X */
        {
-               "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLT_X",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSLT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+                       BPF_JMP32_REG(BPF_JSLT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x22 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLE | BPF_K */
        {
-               "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLE_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R1, R10),
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
-                       BPF_ALU64_REG(BPF_MOV, R0, R10),
-                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0 } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
        {
-               "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x12 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLE | BPF_K */
        {
-               "STX_XADD_DW: X + 1 + 1 + 1 + ...",
-               { },
+               "JMP32_JSLE_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+                       BPF_JMP32_REG(BPF_JSLE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSLE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
                INTERNAL,
                { },
-               { { 0, 4134 } },
-               .fill_helper = bpf_fill_stxdw,
+               { { 0, -12345678 } }
        },
        /* BPF_JMP | BPF_EXIT */
        {
@@ -5223,6 +7192,14 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 1 } },
        },
+       {       /* Mainly checking JIT here. */
+               "BPF_MAXINSNS: Very long conditional jump",
+               { },
+               INTERNAL | FLAG_NO_DATA,
+               { },
+               { { 0, 1 } },
+               .fill_helper = bpf_fill_long_jmp,
+       },
        {
                "JMP_JA: Jump, gap, jump, ...",
                { },
@@ -6659,7 +8636,14 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
                u64 duration;
                u32 ret;
 
-               if (test->test[i].data_size == 0 &&
+               /*
+                * NOTE: Several sub-tests may be present, in which case
+                * a zero {data_size, result} tuple indicates the end of
+                * the sub-test array. The first test is always run,
+                * even if both data_size and result happen to be zero.
+                */
+               if (i > 0 &&
+                   test->test[i].data_size == 0 &&
                    test->test[i].result == 0)
                        break;
 
@@ -7005,8 +8989,248 @@ static __init int test_bpf(void)
        return err_cnt ? -EINVAL : 0;
 }
 
+struct tail_call_test {
+       const char *descr;
+       struct bpf_insn insns[MAX_INSNS];
+       int result;
+       int stack_depth;
+};
+
+/*
+ * Magic marker used in test snippets for tail calls below.
+ * BPF_LD/MOV to R2 and R2 with this immediate value is replaced
+ * with the proper values by the test runner.
+ */
+#define TAIL_CALL_MARKER 0x7a11ca11
+
+/* Special offset to indicate a NULL call target */
+#define TAIL_CALL_NULL 0x7fff
+
+/* Special offset to indicate an out-of-range index */
+#define TAIL_CALL_INVALID 0x7ffe
+
+#define TAIL_CALL(offset)                             \
+       BPF_LD_IMM64(R2, TAIL_CALL_MARKER),            \
+       BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \
+                    offset, TAIL_CALL_MARKER),        \
+       BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0)
+
+/*
+ * Tail call tests. Each test case may call any other test in the table,
+ * including itself, specified as a relative index offset from the calling
+ * test. The index TAIL_CALL_NULL can be used to specify a NULL target
+ * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID
+ * results in a target index that is out of range.
+ */
+static struct tail_call_test tail_call_tests[] = {
+       {
+               "Tail call leaf",
+               .insns = {
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+       {
+               "Tail call 2",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 2),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 3,
+       },
+       {
+               "Tail call 3",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 3),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 6,
+       },
+       {
+               "Tail call 4",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 4),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 10,
+       },
+       {
+               "Tail call error path, max count reached",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 1),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       TAIL_CALL(0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = MAX_TAIL_CALL_CNT + 1,
+       },
+       {
+               "Tail call error path, NULL target",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       TAIL_CALL(TAIL_CALL_NULL),
+                       BPF_ALU64_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+       {
+               "Tail call error path, index out of range",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       TAIL_CALL(TAIL_CALL_INVALID),
+                       BPF_ALU64_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+};
+
+static void __init destroy_tail_call_tests(struct bpf_array *progs)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++)
+               if (progs->ptrs[i])
+                       bpf_prog_free(progs->ptrs[i]);
+       kfree(progs);
+}
+
+static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
+{
+       int ntests = ARRAY_SIZE(tail_call_tests);
+       struct bpf_array *progs;
+       int which, err;
+
+       /* Allocate the table of programs to be used for tall calls */
+       progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]),
+                       GFP_KERNEL);
+       if (!progs)
+               goto out_nomem;
+
+       /* Create all eBPF programs and populate the table */
+       for (which = 0; which < ntests; which++) {
+               struct tail_call_test *test = &tail_call_tests[which];
+               struct bpf_prog *fp;
+               int len, i;
+
+               /* Compute the number of program instructions */
+               for (len = 0; len < MAX_INSNS; len++) {
+                       struct bpf_insn *insn = &test->insns[len];
+
+                       if (len < MAX_INSNS - 1 &&
+                           insn->code == (BPF_LD | BPF_DW | BPF_IMM))
+                               len++;
+                       if (insn->code == 0)
+                               break;
+               }
+
+               /* Allocate and initialize the program */
+               fp = bpf_prog_alloc(bpf_prog_size(len), 0);
+               if (!fp)
+                       goto out_nomem;
+
+               fp->len = len;
+               fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
+               fp->aux->stack_depth = test->stack_depth;
+               memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn));
+
+               /* Relocate runtime tail call offsets and addresses */
+               for (i = 0; i < len; i++) {
+                       struct bpf_insn *insn = &fp->insnsi[i];
+
+                       if (insn->imm != TAIL_CALL_MARKER)
+                               continue;
+
+                       switch (insn->code) {
+                       case BPF_LD | BPF_DW | BPF_IMM:
+                               insn[0].imm = (u32)(long)progs;
+                               insn[1].imm = ((u64)(long)progs) >> 32;
+                               break;
+
+                       case BPF_ALU | BPF_MOV | BPF_K:
+                               if (insn->off == TAIL_CALL_NULL)
+                                       insn->imm = ntests;
+                               else if (insn->off == TAIL_CALL_INVALID)
+                                       insn->imm = ntests + 1;
+                               else
+                                       insn->imm = which + insn->off;
+                               insn->off = 0;
+                       }
+               }
+
+               fp = bpf_prog_select_runtime(fp, &err);
+               if (err)
+                       goto out_err;
+
+               progs->ptrs[which] = fp;
+       }
+
+       /* The last entry contains a NULL program pointer */
+       progs->map.max_entries = ntests + 1;
+       *pprogs = progs;
+       return 0;
+
+out_nomem:
+       err = -ENOMEM;
+
+out_err:
+       if (progs)
+               destroy_tail_call_tests(progs);
+       return err;
+}
+
+static __init int test_tail_calls(struct bpf_array *progs)
+{
+       int i, err_cnt = 0, pass_cnt = 0;
+       int jit_cnt = 0, run_cnt = 0;
+
+       for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) {
+               struct tail_call_test *test = &tail_call_tests[i];
+               struct bpf_prog *fp = progs->ptrs[i];
+               u64 duration;
+               int ret;
+
+               cond_resched();
+
+               pr_info("#%d %s ", i, test->descr);
+               if (!fp) {
+                       err_cnt++;
+                       continue;
+               }
+               pr_cont("jited:%u ", fp->jited);
+
+               run_cnt++;
+               if (fp->jited)
+                       jit_cnt++;
+
+               ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration);
+               if (ret == test->result) {
+                       pr_cont("%lld PASS", duration);
+                       pass_cnt++;
+               } else {
+                       pr_cont("ret %d != %d FAIL", ret, test->result);
+                       err_cnt++;
+               }
+       }
+
+       pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n",
+               __func__, pass_cnt, err_cnt, jit_cnt, run_cnt);
+
+       return err_cnt ? -EINVAL : 0;
+}
+
 static int __init test_bpf_init(void)
 {
+       struct bpf_array *progs = NULL;
        int ret;
 
        ret = prepare_bpf_tests();
@@ -7018,6 +9242,14 @@ static int __init test_bpf_init(void)
        if (ret)
                return ret;
 
+       ret = prepare_tail_call_tests(&progs);
+       if (ret)
+               return ret;
+       ret = test_tail_calls(progs);
+       destroy_tail_call_tests(progs);
+       if (ret)
+               return ret;
+
        return test_skb_segment();
 }
 
index 271f2ca..f5561ea 100644 (file)
@@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work)
        blkcg_unpin_online(blkcg);
 
        fprop_local_destroy_percpu(&wb->memcg_completions);
-       percpu_ref_exit(&wb->refcnt);
 
        spin_lock_irq(&cgwb_lock);
        list_del(&wb->offline_node);
        spin_unlock_irq(&cgwb_lock);
 
+       percpu_ref_exit(&wb->refcnt);
        wb_exit(wb);
        WARN_ON_ONCE(!list_empty(&wb->b_attached));
        kfree_rcu(wb, rcu);
index d7666ac..575c685 100644 (file)
@@ -733,6 +733,22 @@ void kfence_shutdown_cache(struct kmem_cache *s)
 
 void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
 {
+       /*
+        * Perform size check before switching kfence_allocation_gate, so that
+        * we don't disable KFENCE without making an allocation.
+        */
+       if (size > PAGE_SIZE)
+               return NULL;
+
+       /*
+        * Skip allocations from non-default zones, including DMA. We cannot
+        * guarantee that pages in the KFENCE pool will have the requested
+        * properties (e.g. reside in DMAable memory).
+        */
+       if ((flags & GFP_ZONEMASK) ||
+           (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32)))
+               return NULL;
+
        /*
         * allocation_gate only needs to become non-zero, so it doesn't make
         * sense to continue writing to it and pay the associated contention
@@ -757,9 +773,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
        if (!READ_ONCE(kfence_enabled))
                return NULL;
 
-       if (size > PAGE_SIZE)
-               return NULL;
-
        return kfence_guarded_alloc(s, size, flags);
 }
 
index 7f24b9b..942cbc1 100644 (file)
@@ -852,7 +852,7 @@ static void kfence_test_exit(void)
        tracepoint_synchronize_unregister();
 }
 
-late_initcall(kfence_test_init);
+late_initcall_sync(kfence_test_init);
 module_exit(kfence_test_exit);
 
 MODULE_LICENSE("GPL v2");
index 0041ff6..de7b553 100644 (file)
@@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type,
                return true;
 
        /* skip hotpluggable memory regions if needed */
-       if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
+       if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
+           !(flags & MEMBLOCK_HOTPLUG))
                return true;
 
        /* if we want mirror memory skip non-mirror memory regions */
index 1bbf239..8ef06f9 100644 (file)
@@ -3574,7 +3574,8 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
        unsigned long val;
 
        if (mem_cgroup_is_root(memcg)) {
-               cgroup_rstat_flush(memcg->css.cgroup);
+               /* mem_cgroup_threshold() calls here from irqsafe context */
+               cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
                val = memcg_page_state(memcg, NR_FILE_PAGES) +
                        memcg_page_state(memcg, NR_ANON_MAPPED);
                if (swap)
index 747a01d..25fc46e 100644 (file)
@@ -4026,8 +4026,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
                                return ret;
                }
 
-               if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
+               if (vmf->prealloc_pte) {
+                       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+                       if (likely(pmd_none(*vmf->pmd))) {
+                               mm_inc_nr_ptes(vma->vm_mm);
+                               pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
+                               vmf->prealloc_pte = NULL;
+                       }
+                       spin_unlock(vmf->ptl);
+               } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
                        return VM_FAULT_OOM;
+               }
        }
 
        /* See comment in handle_pte_fault() */
index 34a9ad3..7e24043 100644 (file)
@@ -2068,7 +2068,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
        LIST_HEAD(migratepages);
        new_page_t *new;
        bool compound;
-       unsigned int nr_pages = thp_nr_pages(page);
+       int nr_pages = thp_nr_pages(page);
 
        /*
         * PTE mapped THP or HugeTLB page can't reach here so the page could
index f5852a0..1854850 100644 (file)
@@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void)
 #define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
        do {                                                                   \
                const char *memcg_path;                                        \
-               preempt_disable();                                             \
+               local_lock(&memcg_paths.lock);                                 \
                memcg_path = get_mm_memcg_path(mm);                            \
                trace_mmap_lock_##type(mm,                                     \
                                       memcg_path != NULL ? memcg_path : "",   \
                                       ##__VA_ARGS__);                         \
                if (likely(memcg_path != NULL))                                \
                        put_memcg_path_buf();                                  \
-               preempt_enable();                                              \
+               local_unlock(&memcg_paths.lock);                               \
        } while (0)
 
 #else /* !CONFIG_MEMCG */
index 3e97e68..856b175 100644 (file)
@@ -840,21 +840,24 @@ void init_mem_debugging_and_hardening(void)
        }
 #endif
 
-       if (_init_on_alloc_enabled_early) {
-               if (page_poisoning_requested)
-                       pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
-                               "will take precedence over init_on_alloc\n");
-               else
-                       static_branch_enable(&init_on_alloc);
-       }
-       if (_init_on_free_enabled_early) {
-               if (page_poisoning_requested)
-                       pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
-                               "will take precedence over init_on_free\n");
-               else
-                       static_branch_enable(&init_on_free);
+       if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
+           page_poisoning_requested) {
+               pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
+                       "will take precedence over init_on_alloc and init_on_free\n");
+               _init_on_alloc_enabled_early = false;
+               _init_on_free_enabled_early = false;
        }
 
+       if (_init_on_alloc_enabled_early)
+               static_branch_enable(&init_on_alloc);
+       else
+               static_branch_disable(&init_on_alloc);
+
+       if (_init_on_free_enabled_early)
+               static_branch_enable(&init_on_free);
+       else
+               static_branch_disable(&init_on_free);
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
        if (!debug_pagealloc_enabled())
                return;
index f77d254..030f02d 100644 (file)
@@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page)
 }
 
 const struct address_space_operations secretmem_aops = {
+       .set_page_dirty = __set_page_dirty_no_writeback,
        .freepage       = secretmem_freepage,
        .migratepage    = secretmem_migratepage,
        .isolate_page   = secretmem_isolate_page,
index f997fd5..58c01a3 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -346,7 +346,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
                        continue;
 
                page = virt_to_head_page(p[i]);
-               objcgs = page_objcgs(page);
+               objcgs = page_objcgs_check(page);
                if (!objcgs)
                        continue;
 
index 090fa14..af984e4 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3236,6 +3236,16 @@ struct detached_freelist {
        struct kmem_cache *s;
 };
 
+static inline void free_nonslab_page(struct page *page)
+{
+       unsigned int order = compound_order(page);
+
+       VM_BUG_ON_PAGE(!PageCompound(page), page);
+       kfree_hook(page_address(page));
+       mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
+       __free_pages(page, order);
+}
+
 /*
  * This function progressively scans the array with free objects (with
  * a limited look ahead) and extract objects belonging to the same
@@ -3272,9 +3282,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
        if (!s) {
                /* Handle kalloc'ed objects */
                if (unlikely(!PageSlab(page))) {
-                       BUG_ON(!PageCompound(page));
-                       kfree_hook(object);
-                       __free_pages(page, compound_order(page));
+                       free_nonslab_page(page);
                        p[size] = NULL; /* mark object processed */
                        return size;
                }
@@ -4250,13 +4258,7 @@ void kfree(const void *x)
 
        page = virt_to_head_page(x);
        if (unlikely(!PageSlab(page))) {
-               unsigned int order = compound_order(page);
-
-               BUG_ON(!PageCompound(page));
-               kfree_hook(object);
-               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
-                                     -(PAGE_SIZE << order));
-               __free_pages(page, order);
+               free_nonslab_page(page);
                return;
        }
        slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
index e4f63dd..3624977 100644 (file)
@@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
        skb_pull(skb, AX25_KISS_HEADER_LEN);
 
        if (digipeat != NULL) {
-               if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
-                       kfree_skb(skb);
+               if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
                        goto put;
-               }
 
                skb = ourskb;
        }
index f53751b..22f2f66 100644 (file)
@@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25)
 
 void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
 {
-       struct sk_buff *skbn;
        unsigned char *ptr;
        int headroom;
 
@@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
 
        headroom = ax25_addr_size(ax25->digipeat);
 
-       if (skb_headroom(skb) < headroom) {
-               if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
+       if (unlikely(skb_headroom(skb) < headroom)) {
+               skb = skb_expand_head(skb, headroom);
+               if (!skb) {
                        printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
-                       kfree_skb(skb);
                        return;
                }
-
-               if (skb->sk != NULL)
-                       skb_set_owner_w(skbn, skb->sk);
-
-               consume_skb(skb);
-               skb = skbn;
        }
 
        ptr = skb_push(skb, headroom);
index b40e0bc..d0b2e09 100644 (file)
@@ -441,24 +441,17 @@ put:
 struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
        ax25_address *dest, ax25_digi *digi)
 {
-       struct sk_buff *skbn;
        unsigned char *bp;
        int len;
 
        len = digi->ndigi * AX25_ADDR_LEN;
 
-       if (skb_headroom(skb) < len) {
-               if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
+       if (unlikely(skb_headroom(skb) < len)) {
+               skb = skb_expand_head(skb, len);
+               if (!skb) {
                        printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
                        return NULL;
                }
-
-               if (skb->sk != NULL)
-                       skb_set_owner_w(skbn, skb->sk);
-
-               consume_skb(skb);
-
-               skb = skbn;
        }
 
        bp = skb_push(skb, len);
index 63d42dc..2b639c8 100644 (file)
@@ -2274,8 +2274,7 @@ out:
        if (primary_if)
                batadv_hardif_put(primary_if);
 
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
@@ -2446,8 +2445,7 @@ out:
        if (primary_if)
                batadv_hardif_put(primary_if);
 
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
index 8c95a11..7976a04 100644 (file)
@@ -984,8 +984,7 @@ out:
        if (primary_if)
                batadv_hardif_put(primary_if);
 
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
index 007f282..36a98d3 100644 (file)
@@ -557,8 +557,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
 out:
        if (primary_if)
                batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
index 923e219..0158f26 100644 (file)
@@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
                upper = netdev_master_upper_dev_get_rcu(upper);
        } while (upper && !(upper->priv_flags & IFF_EBRIDGE));
 
-       if (upper)
-               dev_hold(upper);
+       dev_hold(upper);
        rcu_read_unlock();
 
        return upper;
@@ -509,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev,
        }
 
 out:
-       if (bridge)
-               dev_put(bridge);
+       dev_put(bridge);
 
        return ret4 + ret6;
 }
@@ -2239,8 +2237,7 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
        }
 
 out:
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        if (!ret && primary_if)
                *primary_if = hard_iface;
index da72494..6a4d3f4 100644 (file)
@@ -799,12 +799,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
  out:
        if (hardif)
                batadv_hardif_put(hardif);
-       if (hard_iface)
-               dev_put(hard_iface);
+       dev_put(hard_iface);
        if (primary_if)
                batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
@@ -1412,12 +1410,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
  out:
        if (hardif)
                batadv_hardif_put(hardif);
-       if (hard_iface)
-               dev_put(hard_iface);
+       dev_put(hard_iface);
        if (primary_if)
                batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
index 434b4f0..711fe5a 100644 (file)
@@ -820,8 +820,7 @@ check_roaming:
 out:
        if (in_hardif)
                batadv_hardif_put(in_hardif);
-       if (in_dev)
-               dev_put(in_dev);
+       dev_put(in_dev);
        if (tt_local)
                batadv_tt_local_entry_put(tt_local);
        if (tt_global)
@@ -1217,8 +1216,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
  out:
        if (primary_if)
                batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        cb->args[0] = bucket;
        cb->args[1] = idx;
@@ -2005,8 +2003,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
  out:
        if (primary_if)
                batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        cb->args[0] = bucket;
        cb->args[1] = idx;
index 2560ed2..e1a545c 100644 (file)
@@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev);
 /* Unregister HCI device */
 void hci_unregister_dev(struct hci_dev *hdev)
 {
-       int id;
-
        BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
        hci_dev_set_flag(hdev, HCI_UNREGISTER);
 
-       id = hdev->id;
-
        write_lock(&hci_dev_list_lock);
        list_del(&hdev->list);
        write_unlock(&hci_dev_list_lock);
@@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev)
        }
 
        device_del(&hdev->dev);
+       /* Actual cleanup is deferred until hci_cleanup_dev(). */
+       hci_dev_put(hdev);
+}
+EXPORT_SYMBOL(hci_unregister_dev);
 
+/* Cleanup HCI device */
+void hci_cleanup_dev(struct hci_dev *hdev)
+{
        debugfs_remove_recursive(hdev->debugfs);
        kfree_const(hdev->hw_info);
        kfree_const(hdev->fw_info);
@@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
        hci_blocked_keys_clear(hdev);
        hci_dev_unlock(hdev);
 
-       hci_dev_put(hdev);
-
-       ida_simple_remove(&hci_index_ida, id);
+       ida_simple_remove(&hci_index_ida, hdev->id);
 }
-EXPORT_SYMBOL(hci_unregister_dev);
 
 /* Suspend HCI device */
 int hci_suspend_dev(struct hci_dev *hdev)
index b04a5a0..f1128c2 100644 (file)
@@ -59,6 +59,17 @@ struct hci_pinfo {
        char              comm[TASK_COMM_LEN];
 };
 
+static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
+{
+       struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+       if (!hdev)
+               return ERR_PTR(-EBADFD);
+       if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+               return ERR_PTR(-EPIPE);
+       return hdev;
+}
+
 void hci_sock_set_flag(struct sock *sk, int nr)
 {
        set_bit(nr, &hci_pi(sk)->flags);
@@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
        if (event == HCI_DEV_UNREG) {
                struct sock *sk;
 
-               /* Detach sockets from device */
+               /* Wake up sockets using this dead device */
                read_lock(&hci_sk_list.lock);
                sk_for_each(sk, &hci_sk_list.head) {
-                       lock_sock(sk);
                        if (hci_pi(sk)->hdev == hdev) {
-                               hci_pi(sk)->hdev = NULL;
                                sk->sk_err = EPIPE;
-                               sk->sk_state = BT_OPEN;
                                sk->sk_state_change(sk);
-
-                               hci_dev_put(hdev);
                        }
-                       release_sock(sk);
                }
                read_unlock(&hci_sk_list.lock);
        }
@@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg)
 static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
                                unsigned long arg)
 {
-       struct hci_dev *hdev = hci_pi(sk)->hdev;
+       struct hci_dev *hdev = hci_hdev_from_sock(sk);
 
-       if (!hdev)
-               return -EBADFD;
+       if (IS_ERR(hdev))
+               return PTR_ERR(hdev);
 
        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
                return -EBUSY;
@@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 
        lock_sock(sk);
 
+       /* Allow detaching from dead device and attaching to alive device, if
+        * the caller wants to re-bind (instead of close) this socket in
+        * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
+        */
+       hdev = hci_pi(sk)->hdev;
+       if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+               hci_pi(sk)->hdev = NULL;
+               sk->sk_state = BT_OPEN;
+               hci_dev_put(hdev);
+       }
+       hdev = NULL;
+
        if (sk->sk_state == BT_BOUND) {
                err = -EALREADY;
                goto done;
@@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
 
        lock_sock(sk);
 
-       hdev = hci_pi(sk)->hdev;
-       if (!hdev) {
-               err = -EBADFD;
+       hdev = hci_hdev_from_sock(sk);
+       if (IS_ERR(hdev)) {
+               err = PTR_ERR(hdev);
                goto done;
        }
 
@@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
                goto done;
        }
 
-       hdev = hci_pi(sk)->hdev;
-       if (!hdev) {
-               err = -EBADFD;
+       hdev = hci_hdev_from_sock(sk);
+       if (IS_ERR(hdev)) {
+               err = PTR_ERR(hdev);
                goto done;
        }
 
index 9874844..b69d88b 100644 (file)
@@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 static void bt_host_release(struct device *dev)
 {
        struct hci_dev *hdev = to_hci_dev(dev);
+
+       if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+               hci_cleanup_dev(hdev);
        kfree(hdev);
        module_put(THIS_MODULE);
 }
index b488e27..335e1d8 100644 (file)
@@ -88,17 +88,19 @@ reset:
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
                        u32 *retval, u32 *time, bool xdp)
 {
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+       struct bpf_prog_array_item item = {.prog = prog};
+       struct bpf_run_ctx *old_ctx;
+       struct bpf_cg_run_ctx run_ctx;
        struct bpf_test_timer t = { NO_MIGRATE };
        enum bpf_cgroup_storage_type stype;
        int ret;
 
        for_each_cgroup_storage_type(stype) {
-               storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
-               if (IS_ERR(storage[stype])) {
-                       storage[stype] = NULL;
+               item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+               if (IS_ERR(item.cgroup_storage[stype])) {
+                       item.cgroup_storage[stype] = NULL;
                        for_each_cgroup_storage_type(stype)
-                               bpf_cgroup_storage_free(storage[stype]);
+                               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
                        return -ENOMEM;
                }
        }
@@ -107,22 +109,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
                repeat = 1;
 
        bpf_test_timer_enter(&t);
+       old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
        do {
-               ret = bpf_cgroup_storage_set(storage);
-               if (ret)
-                       break;
-
+               run_ctx.prog_item = &item;
                if (xdp)
                        *retval = bpf_prog_run_xdp(prog, ctx);
                else
                        *retval = BPF_PROG_RUN(prog, ctx);
-
-               bpf_cgroup_storage_unset();
        } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+       bpf_reset_run_ctx(old_ctx);
        bpf_test_timer_leave(&t);
 
        for_each_cgroup_storage_type(stype)
-               bpf_cgroup_storage_free(storage[stype]);
+               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
 
        return ret;
 }
@@ -764,8 +763,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
        if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
            prog->expected_attach_type == BPF_XDP_CPUMAP)
                return -EINVAL;
-       if (kattr->test.ctx_in || kattr->test.ctx_out)
-               return -EINVAL;
+
        ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
index 8fb5dca..c8ae823 100644 (file)
@@ -166,7 +166,8 @@ static int br_switchdev_event(struct notifier_block *unused,
        case SWITCHDEV_FDB_ADD_TO_BRIDGE:
                fdb_info = ptr;
                err = br_fdb_external_learn_add(br, p, fdb_info->addr,
-                                               fdb_info->vid, false);
+                                               fdb_info->vid,
+                                               fdb_info->is_local, false);
                if (err) {
                        err = notifier_from_errno(err);
                        break;
@@ -201,6 +202,48 @@ static struct notifier_block br_switchdev_notifier = {
        .notifier_call = br_switchdev_event,
 };
 
+/* called under rtnl_mutex */
+static int br_switchdev_blocking_event(struct notifier_block *nb,
+                                      unsigned long event, void *ptr)
+{
+       struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+       struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+       struct switchdev_notifier_brport_info *brport_info;
+       const struct switchdev_brport *b;
+       struct net_bridge_port *p;
+       int err = NOTIFY_DONE;
+
+       p = br_port_get_rtnl(dev);
+       if (!p)
+               goto out;
+
+       switch (event) {
+       case SWITCHDEV_BRPORT_OFFLOADED:
+               brport_info = ptr;
+               b = &brport_info->brport;
+
+               err = br_switchdev_port_offload(p, b->dev, b->ctx,
+                                               b->atomic_nb, b->blocking_nb,
+                                               b->tx_fwd_offload, extack);
+               err = notifier_from_errno(err);
+               break;
+       case SWITCHDEV_BRPORT_UNOFFLOADED:
+               brport_info = ptr;
+               b = &brport_info->brport;
+
+               br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
+                                           b->blocking_nb);
+               break;
+       }
+
+out:
+       return err;
+}
+
+static struct notifier_block br_switchdev_blocking_notifier = {
+       .notifier_call = br_switchdev_blocking_event,
+};
+
 /* br_boolopt_toggle - change user-controlled boolean option
  *
  * @br: bridge device
@@ -355,10 +398,14 @@ static int __init br_init(void)
        if (err)
                goto err_out4;
 
-       err = br_netlink_init();
+       err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
        if (err)
                goto err_out5;
 
+       err = br_netlink_init();
+       if (err)
+               goto err_out6;
+
        brioctl_set(br_ioctl_stub);
 
 #if IS_ENABLED(CONFIG_ATM_LANE)
@@ -373,6 +420,8 @@ static int __init br_init(void)
 
        return 0;
 
+err_out6:
+       unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
 err_out5:
        unregister_switchdev_notifier(&br_switchdev_notifier);
 err_out4:
@@ -392,6 +441,7 @@ static void __exit br_deinit(void)
 {
        stp_proto_unregister(&br_stp_proto);
        br_netlink_fini();
+       unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
        unregister_switchdev_notifier(&br_switchdev_notifier);
        unregister_netdevice_notifier(&br_device_notifier);
        brioctl_set(NULL);
index 4ff8c67..ddd09f5 100644 (file)
@@ -745,7 +745,7 @@ static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
        item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
        item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
        item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
-       item.info.dev = item.is_local ? br->dev : p->dev;
+       item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
        item.info.ctx = ctx;
 
        err = nb->notifier_call(nb, action, &item);
@@ -1011,7 +1011,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
 
 static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
                        struct net_bridge_port *p, const unsigned char *addr,
-                       u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[])
+                       u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
+                       struct netlink_ext_ack *extack)
 {
        int err = 0;
 
@@ -1030,7 +1031,15 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
                rcu_read_unlock();
                local_bh_enable();
        } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
-               err = br_fdb_external_learn_add(br, p, addr, vid, true);
+               if (!p && !(ndm->ndm_state & NUD_PERMANENT)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "FDB entry towards bridge must be permanent");
+                       return -EINVAL;
+               }
+
+               err = br_fdb_external_learn_add(br, p, addr, vid,
+                                               ndm->ndm_state & NUD_PERMANENT,
+                                               true);
        } else {
                spin_lock_bh(&br->hash_lock);
                err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
@@ -1102,9 +1111,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                }
 
                /* VID was specified, so use it. */
-               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
+                                  extack);
        } else {
-               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
+                                  extack);
                if (err || !vg || !vg->num_vlans)
                        goto out;
 
@@ -1116,7 +1127,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                        if (!br_vlan_should_use(v))
                                continue;
                        err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
-                                          nfea_tb);
+                                          nfea_tb, extack);
                        if (err)
                                goto out;
                }
@@ -1256,7 +1267,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
 }
 
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid,
+                             const unsigned char *addr, u16 vid, bool is_local,
                              bool swdev_notify)
 {
        struct net_bridge_fdb_entry *fdb;
@@ -1273,6 +1284,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 
                if (swdev_notify)
                        flags |= BIT(BR_FDB_ADDED_BY_USER);
+
+               if (is_local)
+                       flags |= BIT(BR_FDB_LOCAL);
+
                fdb = fdb_create(br, p, addr, vid, flags);
                if (!fdb) {
                        err = -ENOMEM;
@@ -1299,6 +1314,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
                if (swdev_notify)
                        set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
 
+               if (is_local)
+                       set_bit(BR_FDB_LOCAL, &fdb->flags);
+
                if (modified)
                        fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
        }
index 86f6d7e..67c6024 100644 (file)
@@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name)
        dev_net_set(dev, net);
        dev->rtnl_link_ops = &br_link_ops;
 
-       res = register_netdev(dev);
+       res = register_netdevice(dev);
        if (res)
                free_netdev(dev);
        return res;
@@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name)
        struct net_device *dev;
        int ret = 0;
 
-       rtnl_lock();
        dev = __dev_get_by_name(net, name);
        if (dev == NULL)
                ret =  -ENXIO;  /* Could not find device */
@@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name)
        else
                br_dev_delete(dev, NULL);
 
-       rtnl_unlock();
        return ret;
 }
 
index 46a24c2..793b0db 100644 (file)
@@ -351,7 +351,7 @@ static int old_deviceless(struct net *net, void __user *uarg)
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
 
-               if (copy_from_user(buf, uarg, IFNAMSIZ))
+               if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ))
                        return -EFAULT;
 
                buf[IFNAMSIZ-1] = 0;
@@ -369,33 +369,44 @@ static int old_deviceless(struct net *net, void __user *uarg)
 int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
                  struct ifreq *ifr, void __user *uarg)
 {
+       int ret = -EOPNOTSUPP;
+
+       rtnl_lock();
+
        switch (cmd) {
        case SIOCGIFBR:
        case SIOCSIFBR:
-               return old_deviceless(net, uarg);
-
+               ret = old_deviceless(net, uarg);
+               break;
        case SIOCBRADDBR:
        case SIOCBRDELBR:
        {
                char buf[IFNAMSIZ];
 
-               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-                       return -EPERM;
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+                       ret = -EPERM;
+                       break;
+               }
 
-               if (copy_from_user(buf, uarg, IFNAMSIZ))
-                       return -EFAULT;
+               if (copy_from_user(buf, uarg, IFNAMSIZ)) {
+                       ret = -EFAULT;
+                       break;
+               }
 
                buf[IFNAMSIZ-1] = 0;
                if (cmd == SIOCBRADDBR)
-                       return br_add_bridge(net, buf);
-
-               return br_del_bridge(net, buf);
+                       ret = br_add_bridge(net, buf);
+               else
+                       ret = br_del_bridge(net, buf);
        }
-
+               break;
        case SIOCBRADDIF:
        case SIOCBRDELIF:
-               return add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
-
+               ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
+               break;
        }
-       return -EOPNOTSUPP;
+
+       rtnl_unlock();
+
+       return ret;
 }
index c939631..51991f1 100644 (file)
@@ -770,7 +770,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid,
+                             const unsigned char *addr, u16 vid, bool is_local,
                              bool swdev_notify);
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                              const unsigned char *addr, u16 vid,
@@ -1880,6 +1880,17 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; }
 
 /* br_switchdev.c */
 #ifdef CONFIG_NET_SWITCHDEV
+int br_switchdev_port_offload(struct net_bridge_port *p,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack);
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                                struct notifier_block *atomic_nb,
+                                struct notifier_block *blocking_nb);
+
 bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
 
 void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
@@ -1908,6 +1919,24 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
        skb->offload_fwd_mark = 0;
 }
 #else
+static inline int
+br_switchdev_port_offload(struct net_bridge_port *p,
+                         struct net_device *dev, const void *ctx,
+                         struct notifier_block *atomic_nb,
+                         struct notifier_block *blocking_nb,
+                         bool tx_fwd_offload,
+                         struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void
+br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                           struct notifier_block *atomic_nb,
+                           struct notifier_block *blocking_nb)
+{
+}
+
 static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
 {
        return false;
index 023de0e..6bf518d 100644 (file)
@@ -134,7 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br,
                .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
                .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
        };
-       struct net_device *dev = info.is_local ? br->dev : dst->dev;
+       struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
 
        switch (type) {
        case RTM_DELNEIGH:
@@ -312,23 +312,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
 /* Let the bridge know that this port is offloaded, so that it can assign a
  * switchdev hardware domain to it.
  */
-int switchdev_bridge_port_offload(struct net_device *brport_dev,
-                                 struct net_device *dev, const void *ctx,
-                                 struct notifier_block *atomic_nb,
-                                 struct notifier_block *blocking_nb,
-                                 bool tx_fwd_offload,
-                                 struct netlink_ext_ack *extack)
+int br_switchdev_port_offload(struct net_bridge_port *p,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack)
 {
        struct netdev_phys_item_id ppid;
-       struct net_bridge_port *p;
        int err;
 
-       ASSERT_RTNL();
-
-       p = br_port_get_rtnl(brport_dev);
-       if (!p)
-               return -ENODEV;
-
        err = dev_get_port_parent_id(dev, &ppid, false);
        if (err)
                return err;
@@ -348,23 +341,12 @@ out_switchdev_del:
 
        return err;
 }
-EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
 
-void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
-                                    const void *ctx,
-                                    struct notifier_block *atomic_nb,
-                                    struct notifier_block *blocking_nb)
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                                struct notifier_block *atomic_nb,
+                                struct notifier_block *blocking_nb)
 {
-       struct net_bridge_port *p;
-
-       ASSERT_RTNL();
-
-       p = br_port_get_rtnl(brport_dev);
-       if (!p)
-               return;
-
        nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
 
        nbp_switchdev_del(p);
 }
-EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
index 12369b6..f6df208 100644 (file)
 
 struct j1939_session;
 enum j1939_sk_errqueue_type {
-       J1939_ERRQUEUE_ACK,
-       J1939_ERRQUEUE_SCHED,
-       J1939_ERRQUEUE_ABORT,
+       J1939_ERRQUEUE_TX_ACK,
+       J1939_ERRQUEUE_TX_SCHED,
+       J1939_ERRQUEUE_TX_ABORT,
+       J1939_ERRQUEUE_RX_RTS,
+       J1939_ERRQUEUE_RX_DPO,
+       J1939_ERRQUEUE_RX_ABORT,
 };
 
 /* j1939 devices */
@@ -87,6 +90,7 @@ struct j1939_priv {
        struct list_head j1939_socks;
 
        struct kref rx_kref;
+       u32 rx_tskey;
 };
 
 void j1939_ecu_put(struct j1939_ecu *ecu);
index b904c06..6dff451 100644 (file)
@@ -905,20 +905,33 @@ failure:
        return NULL;
 }
 
-static size_t j1939_sk_opt_stats_get_size(void)
+static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type)
 {
-       return
-               nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
-               0;
+       switch (type) {
+       case J1939_ERRQUEUE_RX_RTS:
+               return
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */
+                       nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */
+                       nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */
+                       nla_total_size(sizeof(u8)) +  /* J1939_NLA_SRC_ADDR */
+                       nla_total_size(sizeof(u8)) +  /* J1939_NLA_DEST_ADDR */
+                       0;
+       default:
+               return
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+                       0;
+       }
 }
 
 static struct sk_buff *
-j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session,
+                                   enum j1939_sk_errqueue_type type)
 {
        struct sk_buff *stats;
        u32 size;
 
-       stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+       stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC);
        if (!stats)
                return NULL;
 
@@ -928,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
                size = min(session->pkt.tx_acked * 7,
                           session->total_message_size);
 
-       nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+       switch (type) {
+       case J1939_ERRQUEUE_RX_RTS:
+               nla_put_u32(stats, J1939_NLA_TOTAL_SIZE,
+                           session->total_message_size);
+               nla_put_u32(stats, J1939_NLA_PGN,
+                           session->skcb.addr.pgn);
+               nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME,
+                                 session->skcb.addr.src_name, J1939_NLA_PAD);
+               nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME,
+                                 session->skcb.addr.dst_name, J1939_NLA_PAD);
+               nla_put_u8(stats, J1939_NLA_SRC_ADDR,
+                          session->skcb.addr.sa);
+               nla_put_u8(stats, J1939_NLA_DEST_ADDR,
+                          session->skcb.addr.da);
+               break;
+       default:
+               nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+       }
 
        return stats;
 }
 
-void j1939_sk_errqueue(struct j1939_session *session,
-                      enum j1939_sk_errqueue_type type)
+static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+                               enum j1939_sk_errqueue_type type)
 {
        struct j1939_priv *priv = session->priv;
-       struct sock *sk = session->sk;
        struct j1939_sock *jsk;
        struct sock_exterr_skb *serr;
        struct sk_buff *skb;
        char *state = "UNK";
        int err;
 
-       /* currently we have no sk for the RX session */
-       if (!sk)
-               return;
-
        jsk = j1939_sk(sk);
 
        if (!(jsk->state & J1939_SOCK_ERRQUEUE))
                return;
 
-       skb = j1939_sk_get_timestamping_opt_stats(session);
+       switch (type) {
+       case J1939_ERRQUEUE_TX_ACK:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_SCHED:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_ABORT:
+               break;
+       case J1939_ERRQUEUE_RX_RTS:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_DPO:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_ABORT:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+                       return;
+               break;
+       default:
+               netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+       }
+
+       skb = j1939_sk_get_timestamping_opt_stats(session, type);
        if (!skb)
                return;
 
@@ -964,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session,
        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
        switch (type) {
-       case J1939_ERRQUEUE_ACK:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
-                       kfree_skb(skb);
-                       return;
-               }
-
+       case J1939_ERRQUEUE_TX_ACK:
                serr->ee.ee_errno = ENOMSG;
                serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
                serr->ee.ee_info = SCM_TSTAMP_ACK;
-               state = "ACK";
+               state = "TX ACK";
                break;
-       case J1939_ERRQUEUE_SCHED:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
-                       kfree_skb(skb);
-                       return;
-               }
-
+       case J1939_ERRQUEUE_TX_SCHED:
                serr->ee.ee_errno = ENOMSG;
                serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
                serr->ee.ee_info = SCM_TSTAMP_SCHED;
-               state = "SCH";
+               state = "TX SCH";
                break;
-       case J1939_ERRQUEUE_ABORT:
+       case J1939_ERRQUEUE_TX_ABORT:
                serr->ee.ee_errno = session->err;
                serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
                serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
-               state = "ABT";
+               state = "TX ABT";
+               break;
+       case J1939_ERRQUEUE_RX_RTS:
+               serr->ee.ee_errno = ENOMSG;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_RTS;
+               state = "RX RTS";
+               break;
+       case J1939_ERRQUEUE_RX_DPO:
+               serr->ee.ee_errno = ENOMSG;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_DPO;
+               state = "RX DPO";
+               break;
+       case J1939_ERRQUEUE_RX_ABORT:
+               serr->ee.ee_errno = session->err;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_ABORT;
+               state = "RX ABT";
                break;
-       default:
-               netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
        }
 
        serr->opt_stats = true;
@@ -1008,6 +1062,27 @@ void j1939_sk_errqueue(struct j1939_session *session,
                kfree_skb(skb);
 };
 
+void j1939_sk_errqueue(struct j1939_session *session,
+                      enum j1939_sk_errqueue_type type)
+{
+       struct j1939_priv *priv = session->priv;
+       struct j1939_sock *jsk;
+
+       if (session->sk) {
+               /* send TX notifications to the socket of origin  */
+               __j1939_sk_errqueue(session, session->sk, type);
+               return;
+       }
+
+       /* spread RX notifications to all sockets subscribed to this session */
+       spin_lock_bh(&priv->j1939_socks_lock);
+       list_for_each_entry(jsk, &priv->j1939_socks, list) {
+               if (j1939_sk_recv_match_one(jsk, &session->skcb))
+                       __j1939_sk_errqueue(session, &jsk->sk, type);
+       }
+       spin_unlock_bh(&priv->j1939_socks_lock);
+};
+
 void j1939_sk_send_loop_abort(struct sock *sk, int err)
 {
        sk->sk_err = err;
index efdf79d..bb5c4b8 100644 (file)
@@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session)
 
 static void j1939_session_destroy(struct j1939_session *session)
 {
-       if (session->err)
-               j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
-       else
-               j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+       if (session->transmission) {
+               if (session->err)
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
+               else
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK);
+       } else if (session->err) {
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+       }
 
        netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
 
@@ -822,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
                memcpy(&dat[1], &tpdat[offset], len);
                ret = j1939_tp_tx_dat(session, dat, len + 1);
                if (ret < 0) {
-                       /* ENOBUS == CAN interface TX queue is full */
+                       /* ENOBUFS == CAN interface TX queue is full */
                        if (ret != -ENOBUFS)
                                netdev_alert(priv->ndev,
                                             "%s: 0x%p: queue data error: %i\n",
@@ -1044,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session)
        if (ret)
                goto out_free;
 
-       j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED);
        j1939_sk_queue_activate_next(session);
 
  out_free:
@@ -1076,11 +1080,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session)
 
 static bool j1939_session_deactivate(struct j1939_session *session)
 {
+       struct j1939_priv *priv = session->priv;
        bool active;
 
-       j1939_session_list_lock(session->priv);
+       j1939_session_list_lock(priv);
+       /* This function should be called with a session ref-count of at
+        * least 2.
+        */
+       WARN_ON_ONCE(kref_read(&session->kref) < 2);
        active = j1939_session_deactivate_locked(session);
-       j1939_session_list_unlock(session->priv);
+       j1939_session_list_unlock(priv);
 
        return active;
 }
@@ -1111,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session,
 
        if (session->sk)
                j1939_sk_send_loop_abort(session->sk, session->err);
+       else
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
 }
 
 static void j1939_session_cancel(struct j1939_session *session,
@@ -1325,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
        session->err = j1939_xtp_abort_to_errno(priv, abort);
        if (session->sk)
                j1939_sk_send_loop_abort(session->sk, session->err);
+       else
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
        j1939_session_deactivate_activate_next(session);
 
 abort_put:
@@ -1433,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
                if (session->transmission) {
                        if (session->pkt.tx_acked)
                                j1939_sk_errqueue(session,
-                                                 J1939_ERRQUEUE_SCHED);
+                                                 J1939_ERRQUEUE_TX_SCHED);
                        j1939_session_txtimer_cancel(session);
                        j1939_tp_schedule_txtimer(session, 0);
                }
@@ -1625,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
        session->pkt.rx = 0;
        session->pkt.tx = 0;
 
+       session->tskey = priv->rx_tskey++;
+       j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
+
        WARN_ON_ONCE(j1939_session_activate(session));
 
        return session;
@@ -1747,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
        session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
        session->last_cmd = dat[0];
        j1939_tp_set_rxtimeout(session, 750);
+
+       if (!session->transmission)
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO);
 }
 
 static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
@@ -1874,7 +1893,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                if (!session->transmission)
                        j1939_tp_schedule_txtimer(session, 0);
        } else {
-               j1939_tp_set_rxtimeout(session, 250);
+               j1939_tp_set_rxtimeout(session, 750);
        }
        session->last_cmd = 0xff;
        consume_skb(se_skb);
index ed4fcb7..7105fa4 100644 (file)
@@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                                return -EFAULT;
                }
 
+               rtnl_lock();
                lock_sock(sk);
 
-               if (ro->bound && ro->ifindex)
+               if (ro->bound && ro->ifindex) {
                        dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+                       if (!dev) {
+                               if (count > 1)
+                                       kfree(filter);
+                               err = -ENODEV;
+                               goto out_fil;
+                       }
+               }
 
                if (ro->bound) {
                        /* (try to) register the new filters */
@@ -584,10 +592,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                ro->count  = count;
 
  out_fil:
-               if (dev)
-                       dev_put(dev);
-
+               dev_put(dev);
                release_sock(sk);
+               rtnl_unlock();
 
                break;
 
@@ -600,10 +607,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
                err_mask &= CAN_ERR_MASK;
 
+               rtnl_lock();
                lock_sock(sk);
 
-               if (ro->bound && ro->ifindex)
+               if (ro->bound && ro->ifindex) {
                        dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+                       if (!dev) {
+                               err = -ENODEV;
+                               goto out_err;
+                       }
+               }
 
                /* remove current error mask */
                if (ro->bound) {
@@ -623,10 +636,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                ro->err_mask = err_mask;
 
  out_err:
-               if (dev)
-                       dev_put(dev);
-
+               dev_put(dev);
                release_sock(sk);
+               rtnl_unlock();
 
                break;
 
index 64e1a5f..8865079 100644 (file)
@@ -676,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po)
 }
 EXPORT_SYMBOL(dev_remove_offload);
 
-/******************************************************************************
- *
- *                   Device Boot-time Settings Routines
- *
- ******************************************************************************/
-
-/* Boot time configuration table */
-static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
-
-/**
- *     netdev_boot_setup_add   - add new setup entry
- *     @name: name of the device
- *     @map: configured settings for the device
- *
- *     Adds new setup entry to the dev_boot_setup list.  The function
- *     returns 0 on error and 1 on success.  This is a generic routine to
- *     all netdevices.
- */
-static int netdev_boot_setup_add(char *name, struct ifmap *map)
-{
-       struct netdev_boot_setup *s;
-       int i;
-
-       s = dev_boot_setup;
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
-               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
-                       memset(s[i].name, 0, sizeof(s[i].name));
-                       strlcpy(s[i].name, name, IFNAMSIZ);
-                       memcpy(&s[i].map, map, sizeof(s[i].map));
-                       break;
-               }
-       }
-
-       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
-}
-
-/**
- * netdev_boot_setup_check     - check boot time settings
- * @dev: the netdevice
- *
- * Check boot time settings for the device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found, 1 if they are.
- */
-int netdev_boot_setup_check(struct net_device *dev)
-{
-       struct netdev_boot_setup *s = dev_boot_setup;
-       int i;
-
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
-               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
-                   !strcmp(dev->name, s[i].name)) {
-                       dev->irq = s[i].map.irq;
-                       dev->base_addr = s[i].map.base_addr;
-                       dev->mem_start = s[i].map.mem_start;
-                       dev->mem_end = s[i].map.mem_end;
-                       return 1;
-               }
-       }
-       return 0;
-}
-EXPORT_SYMBOL(netdev_boot_setup_check);
-
-
-/**
- * netdev_boot_base    - get address from boot time settings
- * @prefix: prefix for network device
- * @unit: id for network device
- *
- * Check boot time settings for the base address of device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found.
- */
-unsigned long netdev_boot_base(const char *prefix, int unit)
-{
-       const struct netdev_boot_setup *s = dev_boot_setup;
-       char name[IFNAMSIZ];
-       int i;
-
-       sprintf(name, "%s%d", prefix, unit);
-
-       /*
-        * If device already registered then return base of 1
-        * to indicate not to probe for this interface
-        */
-       if (__dev_get_by_name(&init_net, name))
-               return 1;
-
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
-               if (!strcmp(name, s[i].name))
-                       return s[i].map.base_addr;
-       return 0;
-}
-
-/*
- * Saves at boot time configured settings for any netdevice.
- */
-int __init netdev_boot_setup(char *str)
-{
-       int ints[5];
-       struct ifmap map;
-
-       str = get_options(str, ARRAY_SIZE(ints), ints);
-       if (!str || !*str)
-               return 0;
-
-       /* Save settings */
-       memset(&map, 0, sizeof(map));
-       if (ints[0] > 0)
-               map.irq = ints[1];
-       if (ints[0] > 1)
-               map.base_addr = ints[2];
-       if (ints[0] > 2)
-               map.mem_start = ints[3];
-       if (ints[0] > 3)
-               map.mem_end = ints[4];
-
-       /* Add new entry to the list */
-       return netdev_boot_setup_add(str, &map);
-}
-
-__setup("netdev=", netdev_boot_setup);
-
 /*******************************************************************************
  *
  *                         Device Interface Subroutines
@@ -956,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
 
        rcu_read_lock();
        dev = dev_get_by_name_rcu(net, name);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
        return dev;
 }
@@ -1030,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 
        rcu_read_lock();
        dev = dev_get_by_index_rcu(net, ifindex);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
        return dev;
 }
@@ -3098,6 +2971,50 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
 #endif
 
+/**
+ *     netif_set_real_num_queues - set actual number of RX and TX queues used
+ *     @dev: Network device
+ *     @txq: Actual number of TX queues
+ *     @rxq: Actual number of RX queues
+ *
+ *     Set the real number of both TX and RX queues.
+ *     Does nothing if the number of queues is already correct.
+ */
+int netif_set_real_num_queues(struct net_device *dev,
+                             unsigned int txq, unsigned int rxq)
+{
+       unsigned int old_rxq = dev->real_num_rx_queues;
+       int err;
+
+       if (txq < 1 || txq > dev->num_tx_queues ||
+           rxq < 1 || rxq > dev->num_rx_queues)
+               return -EINVAL;
+
+       /* Start from increases, so the error path only does decreases -
+        * decreases can't fail.
+        */
+       if (rxq > dev->real_num_rx_queues) {
+               err = netif_set_real_num_rx_queues(dev, rxq);
+               if (err)
+                       return err;
+       }
+       if (txq > dev->real_num_tx_queues) {
+               err = netif_set_real_num_tx_queues(dev, txq);
+               if (err)
+                       goto undo_rx;
+       }
+       if (rxq < dev->real_num_rx_queues)
+               WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
+       if (txq < dev->real_num_tx_queues)
+               WARN_ON(netif_set_real_num_tx_queues(dev, txq));
+
+       return 0;
+undo_rx:
+       WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
+       return err;
+}
+EXPORT_SYMBOL(netif_set_real_num_queues);
+
 /**
  * netif_get_num_default_rss_queues - default number of RSS queues
  *
@@ -5878,7 +5795,7 @@ static void flush_all_backlogs(void)
         */
        ASSERT_RTNL();
 
-       get_online_cpus();
+       cpus_read_lock();
 
        cpumask_clear(&flush_cpus);
        for_each_online_cpu(cpu) {
@@ -5896,7 +5813,7 @@ static void flush_all_backlogs(void)
        for_each_cpu(cpu, &flush_cpus)
                flush_work(per_cpu_ptr(&flush_works, cpu));
 
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
@@ -7615,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 {
        struct netdev_adjacent *lower;
 
-       WARN_ON_ONCE(!rcu_read_lock_held());
+       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 
        lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 
@@ -9380,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
        return dev->xdp_state[mode].prog;
 }
 
-static u8 dev_xdp_prog_count(struct net_device *dev)
+u8 dev_xdp_prog_count(struct net_device *dev)
 {
        u8 count = 0;
        int i;
@@ -9390,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
                        count++;
        return count;
 }
+EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
 
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
 {
@@ -9483,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
 {
        unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
        struct bpf_prog *cur_prog;
+       struct net_device *upper;
+       struct list_head *iter;
        enum bpf_xdp_mode mode;
        bpf_op_t bpf_op;
        int err;
@@ -9521,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
                return -EBUSY;
        }
 
+       /* don't allow if an upper device already has a program */
+       netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+               if (dev_xdp_prog_count(upper) > 0) {
+                       NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
+                       return -EEXIST;
+               }
+       }
+
        cur_prog = dev_xdp_prog(dev, mode);
        /* can't replace attached prog with link */
        if (link && cur_prog) {
index 4035bce..0e87237 100644 (file)
@@ -379,7 +379,14 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
        case SIOCBRDELIF:
                if (!netif_device_present(dev))
                        return -ENODEV;
-               return br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+               if (!netif_is_bridge_master(dev))
+                       return -EOPNOTSUPP;
+               dev_hold(dev);
+               rtnl_unlock();
+               err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+               dev_put(dev);
+               rtnl_lock();
+               return err;
 
        case SIOCSHWTSTAMP:
                err = net_hwtstamp_validate(ifr);
index fd2fc2b..b02d54a 100644 (file)
@@ -804,10 +804,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
        return 0;
 }
 
-static int
-devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops,
-                            struct devlink_port *port, struct sk_buff *msg,
-                            struct netlink_ext_ack *extack, bool *msg_updated)
+static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
+                                       struct devlink_port *port,
+                                       struct sk_buff *msg,
+                                       struct netlink_ext_ack *extack,
+                                       bool *msg_updated)
 {
        u8 hw_addr[MAX_ADDR_LEN];
        int hw_addr_len;
@@ -816,7 +817,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
        if (!ops->port_function_hw_addr_get)
                return 0;
 
-       err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+       err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+                                            extack);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
@@ -893,12 +895,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
               opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
 }
 
-static int
-devlink_port_fn_state_fill(struct devlink *devlink,
-                          const struct devlink_ops *ops,
-                          struct devlink_port *port, struct sk_buff *msg,
-                          struct netlink_ext_ack *extack,
-                          bool *msg_updated)
+static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
+                                     struct devlink_port *port,
+                                     struct sk_buff *msg,
+                                     struct netlink_ext_ack *extack,
+                                     bool *msg_updated)
 {
        enum devlink_port_fn_opstate opstate;
        enum devlink_port_fn_state state;
@@ -907,7 +908,7 @@ devlink_port_fn_state_fill(struct devlink *devlink,
        if (!ops->port_fn_state_get)
                return 0;
 
-       err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack);
+       err = ops->port_fn_state_get(port, &state, &opstate, extack);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
@@ -935,7 +936,6 @@ static int
 devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
                                   struct netlink_ext_ack *extack)
 {
-       struct devlink *devlink = port->devlink;
        const struct devlink_ops *ops;
        struct nlattr *function_attr;
        bool msg_updated = false;
@@ -945,13 +945,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
        if (!function_attr)
                return -EMSGSIZE;
 
-       ops = devlink->ops;
-       err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg,
-                                          extack, &msg_updated);
+       ops = port->devlink->ops;
+       err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
+                                          &msg_updated);
        if (err)
                goto out;
-       err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack,
-                                        &msg_updated);
+       err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
 out:
        if (err || !msg_updated)
                nla_nest_cancel(msg, function_attr);
@@ -1269,31 +1268,33 @@ out:
        return msg->len;
 }
 
-static int devlink_port_type_set(struct devlink *devlink,
-                                struct devlink_port *devlink_port,
+static int devlink_port_type_set(struct devlink_port *devlink_port,
                                 enum devlink_port_type port_type)
 
 {
        int err;
 
-       if (devlink->ops->port_type_set) {
-               if (port_type == devlink_port->type)
-                       return 0;
-               err = devlink->ops->port_type_set(devlink_port, port_type);
-               if (err)
-                       return err;
-               devlink_port->desired_type = port_type;
-               devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+       if (!devlink_port->devlink->ops->port_type_set)
+               return -EOPNOTSUPP;
+
+       if (port_type == devlink_port->type)
                return 0;
-       }
-       return -EOPNOTSUPP;
+
+       err = devlink_port->devlink->ops->port_type_set(devlink_port,
+                                                       port_type);
+       if (err)
+               return err;
+
+       devlink_port->desired_type = port_type;
+       devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+       return 0;
 }
 
-static int
-devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
-                                 const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+                                            const struct nlattr *attr,
+                                            struct netlink_ext_ack *extack)
 {
-       const struct devlink_ops *ops;
+       const struct devlink_ops *ops = port->devlink->ops;
        const u8 *hw_addr;
        int hw_addr_len;
 
@@ -1314,17 +1315,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *
                }
        }
 
-       ops = devlink->ops;
        if (!ops->port_function_hw_addr_set) {
                NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
                return -EOPNOTSUPP;
        }
 
-       return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+       return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+                                             extack);
 }
 
-static int devlink_port_fn_state_set(struct devlink *devlink,
-                                    struct devlink_port *port,
+static int devlink_port_fn_state_set(struct devlink_port *port,
                                     const struct nlattr *attr,
                                     struct netlink_ext_ack *extack)
 {
@@ -1332,18 +1332,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink,
        const struct devlink_ops *ops;
 
        state = nla_get_u8(attr);
-       ops = devlink->ops;
+       ops = port->devlink->ops;
        if (!ops->port_fn_state_set) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Function does not support state setting");
                return -EOPNOTSUPP;
        }
-       return ops->port_fn_state_set(devlink, port, state, extack);
+       return ops->port_fn_state_set(port, state, extack);
 }
 
-static int
-devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
-                         const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_set(struct devlink_port *port,
+                                    const struct nlattr *attr,
+                                    struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
        int err;
@@ -1357,7 +1357,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
 
        attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
        if (attr) {
-               err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+               err = devlink_port_function_hw_addr_set(port, attr, extack);
                if (err)
                        return err;
        }
@@ -1367,7 +1367,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
         */
        attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
        if (attr)
-               err = devlink_port_fn_state_set(devlink, port, attr, extack);
+               err = devlink_port_fn_state_set(port, attr, extack);
 
        if (!err)
                devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
@@ -1378,14 +1378,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
                                        struct genl_info *info)
 {
        struct devlink_port *devlink_port = info->user_ptr[1];
-       struct devlink *devlink = devlink_port->devlink;
        int err;
 
        if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
                enum devlink_port_type port_type;
 
                port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
-               err = devlink_port_type_set(devlink, devlink_port, port_type);
+               err = devlink_port_type_set(devlink_port, port_type);
                if (err)
                        return err;
        }
@@ -1394,7 +1393,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
                struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
                struct netlink_ext_ack *extack = info->extack;
 
-               err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+               err = devlink_port_function_set(devlink_port, attr, extack);
                if (err)
                        return err;
        }
@@ -8769,24 +8768,26 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
  *     @ops: ops
  *     @priv_size: size of user private data
  *     @net: net namespace
+ *     @dev: parent device
  *
  *     Allocate new devlink instance resources, including devlink index
  *     and name.
  */
 struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
-                                size_t priv_size, struct net *net)
+                                size_t priv_size, struct net *net,
+                                struct device *dev)
 {
        struct devlink *devlink;
 
-       if (WARN_ON(!ops))
-               return NULL;
-
+       WARN_ON(!ops || !dev);
        if (!devlink_reload_actions_valid(ops))
                return NULL;
 
        devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
        if (!devlink)
                return NULL;
+
+       devlink->dev = dev;
        devlink->ops = ops;
        xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
        write_pnet(&devlink->_net, net);
@@ -8811,12 +8812,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns);
  *     devlink_register - Register devlink instance
  *
  *     @devlink: devlink
- *     @dev: parent device
  */
-int devlink_register(struct devlink *devlink, struct device *dev)
+int devlink_register(struct devlink *devlink)
 {
-       WARN_ON(devlink->dev);
-       devlink->dev = dev;
        mutex_lock(&devlink_mutex);
        list_add_tail(&devlink->list, &devlink_list);
        devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -9327,18 +9325,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 
        switch (attrs->flavour) {
        case DEVLINK_PORT_FLAVOUR_PHYSICAL:
-       case DEVLINK_PORT_FLAVOUR_VIRTUAL:
                n = snprintf(name, len, "p%u", attrs->phys.port_number);
                if (n < len && attrs->split)
                        n += snprintf(name + n, len - n, "s%u",
                                      attrs->phys.split_subport_number);
-               if (!attrs->split)
-                       n = snprintf(name, len, "p%u", attrs->phys.port_number);
-               else
-                       n = snprintf(name, len, "p%us%u",
-                                    attrs->phys.port_number,
-                                    attrs->phys.split_subport_number);
-
                break;
        case DEVLINK_PORT_FLAVOUR_CPU:
        case DEVLINK_PORT_FLAVOUR_DSA:
@@ -9380,6 +9370,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
                n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
                             attrs->pci_sf.sf);
                break;
+       case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+               return -EOPNOTSUPP;
        }
 
        if (n >= len)
index ead2a8a..49442ca 100644 (file)
@@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
        }
 
        hw_metadata->input_dev = metadata->input_dev;
-       if (hw_metadata->input_dev)
-               dev_hold(hw_metadata->input_dev);
+       dev_hold(hw_metadata->input_dev);
 
        return hw_metadata;
 
@@ -867,8 +866,7 @@ free_hw_metadata:
 static void
 net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata)
 {
-       if (hw_metadata->input_dev)
-               dev_put(hw_metadata->input_dev);
+       dev_put(hw_metadata->input_dev);
        kfree(hw_metadata->fa_cookie);
        kfree(hw_metadata->trap_name);
        kfree(hw_metadata->trap_group_name);
index fb3bcba..497ef9b 100644 (file)
@@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
              unsigned short flags)
 {
        dst->dev = dev;
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        dst->ops = ops;
        dst_init_metrics(dst, dst_default_metrics.metrics, true);
        dst->expires = 0UL;
@@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
 
        if (dst->ops->destroy)
                dst->ops->destroy(dst);
-       if (dst->dev)
-               dev_put(dst->dev);
+       dev_put(dst->dev);
 
        lwtstate_put(dst->lwtstate);
 
index 3b4986e..3aca07c 100644 (file)
@@ -2180,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
        skb->tstamp = 0;
 
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, hh_len);
-               if (unlikely(!skb2)) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                        return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        rcu_read_lock_bh();
@@ -2214,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
        }
        rcu_read_unlock_bh();
        if (dst)
-               IP6_INC_STATS(dev_net(dst->dev),
-                             ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+               IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 out_drop:
        kfree_skb(skb);
        return -ENETDOWN;
@@ -2287,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
        skb->tstamp = 0;
 
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, hh_len);
-               if (unlikely(!skb2)) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                        return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        rcu_read_lock_bh();
@@ -3950,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map)
        }
 }
 
+DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp)
+{
+       struct net_device *master, *slave;
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+       master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+       slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+       if (slave && slave != xdp->rxq->dev) {
+               /* The target device is different from the receiving device, so
+                * redirect it to the new device.
+                * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
+                * drivers to unmap the packet from their rx ring.
+                */
+               ri->tgt_index = slave->ifindex;
+               ri->map_id = INT_MAX;
+               ri->map_type = BPF_MAP_TYPE_UNSPEC;
+               return XDP_REDIRECT;
+       }
+       return XDP_TX;
+}
+EXPORT_SYMBOL_GPL(xdp_master_redirect);
+
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
                    struct bpf_prog *xdp_prog)
 {
@@ -5016,6 +5024,40 @@ err_clear:
        return -EINVAL;
 }
 
+BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_proto = {
+       .func           = bpf_sk_setsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       return _bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_getsockopt_proto = {
+       .func           = bpf_sk_getsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
 BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
           int, level, int, optname, char *, optval, int, optlen)
 {
index 39d7be0..bac0184 100644 (file)
@@ -1508,7 +1508,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
 }
 EXPORT_SYMBOL(flow_get_u32_dst);
 
-/* Sort the source and destination IP (and the ports if the IP are the same),
+/* Sort the source and destination IP and the ports,
  * to have consistent hash within the two directions
  */
 static inline void __flow_hash_consistentify(struct flow_keys *keys)
@@ -1519,11 +1519,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
        case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
                addr_diff = (__force u32)keys->addrs.v4addrs.dst -
                            (__force u32)keys->addrs.v4addrs.src;
-               if ((addr_diff < 0) ||
-                   (addr_diff == 0 &&
-                    ((__force u16)keys->ports.dst <
-                     (__force u16)keys->ports.src))) {
+               if (addr_diff < 0)
                        swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+               if ((__force u16)keys->ports.dst <
+                   (__force u16)keys->ports.src) {
                        swap(keys->ports.src, keys->ports.dst);
                }
                break;
@@ -1531,13 +1531,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
                addr_diff = memcmp(&keys->addrs.v6addrs.dst,
                                   &keys->addrs.v6addrs.src,
                                   sizeof(keys->addrs.v6addrs.dst));
-               if ((addr_diff < 0) ||
-                   (addr_diff == 0 &&
-                    ((__force u16)keys->ports.dst <
-                     (__force u16)keys->ports.src))) {
+               if (addr_diff < 0) {
                        for (i = 0; i < 4; i++)
                                swap(keys->addrs.v6addrs.src.s6_addr32[i],
                                     keys->addrs.v6addrs.dst.s6_addr32[i]);
+               }
+               if ((__force u16)keys->ports.dst <
+                   (__force u16)keys->ports.src) {
                        swap(keys->ports.src, keys->ports.dst);
                }
                break;
index 53e85c7..2d5bc3a 100644 (file)
@@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
        write_pnet(&n->net, net);
        memcpy(n->key, pkey, key_len);
        n->dev = dev;
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
 
        if (tbl->pconstructor && tbl->pconstructor(n)) {
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                kfree(n);
                n = NULL;
                goto out;
@@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
                        write_unlock_bh(&tbl->lock);
                        if (tbl->pdestructor)
                                tbl->pdestructor(n);
-                       if (n->dev)
-                               dev_put(n->dev);
+                       dev_put(n->dev);
                        kfree(n);
                        return 0;
                }
@@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
                n->next = NULL;
                if (tbl->pdestructor)
                        tbl->pdestructor(n);
-               if (n->dev)
-                       dev_put(n->dev);
+               dev_put(n->dev);
                kfree(n);
        }
        return -ENOENT;
@@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
        list_del(&parms->list);
        parms->dead = 1;
        write_unlock_bh(&tbl->lock);
-       if (parms->dev)
-               dev_put(parms->dev);
+       dev_put(parms->dev);
        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
 }
 EXPORT_SYMBOL(neigh_parms_release);
@@ -2533,6 +2528,13 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
                return false;
 
        master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+
+       /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+        * invalid value for ifindex to denote "no master".
+        */
+       if (master_idx == -1)
+               return !!master;
+
        if (!master || master->ifindex != master_idx)
                return true;
 
@@ -3315,12 +3317,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
        struct neigh_statistics *st = v;
 
        if (v == SEQ_START_TOKEN) {
-               seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
+               seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
                return 0;
        }
 
-       seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
-                       "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
+       seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
+                       "%08lx         %08lx         %08lx         "
+                       "%08lx       %08lx            %08lx\n",
                   atomic_read(&tbl->entries),
 
                   st->allocs,
index 5e4eb45..ac11604 100644 (file)
@@ -24,6 +24,8 @@
 #define DEFER_TIME (msecs_to_jiffies(1000))
 #define DEFER_WARN_INTERVAL (60 * HZ)
 
+#define BIAS_MAX       LONG_MAX
+
 static int page_pool_init(struct page_pool *pool,
                          const struct page_pool_params *params)
 {
@@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
                 */
        }
 
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+           pool->p.flags & PP_FLAG_PAGE_FRAG)
+               return -EINVAL;
+
        if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
                return -ENOMEM;
 
@@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
        return true;
 }
 
+static void page_pool_set_pp_info(struct page_pool *pool,
+                                 struct page *page)
+{
+       page->pp = pool;
+       page->pp_magic |= PP_SIGNATURE;
+}
+
+static void page_pool_clear_pp_info(struct page *page)
+{
+       page->pp_magic = 0;
+       page->pp = NULL;
+}
+
 static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
                                                 gfp_t gfp)
 {
@@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
                return NULL;
        }
 
-       page->pp_magic |= PP_SIGNATURE;
+       page_pool_set_pp_info(pool, page);
 
        /* Track how many pages are held 'in-flight' */
        pool->pages_state_hold_cnt++;
@@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
                        put_page(page);
                        continue;
                }
-               page->pp_magic |= PP_SIGNATURE;
+
+               page_pool_set_pp_info(pool, page);
                pool->alloc.cache[pool->alloc.count++] = page;
                /* Track how many pages are held 'in-flight' */
                pool->pages_state_hold_cnt++;
@@ -345,7 +365,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
                             DMA_ATTR_SKIP_CPU_SYNC);
        page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
-       page->pp_magic = 0;
+       page_pool_clear_pp_info(page);
 
        /* This may be the last page returned, releasing the pool, so
         * it is not safe to reference pool afterwards.
@@ -405,6 +425,11 @@ static __always_inline struct page *
 __page_pool_put_page(struct page_pool *pool, struct page *page,
                     unsigned int dma_sync_size, bool allow_direct)
 {
+       /* It is not the last user for the page frag case */
+       if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
+           page_pool_atomic_sub_frag_count_return(page, 1))
+               return NULL;
+
        /* This allocator is optimized for the XDP mode that uses
         * one-frame-per-page, but have fallbacks that act like the
         * regular page allocator APIs.
@@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 }
 EXPORT_SYMBOL(page_pool_put_page_bulk);
 
+static struct page *page_pool_drain_frag(struct page_pool *pool,
+                                        struct page *page)
+{
+       long drain_count = BIAS_MAX - pool->frag_users;
+
+       /* Some user is still using the page frag */
+       if (likely(page_pool_atomic_sub_frag_count_return(page,
+                                                         drain_count)))
+               return NULL;
+
+       if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
+               if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+                       page_pool_dma_sync_for_device(pool, page, -1);
+
+               return page;
+       }
+
+       page_pool_return_page(pool, page);
+       return NULL;
+}
+
+static void page_pool_free_frag(struct page_pool *pool)
+{
+       long drain_count = BIAS_MAX - pool->frag_users;
+       struct page *page = pool->frag_page;
+
+       pool->frag_page = NULL;
+
+       if (!page ||
+           page_pool_atomic_sub_frag_count_return(page, drain_count))
+               return;
+
+       page_pool_return_page(pool, page);
+}
+
+struct page *page_pool_alloc_frag(struct page_pool *pool,
+                                 unsigned int *offset,
+                                 unsigned int size, gfp_t gfp)
+{
+       unsigned int max_size = PAGE_SIZE << pool->p.order;
+       struct page *page = pool->frag_page;
+
+       if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+                   size > max_size))
+               return NULL;
+
+       size = ALIGN(size, dma_get_cache_alignment());
+       *offset = pool->frag_offset;
+
+       if (page && *offset + size > max_size) {
+               page = page_pool_drain_frag(pool, page);
+               if (page)
+                       goto frag_reset;
+       }
+
+       if (!page) {
+               page = page_pool_alloc_pages(pool, gfp);
+               if (unlikely(!page)) {
+                       pool->frag_page = NULL;
+                       return NULL;
+               }
+
+               pool->frag_page = page;
+
+frag_reset:
+               pool->frag_users = 1;
+               *offset = 0;
+               pool->frag_offset = size;
+               page_pool_set_frag_count(page, BIAS_MAX);
+               return page;
+       }
+
+       pool->frag_users++;
+       pool->frag_offset = *offset + size;
+       return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag);
+
 static void page_pool_empty_ring(struct page_pool *pool)
 {
        struct page *page;
@@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool)
        if (!page_pool_put(pool))
                return;
 
+       page_pool_free_frag(pool);
+
        if (!page_pool_release(pool))
                return;
 
@@ -644,7 +749,6 @@ bool page_pool_return_skb_page(struct page *page)
         * The page will be returned to the pool here regardless of the
         * 'flipped' fragment being in use or not.
         */
-       page->pp = NULL;
        page_pool_put_full_page(pp, page, false);
 
        return true;
index 7e258d2..314f97a 100644 (file)
@@ -1190,11 +1190,6 @@ static ssize_t pktgen_if_write(struct file *file,
                         * pktgen_xmit() is called
                         */
                        pkt_dev->last_ok = 1;
-
-                       /* override clone_skb if user passed default value
-                        * at module loading time
-                        */
-                       pkt_dev->clone_skb = 0;
                } else if (strcmp(f, "queue_xmit") == 0) {
                        pkt_dev->xmit_mode = M_QUEUE_XMIT;
                        pkt_dev->last_ok = 1;
index e79aaf1..2dcf1c0 100644 (file)
@@ -1959,6 +1959,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx)
                return false;
 
        master = netdev_master_upper_dev_get(dev);
+
+       /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
+        * another invalid value for ifindex to denote "no master".
+        */
+       if (master_idx == -1)
+               return !!master;
+
        if (!master || master->ifindex != master_idx)
                return true;
 
@@ -2257,7 +2264,8 @@ invalid_attr:
        return -EINVAL;
 }
 
-static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
+                           struct netlink_ext_ack *extack)
 {
        if (dev) {
                if (tb[IFLA_ADDRESS] &&
@@ -2284,7 +2292,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
                                return -EOPNOTSUPP;
 
                        if (af_ops->validate_link_af) {
-                               err = af_ops->validate_link_af(dev, af);
+                               err = af_ops->validate_link_af(dev, af, extack);
                                if (err < 0)
                                        return err;
                        }
@@ -2592,7 +2600,7 @@ static int do_setlink(const struct sk_buff *skb,
        const struct net_device_ops *ops = dev->netdev_ops;
        int err;
 
-       err = validate_linkmsg(dev, tb);
+       err = validate_linkmsg(dev, tb, extack);
        if (err < 0)
                return err;
 
@@ -3290,7 +3298,7 @@ replay:
                        m_ops = master_dev->rtnl_link_ops;
        }
 
-       err = validate_linkmsg(dev, tb);
+       err = validate_linkmsg(dev, tb, extack);
        if (err < 0)
                return err;
 
index fcbd977..9240af2 100644 (file)
@@ -1789,6 +1789,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
 }
 EXPORT_SYMBOL(skb_realloc_headroom);
 
+/**
+ *     skb_expand_head - reallocate header of &sk_buff
+ *     @skb: buffer to reallocate
+ *     @headroom: needed headroom
+ *
+ *     Unlike skb_realloc_headroom, this one does not allocate a new skb
+ *     if possible; copies skb->sk to new skb as needed
+ *     and frees original skb in case of failures.
+ *
+ *     It expect increased headroom and generates warning otherwise.
+ */
+
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
+{
+       int delta = headroom - skb_headroom(skb);
+
+       if (WARN_ONCE(delta <= 0,
+                     "%s is expecting an increase in the headroom", __func__))
+               return skb;
+
+       /* pskb_expand_head() might crash, if skb is shared */
+       if (skb_shared(skb)) {
+               struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+               if (likely(nskb)) {
+                       if (skb->sk)
+                               skb_set_owner_w(nskb, skb->sk);
+                       consume_skb(skb);
+               } else {
+                       kfree_skb(skb);
+               }
+               skb = nskb;
+       }
+       if (skb &&
+           pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+               kfree_skb(skb);
+               skb = NULL;
+       }
+       return skb;
+}
+EXPORT_SYMBOL(skb_expand_head);
+
 /**
  *     skb_copy_expand -       copy and expand sk_buff
  *     @skb: buffer to copy
@@ -4327,7 +4369,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
                memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
                /* We dont need to clear skbinfo->nr_frags here */
 
-               new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff));
+               new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
                delta_truesize = skb->truesize - new_truesize;
                skb->truesize = new_truesize;
                NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
index 15d7128..2d6249b 100644 (file)
@@ -584,29 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
        return sk_psock_skb_ingress(psock, skb);
 }
 
-static void sock_drop(struct sock *sk, struct sk_buff *skb)
+static void sk_psock_skb_state(struct sk_psock *psock,
+                              struct sk_psock_work_state *state,
+                              struct sk_buff *skb,
+                              int len, int off)
 {
-       sk_drops_add(sk, skb);
-       kfree_skb(skb);
+       spin_lock_bh(&psock->ingress_lock);
+       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+               state->skb = skb;
+               state->len = len;
+               state->off = off;
+       } else {
+               sock_drop(psock->sk, skb);
+       }
+       spin_unlock_bh(&psock->ingress_lock);
 }
 
 static void sk_psock_backlog(struct work_struct *work)
 {
        struct sk_psock *psock = container_of(work, struct sk_psock, work);
        struct sk_psock_work_state *state = &psock->work_state;
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
        bool ingress;
        u32 len, off;
        int ret;
 
        mutex_lock(&psock->work_mutex);
-       if (state->skb) {
+       if (unlikely(state->skb)) {
+               spin_lock_bh(&psock->ingress_lock);
                skb = state->skb;
                len = state->len;
                off = state->off;
                state->skb = NULL;
-               goto start;
+               spin_unlock_bh(&psock->ingress_lock);
        }
+       if (skb)
+               goto start;
 
        while ((skb = skb_dequeue(&psock->ingress_skb))) {
                len = skb->len;
@@ -621,9 +634,8 @@ start:
                                                          len, ingress);
                        if (ret <= 0) {
                                if (ret == -EAGAIN) {
-                                       state->skb = skb;
-                                       state->len = len;
-                                       state->off = off;
+                                       sk_psock_skb_state(psock, state, skb,
+                                                          len, off);
                                        goto end;
                                }
                                /* Hard errors break pipe and stop xmit. */
@@ -722,6 +734,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
                skb_bpf_redirect_clear(skb);
                sock_drop(psock->sk, skb);
        }
+       kfree_skb(psock->work_state.skb);
+       /* We null the skb here to ensure that calls to sk_psock_backlog
+        * do not pick up the free'd skb.
+        */
+       psock->work_state.skb = NULL;
        __sk_psock_purge_ingress_msg(psock);
 }
 
@@ -773,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work)
 
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 {
-       sk_psock_stop(psock, false);
-
        write_lock_bh(&sk->sk_callback_lock);
        sk_psock_restore_proto(sk, psock);
        rcu_assign_sk_user_data(sk, NULL);
@@ -784,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
                sk_psock_stop_verdict(sk, psock);
        write_unlock_bh(&sk->sk_callback_lock);
 
+       sk_psock_stop(psock, false);
+
        INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
        queue_rcu_work(system_wq, &psock->rwork);
 }
index 9671c32..aada649 100644 (file)
@@ -1358,6 +1358,15 @@ set_sndbuf:
                ret = sock_bindtoindex_locked(sk, val);
                break;
 
+       case SO_BUF_LOCK:
+               if (val & ~SOCK_BUF_LOCK_MASK) {
+                       ret = -EINVAL;
+                       break;
+               }
+               sk->sk_userlocks = val | (sk->sk_userlocks &
+                                         ~SOCK_BUF_LOCK_MASK);
+               break;
+
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -1720,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val64 = sock_net(sk)->net_cookie;
                break;
 
+       case SO_BUF_LOCK:
+               v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
+               break;
+
        default:
                /* We implement the SO_SNDLOWAT etc to not be settable
                 * (1003.1g 7).
index d1c50a4..0ee7d4c 100644 (file)
@@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
        }
        spin_unlock(&dndev_lock);
 
-       if (old)
-               dev_put(old);
+       dev_put(old);
        return rv;
 }
 
@@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev)
        }
        spin_unlock(&dndev_lock);
 
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 }
 
 /*
index 387a7e8..269c029 100644 (file)
@@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
        }
 
        change_nexthops(fi) {
-               if (nh->nh_dev)
-                       dev_put(nh->nh_dev);
+               dev_put(nh->nh_dev);
                nh->nh_dev = NULL;
        } endfor_nexthops(fi);
        kfree(fi);
@@ -389,7 +388,7 @@ link_it:
                return ofi;
        }
 
-       refcount_inc(&fi->fib_treeref);
+       refcount_set(&fi->fib_treeref, 1);
        refcount_set(&fi->fib_clntref, 1);
        spin_lock(&dn_fib_info_lock);
        fi->fib_next = dn_fib_info_list;
index 729d3de..7e85f2a 100644 (file)
@@ -1026,8 +1026,7 @@ source_ok:
        if (!fld.daddr) {
                fld.daddr = fld.saddr;
 
-               if (dev_out)
-                       dev_put(dev_out);
+               dev_put(dev_out);
                err = -EINVAL;
                dev_out = init_net.loopback_dev;
                if (!dev_out->dn_ptr)
@@ -1084,8 +1083,7 @@ source_ok:
                                        neigh_release(neigh);
                                        neigh = NULL;
                                } else {
-                                       if (dev_out)
-                                               dev_put(dev_out);
+                                       dev_put(dev_out);
                                        if (dn_dev_islocal(neigh->dev, fld.daddr)) {
                                                dev_out = init_net.loopback_dev;
                                                res.type = RTN_LOCAL;
@@ -1144,8 +1142,7 @@ select_source:
        if (res.type == RTN_LOCAL) {
                if (!fld.saddr)
                        fld.saddr = fld.daddr;
-               if (dev_out)
-                       dev_put(dev_out);
+               dev_put(dev_out);
                dev_out = init_net.loopback_dev;
                dev_hold(dev_out);
                if (!dev_out->dn_ptr)
@@ -1168,8 +1165,7 @@ select_source:
        if (!fld.saddr)
                fld.saddr = DN_FIB_RES_PREFSRC(res);
 
-       if (dev_out)
-               dev_put(dev_out);
+       dev_put(dev_out);
        dev_out = DN_FIB_RES_DEV(res);
        dev_hold(dev_out);
        fld.flowidn_oif = dev_out->ifindex;
@@ -1222,8 +1218,7 @@ done:
                neigh_release(neigh);
        if (free_res)
                dn_fib_res_put(&res);
-       if (dev_out)
-               dev_put(dev_out);
+       dev_put(dev_out);
 out:
        return err;
 
@@ -1503,8 +1498,7 @@ done:
        if (free_res)
                dn_fib_res_put(&res);
        dev_put(in_dev);
-       if (out_dev)
-               dev_put(out_dev);
+       dev_put(out_dev);
 out:
        return err;
 
index bca1b5d..970906e 100644 (file)
@@ -138,6 +138,7 @@ config NET_DSA_TAG_LAN9303
 
 config NET_DSA_TAG_SJA1105
        tristate "Tag driver for NXP SJA1105 switches"
+       depends on (NET_DSA_SJA1105 && NET_DSA_SJA1105_PTP) || !NET_DSA_SJA1105 || !NET_DSA_SJA1105_PTP
        select PACKING
        help
          Say Y or M if you want to enable support for tagging frames with the
index 84cad1b..1dc45e4 100644 (file)
@@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
        if (!skb)
                return 0;
 
-       nskb = cpu_dp->rcv(skb, dev, pt);
+       nskb = cpu_dp->rcv(skb, dev);
        if (!nskb) {
                kfree_skb(skb);
                return 0;
index c7fa85f..8150e16 100644 (file)
@@ -311,6 +311,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
        return NULL;
 }
 
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
+ */
 static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
 {
        struct dsa_port *cpu_dp, *dp;
@@ -321,15 +324,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
                return -EINVAL;
        }
 
-       /* Assign the default CPU port to all ports of the fabric */
-       list_for_each_entry(dp, &dst->ports, list)
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dp->cpu_dp)
+                       continue;
+
                if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
                        dp->cpu_dp = cpu_dp;
+       }
 
        return 0;
 }
 
-static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+       struct dsa_port *cpu_dp, *dp;
+
+       list_for_each_entry(cpu_dp, &dst->ports, list) {
+               if (!dsa_port_is_cpu(cpu_dp))
+                       continue;
+
+               list_for_each_entry(dp, &dst->ports, list) {
+                       /* Prefer a local CPU port */
+                       if (dp->ds != cpu_dp->ds)
+                               continue;
+
+                       /* Prefer the first local CPU port found */
+                       if (dp->cpu_dp)
+                               continue;
+
+                       if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+                               dp->cpu_dp = cpu_dp;
+               }
+       }
+
+       return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
 {
        struct dsa_port *dp;
 
@@ -710,13 +746,14 @@ static int dsa_switch_setup(struct dsa_switch *ds)
        /* Add the switch to devlink before calling setup, so that setup can
         * add dpipe tables
         */
-       ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
+       ds->devlink =
+               devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
        if (!ds->devlink)
                return -ENOMEM;
        dl_priv = devlink_priv(ds->devlink);
        dl_priv->ds = ds;
 
-       err = devlink_register(ds->devlink, ds->dev);
+       err = devlink_register(ds->devlink);
        if (err)
                goto free_devlink;
 
@@ -921,13 +958,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
        if (!complete)
                return 0;
 
-       err = dsa_tree_setup_default_cpu(dst);
+       err = dsa_tree_setup_cpu_ports(dst);
        if (err)
                return err;
 
        err = dsa_tree_setup_switches(dst);
        if (err)
-               goto teardown_default_cpu;
+               goto teardown_cpu_ports;
 
        err = dsa_tree_setup_master(dst);
        if (err)
@@ -947,8 +984,8 @@ teardown_master:
        dsa_tree_teardown_master(dst);
 teardown_switches:
        dsa_tree_teardown_switches(dst);
-teardown_default_cpu:
-       dsa_tree_teardown_default_cpu(dst);
+teardown_cpu_ports:
+       dsa_tree_teardown_cpu_ports(dst);
 
        return err;
 }
@@ -966,7 +1003,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
 
        dsa_tree_teardown_switches(dst);
 
-       dsa_tree_teardown_default_cpu(dst);
+       dsa_tree_teardown_cpu_ports(dst);
 
        list_for_each_entry_safe(dl, next, &dst->rtable, list) {
                list_del(&dl->list);
index e43c5dc..9575cab 100644 (file)
@@ -199,7 +199,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
 /* port.c */
 void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
                               const struct dsa_device_ops *tag_ops);
-int dsa_port_set_state(struct dsa_port *dp, u8 state);
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
 int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
 void dsa_port_disable_rt(struct dsa_port *dp);
@@ -241,11 +241,9 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp,
 int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
                              struct switchdev_brport_flags flags,
                              struct netlink_ext_ack *extack);
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
                          struct switchdev_brport_flags flags,
                          struct netlink_ext_ack *extack);
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
-                    struct netlink_ext_ack *extack);
 int dsa_port_vlan_add(struct dsa_port *dp,
                      const struct switchdev_obj_port_vlan *vlan,
                      struct netlink_ext_ack *extack);
index b927d94..831d50d 100644 (file)
@@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
        return dsa_tree_notify(dp->ds->dst, e, v);
 }
 
-int dsa_port_set_state(struct dsa_port *dp, u8 state)
+static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp)
+{
+       struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+       struct switchdev_notifier_fdb_info info = {
+               /* flush all VLANs */
+               .vid = 0,
+       };
+
+       /* When the port becomes standalone it has already left the bridge.
+        * Don't notify the bridge in that case.
+        */
+       if (!brport_dev)
+               return;
+
+       call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
+                                brport_dev, &info.info, NULL);
+}
+
+static void dsa_port_fast_age(const struct dsa_port *dp)
+{
+       struct dsa_switch *ds = dp->ds;
+
+       if (!ds->ops->port_fast_age)
+               return;
+
+       ds->ops->port_fast_age(ds, dp->index);
+
+       dsa_port_notify_bridge_fdb_flush(dp);
+}
+
+static bool dsa_port_can_configure_learning(struct dsa_port *dp)
+{
+       struct switchdev_brport_flags flags = {
+               .mask = BR_LEARNING,
+       };
+       struct dsa_switch *ds = dp->ds;
+       int err;
+
+       if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags)
+               return false;
+
+       err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL);
+       return !err;
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
 {
        struct dsa_switch *ds = dp->ds;
        int port = dp->index;
@@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
 
        ds->ops->port_stp_state_set(ds, port, state);
 
-       if (ds->ops->port_fast_age) {
+       if (!dsa_port_can_configure_learning(dp) ||
+           (do_fast_age && dp->learning)) {
                /* Fast age FDB entries or flush appropriate forwarding database
                 * for the given port, if we are moving it from Learning or
                 * Forwarding state, to Disabled or Blocking or Listening state.
+                * Ports that were standalone before the STP state change don't
+                * need to fast age the FDB, since address learning is off in
+                * standalone mode.
                 */
 
                if ((dp->stp_state == BR_STATE_LEARNING ||
@@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
                    (state == BR_STATE_DISABLED ||
                     state == BR_STATE_BLOCKING ||
                     state == BR_STATE_LISTENING))
-                       ds->ops->port_fast_age(ds, port);
+                       dsa_port_fast_age(dp);
        }
 
        dp->stp_state = state;
@@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
        return 0;
 }
 
-static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
+                                  bool do_fast_age)
 {
        int err;
 
-       err = dsa_port_set_state(dp, state);
+       err = dsa_port_set_state(dp, state, do_fast_age);
        if (err)
                pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
 }
@@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
        }
 
        if (!dp->bridge_dev)
-               dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+               dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false);
 
        if (dp->pl)
                phylink_start(dp->pl);
@@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp)
                phylink_stop(dp->pl);
 
        if (!dp->bridge_dev)
-               dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+               dsa_port_set_state_now(dp, BR_STATE_DISABLED, false);
 
        if (ds->ops->port_disable)
                ds->ops->port_disable(ds, port);
@@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
        if (err)
                return err;
 
-       err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+       err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false);
        if (err && err != -EOPNOTSUPP)
                return err;
 
@@ -186,10 +236,6 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
        if (err && err != -EOPNOTSUPP)
                return err;
 
-       err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
        err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
        if (err && err != -EOPNOTSUPP)
                return err;
@@ -215,16 +261,10 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
        /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
         * so allow it to be in BR_STATE_FORWARDING to be kept functional
         */
-       dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+       dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
 
        /* VLAN filtering is handled by dsa_switch_bridge_leave */
 
-       /* Some drivers treat the notification for having a local multicast
-        * router by allowing multicast to be flooded to the CPU, so we should
-        * allow this in standalone mode too.
-        */
-       dsa_port_mrouter(dp->cpu_dp, true, NULL);
-
        /* Ageing time may be global to the switch chip, so don't change it
         * here because we have no good reason (or value) to change it to.
         */
@@ -639,27 +679,35 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
        return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
 }
 
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
                          struct switchdev_brport_flags flags,
                          struct netlink_ext_ack *extack)
 {
        struct dsa_switch *ds = dp->ds;
+       int err;
 
        if (!ds->ops->port_bridge_flags)
                return -EOPNOTSUPP;
 
-       return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
-}
+       err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
+       if (err)
+               return err;
 
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
-                    struct netlink_ext_ack *extack)
-{
-       struct dsa_switch *ds = dp->ds;
+       if (flags.mask & BR_LEARNING) {
+               bool learning = flags.val & BR_LEARNING;
 
-       if (!ds->ops->port_set_mrouter)
-               return -EOPNOTSUPP;
+               if (learning == dp->learning)
+                       return 0;
+
+               if ((dp->learning && !learning) &&
+                   (dp->stp_state == BR_STATE_LEARNING ||
+                    dp->stp_state == BR_STATE_FORWARDING))
+                       dsa_port_fast_age(dp);
 
-       return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
+               dp->learning = learning;
+       }
+
+       return 0;
 }
 
 int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
index 6e1135d..acf73db 100644 (file)
@@ -286,7 +286,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
                if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
                        return -EOPNOTSUPP;
 
-               ret = dsa_port_set_state(dp, attr->u.stp_state);
+               ret = dsa_port_set_state(dp, attr->u.stp_state, true);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
                if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
@@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
 
                ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
                break;
-       case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
-               if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
-                       return -EOPNOTSUPP;
-
-               ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
-               break;
        default:
                ret = -EOPNOTSUPP;
                break;
index 0efae1a..8a02ac4 100644 (file)
@@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
-                                     struct net_device *ndev,
-                                     struct packet_type *pt)
+                                     struct net_device *ndev)
 {
        u8 ver, port;
        u16 hdr;
index a27f509..96e93b5 100644 (file)
@@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
  */
 static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
                                       struct net_device *dev,
-                                      struct packet_type *pt,
                                       unsigned int offset)
 {
        int source_port;
@@ -182,13 +181,12 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
 }
 
 
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                   struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        struct sk_buff *nskb;
 
        /* skb->data points to the EtherType, the tag is right before it */
-       nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+       nskb = brcm_tag_rcv_ll(skb, dev, 2);
        if (!nskb)
                return nskb;
 
@@ -251,8 +249,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
-                                       struct net_device *dev,
-                                       struct packet_type *pt)
+                                       struct net_device *dev)
 {
        int source_port;
        u8 *brcm_tag;
@@ -302,11 +299,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
 }
 
 static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
-                                           struct net_device *dev,
-                                           struct packet_type *pt)
+                                           struct net_device *dev)
 {
        /* tag is prepended to the packet */
-       return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+       return brcm_tag_rcv_ll(skb, dev, ETH_HLEN);
 }
 
 static const struct dsa_device_ops brcm_prepend_netdev_ops = {
index 3607499..e32f816 100644 (file)
@@ -332,8 +332,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
        return dsa_xmit_ll(skb, dev, 0);
 }
 
-static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
                return NULL;
@@ -373,8 +372,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
-                               struct packet_type *pt)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
                return NULL;
index 5985dab..df71409 100644 (file)
@@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
 {
        int port;
        u8 *gswip_tag;
index c41208c..f64b805 100644 (file)
@@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
 {
        /* Tag decoding */
        u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN;
index 1c2dfa8..fa1d60d 100644 (file)
@@ -67,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
-                                 struct packet_type *pt)
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
 
@@ -134,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
        return skb;
 }
 
-static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        /* Tag decoding */
        u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
index cf7cf2f..58d3a0e 100644 (file)
@@ -74,8 +74,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        __be16 *lan9303_tag;
        u16 lan9303_tag1;
index 3fb80e4..bbf37c0 100644 (file)
@@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
        return skb;
 }
 
-static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u16 hdr;
        int port;
index 3252634..d37ab98 100644 (file)
@@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
-                                 struct net_device *netdev,
-                                 struct packet_type *pt)
+                                 struct net_device *netdev)
 {
        u64 src_port, qos_class;
        u64 vlan_tci, tag_type;
index c95de71..3038a25 100644 (file)
@@ -38,8 +38,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
-                                 struct net_device *netdev,
-                                 struct packet_type *pt)
+                                 struct net_device *netdev)
 {
        int src_port, switch_id;
 
index 693bda0..6e31369 100644 (file)
@@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u8 ver;
        u16  hdr;
index f6b63aa..aaddca3 100644 (file)
@@ -64,8 +64,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
 {
        u16 protport;
        __be16 *p;
index 664cb80..38b2792 100644 (file)
@@ -391,8 +391,7 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
 }
 
 static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
-                                  struct net_device *netdev,
-                                  struct packet_type *pt)
+                                  struct net_device *netdev)
 {
        int source_port = -1, switch_id = -1;
        struct sja1105_meta meta = {0};
@@ -546,12 +545,11 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
 }
 
 static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
-                                  struct net_device *netdev,
-                                  struct packet_type *pt)
+                                  struct net_device *netdev)
 {
        int source_port = -1, switch_id = -1;
        bool host_only = false;
-       u16 vid;
+       u16 vid = 0;
 
        if (sja1110_skb_has_inband_control_extension(skb)) {
                skb = sja1110_rcv_inband_control_extension(skb, &source_port,
index ba73804..5749ba8 100644 (file)
@@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u8 *trailer;
        int source_port;
index da231c1..ff442b8 100644 (file)
@@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        int source_port;
        u8 *trailer;
index 171ba75..73fce94 100644 (file)
@@ -62,8 +62,6 @@
 #include <linux/uaccess.h>
 #include <net/pkt_sched.h>
 
-__setup("ether=", netdev_boot_setup);
-
 /**
  * eth_header - create the Ethernet header
  * @skb:       buffer to alter
index b0fa2b0..81fa36a 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/sched/signal.h>
 #include <linux/net.h>
+#include <linux/pm_runtime.h>
 #include <net/devlink.h>
 #include <net/xdp_sock_drv.h>
 #include <net/flow_offload.h>
@@ -2692,7 +2693,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
        int rc;
        netdev_features_t old_features;
 
-       if (!dev || !netif_device_present(dev))
+       if (!dev)
                return -ENODEV;
 
        if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
@@ -2748,10 +2749,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
                        return -EPERM;
        }
 
+       if (dev->dev.parent)
+               pm_runtime_get_sync(dev->dev.parent);
+
+       if (!netif_device_present(dev)) {
+               rc = -ENODEV;
+               goto out;
+       }
+
        if (dev->ethtool_ops->begin) {
                rc = dev->ethtool_ops->begin(dev);
-               if (rc  < 0)
-                       return rc;
+               if (rc < 0)
+                       goto out;
        }
        old_features = dev->features;
 
@@ -2970,6 +2979,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
 
        if (old_features != dev->features)
                netdev_features_change(dev);
+out:
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
 
        return rc;
 }
index 73e0f5b..1797a0a 100644 (file)
@@ -2,6 +2,7 @@
 
 #include <net/sock.h>
 #include <linux/ethtool_netlink.h>
+#include <linux/pm_runtime.h>
 #include "netlink.h"
 
 static struct genl_family ethtool_genl_family;
@@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = {
                                                          ETHTOOL_FLAGS_STATS),
 };
 
+int ethnl_ops_begin(struct net_device *dev)
+{
+       int ret;
+
+       if (!dev)
+               return -ENODEV;
+
+       if (dev->dev.parent)
+               pm_runtime_get_sync(dev->dev.parent);
+
+       if (!netif_device_present(dev)) {
+               ret = -ENODEV;
+               goto err;
+       }
+
+       if (dev->ethtool_ops->begin) {
+               ret = dev->ethtool_ops->begin(dev);
+               if (ret)
+                       goto err;
+       }
+
+       return 0;
+err:
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
+
+       return ret;
+}
+
+void ethnl_ops_complete(struct net_device *dev)
+{
+       if (dev->ethtool_ops->complete)
+               dev->ethtool_ops->complete(dev);
+
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
+}
+
 /**
  * ethnl_parse_header_dev_get() - parse request header
  * @req_info:    structure to put results into
@@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
                return -EINVAL;
        }
 
-       if (dev && !netif_device_present(dev)) {
-               dev_put(dev);
-               NL_SET_ERR_MSG(extack, "device not present");
-               return -ENODEV;
-       }
-
        req_info->dev = dev;
        req_info->flags = flags;
        return 0;
@@ -365,8 +398,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
                ops->cleanup_data(reply_data);
 
        genlmsg_end(rskb, reply_payload);
-       if (req_info->dev)
-               dev_put(req_info->dev);
+       dev_put(req_info->dev);
        kfree(reply_data);
        kfree(req_info);
        return genlmsg_reply(rskb, info);
@@ -378,8 +410,7 @@ err_cleanup:
        if (ops->cleanup_data)
                ops->cleanup_data(reply_data);
 err_dev:
-       if (req_info->dev)
-               dev_put(req_info->dev);
+       dev_put(req_info->dev);
        kfree(reply_data);
        kfree(req_info);
        return ret;
index 3fc395c..077aac3 100644 (file)
@@ -247,19 +247,8 @@ struct ethnl_reply_data {
        struct net_device               *dev;
 };
 
-static inline int ethnl_ops_begin(struct net_device *dev)
-{
-       if (dev && dev->ethtool_ops->begin)
-               return dev->ethtool_ops->begin(dev);
-       else
-               return 0;
-}
-
-static inline void ethnl_ops_complete(struct net_device *dev)
-{
-       if (dev && dev->ethtool_ops->complete)
-               dev->ethtool_ops->complete(dev);
-}
+int ethnl_ops_begin(struct net_device *dev);
+void ethnl_ops_complete(struct net_device *dev);
 
 /**
  * struct ethnl_request_ops - unified handling of GET requests
index 88215b5..dd5a45f 100644 (file)
@@ -340,8 +340,7 @@ nla_put_failure:
 out_dev:
        wpan_phy_put(phy);
 out:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 
        return rc;
 }
index 0cf2374..277124f 100644 (file)
@@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
                if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) {
                        struct wpan_dev *wpan_dev = info->user_ptr[1];
 
-                       if (wpan_dev->netdev)
-                               dev_put(wpan_dev->netdev);
+                       dev_put(wpan_dev->netdev);
                } else {
                        dev_put(info->user_ptr[1]);
                }
index f5077de..90233ef 100644 (file)
@@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
                ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
                rcu_read_lock();
                dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
-               if (dev)
-                       dev_hold(dev);
+               dev_hold(dev);
                rcu_read_unlock();
                break;
        case IEEE802154_ADDR_SHORT:
index c82aded..f446898 100644 (file)
@@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
 };
 
 static int inet_validate_link_af(const struct net_device *dev,
-                                const struct nlattr *nla)
+                                const struct nlattr *nla,
+                                struct netlink_ext_ack *extack)
 {
        struct nlattr *a, *tb[IFLA_INET_MAX+1];
        int err, rem;
@@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev,
                return -EAFNOSUPPORT;
 
        err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
-                                         inet_af_policy, NULL);
+                                         inet_af_policy, extack);
        if (err < 0)
                return err;
 
index fa19f4c..b42c429 100644 (file)
@@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
 
 void fib_nh_common_release(struct fib_nh_common *nhc)
 {
-       if (nhc->nhc_dev)
-               dev_put(nhc->nhc_dev);
-
+       dev_put(nhc->nhc_dev);
        lwtstate_put(nhc->nhc_lwtstate);
        rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
        rt_fibinfo_free(&nhc->nhc_rth_input);
@@ -1551,7 +1549,7 @@ link_it:
                return ofi;
        }
 
-       refcount_inc(&fi->fib_treeref);
+       refcount_set(&fi->fib_treeref, 1);
        refcount_set(&fi->fib_clntref, 1);
        spin_lock_bh(&fib_info_lock);
        hlist_add_head(&fi->fib_hash,
index c695d29..8b30cad 100644 (file)
@@ -1095,8 +1095,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
                                         sizeof(struct in6_addr))
                                goto send_mal_query;
                        dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
-                       if (dev)
-                               dev_hold(dev);
+                       dev_hold(dev);
                        break;
 #endif
                default:
index 03589a0..7e50727 100644 (file)
@@ -2233,7 +2233,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
                        iml->sfmode, psf->sl_count, psf->sl_addr, 0);
        RCU_INIT_POINTER(iml->sflist, NULL);
        /* decrease mem now to avoid the memleak warning */
-       atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+       atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc);
        kfree_rcu(psf, rcu);
        return err;
 }
@@ -2382,7 +2382,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 
                if (psl)
                        count += psl->sl_max;
-               newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
@@ -2393,7 +2394,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
                        for (i = 0; i < psl->sl_count; i++)
                                newpsl->sl_addr[i] = psl->sl_addr[i];
                        /* decrease mem now to avoid the memleak warning */
-                       atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+                       atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                                  &sk->sk_omem_alloc);
                        kfree_rcu(psl, rcu);
                }
                rcu_assign_pointer(pmc->sflist, newpsl);
@@ -2468,19 +2470,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
                goto done;
        }
        if (msf->imsf_numsrc) {
-               newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
-                                                          GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+                                                     msf->imsf_numsrc),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
                }
                newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
-               memcpy(newpsl->sl_addr, msf->imsf_slist,
-                       msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+               memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+                      flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
                err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
                        msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
                if (err) {
-                       sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
+                       sock_kfree_s(sk, newpsl,
+                                    struct_size(newpsl, sl_addr,
+                                                newpsl->sl_max));
                        goto done;
                }
        } else {
@@ -2493,7 +2498,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
                (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
                        psl->sl_count, psl->sl_addr, 0);
                /* decrease mem now to avoid the memleak warning */
-               atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                kfree_rcu(psl, rcu);
        } else
                (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
@@ -2551,14 +2557,14 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
                count = psl->sl_count;
        }
        copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
-       len = copycount * sizeof(psl->sl_addr[0]);
+       len = flex_array_size(psl, sl_addr, copycount);
        msf->imsf_numsrc = count;
        if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
            copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
                return -EFAULT;
        }
        if (len &&
-           copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+           copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len))
                return -EFAULT;
        return 0;
 done:
index a202dce..6b04a88 100644 (file)
@@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
        } else if (rt->rt_type == RTN_BROADCAST)
                IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
 
-       /* Be paranoid, rather than too clever. */
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
-               if (!skb2) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                        return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        if (lwtunnel_xmit_redirect(dst->lwtstate)) {
index ec60367..b297bb2 100644 (file)
@@ -663,12 +663,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
                              struct sockaddr_storage *group,
                              struct sockaddr_storage *list)
 {
-       int msize = IP_MSFILTER_SIZE(numsrc);
        struct ip_msfilter *msf;
        struct sockaddr_in *psin;
        int err, i;
 
-       msf = kmalloc(msize, GFP_KERNEL);
+       msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
        if (!msf)
                return -ENOBUFS;
 
@@ -684,7 +683,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
 
                if (psin->sin_family != AF_INET)
                        goto Eaddrnotavail;
-               msf->imsf_slist[i] = psin->sin_addr.s_addr;
+               msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
        }
        err = ip_mc_msfilter(sk, msf, ifindex);
        kfree(msf);
@@ -791,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
                goto out_free_gsf;
 
        err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
-                                gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+                                gsf->gf_fmode, &gsf->gf_group,
+                                gsf->gf_slist_flex);
 out_free_gsf:
        kfree(gsf);
        return err;
@@ -800,7 +800,7 @@ out_free_gsf:
 static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                int optlen)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter *gf32;
        unsigned int n;
        void *p;
@@ -814,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        p = kmalloc(optlen + 4, GFP_KERNEL);
        if (!p)
                return -ENOMEM;
-       gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+       gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
 
        err = -EFAULT;
        if (copy_from_sockptr(gf32, optval, optlen))
@@ -827,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                goto out_free_gsf;
 
        err = -EINVAL;
-       if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+       if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
                goto out_free_gsf;
 
        /* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -835,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
                goto out_free_gsf;
        err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
-                                &gf32->gf_group, gf32->gf_slist);
+                                &gf32->gf_group, gf32->gf_slist_flex);
 out_free_gsf:
        kfree(p);
        return err;
@@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname)
 static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen, int len)
 {
-       const int size0 = offsetof(struct group_filter, gf_slist);
+       const int size0 = offsetof(struct group_filter, gf_slist_flex);
        struct group_filter __user *p = optval;
        struct group_filter gsf;
        int num;
@@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                return -EFAULT;
 
        num = gsf.gf_numsrc;
-       err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+       err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
        if (err)
                return err;
        if (gsf.gf_numsrc < num)
@@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
 static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen, int len)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter __user *p = optval;
        struct compat_group_filter gf32;
        struct group_filter gf;
@@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
        num = gf.gf_numsrc = gf32.gf_numsrc;
        gf.gf_group = gf32.gf_group;
 
-       err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+       err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
        if (err)
                return err;
        if (gf.gf_numsrc < num)
index 7f0e810..fe9101d 100644 (file)
@@ -390,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
                tunnel->i_seqno = ntohl(tpi->seq) + 1;
        }
 
-       skb_reset_network_header(skb);
+       skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
 
        err = IP_ECN_decapsulate(iph, skb);
        if (unlikely(err)) {
index 04754d5..b181773 100644 (file)
@@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
        struct rt_cache_stat *st = v;
 
        if (v == SEQ_START_TOKEN) {
-               seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
+               seq_puts(seq, "entries  in_hit   in_slow_tot in_slow_mc in_no_route in_brd   in_martian_dst in_martian_src out_hit  out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
                return 0;
        }
 
-       seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
-                  " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
+       seq_printf(seq, "%08x %08x %08x    %08x   %08x    %08x %08x       "
+                       "%08x       %08x %08x     %08x    %08x %08x   "
+                       "%08x     %08x        %08x        %08x\n",
                   dst_entries_get_slow(&ipv4_dst_ops),
                   0, /* st->in_hit */
                   st->in_slow_tot,
@@ -2812,8 +2813,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                new->output = dst_discard_out;
 
                new->dev = net->loopback_dev;
-               if (new->dev)
-                       dev_hold(new->dev);
+               dev_hold(new->dev);
 
                rt->rt_is_input = ort->rt_is_input;
                rt->rt_iif = ort->rt_iif;
index 84db1c9..2e62e0d 100644 (file)
@@ -2277,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
-/*
- * Get next listener socket follow cur.  If cur is NULL, get first socket
- * starting from bucket given in st->bucket; when st->bucket is zero the
- * very first socket in the hash table is returned.
+static unsigned short seq_file_family(const struct seq_file *seq);
+
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+       unsigned short family = seq_file_family(seq);
+
+       /* AF_UNSPEC is used as a match all */
+       return ((family == AF_UNSPEC || family == sk->sk_family) &&
+               net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+/* Find a non empty bucket (starting from st->bucket)
+ * and return the first sk from it.
  */
-static void *listening_get_next(struct seq_file *seq, void *cur)
+static void *listening_get_first(struct seq_file *seq)
 {
-       struct tcp_seq_afinfo *afinfo;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct inet_listen_hashbucket *ilb;
-       struct hlist_nulls_node *node;
-       struct sock *sk = cur;
 
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
+       st->offset = 0;
+       for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+               struct inet_listen_hashbucket *ilb2;
+               struct inet_connection_sock *icsk;
+               struct sock *sk;
 
-       if (!sk) {
-get_head:
-               ilb = &tcp_hashinfo.listening_hash[st->bucket];
-               spin_lock(&ilb->lock);
-               sk = sk_nulls_head(&ilb->nulls_head);
-               st->offset = 0;
-               goto get_sk;
+               ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+               if (hlist_empty(&ilb2->head))
+                       continue;
+
+               spin_lock(&ilb2->lock);
+               inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
+                       sk = (struct sock *)icsk;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
+               }
+               spin_unlock(&ilb2->lock);
        }
-       ilb = &tcp_hashinfo.listening_hash[st->bucket];
+
+       return NULL;
+}
+
+/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
+ * If "cur" is the last one in the st->bucket,
+ * call listening_get_first() to return the first sk of the next
+ * non empty bucket.
+ */
+static void *listening_get_next(struct seq_file *seq, void *cur)
+{
+       struct tcp_iter_state *st = seq->private;
+       struct inet_listen_hashbucket *ilb2;
+       struct inet_connection_sock *icsk;
+       struct sock *sk = cur;
+
        ++st->num;
        ++st->offset;
 
-       sk = sk_nulls_next(sk);
-get_sk:
-       sk_nulls_for_each_from(sk, node) {
-               if (!net_eq(sock_net(sk), net))
-                       continue;
-               if (afinfo->family == AF_UNSPEC ||
-                   sk->sk_family == afinfo->family)
+       icsk = inet_csk(sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk))
                        return sk;
        }
-       spin_unlock(&ilb->lock);
-       st->offset = 0;
-       if (++st->bucket < INET_LHTABLE_SIZE)
-               goto get_head;
-       return NULL;
+
+       ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+       spin_unlock(&ilb2->lock);
+       ++st->bucket;
+       return listening_get_first(seq);
 }
 
 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
@@ -2331,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 
        st->bucket = 0;
        st->offset = 0;
-       rc = listening_get_next(seq, NULL);
+       rc = listening_get_first(seq);
 
        while (rc && *pos) {
                rc = listening_get_next(seq, rc);
@@ -2351,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
  */
 static void *established_get_first(struct seq_file *seq)
 {
-       struct tcp_seq_afinfo *afinfo;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       void *rc = NULL;
-
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
 
        st->offset = 0;
        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
@@ -2373,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq)
 
                spin_lock_bh(lock);
                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-                       if ((afinfo->family != AF_UNSPEC &&
-                            sk->sk_family != afinfo->family) ||
-                           !net_eq(sock_net(sk), net)) {
-                               continue;
-                       }
-                       rc = sk;
-                       goto out;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
                }
                spin_unlock_bh(lock);
        }
-out:
-       return rc;
+
+       return NULL;
 }
 
 static void *established_get_next(struct seq_file *seq, void *cur)
 {
-       struct tcp_seq_afinfo *afinfo;
        struct sock *sk = cur;
        struct hlist_nulls_node *node;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
 
        ++st->num;
        ++st->offset;
@@ -2406,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
        sk = sk_nulls_next(sk);
 
        sk_nulls_for_each_from(sk, node) {
-               if ((afinfo->family == AF_UNSPEC ||
-                    sk->sk_family == afinfo->family) &&
-                   net_eq(sock_net(sk), net))
+               if (seq_sk_match(seq, sk))
                        return sk;
        }
 
@@ -2451,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 static void *tcp_seek_last_pos(struct seq_file *seq)
 {
        struct tcp_iter_state *st = seq->private;
+       int bucket = st->bucket;
        int offset = st->offset;
        int orig_num = st->num;
        void *rc = NULL;
 
        switch (st->state) {
        case TCP_SEQ_STATE_LISTENING:
-               if (st->bucket >= INET_LHTABLE_SIZE)
+               if (st->bucket > tcp_hashinfo.lhash2_mask)
                        break;
                st->state = TCP_SEQ_STATE_LISTENING;
-               rc = listening_get_next(seq, NULL);
-               while (offset-- && rc)
+               rc = listening_get_first(seq);
+               while (offset-- && rc && bucket == st->bucket)
                        rc = listening_get_next(seq, rc);
                if (rc)
                        break;
@@ -2472,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
                if (st->bucket > tcp_hashinfo.ehash_mask)
                        break;
                rc = established_get_first(seq);
-               while (offset-- && rc)
+               while (offset-- && rc && bucket == st->bucket)
                        rc = established_get_next(seq, rc);
        }
 
@@ -2542,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v)
        switch (st->state) {
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
-                       spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
+                       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
                break;
        case TCP_SEQ_STATE_ESTABLISHED:
                if (v)
@@ -2687,6 +2687,15 @@ out:
 }
 
 #ifdef CONFIG_BPF_SYSCALL
+struct bpf_tcp_iter_state {
+       struct tcp_iter_state state;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
 struct bpf_iter__tcp {
        __bpf_md_ptr(struct bpf_iter_meta *, meta);
        __bpf_md_ptr(struct sock_common *, sk_common);
@@ -2705,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
        return bpf_iter_run_prog(prog, &ctx);
 }
 
+static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+                                     unsigned int new_batch_sz)
+{
+       struct sock **new_batch;
+
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+
+       bpf_iter_tcp_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+                                                struct sock *start_sk)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct inet_connection_sock *icsk;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       icsk = inet_csk(start_sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+
+       return expected;
+}
+
+static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
+                                                  struct sock *start_sk)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct hlist_nulls_node *node;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       sk = sk_nulls_next(start_sk);
+       sk_nulls_for_each_from(sk, node) {
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+
+       return expected;
+}
+
+static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+
+       /* The st->bucket is done.  Directly advance to the next
+        * bucket instead of having the tcp_seek_last_pos() to skip
+        * one by one in the current bucket and eventually find out
+        * it has to advance to the next bucket.
+        */
+       if (iter->st_bucket_done) {
+               st->offset = 0;
+               st->bucket++;
+               if (st->state == TCP_SEQ_STATE_LISTENING &&
+                   st->bucket > tcp_hashinfo.lhash2_mask) {
+                       st->state = TCP_SEQ_STATE_ESTABLISHED;
+                       st->bucket = 0;
+               }
+       }
+
+again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+       iter->st_bucket_done = false;
+
+       sk = tcp_seek_last_pos(seq);
+       if (!sk)
+               return NULL; /* Done */
+
+       if (st->state == TCP_SEQ_STATE_LISTENING)
+               expected = bpf_iter_tcp_listening_batch(seq, sk);
+       else
+               expected = bpf_iter_tcp_established_batch(seq, sk);
+
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+
+       if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+
+       return sk;
+}
+
+static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       if (*pos)
+               return bpf_iter_tcp_batch(seq);
+
+       return SEQ_START_TOKEN;
+}
+
+static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk) {
+               /* Keeping st->num consistent in tcp_iter_state.
+                * bpf_iter_tcp does not use st->num.
+                * meta.seq_num is used instead.
+                */
+               st->num++;
+               /* Move st->offset to the next sk in the bucket such that
+                * the future start() will resume at st->offset in
+                * st->bucket.  See tcp_seek_last_pos().
+                */
+               st->offset++;
+               sock_put(iter->batch[iter->cur_sk++]);
+       }
+
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_tcp_batch(seq);
+
+       ++*pos;
+       /* Keeping st->last_pos consistent in tcp_iter_state.
+        * bpf iter does not do lseek, so st->last_pos always equals to *pos.
+        */
+       st->last_pos = *pos;
+       return sk;
+}
+
 static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
 {
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
        struct sock *sk = v;
+       bool slow;
        uid_t uid;
+       int ret;
 
        if (v == SEQ_START_TOKEN)
                return 0;
 
+       if (sk_fullsock(sk))
+               slow = lock_sock_fast(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
        if (sk->sk_state == TCP_TIME_WAIT) {
                uid = 0;
        } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
@@ -2728,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
 
        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
-       return tcp_prog_seq_show(prog, &meta, v, uid);
+       ret = tcp_prog_seq_show(prog, &meta, v, uid);
+
+unlock:
+       if (sk_fullsock(sk))
+               unlock_sock_fast(sk, slow);
+       return ret;
+
 }
 
 static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
 {
+       struct bpf_tcp_iter_state *iter = seq->private;
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
 
@@ -2743,17 +2947,34 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
                        (void)tcp_prog_seq_show(prog, &meta, v, 0);
        }
 
-       tcp_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk) {
+               bpf_iter_tcp_put_batch(iter);
+               iter->st_bucket_done = false;
+       }
 }
 
 static const struct seq_operations bpf_iter_tcp_seq_ops = {
        .show           = bpf_iter_tcp_seq_show,
-       .start          = tcp_seq_start,
-       .next           = tcp_seq_next,
+       .start          = bpf_iter_tcp_seq_start,
+       .next           = bpf_iter_tcp_seq_next,
        .stop           = bpf_iter_tcp_seq_stop,
 };
+#endif
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+       const struct tcp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+       /* Iterated from bpf_iter.  Let the bpf prog to filter instead. */
+       if (seq->op == &bpf_iter_tcp_seq_ops)
+               return AF_UNSPEC;
 #endif
 
+       /* Iterated from proc fs */
+       afinfo = PDE_DATA(file_inode(seq->file));
+       return afinfo->family;
+}
+
 static const struct seq_operations tcp4_seq_ops = {
        .show           = tcp4_seq_show,
        .start          = tcp_seq_start,
@@ -3002,39 +3223,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
 DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
                     struct sock_common *sk_common, uid_t uid)
 
+#define INIT_BATCH_SZ 16
+
 static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
 {
-       struct tcp_iter_state *st = priv_data;
-       struct tcp_seq_afinfo *afinfo;
-       int ret;
+       struct bpf_tcp_iter_state *iter = priv_data;
+       int err;
 
-       afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
-       if (!afinfo)
-               return -ENOMEM;
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
 
-       afinfo->family = AF_UNSPEC;
-       st->bpf_seq_afinfo = afinfo;
-       ret = bpf_iter_init_seq_net(priv_data, aux);
-       if (ret)
-               kfree(afinfo);
-       return ret;
+       err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+
+       return 0;
 }
 
 static void bpf_iter_fini_tcp(void *priv_data)
 {
-       struct tcp_iter_state *st = priv_data;
+       struct bpf_tcp_iter_state *iter = priv_data;
 
-       kfree(st->bpf_seq_afinfo);
        bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
 }
 
 static const struct bpf_iter_seq_info tcp_seq_info = {
        .seq_ops                = &bpf_iter_tcp_seq_ops,
        .init_seq_private       = bpf_iter_init_tcp,
        .fini_seq_private       = bpf_iter_fini_tcp,
-       .seq_priv_size          = sizeof(struct tcp_iter_state),
+       .seq_priv_size          = sizeof(struct bpf_tcp_iter_state),
 };
 
+static const struct bpf_func_proto *
+bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
+                           const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+}
+
 static struct bpf_iter_reg tcp_reg_info = {
        .target                 = "tcp",
        .ctx_arg_info_size      = 1,
@@ -3042,6 +3279,7 @@ static struct bpf_iter_reg tcp_reg_info = {
                { offsetof(struct bpf_iter__tcp, sk_common),
                  PTR_TO_BTF_ID_OR_NULL },
        },
+       .get_func_proto         = bpf_iter_tcp_get_func_proto,
        .seq_info               = &tcp_seq_info,
 };
 
index e09147a..fc61cd3 100644 (file)
@@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb)
        if (th->cwr)
                skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
+       if (skb->encapsulation)
+               skb->inner_transport_header = skb->transport_header;
+
        return 0;
 }
 EXPORT_SYMBOL(tcp_gro_complete);
index 9dde1e5..1380a6b 100644 (file)
@@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb)
 
        skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
        skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+
+       if (skb->encapsulation)
+               skb->inner_transport_header = skb->transport_header;
+
        return 0;
 }
 
index db0a898..8381288 100644 (file)
@@ -701,8 +701,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 errout:
        if (in6_dev)
                in6_dev_put(in6_dev);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
        return err;
 }
 
@@ -5417,8 +5416,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 errout_ifa:
        in6_ifa_put(ifa);
 errout:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
        if (fillargs.netnsid >= 0)
                put_net(tgt_net);
 
@@ -5792,7 +5790,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net,
 }
 
 static int inet6_validate_link_af(const struct net_device *dev,
-                                 const struct nlattr *nla)
+                                 const struct nlattr *nla,
+                                 struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[IFLA_INET6_MAX + 1];
        struct inet6_dev *idev = NULL;
@@ -5805,7 +5804,7 @@ static int inet6_validate_link_af(const struct net_device *dev,
        }
 
        err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
-                                         inet6_af_policy, NULL);
+                                         inet6_af_policy, extack);
        if (err)
                return err;
 
index d897faa..3a871a0 100644 (file)
 
 #include <linux/uaccess.h>
 
-/*
- *     Parsing tlv encoded headers.
- *
- *     Parsing function "func" returns true, if parsing succeed
- *     and false, if it failed.
- *     It MUST NOT touch skb->h.
- */
-
-struct tlvtype_proc {
-       int     type;
-       bool    (*func)(struct sk_buff *skb, int offset);
-};
-
 /*********************
   Generic functions
  *********************/
@@ -112,16 +99,23 @@ drop:
        return false;
 }
 
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff);
+#endif
+
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+static bool ip6_parse_tlv(bool hopbyhop,
                          struct sk_buff *skb,
                          int max_count)
 {
        int len = (skb_transport_header(skb)[1] + 1) << 3;
        const unsigned char *nh = skb_network_header(skb);
        int off = skb_network_header_len(skb);
-       const struct tlvtype_proc *curr;
        bool disallow_unknowns = false;
        int tlv_count = 0;
        int padlen = 0;
@@ -176,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
                        if (tlv_count > max_count)
                                goto bad;
 
-                       for (curr = procs; curr->type >= 0; curr++) {
-                               if (curr->type == nh[off]) {
-                                       /* type specific length/alignment
-                                          checks will be performed in the
-                                          func(). */
-                                       if (curr->func(skb, off) == false)
+                       if (hopbyhop) {
+                               switch (nh[off]) {
+                               case IPV6_TLV_ROUTERALERT:
+                                       if (!ipv6_hop_ra(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_IOAM:
+                                       if (!ipv6_hop_ioam(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_JUMBO:
+                                       if (!ipv6_hop_jumbo(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_CALIPSO:
+                                       if (!ipv6_hop_calipso(skb, off))
+                                               return false;
+                                       break;
+                               default:
+                                       if (!ip6_tlvopt_unknown(skb, off,
+                                                               disallow_unknowns))
+                                               return false;
+                                       break;
+                               }
+                       } else {
+                               switch (nh[off]) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+                               case IPV6_TLV_HAO:
+                                       if (!ipv6_dest_hao(skb, off))
+                                               return false;
+                                       break;
+#endif
+                               default:
+                                       if (!ip6_tlvopt_unknown(skb, off,
+                                                               disallow_unknowns))
                                                return false;
                                        break;
                                }
                        }
-                       if (curr->type < 0 &&
-                           !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
-                               return false;
-
                        padlen = 0;
                }
                off += optlen;
@@ -267,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 }
 #endif
 
-static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-       {
-               .type   = IPV6_TLV_HAO,
-               .func   = ipv6_dest_hao,
-       },
-#endif
-       {-1,                    NULL}
-};
-
 static int ipv6_destopt_rcv(struct sk_buff *skb)
 {
        struct inet6_dev *idev = __in6_dev_get(skb->dev);
@@ -307,8 +316,7 @@ fail_and_free:
        dstbuf = opt->dst1;
 #endif
 
-       if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
-                         net->ipv6.sysctl.max_dst_opts_cnt)) {
+       if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) {
                skb->transport_header += extlen;
                opt = IP6CB(skb);
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -1051,26 +1059,6 @@ drop:
        return false;
 }
 
-static const struct tlvtype_proc tlvprochopopt_lst[] = {
-       {
-               .type   = IPV6_TLV_ROUTERALERT,
-               .func   = ipv6_hop_ra,
-       },
-       {
-               .type   = IPV6_TLV_IOAM,
-               .func   = ipv6_hop_ioam,
-       },
-       {
-               .type   = IPV6_TLV_JUMBO,
-               .func   = ipv6_hop_jumbo,
-       },
-       {
-               .type   = IPV6_TLV_CALIPSO,
-               .func   = ipv6_hop_calipso,
-       },
-       { -1, }
-};
-
 int ipv6_parse_hopopts(struct sk_buff *skb)
 {
        struct inet6_skb_parm *opt = IP6CB(skb);
@@ -1096,8 +1084,7 @@ fail_and_free:
                goto fail_and_free;
 
        opt->flags |= IP6SKB_HOPBYHOP;
-       if (ip6_parse_tlv(tlvprochopopt_lst, skb,
-                         net->ipv6.sysctl.max_hbh_opts_cnt)) {
+       if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) {
                skb->transport_header += extlen;
                opt = IP6CB(skb);
                opt->nhoff = sizeof(struct ipv6hdr);
index b7b27d9..12f985f 100644 (file)
@@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
 {
        struct dst_entry *dst = skb_dst(skb);
        struct net_device *dev = dst->dev;
+       struct inet6_dev *idev = ip6_dst_idev(dst);
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
-       int delta = hh_len - skb_headroom(skb);
-       const struct in6_addr *nexthop;
+       const struct in6_addr *daddr, *nexthop;
+       struct ipv6hdr *hdr;
        struct neighbour *neigh;
        int ret;
 
        /* Be paranoid, rather than too clever. */
-       if (unlikely(delta > 0) && dev->header_ops) {
-               /* pskb_expand_head() might crash, if skb is shared */
-               if (skb_shared(skb)) {
-                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
-                       if (likely(nskb)) {
-                               if (skb->sk)
-                                       skb_set_owner_w(nskb, skb->sk);
-                               consume_skb(skb);
-                       } else {
-                               kfree_skb(skb);
-                       }
-                       skb = nskb;
-               }
-               if (skb &&
-                   pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
-                       kfree_skb(skb);
-                       skb = NULL;
-               }
+       if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+               skb = skb_expand_head(skb, hh_len);
                if (!skb) {
-                       IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+                       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                        return -ENOMEM;
                }
        }
 
-       if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
-               struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
-
+       hdr = ipv6_hdr(skb);
+       daddr = &hdr->daddr;
+       if (ipv6_addr_is_multicast(daddr)) {
                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
                    ((mroute6_is_socket(net, skb) &&
                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
-                    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
-                                        &ipv6_hdr(skb)->saddr))) {
+                    ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 
                        /* Do not check for IFF_ALLMULTI; multicast routing
@@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                                        net, sk, newskb, NULL, newskb->dev,
                                        dev_loopback_xmit);
 
-                       if (ipv6_hdr(skb)->hop_limit == 0) {
+                       if (hdr->hop_limit == 0) {
                                IP6_INC_STATS(net, idev,
                                              IPSTATS_MIB_OUTDISCARDS);
                                kfree_skb(skb);
@@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                }
 
                IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
-
-               if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
-                   IPV6_ADDR_SCOPE_NODELOCAL &&
+               if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
                    !(dev->flags & IFF_LOOPBACK)) {
                        kfree_skb(skb);
                        return 0;
@@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
        }
 
        rcu_read_lock_bh();
-       nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
-       neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+       nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+       neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
        if (unlikely(!neigh))
-               neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+               neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
        if (!IS_ERR(neigh)) {
                sock_confirm_neigh(skb, neigh);
                ret = neigh_output(neigh, skb, false);
@@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
        }
        rcu_read_unlock_bh();
 
-       IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
        kfree_skb(skb);
        return -EINVAL;
 }
@@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
        const struct ipv6_pinfo *np = inet6_sk(sk);
        struct in6_addr *first_hop = &fl6->daddr;
        struct dst_entry *dst = skb_dst(skb);
+       struct net_device *dev = dst->dev;
+       struct inet6_dev *idev = ip6_dst_idev(dst);
        unsigned int head_room;
        struct ipv6hdr *hdr;
        u8  proto = fl6->flowi6_proto;
@@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
        int hlimit = -1;
        u32 mtu;
 
-       head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+       head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
        if (opt)
                head_room += opt->opt_nflen + opt->opt_flen;
 
-       if (unlikely(skb_headroom(skb) < head_room)) {
-               struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
-               if (!skb2) {
-                       IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                                     IPSTATS_MIB_OUTDISCARDS);
-                       kfree_skb(skb);
+       if (unlikely(head_room > skb_headroom(skb))) {
+               skb = skb_expand_head(skb, head_room);
+               if (!skb) {
+                       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                        return -ENOBUFS;
                }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        if (opt) {
@@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 
        mtu = dst_mtu(dst);
        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
-               IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                             IPSTATS_MIB_OUT, skb->len);
+               IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
                /* if egress device is enslaved to an L3 master device pass the
                 * skb to its handler for processing
@@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                 * we promote our socket to non const
                 */
                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
-                              net, (struct sock *)sk, skb, NULL, dst->dev,
+                              net, (struct sock *)sk, skb, NULL, dev,
                               dst_output);
        }
 
-       skb->dev = dst->dev;
+       skb->dev = dev;
        /* ipv6_local_error() does not require socket lock,
         * we promote our socket to non const
         */
        ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 
-       IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+       IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
        kfree_skb(skb);
        return -EMSGSIZE;
 }
@@ -549,9 +525,10 @@ int ip6_forward(struct sk_buff *skb)
        if (net->ipv6.devconf_all->proxy_ndp &&
            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
                int proxied = ip6_forward_proxy_check(skb);
-               if (proxied > 0)
+               if (proxied > 0) {
+                       hdr->hop_limit--;
                        return ip6_input(skb);
-               else if (proxied < 0) {
+               else if (proxied < 0) {
                        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
                        goto drop;
                }
index 06b0d2c..36ed9ef 100644 (file)
@@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb)
        read_lock(&mrt_lock);
        if (reg_vif_num >= 0)
                reg_dev = mrt->vif_table[reg_vif_num].dev;
-       if (reg_dev)
-               dev_hold(reg_dev);
+       dev_hold(reg_dev);
        read_unlock(&mrt_lock);
 
        if (!reg_dev)
index a6804a7..e4bdb09 100644 (file)
@@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
                goto out_free_gsf;
 
-       ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+       ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
 out_free_gsf:
        kfree(gsf);
        return ret;
@@ -234,7 +234,7 @@ out_free_gsf:
 static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                int optlen)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter *gf32;
        void *p;
        int ret;
@@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        if (!p)
                return -ENOMEM;
 
-       gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+       gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
        ret = -EFAULT;
        if (copy_from_sockptr(gf32, optval, optlen))
                goto out_free_p;
@@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                goto out_free_p;
 
        ret = -EINVAL;
-       if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+       if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
                goto out_free_p;
 
        ret = ip6_mc_msfilter(sk, &(struct group_filter){
                        .gf_interface = gf32->gf_interface,
                        .gf_group = gf32->gf_group,
                        .gf_fmode = gf32->gf_fmode,
-                       .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+                       .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
 
 out_free_p:
        kfree(p);
@@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
 static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen, int len)
 {
-       const int size0 = offsetof(struct group_filter, gf_slist);
+       const int size0 = offsetof(struct group_filter, gf_slist_flex);
        struct group_filter __user *p = optval;
        struct group_filter gsf;
        int num;
@@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
                return -EADDRNOTAVAIL;
        num = gsf.gf_numsrc;
        lock_sock(sk);
-       err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+       err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
        if (!err) {
                if (num > gsf.gf_numsrc)
                        num = gsf.gf_numsrc;
@@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
 static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter __user *p = optval;
        struct compat_group_filter gf32;
        struct group_filter gf;
@@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
                return -EADDRNOTAVAIL;
 
        lock_sock(sk);
-       err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+       err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
        release_sock(sk);
        if (err)
                return err;
index 54ec163..cd951fa 100644 (file)
@@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
                if (psl)
                        count += psl->sl_max;
-               newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
@@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
                if (psl) {
                        for (i = 0; i < psl->sl_count; i++)
                                newpsl->sl_addr[i] = psl->sl_addr[i];
-                       atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+                       atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                                  &sk->sk_omem_alloc);
                        kfree_rcu(psl, rcu);
                }
                psl = newpsl;
@@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                goto done;
        }
        if (gsf->gf_numsrc) {
-               newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
-                                                         GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+                                                     gsf->gf_numsrc),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
@@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                                     newpsl->sl_count, newpsl->sl_addr, 0);
                if (err) {
                        mutex_unlock(&idev->mc_lock);
-                       sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+                       sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
+                                                            newpsl->sl_max));
                        goto done;
                }
                mutex_unlock(&idev->mc_lock);
@@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
        if (psl) {
                ip6_mc_del_src(idev, group, pmc->sfmode,
                               psl->sl_count, psl->sl_addr, 0);
-               atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                kfree_rcu(psl, rcu);
        } else {
                ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
@@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
                err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
                                     psl->sl_count, psl->sl_addr, 0);
                RCU_INIT_POINTER(iml->sflist, NULL);
-               atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                kfree_rcu(psl, rcu);
        }
 
index 6b80511..6cf4bb8 100644 (file)
@@ -3626,8 +3626,7 @@ out:
        if (err) {
                lwtstate_put(fib6_nh->fib_nh_lws);
                fib6_nh->fib_nh_lws = NULL;
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
        }
 
        return err;
index 44453b3..18316ee 100644 (file)
@@ -1044,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
                        if (err == 0) {
                                atomic_dec(&iucv->skbs_in_xmit);
                                skb_unlink(skb, &iucv->send_skb_q);
-                               kfree_skb(skb);
+                               consume_skb(skb);
                        }
 
                        /* this error should never happen since the     */
@@ -1293,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
                        }
                }
 
-               kfree_skb(skb);
+               consume_skb(skb);
                if (iucv->transport == AF_IUCV_TRANS_HIPER) {
                        atomic_inc(&iucv->msg_recv);
                        if (atomic_read(&iucv->msg_recv) > iucv->msglimit) {
@@ -1756,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
        spin_unlock_irqrestore(&list->lock, flags);
 
        if (this) {
-               kfree_skb(this);
+               consume_skb(this);
                /* wake up any process waiting for sending */
                iucv_sock_wake_msglim(sk);
        }
@@ -1903,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
 {
        struct iucv_sock *iucv = iucv_sk(sk);
 
-       if (!iucv)
-               goto out;
-       if (sk->sk_state != IUCV_BOUND)
-               goto out;
+       if (!iucv || sk->sk_state != IUCV_BOUND) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
        bh_lock_sock(sk);
        iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
        sk->sk_state = IUCV_CONNECTED;
        sk->sk_state_change(sk);
        bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
        return NET_RX_SUCCESS;
 }
 
@@ -1924,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
 {
        struct iucv_sock *iucv = iucv_sk(sk);
 
-       if (!iucv)
-               goto out;
-       if (sk->sk_state != IUCV_BOUND)
-               goto out;
+       if (!iucv || sk->sk_state != IUCV_BOUND) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
        bh_lock_sock(sk);
        sk->sk_state = IUCV_DISCONN;
        sk->sk_state_change(sk);
        bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
        return NET_RX_SUCCESS;
 }
 
@@ -1945,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
        struct iucv_sock *iucv = iucv_sk(sk);
 
        /* other end of connection closed */
-       if (!iucv)
-               goto out;
+       if (!iucv) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
        bh_lock_sock(sk);
        if (sk->sk_state == IUCV_CONNECTED) {
                sk->sk_state = IUCV_DISCONN;
                sk->sk_state_change(sk);
        }
        bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
        return NET_RX_SUCCESS;
 }
 
@@ -2107,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
        case (AF_IUCV_FLAG_WIN):
                err = afiucv_hs_callback_win(sk, skb);
                if (skb->len == sizeof(struct af_iucv_trans_hdr)) {
-                       kfree_skb(skb);
+                       consume_skb(skb);
                        break;
                }
                fallthrough;    /* and receive non-zero length data */
@@ -2262,21 +2264,11 @@ static struct packet_type iucv_packet_type = {
        .func = afiucv_hs_rcv,
 };
 
-static int afiucv_iucv_init(void)
-{
-       return pr_iucv->iucv_register(&af_iucv_handler, 0);
-}
-
-static void afiucv_iucv_exit(void)
-{
-       pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-}
-
 static int __init afiucv_init(void)
 {
        int err;
 
-       if (MACHINE_IS_VM) {
+       if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) {
                cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
                if (unlikely(err)) {
                        WARN_ON(err);
@@ -2284,11 +2276,7 @@ static int __init afiucv_init(void)
                        goto out;
                }
 
-               pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
-               if (!pr_iucv) {
-                       printk(KERN_WARNING "iucv_if lookup failed\n");
-                       memset(&iucv_userid, 0, sizeof(iucv_userid));
-               }
+               pr_iucv = &iucv_if;
        } else {
                memset(&iucv_userid, 0, sizeof(iucv_userid));
                pr_iucv = NULL;
@@ -2302,7 +2290,7 @@ static int __init afiucv_init(void)
                goto out_proto;
 
        if (pr_iucv) {
-               err = afiucv_iucv_init();
+               err = pr_iucv->iucv_register(&af_iucv_handler, 0);
                if (err)
                        goto out_sock;
        }
@@ -2316,23 +2304,19 @@ static int __init afiucv_init(void)
 
 out_notifier:
        if (pr_iucv)
-               afiucv_iucv_exit();
+               pr_iucv->iucv_unregister(&af_iucv_handler, 0);
 out_sock:
        sock_unregister(PF_IUCV);
 out_proto:
        proto_unregister(&iucv_proto);
 out:
-       if (pr_iucv)
-               symbol_put(iucv_if);
        return err;
 }
 
 static void __exit afiucv_exit(void)
 {
-       if (pr_iucv) {
-               afiucv_iucv_exit();
-               symbol_put(iucv_if);
-       }
+       if (pr_iucv)
+               pr_iucv->iucv_unregister(&af_iucv_handler, 0);
 
        unregister_netdevice_notifier(&afiucv_netdev_notifier);
        dev_remove_pack(&iucv_packet_type);
index e6795d5..f3343a8 100644 (file)
@@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
  */
 static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
 {
-       register unsigned long reg0 asm ("0");
-       register unsigned long reg1 asm ("1");
-       int ccode;
+       int cc;
 
-       reg0 = command;
-       reg1 = (unsigned long)parm;
        asm volatile(
-               "       .long 0xb2f01000\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
-               :  "m" (*parm) : "cc");
-       return ccode;
+               "       lgr     0,%[reg0]\n"
+               "       lgr     1,%[reg1]\n"
+               "       .long   0xb2f01000\n"
+               "       ipm     %[cc]\n"
+               "       srl     %[cc],28\n"
+               : [cc] "=&d" (cc), "+m" (*parm)
+               : [reg0] "d" ((unsigned long)command),
+                 [reg1] "d" ((unsigned long)parm)
+               : "cc", "0", "1");
+       return cc;
 }
 
 static inline int iucv_call_b2f0(int command, union iucv_param *parm)
@@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
  */
 static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
 {
-       register unsigned long reg0 asm ("0");
-       register unsigned long reg1 asm ("1");
-       int ccode;
+       unsigned long reg1 = (unsigned long)param;
+       int cc;
 
-       reg0 = IUCV_QUERY;
-       reg1 = (unsigned long) param;
        asm volatile (
+               "       lghi    0,%[cmd]\n"
+               "       lgr     1,%[reg1]\n"
                "       .long   0xb2f01000\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+               "       ipm     %[cc]\n"
+               "       srl     %[cc],28\n"
+               "       lgr     %[reg1],1\n"
+               : [cc] "=&d" (cc), [reg1] "+&d" (reg1)
+               : [cmd] "K" (IUCV_QUERY)
+               : "cc", "0", "1");
        *max_pathid = reg1;
-       return ccode;
+       return cc;
 }
 
 static int iucv_query_maxconn(void)
@@ -500,14 +502,14 @@ static void iucv_setmask_mp(void)
 {
        int cpu;
 
-       get_online_cpus();
+       cpus_read_lock();
        for_each_online_cpu(cpu)
                /* Enable all cpus with a declared buffer. */
                if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) &&
                    !cpumask_test_cpu(cpu, &iucv_irq_cpumask))
                        smp_call_function_single(cpu, iucv_allow_cpu,
                                                 NULL, 1);
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 /**
@@ -540,7 +542,7 @@ static int iucv_enable(void)
        size_t alloc_size;
        int cpu, rc;
 
-       get_online_cpus();
+       cpus_read_lock();
        rc = -ENOMEM;
        alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
        iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
@@ -553,12 +555,12 @@ static int iucv_enable(void)
        if (cpumask_empty(&iucv_buffer_cpumask))
                /* No cpu could declare an iucv buffer. */
                goto out;
-       put_online_cpus();
+       cpus_read_unlock();
        return 0;
 out:
        kfree(iucv_path_table);
        iucv_path_table = NULL;
-       put_online_cpus();
+       cpus_read_unlock();
        return rc;
 }
 
@@ -571,11 +573,11 @@ out:
  */
 static void iucv_disable(void)
 {
-       get_online_cpus();
+       cpus_read_lock();
        on_each_cpu(iucv_retrieve_cpu, NULL, 1);
        kfree(iucv_path_table);
        iucv_path_table = NULL;
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 static int iucv_cpu_dead(unsigned int cpu)
@@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this,
        if (cpumask_empty(&iucv_irq_cpumask))
                return NOTIFY_DONE;
 
-       get_online_cpus();
+       cpus_read_lock();
        on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1);
        preempt_disable();
        for (i = 0; i < iucv_max_pathid; i++) {
@@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this,
                        iucv_sever_pathid(i, NULL);
        }
        preempt_enable();
-       put_online_cpus();
+       cpus_read_unlock();
        iucv_disable();
        return NOTIFY_DONE;
 }
index 7180979..3086f4a 100644 (file)
@@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
 {
        u8 rc = LLC_PDU_LEN_U;
 
-       if (addr->sllc_test || addr->sllc_xid)
+       if (addr->sllc_test)
                rc = LLC_PDU_LEN_U;
+       else if (addr->sllc_xid)
+               /* We need to expand header to sizeof(struct llc_xid_info)
+                * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header
+                * as XID PDU. In llc_ui_sendmsg() we reserved header size and then
+                * filled all other space with user data. If we won't reserve this
+                * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data
+                */
+               rc = LLC_PDU_LEN_U_XID;
        else if (sk->sk_type == SOCK_STREAM)
                rc = LLC_PDU_LEN_I;
        return rc;
@@ -216,8 +224,7 @@ static int llc_ui_release(struct socket *sock)
        } else {
                release_sock(sk);
        }
-       if (llc->dev)
-               dev_put(llc->dev);
+       dev_put(llc->dev);
        sock_put(sk);
        llc_sk_free(sk);
 out:
@@ -355,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
        } else
                llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
                                           addr->sllc_mac);
-       if (llc->dev)
-               dev_hold(llc->dev);
+       dev_hold(llc->dev);
        rcu_read_unlock();
        if (!llc->dev)
                goto out;
index b554f26..79d1cef 100644 (file)
@@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
        struct llc_sap_state_ev *ev = llc_sap_ev(skb);
        int rc;
 
-       llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+       llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
                            ev->daddr.lsap, LLC_PDU_CMD);
        llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
        rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
index 84cc773..4e6f11e 100644 (file)
@@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
                                  struct vif_params *params)
 {
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+       struct ieee80211_local *local = sdata->local;
+       struct sta_info *sta;
        int ret;
 
        ret = ieee80211_if_change_type(sdata, type);
@@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
                RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
                ieee80211_check_fast_rx_iface(sdata);
        } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) {
+               struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+               if (params->use_4addr == ifmgd->use_4addr)
+                       return 0;
+
                sdata->u.mgd.use_4addr = params->use_4addr;
+               if (!ifmgd->associated)
+                       return 0;
+
+               mutex_lock(&local->sta_mtx);
+               sta = sta_info_get(sdata, ifmgd->bssid);
+               if (sta)
+                       drv_sta_set_4addr(local, sdata, &sta->sta,
+                                         params->use_4addr);
+               mutex_unlock(&local->sta_mtx);
+
+               if (params->use_4addr)
+                       ieee80211_send_4addr_nullfunc(local, sdata);
        }
 
        if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
index 22549b9..30ce6d2 100644 (file)
@@ -2201,6 +2201,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t);
 void ieee80211_send_nullfunc(struct ieee80211_local *local,
                             struct ieee80211_sub_if_data *sdata,
                             bool powersave);
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+                                  struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
                             struct ieee80211_hdr *hdr, bool ack, u16 tx_time);
 
index a00f11a..c0ea3b1 100644 (file)
@@ -1095,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
        ieee80211_tx_skb(sdata, skb);
 }
 
-static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
-                                         struct ieee80211_sub_if_data *sdata)
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+                                  struct ieee80211_sub_if_data *sdata)
 {
        struct sk_buff *skb;
        struct ieee80211_hdr *nullfunc;
index 771921c..2563473 100644 (file)
@@ -730,7 +730,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local,
                 * Need to make a copy and possibly remove radiotap header
                 * and FCS from the original.
                 */
-               skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC);
+               skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD,
+                                     0, GFP_ATOMIC);
 
                if (!skb)
                        return NULL;
index e969811..8509778 100644 (file)
@@ -1147,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
        return queued;
 }
 
+static void
+ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
+                    struct sta_info *sta,
+                    struct sk_buff *skb)
+{
+       struct rate_control_ref *ref = sdata->local->rate_ctrl;
+       u16 tid;
+
+       if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
+               return;
+
+       if (!sta || !sta->sta.ht_cap.ht_supported ||
+           !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
+           skb->protocol == sdata->control_port_protocol)
+               return;
+
+       tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+       if (likely(sta->ampdu_mlme.tid_tx[tid]))
+               return;
+
+       ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
+}
+
 /*
  * initialises @tx
  * pass %NULL for the station if unknown, a valid pointer if known
@@ -1160,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_hdr *hdr;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       bool aggr_check = false;
        int tid;
 
        memset(tx, 0, sizeof(*tx));
@@ -1188,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
                } else if (tx->sdata->control_port_protocol == tx->skb->protocol) {
                        tx->sta = sta_info_get_bss(sdata, hdr->addr1);
                }
-               if (!tx->sta && !is_multicast_ether_addr(hdr->addr1))
+               if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) {
                        tx->sta = sta_info_get(sdata, hdr->addr1);
+                       aggr_check = true;
+               }
        }
 
        if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
@@ -1199,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
                struct tid_ampdu_tx *tid_tx;
 
                tid = ieee80211_get_tid(hdr);
-
                tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+               if (!tid_tx && aggr_check) {
+                       ieee80211_aggr_check(sdata, tx->sta, skb);
+                       tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+               }
+
                if (tid_tx) {
                        bool queued;
 
@@ -4120,29 +4150,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
-static void
-ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
-                    struct sta_info *sta,
-                    struct sk_buff *skb)
-{
-       struct rate_control_ref *ref = sdata->local->rate_ctrl;
-       u16 tid;
-
-       if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
-               return;
-
-       if (!sta || !sta->sta.ht_cap.ht_supported ||
-           !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
-           skb->protocol == sdata->control_port_protocol)
-               return;
-
-       tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
-       if (likely(sta->ampdu_mlme.tid_tx[tid]))
-               return;
-
-       ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
-}
-
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
                                  struct net_device *dev,
                                  u32 info_flags,
index 84f722d..a9526ac 100644 (file)
@@ -170,7 +170,6 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                /* TODO: expand mctp_skb_cb for header fields? */
                struct mctp_hdr *hdr = mctp_hdr(skb);
 
-               hdr = mctp_hdr(skb);
                addr = msg->msg_name;
                addr->smctp_family = AF_MCTP;
                addr->smctp_network = cb->net;
index d2591eb..56263c2 100644 (file)
@@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry {
        struct mptcp_addr_info  addr;
        u8                      flags;
        int                     ifindex;
-       struct rcu_head         rcu;
        struct socket           *lsk;
 };
 
index 83c52df..5c03e51 100644 (file)
@@ -670,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
                return false;
 
        tstamp = nf_conn_tstamp_find(ct);
-       if (tstamp && tstamp->stop == 0)
+       if (tstamp) {
+               s32 timeout = ct->timeout - nfct_time_stamp;
+
                tstamp->stop = ktime_get_real_ns();
+               if (timeout < 0)
+                       tstamp->stop -= jiffies_to_nsecs(-timeout);
+       }
 
        if (nf_conntrack_event_report(IPCT_DESTROY, ct,
                                    portid, report) < 0) {
index ec3dd1c..a106721 100644 (file)
@@ -321,7 +321,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
 void flow_offload_refresh(struct nf_flowtable *flow_table,
                          struct flow_offload *flow)
 {
-       flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+       u32 timeout;
+
+       timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+       if (READ_ONCE(flow->timeout) != timeout)
+               WRITE_ONCE(flow->timeout, timeout);
 
        if (likely(!nf_flowtable_hw_offload(flow_table)))
                return;
index f92006c..2bfd9f1 100644 (file)
@@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net,
        flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
                            &val, &mask);
 
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 
        return 0;
 }
index 4903da8..6d12afa 100644 (file)
@@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
        struct nf_hook_state *state = &entry->state;
 
        /* Release those devices we held, or Alexey will kill me. */
-       if (state->in)
-               dev_put(state->in);
-       if (state->out)
-               dev_put(state->out);
+       dev_put(state->in);
+       dev_put(state->out);
        if (state->sk)
                sock_put(state->sk);
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-       if (entry->physin)
-               dev_put(entry->physin);
-       if (entry->physout)
-               dev_put(entry->physout);
+       dev_put(entry->physin);
+       dev_put(entry->physout);
 #endif
 }
 
@@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 {
        struct nf_hook_state *state = &entry->state;
 
-       if (state->in)
-               dev_hold(state->in);
-       if (state->out)
-               dev_hold(state->out);
+       dev_hold(state->in);
+       dev_hold(state->out);
        if (state->sk)
                sock_hold(state->sk);
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-       if (entry->physin)
-               dev_hold(entry->physin);
-       if (entry->physout)
-               dev_hold(entry->physout);
+       dev_hold(entry->physin);
+       dev_hold(entry->physout);
 #endif
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
index de182d1..081437d 100644 (file)
@@ -8445,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl,
        return 0;
 }
 
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+       struct nft_audit_data *adp, *adn;
+
+       list_for_each_entry_safe(adp, adn, adl, list) {
+               list_del(&adp->list);
+               kfree(adp);
+       }
+}
+
 static void nf_tables_commit_audit_collect(struct list_head *adl,
                                           struct nft_table *table, u32 op)
 {
@@ -8509,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
                if (ret) {
                        nf_tables_commit_chain_prepare_cancel(net);
+                       nf_tables_commit_audit_free(&adl);
                        return ret;
                }
                if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -8518,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                        ret = nf_tables_commit_chain_prepare(net, chain);
                        if (ret < 0) {
                                nf_tables_commit_chain_prepare_cancel(net);
+                               nf_tables_commit_audit_free(&adl);
                                return ret;
                        }
                }
index 50b4e3c..202f57d 100644 (file)
@@ -174,7 +174,9 @@ static const struct nf_hook_entries *
 nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
 {
        const struct nf_hook_entries *hook_head = NULL;
+#ifdef CONFIG_NETFILTER_INGRESS
        struct net_device *netdev;
+#endif
 
        switch (pf) {
        case NFPROTO_IPV4:
index 8088b99..304e33c 100644 (file)
@@ -48,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr,
 {
        struct nft_last_priv *priv = nft_expr_priv(expr);
 
-       priv->last_jiffies = jiffies;
-       priv->last_set = 1;
+       if (READ_ONCE(priv->last_jiffies) != jiffies)
+               WRITE_ONCE(priv->last_jiffies, jiffies);
+       if (READ_ONCE(priv->last_set) == 0)
+               WRITE_ONCE(priv->last_set, 1);
 }
 
 static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        struct nft_last_priv *priv = nft_expr_priv(expr);
+       unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
+       u32 last_set = READ_ONCE(priv->last_set);
        __be64 msecs;
 
-       if (time_before(jiffies, priv->last_jiffies))
-               priv->last_set = 0;
+       if (time_before(jiffies, last_jiffies)) {
+               WRITE_ONCE(priv->last_set, 0);
+               last_set = 0;
+       }
 
-       if (priv->last_set)
-               msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+       if (last_set)
+               msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies);
        else
                msecs = 0;
 
-       if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+       if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) ||
            nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
                goto nla_put_failure;
 
index 0840c63..be1595d 100644 (file)
@@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
                alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
                break;
        default:
-               return -EAFNOSUPPORT;
+               if (tb[NFTA_NAT_REG_ADDR_MIN])
+                       return -EAFNOSUPPORT;
+               break;
        }
        priv->family = family;
 
index 2483df0..566ba43 100644 (file)
@@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
                netlbl_af4list_audit_addr(audit_buf, 1,
                                          (dev != NULL ? dev->name : NULL),
                                          addr->s_addr, mask->s_addr);
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
                if (entry != NULL &&
                    security_secid_to_secctx(entry->secid,
                                             &secctx, &secctx_len) == 0) {
@@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
                netlbl_af6list_audit_addr(audit_buf, 1,
                                          (dev != NULL ? dev->name : NULL),
                                          addr, mask);
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
                if (entry != NULL &&
                    security_secid_to_secctx(entry->secid,
                                             &secctx, &secctx_len) == 0) {
index a880dd3..511819f 100644 (file)
@@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused)
                if (dev == NULL || nr_rx_frame(skb, dev) == 0)
                        kfree_skb(skb);
 
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
 
                if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
                        mod_timer(&loopback_timer, jiffies + 10);
index de04560..ddd5cbd 100644 (file)
@@ -582,8 +582,7 @@ struct net_device *nr_dev_first(void)
                        if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
                                first = dev;
        }
-       if (first)
-               dev_hold(first);
+       dev_hold(first);
        rcu_read_unlock();
 
        return first;
index 80a5c2a..82ab39d 100644 (file)
@@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err)
 
 /* Execute request and wait for completion. */
 static int __nci_request(struct nci_dev *ndev,
-                        void (*req)(struct nci_dev *ndev, unsigned long opt),
-                        unsigned long opt, __u32 timeout)
+                        void (*req)(struct nci_dev *ndev, const void *opt),
+                        const void *opt, __u32 timeout)
 {
        int rc = 0;
        long completion_rc;
@@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev,
 
 inline int nci_request(struct nci_dev *ndev,
                       void (*req)(struct nci_dev *ndev,
-                                  unsigned long opt),
-                      unsigned long opt, __u32 timeout)
+                                  const void *opt),
+                      const void *opt, __u32 timeout)
 {
        int rc;
 
@@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev,
        return rc;
 }
 
-static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_reset_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_core_reset_cmd cmd;
 
@@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
        nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd);
 }
 
-static void nci_init_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_req(struct nci_dev *ndev, const void *opt)
 {
        u8 plen = 0;
 
        if (opt)
                plen = sizeof(struct nci_core_init_v2_cmd);
 
-       nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt);
+       nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt);
 }
 
-static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_complete_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_rf_disc_map_cmd cmd;
        struct disc_map_config *cfg = cmd.mapping_configs;
@@ -215,10 +215,9 @@ struct nci_set_config_param {
        const __u8      *val;
 };
 
-static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_set_config_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct nci_set_config_param *param =
-               (struct nci_set_config_param *)opt;
+       const struct nci_set_config_param *param = opt;
        struct nci_core_set_config_cmd cmd;
 
        BUG_ON(param->len > NCI_MAX_PARAM_LEN);
@@ -236,10 +235,9 @@ struct nci_rf_discover_param {
        __u32   tm_protocols;
 };
 
-static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct nci_rf_discover_param *param =
-               (struct nci_rf_discover_param *)opt;
+       const struct nci_rf_discover_param *param = opt;
        struct nci_rf_disc_cmd cmd;
 
        cmd.num_disc_configs = 0;
@@ -302,10 +300,9 @@ struct nci_rf_discover_select_param {
        __u8    rf_protocol;
 };
 
-static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct nci_rf_discover_select_param *param =
-               (struct nci_rf_discover_select_param *)opt;
+       const struct nci_rf_discover_select_param *param = opt;
        struct nci_rf_discover_select_cmd cmd;
 
        cmd.rf_discovery_id = param->rf_discovery_id;
@@ -329,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
                     sizeof(struct nci_rf_discover_select_cmd), &cmd);
 }
 
-static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_rf_deactivate_cmd cmd;
 
-       cmd.type = opt;
+       cmd.type = (unsigned long)opt;
 
        nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD,
                     sizeof(struct nci_rf_deactivate_cmd), &cmd);
@@ -345,10 +342,9 @@ struct nci_cmd_param {
        const __u8 *payload;
 };
 
-static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct nci_cmd_param *param =
-               (struct nci_cmd_param *)opt;
+       const struct nci_cmd_param *param = opt;
 
        nci_send_cmd(ndev, param->opcode, param->len, param->payload);
 }
@@ -361,7 +357,7 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload
        param.len = len;
        param.payload = payload;
 
-       return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_generic_req, &param,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_prop_cmd);
@@ -375,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
        param.len = len;
        param.payload = payload;
 
-       return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_generic_req, &param,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_cmd);
 
 int nci_core_reset(struct nci_dev *ndev)
 {
-       return __nci_request(ndev, nci_reset_req, 0,
+       return __nci_request(ndev, nci_reset_req, (void *)0,
                             msecs_to_jiffies(NCI_RESET_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_reset);
 
 int nci_core_init(struct nci_dev *ndev)
 {
-       return __nci_request(ndev, nci_init_req, 0,
+       return __nci_request(ndev, nci_init_req, (void *)0,
                             msecs_to_jiffies(NCI_INIT_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_init);
@@ -399,9 +395,9 @@ struct nci_loopback_data {
        struct sk_buff *data;
 };
 
-static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_send_data_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
+       const struct nci_loopback_data *data = opt;
 
        nci_send_data(ndev, data->conn_id, data->data);
 }
@@ -462,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
        loopback_data.data = skb;
 
        ndev->cur_conn_id = conn_id;
-       r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
+       r = nci_request(ndev, nci_send_data_req, &loopback_data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK && resp)
                *resp = conn_info->rx_skb;
@@ -495,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev)
                rc = ndev->ops->init(ndev);
 
        if (!rc) {
-               rc = __nci_request(ndev, nci_reset_req, 0,
+               rc = __nci_request(ndev, nci_reset_req, (void *)0,
                                   msecs_to_jiffies(NCI_RESET_TIMEOUT));
        }
 
@@ -508,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev)
                        .feature1 = NCI_FEATURE_DISABLE,
                        .feature2 = NCI_FEATURE_DISABLE
                };
-               unsigned long opt = 0;
+               const void *opt = NULL;
 
                if (ndev->nci_ver & NCI_VER_2_MASK)
-                       opt = (unsigned long)&nci_init_v2_cmd;
+                       opt = &nci_init_v2_cmd;
 
                rc = __nci_request(ndev, nci_init_req, opt,
                                   msecs_to_jiffies(NCI_INIT_TIMEOUT));
@@ -521,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev)
                rc = ndev->ops->post_setup(ndev);
 
        if (!rc) {
-               rc = __nci_request(ndev, nci_init_complete_req, 0,
+               rc = __nci_request(ndev, nci_init_complete_req, (void *)0,
                                   msecs_to_jiffies(NCI_INIT_TIMEOUT));
        }
 
@@ -571,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev)
        atomic_set(&ndev->cmd_cnt, 1);
 
        set_bit(NCI_INIT, &ndev->flags);
-       __nci_request(ndev, nci_reset_req, 0,
+       __nci_request(ndev, nci_reset_req, (void *)0,
                      msecs_to_jiffies(NCI_RESET_TIMEOUT));
 
        /* After this point our queues are empty
@@ -637,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val)
        param.len = len;
        param.val = val;
 
-       return __nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_set_config_req, &param,
                             msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_set_config);
 
-static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_nfcee_discover_cmd cmd;
-       __u8 action = opt;
+       __u8 action = (unsigned long)opt;
 
        cmd.discovery_action = action;
 
@@ -654,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
 {
-       return __nci_request(ndev, nci_nfcee_discover_req, action,
+       unsigned long opt = action;
+
+       return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt,
                                msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_nfcee_discover);
 
-static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct nci_nfcee_mode_set_cmd *cmd =
-                                       (struct nci_nfcee_mode_set_cmd *)opt;
+       const struct nci_nfcee_mode_set_cmd *cmd = opt;
 
        nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
                     sizeof(struct nci_nfcee_mode_set_cmd), cmd);
@@ -675,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
        cmd.nfcee_id = nfcee_id;
        cmd.nfcee_mode = nfcee_mode;
 
-       return __nci_request(ndev, nci_nfcee_mode_set_req,
-                            (unsigned long)&cmd,
+       return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_nfcee_mode_set);
 
-static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct core_conn_create_data *data =
-                                       (struct core_conn_create_data *)opt;
+       const struct core_conn_create_data *data = opt;
 
        nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
 }
@@ -721,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
        }
        ndev->cur_dest_type = destination_type;
 
-       r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
+       r = __nci_request(ndev, nci_core_conn_create_req, &data,
                          msecs_to_jiffies(NCI_CMD_TIMEOUT));
        kfree(cmd);
        return r;
 }
 EXPORT_SYMBOL(nci_core_conn_create);
 
-static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt)
 {
-       __u8 conn_id = opt;
+       __u8 conn_id = (unsigned long)opt;
 
        nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
 }
 
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
 {
+       unsigned long opt = conn_id;
+
        ndev->cur_conn_id = conn_id;
-       return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+       return __nci_request(ndev, nci_core_conn_close_req, (void *)opt,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_conn_close);
@@ -758,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 
        param.id = NCI_PN_ATR_REQ_GEN_BYTES;
 
-       rc = nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       rc = nci_request(ndev, nci_set_config_req, &param,
                         msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
        if (rc)
                return rc;
 
        param.id = NCI_LN_ATR_RES_GEN_BYTES;
 
-       return nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       return nci_request(ndev, nci_set_config_req, &param,
                           msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
 }
 
@@ -815,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
                pr_debug("target active or w4 select, implicitly deactivate\n");
 
                rc = nci_request(ndev, nci_rf_deactivate_req,
-                                NCI_DEACTIVATE_TYPE_IDLE_MODE,
+                                (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                                 msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                if (rc)
                        return -EBUSY;
@@ -837,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
 
        param.im_protocols = im_protocols;
        param.tm_protocols = tm_protocols;
-       rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)&param,
+       rc = nci_request(ndev, nci_rf_discover_req, &param,
                         msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
 
        if (!rc)
@@ -856,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev)
                return;
        }
 
-       nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE,
+       nci_request(ndev, nci_rf_deactivate_req,
+                   (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                    msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
 }
 
@@ -915,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
                else
                        param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;
 
-               rc = nci_request(ndev, nci_rf_discover_select_req,
-                                (unsigned long)&param,
+               rc = nci_request(ndev, nci_rf_discover_select_req, &param,
                                 msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
        }
 
@@ -931,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
                                  __u8 mode)
 {
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
-       u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
+       unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
 
        pr_debug("entry\n");
 
@@ -949,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
        }
 
        if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
-               nci_request(ndev, nci_rf_deactivate_req, nci_mode,
+               nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode,
                            msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
        }
 }
@@ -987,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
        } else {
                if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
                    atomic_read(&ndev->state) == NCI_DISCOVERY) {
-                       nci_request(ndev, nci_rf_deactivate_req, 0,
-                               msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
+                       nci_request(ndev, nci_rf_deactivate_req, (void *)0,
+                                   msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                }
 
                rc = nfc_tm_deactivated(nfc_dev);
index a8ff794..e199912 100644 (file)
@@ -195,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
        return i;
 }
 
-static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt)
 {
-       const struct nci_data *data = (struct nci_data *)opt;
+       const struct nci_data *data = opt;
 
        nci_hci_send_data(ndev, data->pipe, data->cmd,
                          data->data, data->data_len);
@@ -240,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
        data.data = param;
        data.data_len = param_len;
 
-       r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK) {
                message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -511,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
        data.data = NULL;
        data.data_len = 0;
 
-       return nci_request(ndev, nci_hci_send_data_req,
-                       (unsigned long)&data,
-                       msecs_to_jiffies(NCI_DATA_TIMEOUT));
+       return nci_request(ndev, nci_hci_send_data_req, &data,
+                          msecs_to_jiffies(NCI_DATA_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_hci_open_pipe);
 
@@ -587,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
        data.data = tmp;
        data.data_len = param_len + 1;
 
-       r = nci_request(ndev, nci_hci_send_data_req,
-                       (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK) {
                message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -627,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
        data.data = &idx;
        data.data_len = 1;
 
-       r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
 
        if (r == NCI_STATUS_OK) {
index 57a1971..543365f 100644 (file)
@@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po)
 
        rcu_read_lock();
        dev = rcu_dereference(po->cached_dev);
-       if (likely(dev))
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
 
        return dev;
@@ -3024,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 out_free:
        kfree_skb(skb);
 out_unlock:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 out:
        return err;
 }
@@ -3158,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                }
        }
 
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
 
        proto_curr = po->prot_hook.type;
        dev_curr = po->prot_hook.dev;
@@ -3196,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                        packet_cached_dev_assign(po, dev);
                }
        }
-       if (dev_curr)
-               dev_put(dev_curr);
+       dev_put(dev_curr);
 
        if (proto == 0 || !need_rehook)
                goto out_unlock;
@@ -4109,8 +4105,7 @@ static int packet_notifier(struct notifier_block *this,
                                if (msg == NETDEV_UNREGISTER) {
                                        packet_cached_dev_reset(po);
                                        WRITE_ONCE(po->ifindex, -1);
-                                       if (po->prot_hook.dev)
-                                               dev_put(po->prot_hook.dev);
+                                       dev_put(po->prot_hook.dev);
                                        po->prot_hook.dev = NULL;
                                }
                                spin_unlock(&po->bind_lock);
index ca6ae4c..65218b7 100644 (file)
@@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 
 drop:
        kfree_skb(skb);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
        return err;
 }
 EXPORT_SYMBOL(pn_skb_send);
index 876d0ae..cde671d 100644 (file)
@@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net)
                        break;
                dev = NULL;
        }
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
        return dev;
 }
@@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr)
        daddr >>= 2;
        rcu_read_lock();
        dev = rcu_dereference(routes->table[daddr]);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
 
        if (!dev)
index 2599235..71e2caf 100644 (file)
@@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
                        saddr = PN_NO_ADDR;
                release_sock(sk);
 
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                if (saddr == PN_NO_ADDR)
                        return -EHOSTUNREACH;
 
index fa61167..1dc955c 100644 (file)
@@ -15,6 +15,7 @@ struct qrtr_mhi_dev {
        struct qrtr_endpoint ep;
        struct mhi_device *mhi_dev;
        struct device *dev;
+       struct completion ready;
 };
 
 /* From MHI to QRTR */
@@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
        struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
        int rc;
 
+       rc = wait_for_completion_interruptible(&qdev->ready);
+       if (rc)
+               goto free_skb;
+
        if (skb->sk)
                sock_hold(skb->sk);
 
@@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
        int rc;
 
        /* start channels */
-       rc = mhi_prepare_for_transfer(mhi_dev);
+       rc = mhi_prepare_for_transfer(mhi_dev, 0);
        if (rc)
                return rc;
 
@@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
        if (rc)
                return rc;
 
+       /* start channels */
+       rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS);
+       if (rc) {
+               qrtr_endpoint_unregister(&qdev->ep);
+               dev_set_drvdata(&mhi_dev->dev, NULL);
+               return rc;
+       }
+
+       complete_all(&qdev->ready);
        dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
 
        return 0;
index e718478..6c61b7b 100644 (file)
@@ -518,8 +518,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
                if (!ipc)
                        goto err;
 
-               if (sock_queue_rcv_skb(&ipc->sk, skb))
+               if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+                       qrtr_port_put(ipc);
                        goto err;
+               }
 
                qrtr_port_put(ipc);
        }
@@ -839,6 +841,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
 
        ipc = qrtr_port_lookup(to->sq_port);
        if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
+               if (ipc)
+                       qrtr_port_put(ipc);
                kfree_skb(skb);
                return -ENODEV;
        }
index 998a237..7dd3a2d 100644 (file)
@@ -495,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
        p->tcfa_tm.install = jiffies;
        p->tcfa_tm.lastuse = jiffies;
        p->tcfa_tm.firstuse = 0;
-       p->tcfa_flags = flags;
+       p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK;
        if (est) {
                err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
                                        &p->tcfa_rate_est,
@@ -941,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[])
        }
 }
 
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
                                         bool rtnl_held,
                                         struct netlink_ext_ack *extack)
 {
@@ -951,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
        struct nlattr *kind;
        int err;
 
-       if (name == NULL) {
+       if (!police) {
                err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
                                                  tcf_action_policy, extack);
                if (err < 0)
@@ -967,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
                        return ERR_PTR(err);
                }
        } else {
-               if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+               if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
                        NL_SET_ERR_MSG(extack, "TC action name too long");
                        return ERR_PTR(-EINVAL);
                }
@@ -1004,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind,
                                    struct tc_action_ops *a_o, int *init_res,
-                                   bool rtnl_held,
-                                   struct netlink_ext_ack *extack)
+                                   u32 flags, struct netlink_ext_ack *extack)
 {
-       struct nla_bitfield32 flags = { 0, 0 };
+       bool police = flags & TCA_ACT_FLAGS_POLICE;
+       struct nla_bitfield32 userflags = { 0, 0 };
        u8 hw_stats = TCA_ACT_HW_STATS_ANY;
        struct nlattr *tb[TCA_ACT_MAX + 1];
        struct tc_cookie *cookie = NULL;
@@ -1017,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        int err;
 
        /* backward compatibility for policer */
-       if (name == NULL) {
+       if (!police) {
                err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
                                                  tcf_action_policy, extack);
                if (err < 0)
@@ -1032,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                }
                hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
                if (tb[TCA_ACT_FLAGS])
-                       flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+                       userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
 
-               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-                               rtnl_held, tp, flags.value, extack);
+               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp,
+                               userflags.value | flags, extack);
        } else {
-               err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
-                               tp, flags.value, extack);
+               err = a_o->init(net, nla, est, &a, tp, userflags.value | flags,
+                               extack);
        }
        if (err < 0)
                goto err_out;
        *init_res = err;
 
-       if (!name && tb[TCA_ACT_COOKIE])
+       if (!police && tb[TCA_ACT_COOKIE])
                tcf_set_action_cookie(&a->act_cookie, cookie);
 
-       if (!name)
+       if (!police)
                a->hw_stats = hw_stats;
 
        return a;
@@ -1063,9 +1062,9 @@ err_out:
 /* Returns numbers of initialized actions or negative error. */
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
-                   struct nlattr *est, char *name, int ovr, int bind,
-                   struct tc_action *actions[], int init_res[], size_t *attr_size,
-                   bool rtnl_held, struct netlink_ext_ack *extack)
+                   struct nlattr *est, struct tc_action *actions[],
+                   int init_res[], size_t *attr_size, u32 flags,
+                   struct netlink_ext_ack *extack)
 {
        struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -1082,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
                struct tc_action_ops *a_o;
 
-               a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+               a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
+                                        !(flags & TCA_ACT_FLAGS_NO_RTNL),
+                                        extack);
                if (IS_ERR(a_o)) {
                        err = PTR_ERR(a_o);
                        goto err_mod;
@@ -1091,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
        }
 
        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-               act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-                                       ops[i - 1], &init_res[i - 1], rtnl_held,
-                                       extack);
+               act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1],
+                                       &init_res[i - 1], flags, extack);
                if (IS_ERR(act)) {
                        err = PTR_ERR(act);
                        goto err;
@@ -1113,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
        goto err_mod;
 
 err:
-       tcf_action_destroy(actions, bind);
+       tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
 err_mod:
        for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
                if (ops[i])
@@ -1495,7 +1495,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-                         struct nlmsghdr *n, u32 portid, int ovr,
+                         struct nlmsghdr *n, u32 portid, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        size_t attr_size = 0;
@@ -1504,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
        int init_res[TCA_ACT_MAX_PRIO] = {};
 
        for (loop = 0; loop < 10; loop++) {
-               ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
-                                     actions, init_res, &attr_size, true, extack);
+               ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res,
+                                     &attr_size, flags, extack);
                if (ret != -EAGAIN)
                        break;
        }
@@ -1535,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
        struct net *net = sock_net(skb->sk);
        struct nlattr *tca[TCA_ROOT_MAX + 1];
        u32 portid = NETLINK_CB(skb).portid;
-       int ret = 0, ovr = 0;
+       u32 flags = 0;
+       int ret = 0;
 
        if ((n->nlmsg_type != RTM_GETACTION) &&
            !netlink_capable(skb, CAP_NET_ADMIN))
@@ -1561,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
                 * is zero) then just set this
                 */
                if (n->nlmsg_flags & NLM_F_REPLACE)
-                       ovr = 1;
-               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+                       flags = TCA_ACT_FLAGS_REPLACE;
+               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags,
                                     extack);
                break;
        case RTM_DELACTION:
index e409a00..040807a 100644 (file)
@@ -275,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act,
-                       int replace, int bind, bool rtnl_held,
                        struct tcf_proto *tp, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, bpf_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tcf_bpf_cfg cfg, old;
@@ -317,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                if (bind)
                        return 0;
 
-               if (!replace) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*act, bind);
                        return -EEXIST;
                }
index e19885d..94e78ac 100644 (file)
@@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                             struct nlattr *est, struct tc_action **a,
-                            int ovr, int bind, bool rtnl_held,
                             struct tcf_proto *tp, u32 flags,
                             struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, connmark_net_id);
        struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct tcf_chain *goto_ch = NULL;
        struct tcf_connmark_info *ci;
        struct tc_connmark *parm;
@@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                ci = to_connmark(*a);
                if (bind)
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 4fa4fcb..a15ec95 100644 (file)
@@ -41,11 +41,12 @@ static unsigned int csum_net_id;
 static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
-                        struct nlattr *est, struct tc_action **a, int ovr,
-                        int bind, bool rtnl_held, struct tcf_proto *tp,
+                        struct nlattr *est, struct tc_action **a,
+                        struct tcf_proto *tp,
                         u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct tcf_csum_params *params_new;
        struct nlattr *tb[TCA_CSUM_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind)/* dont override defaults */
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 1b4b351..ad9df0c 100644 (file)
@@ -1235,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net,
 
 static int tcf_ct_init(struct net *net, struct nlattr *nla,
                       struct nlattr *est, struct tc_action **a,
-                      int replace, int bind, bool rtnl_held,
                       struct tcf_proto *tp, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ct_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct tcf_ct_params *params = NULL;
        struct nlattr *tb[TCA_CT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -1279,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
                if (bind)
                        return 0;
 
-               if (!replace) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index b20c8ce..549374a 100644 (file)
@@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
 
 static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        u32 dscpmask = 0, dscpstatemask, index;
        struct nlattr *tb[TCA_CTINFO_MAX + 1];
        struct tcf_ctinfo_params *cp_new;
@@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind) /* don't override defaults */
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 73c3926..d8dce17 100644 (file)
@@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gact_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_GACT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_gact *parm;
@@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind)/* dont override defaults */
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index a78cb79..7df72a4 100644 (file)
@@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
 
 static int tcf_gate_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gate_net_id);
        enum tk_offsets tk_offset = TK_OFFS_TAI;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_GATE_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        u64 cycletime = 0, basetime = 0;
@@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index a2ddea0..7064a36 100644 (file)
@@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a,
-                       int ovr, int bind, bool rtnl_held,
                        struct tcf_proto *tp, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_IFE_MAX + 1];
        struct nlattr *tb2[IFE_META_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        kfree(p);
                        return err;
                }
-               err = load_metalist(tb2, rtnl_held);
+               err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL));
                if (err) {
                        kfree(p);
                        return err;
@@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        return ret;
                }
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                kfree(p);
                return -EEXIST;
@@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
        }
 
        if (tb[TCA_IFE_METALST]) {
-               err = populate_metalist(ife, tb2, exists, rtnl_held);
+               err = populate_metalist(ife, tb2, exists,
+                                       !(flags & TCA_ACT_FLAGS_NO_RTNL));
                if (err)
                        goto metadata_parse_err;
        } else {
index ac7297f..265b144 100644 (file)
@@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 
 static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                         const struct tc_action_ops *ops, int ovr, int bind,
+                         const struct tc_action_ops *ops,
                          struct tcf_proto *tp, u32 flags)
 {
        struct tc_action_net *tn = net_generic(net, id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_IPT_MAX + 1];
        struct tcf_ipt *ipt;
        struct xt_entry_target *td, *t;
@@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
                if (bind)/* dont override defaults */
                        return 0;
 
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
@@ -201,21 +202,21 @@ err1:
 }
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
-                       struct nlattr *est, struct tc_action **a, int ovr,
-                       int bind, bool rtnl_held, struct tcf_proto *tp,
+                       struct nlattr *est, struct tc_action **a,
+                       struct tcf_proto *tp,
                        u32 flags, struct netlink_ext_ack *extack)
 {
-       return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
-                             bind, tp, flags);
+       return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops,
+                             tp, flags);
 }
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
-                      struct nlattr *est, struct tc_action **a, int ovr,
-                      int bind, bool unlocked, struct tcf_proto *tp,
+                      struct nlattr *est, struct tc_action **a,
+                      struct tcf_proto *tp,
                       u32 flags, struct netlink_ext_ack *extack)
 {
-       return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
-                             bind, tp, flags);
+       return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops,
+                             tp, flags);
 }
 
 static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
index 7153c67..37f51d7 100644 (file)
@@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a)
 
        /* last reference to action, no need to lock */
        dev = rcu_dereference_protected(m->tcfm_dev, 1);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 }
 
 static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -91,11 +90,11 @@ static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp,
                           u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_MIRRED_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        bool mac_header_xmit = false;
@@ -155,7 +154,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                        return ret;
                }
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
@@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                mac_header_xmit = dev_is_mac_header_xmit(dev);
                dev = rcu_replace_pointer(m->tcfm_dev, dev,
                                          lockdep_is_held(&m->tcf_lock));
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                m->tcfm_mac_header_xmit = mac_header_xmit;
        }
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
index d1486ea..e4529b4 100644 (file)
@@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
 
 static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mpls_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_MPLS_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tcf_mpls_params *p;
@@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index 1ebd2a8..7dd6b58 100644 (file)
@@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 };
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-                       struct tc_action **a, int ovr, int bind,
-                       bool rtnl_held, struct tcf_proto *tp,
+                       struct tc_action **a, struct tcf_proto *tp,
                        u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, nat_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_NAT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_nat *parm;
@@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
        } else if (err > 0) {
                if (bind)
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index b453044..c6c862c 100644 (file)
@@ -136,11 +136,11 @@ nla_failure:
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                         int ovr, int bind, bool rtnl_held,
                          struct tcf_proto *tp, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, pedit_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_PEDIT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_pedit_key *keys = NULL;
@@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind)
                        goto out_free;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        ret = -EEXIST;
                        goto out_release;
                }
index 0fab8de..832157a 100644 (file)
@@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_police_init(struct net *net, struct nlattr *nla,
                               struct nlattr *est, struct tc_action **a,
-                              int ovr, int bind, bool rtnl_held,
                               struct tcf_proto *tp, u32 flags,
                               struct netlink_ext_ack *extack)
 {
        int ret = 0, tcfp_result = TC_ACT_OK, err, size;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_POLICE_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_police *parm;
@@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
                }
                ret = ACT_P_CREATED;
                spin_lock_init(&(to_police(*a)->tcfp_lock));
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index 6a0c16e..230501e 100644 (file)
@@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 };
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
-                          struct nlattr *est, struct tc_action **a, int ovr,
-                          int bind, bool rtnl_held, struct tcf_proto *tp,
+                          struct nlattr *est, struct tc_action **a,
+                          struct tcf_proto *tp,
                           u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_SAMPLE_MAX + 1];
        struct psample_group *psample_group;
        u32 psample_group_num, rate, index;
@@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
                        return ret;
                }
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index 726cc95..cbbe186 100644 (file)
@@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, simp_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_DEF_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_defact *parm;
@@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
                tcf_action_set_ctrlact(*a, parm->action, goto_ch);
                ret = ACT_P_CREATED;
        } else {
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        err = -EEXIST;
                        goto release_idr;
                }
index e5f3fb8..6054185 100644 (file)
@@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                           int ovr, int bind, bool rtnl_held,
                            struct tcf_proto *tp, u32 act_flags,
                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+       bool bind = act_flags & TCA_ACT_FLAGS_BIND;
        struct tcf_skbedit_params *params_new;
        struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                ret = ACT_P_CREATED;
        } else {
                d = to_skbedit(*a);
-               if (!ovr) {
+               if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 762ceec..ecb9ee6 100644 (file)
@@ -100,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+       bool ovr = flags & TCA_ACT_FLAGS_REPLACE;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_SKBMOD_MAX + 1];
        struct tcf_skbmod_params *p, *p_old;
        struct tcf_chain *goto_ch = NULL;
index 85c0d0d..d9cd174 100644 (file)
@@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp, u32 act_flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+       bool bind = act_flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
        struct tcf_tunnel_key_params *params_new;
        struct metadata_dst *metadata = NULL;
@@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
                NL_SET_ERR_MSG(extack, "TC IDR already exists");
                ret = -EEXIST;
                goto release_tun_meta;
index 71f2015..e4dc5a5 100644 (file)
@@ -114,11 +114,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_VLAN_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        bool push_prio_exists = false;
@@ -223,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index 7be5b9d..4a7043a 100644 (file)
@@ -1949,6 +1949,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        int err;
        int tp_created;
        bool rtnl_held = false;
+       u32 flags;
 
        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
                return -EPERM;
@@ -1969,6 +1970,7 @@ replay:
        tp = NULL;
        cl = 0;
        block = NULL;
+       flags = 0;
 
        if (prio == 0) {
                /* If no priority is provided by the user,
@@ -2112,9 +2114,12 @@ replay:
                goto errout;
        }
 
+       if (!(n->nlmsg_flags & NLM_F_CREATE))
+               flags |= TCA_ACT_FLAGS_REPLACE;
+       if (!rtnl_held)
+               flags |= TCA_ACT_FLAGS_NO_RTNL;
        err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
-                             n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
-                             rtnl_held, extack);
+                             flags, extack);
        if (err == 0) {
                tfilter_notify(net, skb, n, tp, block, q, parent, fh,
                               RTM_NEWTFILTER, false, rtnl_held);
@@ -3020,8 +3025,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
 EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
-                     struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
-                     bool rtnl_held, struct netlink_ext_ack *extack)
+                     struct nlattr *rate_tlv, struct tcf_exts *exts,
+                     u32 flags, struct netlink_ext_ack *extack)
 {
 #ifdef CONFIG_NET_CLS_ACT
        {
@@ -3032,13 +3037,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                if (exts->police && tb[exts->police]) {
                        struct tc_action_ops *a_o;
 
-                       a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+                       a_o = tc_action_load_ops(tb[exts->police], true,
+                                                !(flags & TCA_ACT_FLAGS_NO_RTNL),
+                                                extack);
                        if (IS_ERR(a_o))
                                return PTR_ERR(a_o);
+                       flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
                        act = tcf_action_init_1(net, tp, tb[exts->police],
-                                               rate_tlv, "police", ovr,
-                                               TCA_ACT_BIND, a_o, init_res,
-                                               rtnl_held, extack);
+                                               rate_tlv, a_o, init_res, flags,
+                                               extack);
                        module_put(a_o->owner);
                        if (IS_ERR(act))
                                return PTR_ERR(act);
@@ -3050,10 +3057,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                } else if (exts->action && tb[exts->action]) {
                        int err;
 
+                       flags |= TCA_ACT_FLAGS_BIND;
                        err = tcf_action_init(net, tp, tb[exts->action],
-                                             rate_tlv, NULL, ovr, TCA_ACT_BIND,
-                                             exts->actions, init_res,
-                                             &attr_size, rtnl_held, extack);
+                                             rate_tlv, exts->actions, init_res,
+                                             &attr_size, flags, extack);
                        if (err < 0)
                                return err;
                        exts->nr_actions = err;
index f256a7c..8158fc9 100644 (file)
@@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
 static int basic_set_parms(struct net *net, struct tcf_proto *tp,
                           struct basic_filter *f, unsigned long base,
                           struct nlattr **tb,
-                          struct nlattr *est, bool ovr,
+                          struct nlattr *est, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
                        struct tcf_proto *tp, unsigned long base, u32 handle,
-                       struct nlattr **tca, void **arg, bool ovr,
-                       bool rtnl_held, struct netlink_ext_ack *extack)
+                       struct nlattr **tca, void **arg,
+                       u32 flags, struct netlink_ext_ack *extack)
 {
        int err;
        struct basic_head *head = rtnl_dereference(tp->root);
@@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
                goto errout;
        }
 
-       err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+       err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags,
                              extack);
        if (err < 0) {
                if (!fold)
index fa739ef..3b472ba 100644 (file)
@@ -404,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 
 static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
                             struct cls_bpf_prog *prog, unsigned long base,
-                            struct nlattr **tb, struct nlattr *est, bool ovr,
+                            struct nlattr **tb, struct nlattr *est, u32 flags,
                             struct netlink_ext_ack *extack)
 {
        bool is_bpf, is_ebpf, have_exts = false;
@@ -416,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
        if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
                return -EINVAL;
 
-       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
                                extack);
        if (ret < 0)
                return ret;
@@ -455,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                          struct tcf_proto *tp, unsigned long base,
                          u32 handle, struct nlattr **tca,
-                         void **arg, bool ovr, bool rtnl_held,
+                         void **arg, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -500,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                goto errout;
        prog->handle = handle;
 
-       ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+       ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
                                extack);
        if (ret < 0)
                goto errout_idr;
index fb88114..ed00001 100644 (file)
@@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
                             struct tcf_proto *tp, unsigned long base,
                             u32 handle, struct nlattr **tca,
-                            void **arg, bool ovr, bool rtnl_held,
+                            void **arg, u32 flags,
                             struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
        if (err < 0)
                goto errout;
 
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags,
+                               extack);
        if (err < 0)
                goto errout;
 
index 87398af..972303a 100644 (file)
@@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work)
 static int flow_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      void **arg, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct flow_head *head = rtnl_dereference(tp->root);
@@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
        if (err < 0)
                goto err2;
 
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags,
+                               extack);
        if (err < 0)
                goto err2;
 
index d7869a9..23b2125 100644 (file)
@@ -1915,23 +1915,22 @@ errout_cleanup:
 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
                        struct cls_fl_filter *f, struct fl_flow_mask *mask,
                        unsigned long base, struct nlattr **tb,
-                       struct nlattr *est, bool ovr,
-                       struct fl_flow_tmplt *tmplt, bool rtnl_held,
+                       struct nlattr *est,
+                       struct fl_flow_tmplt *tmplt, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
-                               extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
        if (err < 0)
                return err;
 
        if (tb[TCA_FLOWER_CLASSID]) {
                f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
-               if (!rtnl_held)
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
                        rtnl_lock();
                tcf_bind_filter(tp, &f->res, base);
-               if (!rtnl_held)
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
                        rtnl_unlock();
        }
 
@@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
 static int fl_change(struct net *net, struct sk_buff *in_skb,
                     struct tcf_proto *tp, unsigned long base,
                     u32 handle, struct nlattr **tca,
-                    void **arg, bool ovr, bool rtnl_held,
+                    void **arg, u32 flags,
                     struct netlink_ext_ack *extack)
 {
        struct cls_fl_head *head = fl_head_dereference(tp);
+       bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
        struct cls_fl_filter *fold = *arg;
        struct cls_fl_filter *fnew;
        struct fl_flow_mask *mask;
@@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
                }
        }
 
-       err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
-                          tp->chain->tmplt_priv, rtnl_held, extack);
+       err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
+                          tp->chain->tmplt_priv, flags, extack);
        if (err)
                goto errout;
 
index ec94529..8654b0c 100644 (file)
@@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 
 static int fw_set_parms(struct net *net, struct tcf_proto *tp,
                        struct fw_filter *f, struct nlattr **tb,
-                       struct nlattr **tca, unsigned long base, bool ovr,
+                       struct nlattr **tca, unsigned long base, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        u32 mask;
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
+                               extack);
        if (err < 0)
                return err;
 
@@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 static int fw_change(struct net *net, struct sk_buff *in_skb,
                     struct tcf_proto *tp, unsigned long base,
                     u32 handle, struct nlattr **tca, void **arg,
-                    bool ovr, bool rtnl_held,
-                    struct netlink_ext_ack *extack)
+                    u32 flags, struct netlink_ext_ack *extack)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        struct fw_filter *f = *arg;
@@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
                        return err;
                }
 
-               err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
+               err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
                if (err < 0) {
                        tcf_exts_destroy(&fnew->exts);
                        kfree(fnew);
@@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
        f->id = handle;
        f->tp = tp;
 
-       err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
+       err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
        if (err < 0)
                goto errout;
 
index cafb844..24f0046 100644 (file)
@@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
 static int mall_set_parms(struct net *net, struct tcf_proto *tp,
                          struct cls_mall_head *head,
                          unsigned long base, struct nlattr **tb,
-                         struct nlattr *est, bool ovr,
+                         struct nlattr *est, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
-                               extack);
+       err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 static int mall_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      void **arg, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct cls_mall_head *head = rtnl_dereference(tp->root);
        struct nlattr *tb[TCA_MATCHALL_MAX + 1];
        struct cls_mall_head *new;
-       u32 flags = 0;
+       u32 userflags = 0;
        int err;
 
        if (!tca[TCA_OPTIONS])
@@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
                return err;
 
        if (tb[TCA_MATCHALL_FLAGS]) {
-               flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
-               if (!tc_flags_valid(flags))
+               userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
+               if (!tc_flags_valid(userflags))
                        return -EINVAL;
        }
 
@@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
        if (!handle)
                handle = 1;
        new->handle = handle;
-       new->flags = flags;
+       new->flags = userflags;
        new->pf = alloc_percpu(struct tc_matchall_pcnt);
        if (!new->pf) {
                err = -ENOMEM;
                goto err_alloc_percpu;
        }
 
-       err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+       err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags,
                             extack);
        if (err)
                goto err_set_parms;
index 5efa3e7..a35ab8c 100644 (file)
@@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
                            unsigned long base, struct route4_filter *f,
                            u32 handle, struct route4_head *head,
                            struct nlattr **tb, struct nlattr *est, int new,
-                           bool ovr, struct netlink_ext_ack *extack)
+                           u32 flags, struct netlink_ext_ack *extack)
 {
        u32 id = 0, to = 0, nhandle = 0x8000;
        struct route4_filter *fp;
@@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
        struct route4_bucket *b;
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 
 static int route4_change(struct net *net, struct sk_buff *in_skb,
                         struct tcf_proto *tp, unsigned long base, u32 handle,
-                        struct nlattr **tca, void **arg, bool ovr,
-                        bool rtnl_held, struct netlink_ext_ack *extack)
+                        struct nlattr **tca, void **arg, u32 flags,
+                        struct netlink_ext_ack *extack)
 {
        struct route4_head *head = rtnl_dereference(tp->root);
        struct route4_filter __rcu **fp;
@@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
        }
 
        err = route4_set_parms(net, tp, base, f, handle, head, tb,
-                              tca[TCA_RATE], new, ovr, extack);
+                              tca[TCA_RATE], new, flags, extack);
        if (err < 0)
                goto errout;
 
index 27a4b6d..5cd9d6b 100644 (file)
@@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 
 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
-                      u32 handle,
-                      struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      u32 handle, struct nlattr **tca,
+                      void **arg, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct rsvp_head *data = rtnl_dereference(tp->root);
@@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
        err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
        if (err < 0)
                return err;
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
                                extack);
        if (err < 0)
                goto errout2;
index e9a8a2c..742c7d4 100644 (file)
@@ -330,7 +330,7 @@ static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
                  u32 handle, struct tcindex_data *p,
                  struct tcindex_filter_result *r, struct nlattr **tb,
-                 struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+                 struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
 {
        struct tcindex_filter_result new_filter_result, *old_r = r;
        struct tcindex_data *cp = NULL, *oldp;
@@ -342,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
        if (err < 0)
                return err;
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
        if (err < 0)
                goto errout;
 
@@ -529,8 +529,8 @@ errout:
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
               struct tcf_proto *tp, unsigned long base, u32 handle,
-              struct nlattr **tca, void **arg, bool ovr,
-              bool rtnl_held, struct netlink_ext_ack *extack)
+              struct nlattr **tca, void **arg, u32 flags,
+              struct netlink_ext_ack *extack)
 {
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -551,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
                return err;
 
        return tcindex_set_parms(net, tp, base, handle, p, r, tb,
-                                tca[TCA_RATE], ovr, extack);
+                                tca[TCA_RATE], flags, extack);
 }
 
 static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
index 6e1abe8..4272814 100644 (file)
@@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
                         unsigned long base,
                         struct tc_u_knode *n, struct nlattr **tb,
-                        struct nlattr *est, bool ovr,
+                        struct nlattr *est, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
                      struct tcf_proto *tp, unsigned long base, u32 handle,
-                     struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
+                     struct nlattr **tca, void **arg, u32 flags,
                      struct netlink_ext_ack *extack)
 {
        struct tc_u_common *tp_c = tp->data;
@@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        struct tc_u32_sel *s;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_U32_MAX + 1];
-       u32 htid, flags = 0;
+       u32 htid, userflags = 0;
        size_t sel_size;
        int err;
 
@@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                return err;
 
        if (tb[TCA_U32_FLAGS]) {
-               flags = nla_get_u32(tb[TCA_U32_FLAGS]);
-               if (!tc_flags_valid(flags)) {
+               userflags = nla_get_u32(tb[TCA_U32_FLAGS]);
+               if (!tc_flags_valid(userflags)) {
                        NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
                        return -EINVAL;
                }
@@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                        return -EINVAL;
                }
 
-               if ((n->flags ^ flags) &
+               if ((n->flags ^ userflags) &
                    ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
                        NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
                        return -EINVAL;
@@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                        return -ENOMEM;
 
                err = u32_set_parms(net, tp, base, new, tb,
-                                   tca[TCA_RATE], ovr, extack);
+                                   tca[TCA_RATE], flags, extack);
 
                if (err) {
                        u32_destroy_key(new, false);
@@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                ht->handle = handle;
                ht->prio = tp->prio;
                idr_init(&ht->handle_idr);
-               ht->flags = flags;
+               ht->flags = userflags;
 
-               err = u32_replace_hw_hnode(tp, ht, flags, extack);
+               err = u32_replace_hw_hnode(tp, ht, userflags, extack);
                if (err) {
                        idr_remove(&tp_c->handle_idr, handle);
                        kfree(ht);
@@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        RCU_INIT_POINTER(n->ht_up, ht);
        n->handle = handle;
        n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
-       n->flags = flags;
+       n->flags = userflags;
 
        err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
        if (err < 0)
@@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        }
 #endif
 
-       err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
+       err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags,
                            extack);
        if (err == 0) {
                struct tc_u_knode __rcu **ins;
index d9ac60f..a8dd06c 100644 (file)
@@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 
        /* seqlock has the same scope of busylock, for NOLOCK qdisc */
        spin_lock_init(&sch->seqlock);
-       lockdep_set_class(&sch->busylock,
+       lockdep_set_class(&sch->seqlock,
                          dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 
        seqcount_init(&sch->running);
index 07b30d0..9c79374 100644 (file)
@@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch)
                if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
                        qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
                        old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
-                       if (ntx < dev->real_num_tx_queues)
-                               qdisc_hash_add(qdisc, false);
                } else {
                        old = dev_graft_qdisc(qdisc->dev_queue, sch);
                        qdisc_refcount_inc(sch);
index fe74c5f..db6b737 100644 (file)
@@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
        memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
        cur_key->key = key;
 
-       if (replace) {
-               list_del_init(&shkey->key_list);
-               sctp_auth_shkey_release(shkey);
-               if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
-                       sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+       if (!replace) {
+               list_add(&cur_key->key_list, sh_keys);
+               return 0;
        }
+
+       list_del_init(&shkey->key_list);
+       sctp_auth_shkey_release(shkey);
        list_add(&cur_key->key_list, sh_keys);
 
+       if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
+               sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+
        return 0;
 }
 
index eb3c2a3..5ef86fd 100644 (file)
@@ -1203,7 +1203,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
        if (unlikely(!af))
                return NULL;
 
-       if (af->from_addr_param(&paddr, param, peer_port, 0))
+       if (!af->from_addr_param(&paddr, param, peer_port, 0))
                return NULL;
 
        return __sctp_lookup_association(net, laddr, &paddr, transportp);
index e48dd90..470dbdc 100644 (file)
@@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
                list_for_each_entry_safe(addr, temp,
                                        &net->sctp.local_addr_list, list) {
                        if (addr->a.sa.sa_family == AF_INET6 &&
-                                       ipv6_addr_equal(&addr->a.v6.sin6_addr,
-                                               &ifa->addr)) {
+                           ipv6_addr_equal(&addr->a.v6.sin6_addr,
+                                           &ifa->addr) &&
+                           addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) {
                                sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
                                found = 1;
                                addr->valid = 0;
index 09a8f23..32df65f 100644 (file)
@@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net,
        if (!sctp_transport_pl_enabled(transport))
                return SCTP_DISPOSITION_CONSUME;
 
-       sctp_transport_pl_send(transport);
-
-       reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
-       if (!reply)
-               return SCTP_DISPOSITION_NOMEM;
-       sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+       if (sctp_transport_pl_send(transport)) {
+               reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+               if (!reply)
+                       return SCTP_DISPOSITION_NOMEM;
+               sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+       }
        sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
                        SCTP_TRANSPORT(transport));
 
@@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
                    !sctp_transport_pl_enabled(link))
                        return SCTP_DISPOSITION_DISCARD;
 
-               sctp_transport_pl_recv(link);
-               if (link->pl.state == SCTP_PL_COMPLETE)
+               if (sctp_transport_pl_recv(link))
                        return SCTP_DISPOSITION_CONSUME;
 
                return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
index 397a624..a3d3ca6 100644 (file)
@@ -258,16 +258,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
        sctp_transport_pl_update(transport);
 }
 
-void sctp_transport_pl_send(struct sctp_transport *t)
+bool sctp_transport_pl_send(struct sctp_transport *t)
 {
-       pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
-                __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
-
-       if (t->pl.probe_count < SCTP_MAX_PROBES) {
-               t->pl.probe_count++;
-               return;
-       }
+       if (t->pl.probe_count < SCTP_MAX_PROBES)
+               goto out;
 
+       t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
+       t->pl.probe_count = 0;
        if (t->pl.state == SCTP_PL_BASE) {
                if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
                        t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
@@ -299,14 +296,27 @@ void sctp_transport_pl_send(struct sctp_transport *t)
                        sctp_assoc_sync_pmtu(t->asoc);
                }
        }
-       t->pl.probe_count = 1;
+
+out:
+       if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 &&
+           !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) {
+               t->pl.raise_count++;
+               return false;
+       }
+
+       pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+                __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+       t->pl.probe_count++;
+       return true;
 }
 
-void sctp_transport_pl_recv(struct sctp_transport *t)
+bool sctp_transport_pl_recv(struct sctp_transport *t)
 {
        pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
                 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
 
+       t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
        t->pl.pmtu = t->pl.probe_size;
        t->pl.probe_count = 0;
        if (t->pl.state == SCTP_PL_BASE) {
@@ -323,7 +333,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
                if (!t->pl.probe_high) {
                        t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
                                               SCTP_MAX_PLPMTU);
-                       return;
+                       return false;
                }
                t->pl.probe_size += SCTP_PL_MIN_STEP;
                if (t->pl.probe_size >= t->pl.probe_high) {
@@ -335,14 +345,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
                        t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
                        sctp_assoc_sync_pmtu(t->asoc);
                }
-       } else if (t->pl.state == SCTP_PL_COMPLETE) {
-               t->pl.raise_count++;
-               if (t->pl.raise_count == 30) {
-                       /* Raise probe_size again after 30 * interval in Search Complete */
-                       t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
-                       t->pl.probe_size += SCTP_PL_MIN_STEP;
-               }
+       } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) {
+               /* Raise probe_size again after 30 * interval in Search Complete */
+               t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+               t->pl.probe_size += SCTP_PL_MIN_STEP;
        }
+
+       return t->pl.state == SCTP_PL_COMPLETE;
 }
 
 static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
index cd0d7c9..edc8962 100644 (file)
@@ -1752,21 +1752,30 @@ out:
        return rc;
 }
 
-/* convert the RMB size into the compressed notation - minimum 16K.
+#define SMCD_DMBE_SIZES                6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCR_RMBE_SIZES                5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
+
+/* convert the RMB size into the compressed notation (minimum 16K, see
+ * SMCD/R_DMBE_SIZES.
  * In contrast to plain ilog2, this rounds towards the next power of 2,
  * so the socket application gets at least its desired sndbuf / rcvbuf size.
  */
-static u8 smc_compress_bufsize(int size)
+static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
 {
+       const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
        u8 compressed;
 
        if (size <= SMC_BUF_MIN_SIZE)
                return 0;
 
-       size = (size - 1) >> 14;
-       compressed = ilog2(size) + 1;
-       if (compressed >= SMC_RMBE_SIZES)
-               compressed = SMC_RMBE_SIZES - 1;
+       size = (size - 1) >> 14;  /* convert to 16K multiple */
+       compressed = min_t(u8, ilog2(size) + 1,
+                          is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+
+       if (!is_smcd && is_rmb)
+               /* RMBs are backed by & limited to max size of scatterlists */
+               compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+
        return compressed;
 }
 
@@ -1982,17 +1991,12 @@ out:
        return rc;
 }
 
-#define SMCD_DMBE_SIZES                6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
-
 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
                                                bool is_dmb, int bufsize)
 {
        struct smc_buf_desc *buf_desc;
        int rc;
 
-       if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
-               return ERR_PTR(-EAGAIN);
-
        /* try to alloc a new DMB */
        buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
        if (!buf_desc)
@@ -2041,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                /* use socket send buffer size (w/o overhead) as start value */
                sk_buf_size = smc->sk.sk_sndbuf / 2;
 
-       for (bufsize_short = smc_compress_bufsize(sk_buf_size);
+       for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
             bufsize_short >= 0; bufsize_short--) {
-
                if (is_rmb) {
                        lock = &lgr->rmbs_lock;
                        buf_list = &lgr->rmbs[bufsize_short];
@@ -2052,8 +2055,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                        buf_list = &lgr->sndbufs[bufsize_short];
                }
                bufsize = smc_uncompress_bufsize(bufsize_short);
-               if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
-                       continue;
 
                /* check for reusable slot in the link group */
                buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
index 7d7ba03..a884534 100644 (file)
@@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event)
                        if (!libdev->ops.get_netdev)
                                continue;
                        lndev = libdev->ops.get_netdev(libdev, i + 1);
-                       if (lndev)
-                               dev_put(lndev);
+                       dev_put(lndev);
                        if (lndev != ndev)
                                continue;
                        if (event == NETDEV_REGISTER)
index 6f6d33e..4a964e9 100644 (file)
@@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
        return 0;
 
 out_put:
-       if (ndev)
-               dev_put(ndev);
+       dev_put(ndev);
        return rc;
 }
 
index 0ae3478..0b2c18e 100644 (file)
@@ -809,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
        return err;
 }
 EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
+
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+                                 struct net_device *dev, const void *ctx,
+                                 struct notifier_block *atomic_nb,
+                                 struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
+                                 struct netlink_ext_ack *extack)
+{
+       struct switchdev_notifier_brport_info brport_info = {
+               .brport = {
+                       .dev = dev,
+                       .ctx = ctx,
+                       .atomic_nb = atomic_nb,
+                       .blocking_nb = blocking_nb,
+                       .tx_fwd_offload = tx_fwd_offload,
+               },
+       };
+       int err;
+
+       ASSERT_RTNL();
+
+       err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED,
+                                               brport_dev, &brport_info.info,
+                                               extack);
+       return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
+
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                                    const void *ctx,
+                                    struct notifier_block *atomic_nb,
+                                    struct notifier_block *blocking_nb)
+{
+       struct switchdev_notifier_brport_info brport_info = {
+               .brport = {
+                       .ctx = ctx,
+                       .atomic_nb = atomic_nb,
+                       .blocking_nb = blocking_nb,
+               },
+       };
+
+       ASSERT_RTNL();
+
+       call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED,
+                                         brport_dev, &brport_info.info,
+                                         NULL);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
index e5c43d4..c9391d3 100644 (file)
@@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
        if (unlikely(!aead))
                return -ENOKEY;
 
-       /* Cow skb data if needed */
-       if (likely(!skb_cloned(skb) &&
-                  (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) {
-               nsg = 1 + skb_shinfo(skb)->nr_frags;
-       } else {
-               nsg = skb_cow_data(skb, 0, &unused);
-               if (unlikely(nsg < 0)) {
-                       pr_err("RX: skb_cow_data() returned %d\n", nsg);
-                       return nsg;
-               }
+       nsg = skb_cow_data(skb, 0, &unused);
+       if (unlikely(nsg < 0)) {
+               pr_err("RX: skb_cow_data() returned %d\n", nsg);
+               return nsg;
        }
 
        /* Allocate memory for the AEAD operation */
index b0dd183..b15b2b1 100644 (file)
@@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk);
 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
 static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
 
 static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
@@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
                rc = 0;
        }
 
-       if (unlikely(syn && !rc))
+       if (unlikely(syn && !rc)) {
                tipc_set_sk_state(sk, TIPC_CONNECTING);
+               if (timeout) {
+                       timeout = msecs_to_jiffies(timeout);
+                       tipc_wait_for_connect(sock, &timeout);
+               }
+       }
 
        return rc ? rc : dlen;
 }
@@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
                return -EMSGSIZE;
 
        /* Handle implicit connection setup */
-       if (unlikely(dest)) {
+       if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
                rc = __tipc_sendmsg(sock, m, dlen);
                if (dlen && dlen == rc) {
                        tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -2666,7 +2672,7 @@ static int tipc_listen(struct socket *sock, int len)
 static int tipc_wait_for_accept(struct socket *sock, long timeo)
 {
        struct sock *sk = sock->sk;
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        int err;
 
        /* True wake-one mechanism for incoming connections: only
@@ -2675,12 +2681,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
         * anymore, the common case will execute the loop only once.
        */
        for (;;) {
-               prepare_to_wait_exclusive(sk_sleep(sk), &wait,
-                                         TASK_INTERRUPTIBLE);
                if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+                       add_wait_queue(sk_sleep(sk), &wait);
                        release_sock(sk);
-                       timeo = schedule_timeout(timeo);
+                       timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
                        lock_sock(sk);
+                       remove_wait_queue(sk_sleep(sk), &wait);
                }
                err = 0;
                if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -2692,7 +2698,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
                if (signal_pending(current))
                        break;
        }
-       finish_wait(sk_sleep(sk), &wait);
        return err;
 }
 
@@ -2709,9 +2714,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
                       bool kern)
 {
        struct sock *new_sk, *sk = sock->sk;
-       struct sk_buff *buf;
        struct tipc_sock *new_tsock;
+       struct msghdr m = {NULL,};
        struct tipc_msg *msg;
+       struct sk_buff *buf;
        long timeo;
        int res;
 
@@ -2757,19 +2763,17 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
        }
 
        /*
-        * Respond to 'SYN-' by discarding it & returning 'ACK'-.
-        * Respond to 'SYN+' by queuing it on new socket.
+        * Respond to 'SYN-' by discarding it & returning 'ACK'.
+        * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
         */
        if (!msg_data_sz(msg)) {
-               struct msghdr m = {NULL,};
-
                tsk_advance_rx_queue(sk);
-               __tipc_sendstream(new_sock, &m, 0);
        } else {
                __skb_dequeue(&sk->sk_receive_queue);
                __skb_queue_head(&new_sk->sk_receive_queue, buf);
                skb_set_owner_r(buf, new_sk);
        }
+       __tipc_sendstream(new_sock, &m, 0);
        release_sock(new_sk);
 exit:
        release_sock(sk);
index b6c4282..b7f8112 100644 (file)
@@ -25,6 +25,11 @@ config UNIX_SCM
        depends on UNIX
        default y
 
+config AF_UNIX_OOB
+       bool
+       depends on UNIX
+       default y
+
 config UNIX_DIAG
        tristate "UNIX: socket monitoring interface"
        depends on UNIX
index 8992767..ec02e70 100644 (file)
@@ -503,6 +503,12 @@ static void unix_sock_destructor(struct sock *sk)
 
        skb_queue_purge(&sk->sk_receive_queue);
 
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       if (u->oob_skb) {
+               kfree_skb(u->oob_skb);
+               u->oob_skb = NULL;
+       }
+#endif
        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
        WARN_ON(!sk_unhashed(sk));
        WARN_ON(sk->sk_socket);
@@ -1543,6 +1549,53 @@ out:
        return err;
 }
 
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+       scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+
+       /*
+        * Garbage collection of unix sockets starts by selecting a set of
+        * candidate sockets which have reference only from being in flight
+        * (total_refs == inflight_refs).  This condition is checked once during
+        * the candidate collection phase, and candidates are marked as such, so
+        * that non-candidates can later be ignored.  While inflight_refs is
+        * protected by unix_gc_lock, total_refs (file count) is not, hence this
+        * is an instantaneous decision.
+        *
+        * Once a candidate, however, the socket must not be reinstalled into a
+        * file descriptor while the garbage collection is in progress.
+        *
+        * If the above conditions are met, then the directed graph of
+        * candidates (*) does not change while unix_gc_lock is held.
+        *
+        * Any operations that changes the file count through file descriptors
+        * (dup, close, sendmsg) does not change the graph since candidates are
+        * not installed in fds.
+        *
+        * Dequeing a candidate via recvmsg would install it into an fd, but
+        * that takes unix_gc_lock to decrement the inflight count, so it's
+        * serialized with garbage collection.
+        *
+        * MSG_PEEK is special in that it does not change the inflight count,
+        * yet does install the socket into an fd.  The following lock/unlock
+        * pair is to ensure serialization with garbage collection.  It must be
+        * done between incrementing the file count and installing the file into
+        * an fd.
+        *
+        * If garbage collection starts after the barrier provided by the
+        * lock/unlock, then it will see the elevated refcount and not mark this
+        * as a candidate.  If a garbage collection is already in progress
+        * before the file count was incremented, then the lock/unlock pair will
+        * ensure that garbage collection is finished before progressing to
+        * installing the fd.
+        *
+        * (*) A -> B where B is on the queue of A or B is on the queue of C
+        * which is on the queue of listening socket A.
+        */
+       spin_lock(&unix_gc_lock);
+       spin_unlock(&unix_gc_lock);
+}
+
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
 {
        int err = 0;
@@ -1842,6 +1895,46 @@ out:
  */
 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
 
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+{
+       struct unix_sock *ousk = unix_sk(other);
+       struct sk_buff *skb;
+       int err = 0;
+
+       skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
+
+       if (!skb)
+               return err;
+
+       skb_put(skb, 1);
+       skb->len = 1;
+       err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
+
+       if (err) {
+               kfree_skb(skb);
+               return err;
+       }
+
+       unix_state_lock(other);
+       maybe_add_creds(skb, sock, other);
+       skb_get(skb);
+
+       if (ousk->oob_skb)
+               kfree_skb(ousk->oob_skb);
+
+       ousk->oob_skb = skb;
+
+       scm_stat_add(other, skb);
+       skb_queue_tail(&other->sk_receive_queue, skb);
+       sk_send_sigurg(other);
+       unix_state_unlock(other);
+       other->sk_data_ready(other);
+
+       return err;
+}
+#endif
+
 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                               size_t len)
 {
@@ -1860,8 +1953,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                return err;
 
        err = -EOPNOTSUPP;
-       if (msg->msg_flags&MSG_OOB)
-               goto out_err;
+       if (msg->msg_flags & MSG_OOB) {
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+               if (len)
+                       len--;
+               else
+#endif
+                       goto out_err;
+       }
 
        if (msg->msg_namelen) {
                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
@@ -1926,6 +2025,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                sent += size;
        }
 
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+       if (msg->msg_flags & MSG_OOB) {
+               err = queue_oob(sock, msg, other);
+               if (err)
+                       goto out_err;
+               sent++;
+       }
+#endif
+
        scm_destroy(&scm);
 
        return sent;
@@ -2192,7 +2300,7 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
                sk_peek_offset_fwd(sk, size);
 
                if (UNIXCB(skb).fp)
-                       scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+                       unix_peek_fds(&scm, skb);
        }
        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
 
@@ -2311,6 +2419,59 @@ struct unix_stream_read_state {
        unsigned int splice_flags;
 };
 
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+static int unix_stream_recv_urg(struct unix_stream_read_state *state)
+{
+       struct socket *sock = state->socket;
+       struct sock *sk = sock->sk;
+       struct unix_sock *u = unix_sk(sk);
+       int chunk = 1;
+
+       if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb)
+               return -EINVAL;
+
+       chunk = state->recv_actor(u->oob_skb, 0, chunk, state);
+       if (chunk < 0)
+               return -EFAULT;
+
+       if (!(state->flags & MSG_PEEK)) {
+               UNIXCB(u->oob_skb).consumed += 1;
+               kfree_skb(u->oob_skb);
+               u->oob_skb = NULL;
+       }
+       state->msg->msg_flags |= MSG_OOB;
+       return 1;
+}
+
+static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+                                 int flags, int copied)
+{
+       struct unix_sock *u = unix_sk(sk);
+
+       if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
+               skb_unlink(skb, &sk->sk_receive_queue);
+               consume_skb(skb);
+               skb = NULL;
+       } else {
+               if (skb == u->oob_skb) {
+                       if (copied) {
+                               skb = NULL;
+                       } else if (sock_flag(sk, SOCK_URGINLINE)) {
+                               if (!(flags & MSG_PEEK)) {
+                                       u->oob_skb = NULL;
+                                       consume_skb(skb);
+                               }
+                       } else if (!(flags & MSG_PEEK)) {
+                               skb_unlink(skb, &sk->sk_receive_queue);
+                               consume_skb(skb);
+                               skb = skb_peek(&sk->sk_receive_queue);
+                       }
+               }
+       }
+       return skb;
+}
+#endif
+
 static int unix_stream_read_generic(struct unix_stream_read_state *state,
                                    bool freezable)
 {
@@ -2336,6 +2497,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
 
        if (unlikely(flags & MSG_OOB)) {
                err = -EOPNOTSUPP;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+               mutex_lock(&u->iolock);
+               unix_state_lock(sk);
+
+               err = unix_stream_recv_urg(state);
+
+               unix_state_unlock(sk);
+               mutex_unlock(&u->iolock);
+#endif
                goto out;
        }
 
@@ -2364,6 +2534,18 @@ redo:
                }
                last = skb = skb_peek(&sk->sk_receive_queue);
                last_len = last ? last->len : 0;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+               if (skb) {
+                       skb = manage_oob(skb, sk, flags, copied);
+                       if (!skb) {
+                               unix_state_unlock(sk);
+                               if (copied)
+                                       break;
+                               goto redo;
+                       }
+               }
+#endif
 again:
                if (skb == NULL) {
                        if (copied >= target)
@@ -2482,7 +2664,7 @@ unlock:
                        /* It is questionable, see note in unix_dgram_recvmsg.
                         */
                        if (UNIXCB(skb).fp)
-                               scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+                               unix_peek_fds(&scm, skb);
 
                        sk_peek_offset_fwd(sk, chunk);
 
@@ -2699,6 +2881,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        case SIOCUNIXFILE:
                err = unix_open_file(sk);
                break;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       case SIOCATMARK:
+               {
+                       struct sk_buff *skb;
+                       struct unix_sock *u = unix_sk(sk);
+                       int answ = 0;
+
+                       skb = skb_peek(&sk->sk_receive_queue);
+                       if (skb && skb == u->oob_skb)
+                               answ = 1;
+                       err = put_user(answ, (int __user *)arg);
+               }
+               break;
+#endif
        default:
                err = -ENOIOCTLCMD;
                break;
index db0cda2..20f5357 100644 (file)
@@ -44,7 +44,7 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
 {
        struct unix_sock *u = unix_sk(sk);
        struct sk_psock *psock;
-       int copied, ret;
+       int copied;
 
        psock = sk_psock_get(sk);
        if (unlikely(!psock))
@@ -53,8 +53,9 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
        mutex_lock(&u->iolock);
        if (!skb_queue_empty(&sk->sk_receive_queue) &&
            sk_psock_queue_empty(psock)) {
-               ret = __unix_dgram_recvmsg(sk, msg, len, flags);
-               goto out;
+               mutex_unlock(&u->iolock);
+               sk_psock_put(sk, psock);
+               return __unix_dgram_recvmsg(sk, msg, len, flags);
        }
 
 msg_bytes_ready:
@@ -68,16 +69,15 @@ msg_bytes_ready:
                if (data) {
                        if (!sk_psock_queue_empty(psock))
                                goto msg_bytes_ready;
-                       ret = __unix_dgram_recvmsg(sk, msg, len, flags);
-                       goto out;
+                       mutex_unlock(&u->iolock);
+                       sk_psock_put(sk, psock);
+                       return __unix_dgram_recvmsg(sk, msg, len, flags);
                }
                copied = -EAGAIN;
        }
-       ret = copied;
-out:
        mutex_unlock(&u->iolock);
        sk_psock_put(sk, psock);
-       return ret;
+       return copied;
 }
 
 static struct proto *unix_prot_saved __read_mostly;
@@ -105,6 +105,9 @@ static void unix_bpf_check_needs_rebuild(struct proto *ops)
 
 int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
 {
+       if (sk->sk_type != SOCK_DGRAM)
+               return -EOPNOTSUPP;
+
        if (restore) {
                sk->sk_write_space = psock->saved_write_space;
                WRITE_ONCE(sk->sk_prot, psock->sk_proto);
index 169ba8b..081e7ae 100644 (file)
@@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk,
                virtio_transport_recv_enqueue(vsk, pkt);
                sk->sk_data_ready(sk);
                return err;
+       case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+               virtio_transport_send_credit_update(vsk);
+               break;
        case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
                sk->sk_write_space(sk);
                break;
index 50eb405..dceed5b 100644 (file)
@@ -2351,7 +2351,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
                        goto nla_put_failure;
 
                for (band = state->band_start;
-                    band < NUM_NL80211_BANDS; band++) {
+                    band < (state->split ?
+                               NUM_NL80211_BANDS :
+                               NL80211_BAND_60GHZ + 1);
+                    band++) {
                        struct ieee80211_supported_band *sband;
 
                        /* omit higher bands for ancient software */
@@ -6524,8 +6527,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
        err = rdev_change_station(rdev, dev, mac_addr, &params);
 
  out_put_vlan:
-       if (params.vlan)
-               dev_put(params.vlan);
+       dev_put(params.vlan);
 
        return err;
 }
@@ -6760,8 +6762,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 
        err = rdev_add_station(rdev, dev, mac_addr, &params);
 
-       if (params.vlan)
-               dev_put(params.vlan);
+       dev_put(params.vlan);
        return err;
 }
 
@@ -8486,8 +8487,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
                goto out_free;
 
        nl80211_send_scan_start(rdev, wdev);
-       if (wdev->netdev)
-               dev_hold(wdev->netdev);
+       dev_hold(wdev->netdev);
 
        return 0;
 
@@ -14857,9 +14857,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
                        return -ENETDOWN;
                }
 
-               if (dev)
-                       dev_hold(dev);
-
+               dev_hold(dev);
                info->user_ptr[0] = rdev;
        }
 
@@ -14881,8 +14879,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
                if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
                        struct wireless_dev *wdev = info->user_ptr[1];
 
-                       if (wdev->netdev)
-                               dev_put(wdev->netdev);
+                       dev_put(wdev->netdev);
                } else {
                        dev_put(info->user_ptr[1]);
                }
index f03c7ac..11c68b1 100644 (file)
@@ -975,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
        }
 #endif
 
-       if (wdev->netdev)
-               dev_put(wdev->netdev);
+       dev_put(wdev->netdev);
 
        kfree(rdev->int_scan_req);
        rdev->int_scan_req = NULL;
@@ -1754,16 +1753,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
                         * be grouped with this beacon for updates ...
                         */
                        if (!cfg80211_combine_bsses(rdev, new)) {
-                               kfree(new);
+                               bss_ref_put(rdev, new);
                                goto drop;
                        }
                }
 
                if (rdev->bss_entries >= bss_entries_limit &&
                    !cfg80211_bss_expire_oldest(rdev)) {
-                       if (!list_empty(&new->hidden_list))
-                               list_del(&new->hidden_list);
-                       kfree(new);
+                       bss_ref_put(rdev, new);
                        goto drop;
                }
 
index a20aec9..2bf2693 100644 (file)
@@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
        len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]);
 
        nla_for_each_attr(nla, attrs, len, remaining) {
-               int err = xfrm_xlate64_attr(dst, nla);
+               int err;
 
+               switch (type) {
+               case XFRM_MSG_NEWSPDINFO:
+                       err = xfrm_nla_cpy(dst, nla, nla_len(nla));
+                       break;
+               default:
+                       err = xfrm_xlate64_attr(dst, nla);
+                       break;
+               }
                if (err)
                        return err;
        }
@@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src
 
 /* Calculates len of translated 64-bit message. */
 static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
-                                           struct nlattr *attrs[XFRMA_MAX+1])
+                                           struct nlattr *attrs[XFRMA_MAX + 1],
+                                           int maxtype)
 {
        size_t len = nlmsg_len(src);
 
@@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
        case XFRM_MSG_POLEXPIRE:
                len += 8;
                break;
+       case XFRM_MSG_NEWSPDINFO:
+               /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+               return len;
        default:
                break;
        }
 
+       /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please
+        * correct both 64=>32-bit and 32=>64-bit translators to copy
+        * new attributes.
+        */
+       if (WARN_ON_ONCE(maxtype))
+               return len;
+
        if (attrs[XFRMA_SA])
                len += 4;
        if (attrs[XFRMA_POLICY])
@@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
 
 static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
                        struct nlattr *attrs[XFRMA_MAX+1],
-                       size_t size, u8 type, struct netlink_ext_ack *extack)
+                       size_t size, u8 type, int maxtype,
+                       struct netlink_ext_ack *extack)
 {
        size_t pos;
        int i;
@@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
        }
        pos = dst->nlmsg_len;
 
+       if (maxtype) {
+               /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+               WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO);
+
+               for (i = 1; i <= maxtype; i++) {
+                       int err;
+
+                       if (!attrs[i])
+                               continue;
+
+                       /* just copy - no need for translation */
+                       err = xfrm_attr_cpy32(dst, &pos, attrs[i], size,
+                                       nla_len(attrs[i]), nla_len(attrs[i]));
+                       if (err)
+                               return err;
+               }
+               return 0;
+       }
+
        for (i = 1; i < XFRMA_MAX + 1; i++) {
                int err;
 
@@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
        if (err < 0)
                return ERR_PTR(err);
 
-       len = xfrm_user_rcv_calculate_len64(h32, attrs);
+       len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype);
        /* The message doesn't need translation */
        if (len == nlmsg_len(h32))
                return NULL;
@@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
        if (!h64)
                return ERR_PTR(-ENOMEM);
 
-       err = xfrm_xlate32(h64, h32, attrs, len, type, extack);
+       err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack);
        if (err < 0) {
                kvfree(h64);
                return ERR_PTR(err);
index 2e8afe0..cb40ff0 100644 (file)
@@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms)
                        break;
        }
 
-       WARN_ON(!pos);
+       WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list));
 
        if (--pos->users)
                return;
index 827d842..7f881f5 100644 (file)
@@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
                                                __read_mostly;
 
 static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
 
 static struct rhashtable xfrm_policy_inexact_table;
 static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
                return;
 
        spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-       write_seqcount_begin(&xfrm_policy_hash_generation);
+       write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 
        odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
                                lockdep_is_held(&net->xfrm.xfrm_policy_lock));
@@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
        rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
        net->xfrm.policy_bydst[dir].hmask = nhashmask;
 
-       write_seqcount_end(&xfrm_policy_hash_generation);
+       write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
        spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        synchronize_rcu();
@@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
        } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
 
        spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-       write_seqcount_begin(&xfrm_policy_hash_generation);
+       write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 
        /* make sure that we can insert the indirect policies again before
         * we start with destructive action.
@@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 
 out_unlock:
        __xfrm_policy_inexact_flush(net);
-       write_seqcount_end(&xfrm_policy_hash_generation);
+       write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
        spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        mutex_unlock(&hash_resize_mutex);
@@ -2091,15 +2090,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
        if (unlikely(!daddr || !saddr))
                return NULL;
 
- retry:
-       sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
        rcu_read_lock();
-
-       chain = policy_hash_direct(net, daddr, saddr, family, dir);
-       if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
-               rcu_read_unlock();
-               goto retry;
-       }
+ retry:
+       do {
+               sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
+               chain = policy_hash_direct(net, daddr, saddr, family, dir);
+       } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
 
        ret = NULL;
        hlist_for_each_entry_rcu(pol, chain, bydst) {
@@ -2130,15 +2126,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
        }
 
 skip_inexact:
-       if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
-               rcu_read_unlock();
+       if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
                goto retry;
-       }
 
-       if (ret && !xfrm_pol_hold_rcu(ret)) {
-               rcu_read_unlock();
+       if (ret && !xfrm_pol_hold_rcu(ret))
                goto retry;
-       }
 fail:
        rcu_read_unlock();
 
@@ -4089,6 +4081,7 @@ static int __net_init xfrm_net_init(struct net *net)
        /* Initialize the per-net locks here */
        spin_lock_init(&net->xfrm.xfrm_state_lock);
        spin_lock_init(&net->xfrm.xfrm_policy_lock);
+       seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
        mutex_init(&net->xfrm.xfrm_cfg_mutex);
 
        rv = xfrm_statistics_init(net);
@@ -4133,7 +4126,6 @@ void __init xfrm_init(void)
 {
        register_pernet_subsys(&xfrm_net_ops);
        xfrm_dev_init();
-       seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
        xfrm_input_init();
 
 #ifdef CONFIG_XFRM_ESPINTCP
index b47d613..7aff641 100644 (file)
@@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        err = link->doit(skb, nlh, attrs);
 
+       /* We need to free skb allocated in xfrm_alloc_compat() before
+        * returning from this function, because consume_skb() won't take
+        * care of frag_list since netlink destructor sets
+        * sbk->head to NULL. (see netlink_skb_destructor())
+        */
+       if (skb_has_frag_list(skb)) {
+               kfree_skb(skb_shinfo(skb)->frag_list);
+               skb_shinfo(skb)->frag_list = NULL;
+       }
+
 err:
        kvfree(nlh64);
        return err;
index 0b9548e..fcba217 100644 (file)
@@ -45,11 +45,13 @@ xdp_monitor
 xdp_redirect
 xdp_redirect_cpu
 xdp_redirect_map
+xdp_redirect_map_multi
 xdp_router_ipv4
 xdp_rxq_info
 xdp_sample_pkts
 xdp_tx_iptunnel
 xdpsock
+xdpsock_ctrl_proc
 xsk_fwd
 testfile.img
 hbm_out.log
index e68b9ee..35db26f 100755 (executable)
@@ -1,5 +1,6 @@
 #!/bin/bash
 
+rm -r tmpmnt
 rm -f testfile.img
 dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
 DEVICE=$(losetup --show -f testfile.img)
index fdcd658..8be7ce1 100644 (file)
@@ -14,6 +14,11 @@ int main(int argc, char **argv)
        int ret = 0;
        FILE *f;
 
+       if (!argv[1]) {
+               fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
+               return 0;
+       }
+
        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
        obj = bpf_object__open_file(filename, NULL);
        if (libbpf_get_error(obj)) {
index 34b6439..f0c5d95 100644 (file)
@@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx)
 
        h_proto = eth->h_proto;
 
+       /* Handle VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
@@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx)
                        return rc;
                h_proto = vhdr->h_vlan_encapsulated_proto;
        }
+       /* Handle double VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
index c787f4b..d8a64ab 100644 (file)
@@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx)
 
        h_proto = eth->h_proto;
 
+       /* Handle VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
@@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx)
                        return rc;
                h_proto = vhdr->h_vlan_encapsulated_proto;
        }
+       /* Handle double VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
index d3ecdc1..9e225c9 100644 (file)
@@ -841,7 +841,7 @@ int main(int argc, char **argv)
        memset(cpu, 0, n_cpus * sizeof(int));
 
        /* Parse commands line args */
-       while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
+       while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
                                  long_options, &longindex)) != -1) {
                switch (opt) {
                case 'd':
index 33d0bde..49d7a6a 100644 (file)
@@ -1,12 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2017 - 2018 Intel Corporation. */
 
-#include <asm/barrier.h>
 #include <errno.h>
 #include <getopt.h>
 #include <libgen.h>
 #include <linux/bpf.h>
-#include <linux/compiler.h>
 #include <linux/if_link.h>
 #include <linux/if_xdp.h>
 #include <linux/if_ether.h>
@@ -653,17 +651,15 @@ out:
        return result;
 }
 
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
 /*
  *     This is a version of ip_compute_csum() optimized for IP headers,
  *     which always checksum on 4 octet boundaries.
  *     This function code has been taken from
  *     Linux kernel lib/checksum.c
  */
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
-       return (__force __sum16)~do_csum(iph, ihl * 4);
+       return (__sum16)~do_csum(iph, ihl * 4);
 }
 
 /*
@@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
  */
 static inline __sum16 csum_fold(__wsum csum)
 {
-       u32 sum = (__force u32)csum;
+       u32 sum = (u32)csum;
 
        sum = (sum & 0xffff) + (sum >> 16);
        sum = (sum & 0xffff) + (sum >> 16);
-       return (__force __sum16)~sum;
+       return (__sum16)~sum;
 }
 
 /*
@@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
 __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
                          __u32 len, __u8 proto, __wsum sum)
 {
-       unsigned long long s = (__force u32)sum;
+       unsigned long long s = (u32)sum;
 
-       s += (__force u32)saddr;
-       s += (__force u32)daddr;
+       s += (u32)saddr;
+       s += (u32)daddr;
 #ifdef __BIG_ENDIAN__
        s += proto + len;
 #else
        s += (proto + len) << 8;
 #endif
-       return (__force __wsum)from64to32(s);
+       return (__wsum)from64to32(s);
 }
 
 /*
index c17e480..8f6b13a 100755 (executable)
@@ -173,39 +173,6 @@ my $mcount_regex;  # Find the call site to mcount (return offset)
 my $mcount_adjust;     # Address adjustment to mcount offset
 my $alignment;         # The .align value to use for $mcount_section
 my $section_type;      # Section header plus possible alignment command
-my $can_use_local = 0;         # If we can use local function references
-
-# Shut up recordmcount if user has older objcopy
-my $quiet_recordmcount = ".tmp_quiet_recordmcount";
-my $print_warning = 1;
-$print_warning = 0 if ( -f $quiet_recordmcount);
-
-##
-# check_objcopy - whether objcopy supports --globalize-symbols
-#
-#  --globalize-symbols came out in 2.17, we must test the version
-#  of objcopy, and if it is less than 2.17, then we can not
-#  record local functions.
-sub check_objcopy
-{
-    open (IN, "$objcopy --version |") or die "error running $objcopy";
-    while (<IN>) {
-       if (/objcopy.*\s(\d+)\.(\d+)/) {
-           $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17));
-           last;
-       }
-    }
-    close (IN);
-
-    if (!$can_use_local && $print_warning) {
-       print STDERR "WARNING: could not find objcopy version or version " .
-           "is less than 2.17.\n" .
-           "\tLocal function references are disabled.\n";
-       open (QUIET, ">$quiet_recordmcount");
-       printf QUIET "Disables the warning from recordmcount.pl\n";
-       close QUIET;
-    }
-}
 
 if ($arch =~ /(x86(_64)?)|(i386)/) {
     if ($bits == 64) {
@@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) {
 my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
 my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
 
-check_objcopy();
-
 #
 # Step 1: find all the local (static functions) and weak symbols.
 #         't' is local, 'w/W' is weak
@@ -473,11 +438,6 @@ sub update_funcs
 
     # is this function static? If so, note this fact.
     if (defined $locals{$ref_func}) {
-
-       # only use locals if objcopy supports globalize-symbols
-       if (!$can_use_local) {
-           return;
-       }
        $convert{$ref_func} = 1;
     }
 
index 74f8aad..7011fbe 100755 (executable)
@@ -17,7 +17,7 @@ Usage:
        $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func
        Wait some times but not too much, the script is a bit slow.
        Break the pipe (Ctrl + Z)
-       $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
+       $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace
        Then you have your drawn trace in draw_functrace
 """
 
@@ -103,10 +103,10 @@ def parseLine(line):
        line = line.strip()
        if line.startswith("#"):
                raise CommentLineException
-       m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
+       m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line)
        if m is None:
                raise BrokenLineException
-       return (m.group(1), m.group(2), m.group(3))
+       return (m.group(2), m.group(3), m.group(4))
 
 
 def main():
index defc5ef..0ae1b71 100644 (file)
@@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
        rc = sidtab_init(s);
        if (rc) {
                pr_err("SELinux:  out of memory on SID table init\n");
-               goto out;
+               return rc;
        }
 
        head = p->ocontexts[OCON_ISID];
@@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
                if (sid == SECSID_NULL) {
                        pr_err("SELinux:  SID 0 was assigned a context.\n");
                        sidtab_destroy(s);
-                       goto out;
+                       return -EINVAL;
                }
 
                /* Ignore initial SIDs unused by this kernel. */
@@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
                        pr_err("SELinux:  unable to load initial SID %s.\n",
                               name);
                        sidtab_destroy(s);
-                       goto out;
+                       return rc;
                }
        }
-       rc = 0;
-out:
-       return rc;
+       return 0;
 }
 
 int policydb_class_isvalid(struct policydb *p, unsigned int class)
index 14e3282..6a2971a 100644 (file)
@@ -246,12 +246,18 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
        if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
                return false;
 
-       if (substream->ops->mmap ||
-           (substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV &&
-            substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV_UC))
+       if (substream->ops->mmap)
                return true;
 
-       return dma_can_mmap(substream->dma_buffer.dev.dev);
+       switch (substream->dma_buffer.dev.type) {
+       case SNDRV_DMA_TYPE_UNKNOWN:
+               return false;
+       case SNDRV_DMA_TYPE_CONTINUOUS:
+       case SNDRV_DMA_TYPE_VMALLOC:
+               return true;
+       default:
+               return dma_can_mmap(substream->dma_buffer.dev.dev);
+       }
 }
 
 static int constrain_mask_params(struct snd_pcm_substream *substream,
@@ -3063,9 +3069,14 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
                boundary = 0x7fffffff;
        snd_pcm_stream_lock_irq(substream);
        /* FIXME: we should consider the boundary for the sync from app */
-       if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
-               control->appl_ptr = scontrol.appl_ptr;
-       else
+       if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) {
+               err = pcm_lib_apply_appl_ptr(substream,
+                               scontrol.appl_ptr);
+               if (err < 0) {
+                       snd_pcm_stream_unlock_irq(substream);
+                       return err;
+               }
+       } else
                scontrol.appl_ptr = control->appl_ptr % boundary;
        if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
                control->avail_min = scontrol.avail_min;
@@ -3664,6 +3675,8 @@ static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
        if (substream->ops->page)
                page = substream->ops->page(substream, offset);
+       else if (!snd_pcm_get_dma_buf(substream))
+               page = virt_to_page(runtime->dma_area + offset);
        else
                page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset);
        if (!page)
index d8be146..c9d0ba3 100644 (file)
@@ -319,6 +319,10 @@ static const struct config_entry config_table[] = {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC,
                .device = 0x4b55,
        },
+       {
+               .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC,
+               .device = 0x4b58,
+       },
 #endif
 
 /* Alder Lake */
index 5bbe669..7ad8c5f 100644 (file)
@@ -816,6 +816,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel
        mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7);
+       spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
 
        spin_lock(&p->chip->reg_lock);
        set_mode_register(p->chip, 0xc0);       /* c0 = STOP */
@@ -855,6 +856,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel
        spin_unlock(&p->chip->reg_lock);
 
        /* restore PCM volume */
+       spin_lock_irqsave(&p->chip->mixer_lock, flags);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR);
        spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
@@ -880,6 +882,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p)
        mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7);
+       spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
 
        spin_lock(&p->chip->reg_lock);
        if (p->running & SNDRV_SB_CSP_ST_QSOUND) {
@@ -894,6 +897,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p)
        spin_unlock(&p->chip->reg_lock);
 
        /* restore PCM volume */
+       spin_lock_irqsave(&p->chip->mixer_lock, flags);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL);
        snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR);
        spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
index 4b2cc8c..e143e69 100644 (file)
@@ -1940,6 +1940,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid)
 static const struct snd_pci_quirk force_connect_list[] = {
        SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1),
        SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1),
+       SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1),
+       SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1),
        {}
 };
 
index 1389cfd..caaf0e8 100644 (file)
@@ -8626,6 +8626,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
        SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
        SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
        SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP),
index 84e3906..9449fb4 100644 (file)
@@ -576,6 +576,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                                | SND_SOC_DAIFMT_CBM_CFM,
                .init = cz_rt5682_init,
                .dpcm_playback = 1,
+               .stop_dma_first = 1,
                .ops = &cz_rt5682_play_ops,
                SND_SOC_DAILINK_REG(designware1, rt5682, platform),
        },
@@ -585,6 +586,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                | SND_SOC_DAIFMT_CBM_CFM,
                .dpcm_capture = 1,
+               .stop_dma_first = 1,
                .ops = &cz_rt5682_cap_ops,
                SND_SOC_DAILINK_REG(designware2, rt5682, platform),
        },
@@ -594,6 +596,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                | SND_SOC_DAIFMT_CBM_CFM,
                .dpcm_playback = 1,
+               .stop_dma_first = 1,
                .ops = &cz_rt5682_max_play_ops,
                SND_SOC_DAILINK_REG(designware3, mx, platform),
        },
@@ -604,6 +607,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                | SND_SOC_DAIFMT_CBM_CFM,
                .dpcm_capture = 1,
+               .stop_dma_first = 1,
                .ops = &cz_rt5682_dmic0_cap_ops,
                SND_SOC_DAILINK_REG(designware3, adau, platform),
        },
@@ -614,6 +618,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                | SND_SOC_DAIFMT_CBM_CFM,
                .dpcm_capture = 1,
+               .stop_dma_first = 1,
                .ops = &cz_rt5682_dmic1_cap_ops,
                SND_SOC_DAILINK_REG(designware2, adau, platform),
        },
index 7ebae3f..a3b784e 100644 (file)
@@ -1325,7 +1325,7 @@ config SND_SOC_SSM2305
          high-efficiency mono Class-D audio power amplifiers.
 
 config SND_SOC_SSM2518
-       tristate
+       tristate "Analog Devices SSM2518 Class-D Amplifier"
        depends on I2C
 
 config SND_SOC_SSM2602
@@ -1557,6 +1557,7 @@ config SND_SOC_WCD934X
          Qualcomm SoCs like SDM845.
 
 config SND_SOC_WCD938X
+       depends on SND_SOC_WCD938X_SDW
        tristate
 
 config SND_SOC_WCD938X_SDW
@@ -1813,11 +1814,6 @@ config SND_SOC_ZL38060
          which consists of a Digital Signal Processor (DSP), several Digital
          Audio Interfaces (DAIs), analog outputs, and a block of 14 GPIOs.
 
-config SND_SOC_ZX_AUD96P22
-       tristate "ZTE ZX AUD96P22 CODEC"
-       depends on I2C
-       select REGMAP_I2C
-
 # Amp
 config SND_SOC_LM4857
        tristate
index 3000bc1..38356ea 100644 (file)
@@ -1695,6 +1695,8 @@ static const struct regmap_config rt5631_regmap_config = {
        .reg_defaults = rt5631_reg,
        .num_reg_defaults = ARRAY_SIZE(rt5631_reg),
        .cache_type = REGCACHE_RBTREE,
+       .use_single_read = true,
+       .use_single_write = true,
 };
 
 static int rt5631_i2c_probe(struct i2c_client *i2c,
index e4c9157..abcd6f4 100644 (file)
@@ -973,10 +973,14 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
                rt5682_enable_push_button_irq(component, false);
                snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
                        RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
-               if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS"))
+               if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL1") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL2B"))
                        snd_soc_component_update_bits(component,
                                RT5682_PWR_ANLG_1, RT5682_PWR_MB, 0);
-               if (!snd_soc_dapm_get_pin_status(dapm, "Vref2"))
+               if (!snd_soc_dapm_get_pin_status(dapm, "Vref2") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL1") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL2B"))
                        snd_soc_component_update_bits(component,
                                RT5682_PWR_ANLG_1, RT5682_PWR_VREF2, 0);
                snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
index 51870d5..b504d63 100644 (file)
@@ -1604,6 +1604,8 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c,
                        ret);
                return ret;
        }
+       regcache_cache_only(aic31xx->regmap, true);
+
        aic31xx->dev = &i2c->dev;
        aic31xx->irq = i2c->irq;
 
index 8195298..2513922 100644 (file)
@@ -151,8 +151,8 @@ struct aic31xx_pdata {
 #define AIC31XX_WORD_LEN_24BITS                0x02
 #define AIC31XX_WORD_LEN_32BITS                0x03
 #define AIC31XX_IFACE1_MASTER_MASK     GENMASK(3, 2)
-#define AIC31XX_BCLK_MASTER            BIT(2)
-#define AIC31XX_WCLK_MASTER            BIT(3)
+#define AIC31XX_BCLK_MASTER            BIT(3)
+#define AIC31XX_WCLK_MASTER            BIT(2)
 
 /* AIC31XX_DATA_OFFSET */
 #define AIC31XX_DATA_OFFSET_MASK       GENMASK(7, 0)
index c63b717..dcd8aeb 100644 (file)
@@ -250,8 +250,8 @@ static DECLARE_TLV_DB_SCALE(tlv_pcm, -6350, 50, 0);
 static DECLARE_TLV_DB_SCALE(tlv_driver_gain, -600, 100, 0);
 /* -12dB min, 0.5dB steps */
 static DECLARE_TLV_DB_SCALE(tlv_adc_vol, -1200, 50, 0);
-
-static DECLARE_TLV_DB_LINEAR(tlv_spk_vol, TLV_DB_GAIN_MUTE, 0);
+/* -6dB min, 1dB steps */
+static DECLARE_TLV_DB_SCALE(tlv_tas_driver_gain, -5850, 50, 0);
 static DECLARE_TLV_DB_SCALE(tlv_amp_vol, 0, 600, 1);
 
 static const char * const lo_cm_text[] = {
@@ -1063,21 +1063,20 @@ static const struct snd_soc_component_driver soc_component_dev_aic32x4 = {
 };
 
 static const struct snd_kcontrol_new aic32x4_tas2505_snd_controls[] = {
-       SOC_DOUBLE_R_S_TLV("PCM Playback Volume", AIC32X4_LDACVOL,
-                       AIC32X4_LDACVOL, 0, -0x7f, 0x30, 7, 0, tlv_pcm),
+       SOC_SINGLE_S8_TLV("PCM Playback Volume",
+                         AIC32X4_LDACVOL, -0x7f, 0x30, tlv_pcm),
        SOC_ENUM("DAC Playback PowerTune Switch", l_ptm_enum),
-       SOC_DOUBLE_R_S_TLV("HP Driver Playback Volume", AIC32X4_HPLGAIN,
-                       AIC32X4_HPLGAIN, 0, -0x6, 0x1d, 5, 0,
-                       tlv_driver_gain),
-       SOC_DOUBLE_R("HP DAC Playback Switch", AIC32X4_HPLGAIN,
-                       AIC32X4_HPLGAIN, 6, 0x01, 1),
 
-       SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0),
+       SOC_SINGLE_TLV("HP Driver Gain Volume",
+                       AIC32X4_HPLGAIN, 0, 0x74, 1, tlv_tas_driver_gain),
+       SOC_SINGLE("HP DAC Playback Switch", AIC32X4_HPLGAIN, 6, 1, 1),
 
-       SOC_SINGLE_RANGE_TLV("Speaker Driver Playback Volume", TAS2505_SPKVOL1,
-                       0, 0, 117, 1, tlv_spk_vol),
-       SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", TAS2505_SPKVOL2,
-                       4, 5, 0, tlv_amp_vol),
+       SOC_SINGLE_TLV("Speaker Driver Playback Volume",
+                       TAS2505_SPKVOL1, 0, 0x74, 1, tlv_tas_driver_gain),
+       SOC_SINGLE_TLV("Speaker Amplifier Playback Volume",
+                       TAS2505_SPKVOL2, 4, 5, 0, tlv_amp_vol),
+
+       SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0),
 };
 
 static const struct snd_kcontrol_new hp_output_mixer_controls[] = {
index 78b76ec..2fcc973 100644 (file)
@@ -3317,13 +3317,6 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
                             (WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0);
        }
 
-       ret = wcd938x_irq_init(wcd938x, component->dev);
-       if (ret) {
-               dev_err(component->dev, "%s: IRQ init failed: %d\n",
-                       __func__, ret);
-               return ret;
-       }
-
        wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
                                                       WCD938X_IRQ_HPHR_PDM_WD_INT);
        wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
@@ -3553,7 +3546,6 @@ static int wcd938x_bind(struct device *dev)
        }
        wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev);
        wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x;
-       wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq;
 
        wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode);
        if (!wcd938x->txdev) {
@@ -3562,7 +3554,6 @@ static int wcd938x_bind(struct device *dev)
        }
        wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev);
        wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x;
-       wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq;
        wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev);
        if (!wcd938x->tx_sdw_dev) {
                dev_err(dev, "could not get txslave with matching of dev\n");
@@ -3595,6 +3586,15 @@ static int wcd938x_bind(struct device *dev)
                return PTR_ERR(wcd938x->regmap);
        }
 
+       ret = wcd938x_irq_init(wcd938x, dev);
+       if (ret) {
+               dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret);
+               return ret;
+       }
+
+       wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq;
+       wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq;
+
        ret = wcd938x_set_micbias_data(wcd938x);
        if (ret < 0) {
                dev_err(dev, "%s: bad micbias pdata\n", __func__);
index 37aa020..549d982 100644 (file)
 /*
  * HALO_CCM_CORE_CONTROL
  */
+#define HALO_CORE_RESET                     0x00000200
 #define HALO_CORE_EN                        0x00000001
 
 /*
@@ -1213,7 +1214,7 @@ static int wm_coeff_tlv_get(struct snd_kcontrol *kctl,
 
        mutex_lock(&ctl->dsp->pwr_lock);
 
-       ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, size);
+       ret = wm_coeff_read_ctrl(ctl, ctl->cache, size);
 
        if (!ret && copy_to_user(bytes, ctl->cache, size))
                ret = -EFAULT;
@@ -3333,7 +3334,8 @@ static int wm_halo_start_core(struct wm_adsp *dsp)
 {
        return regmap_update_bits(dsp->regmap,
                                  dsp->base + HALO_CCM_CORE_CONTROL,
-                                 HALO_CORE_EN, HALO_CORE_EN);
+                                 HALO_CORE_RESET | HALO_CORE_EN,
+                                 HALO_CORE_RESET | HALO_CORE_EN);
 }
 
 static void wm_halo_stop_core(struct wm_adsp *dsp)
index 0e7ed90..25daef9 100644 (file)
@@ -55,43 +55,68 @@ static int spk_init(struct snd_soc_pcm_runtime *rtd)
        return ret;
 }
 
-static int max98373_sdw_trigger(struct snd_pcm_substream *substream, int cmd)
+static int mx8373_enable_spk_pin(struct snd_pcm_substream *substream, bool enable)
 {
+       struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+       struct snd_soc_dai *codec_dai;
+       struct snd_soc_dai *cpu_dai;
        int ret;
+       int j;
 
-       switch (cmd) {
-       case SNDRV_PCM_TRIGGER_START:
-       case SNDRV_PCM_TRIGGER_RESUME:
-       case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-               /* enable max98373 first */
-               ret = max_98373_trigger(substream, cmd);
-               if (ret < 0)
-                       break;
-
-               ret = sdw_trigger(substream, cmd);
-               break;
-       case SNDRV_PCM_TRIGGER_STOP:
-       case SNDRV_PCM_TRIGGER_SUSPEND:
-       case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-               ret = sdw_trigger(substream, cmd);
-               if (ret < 0)
-                       break;
-
-               ret = max_98373_trigger(substream, cmd);
-               break;
-       default:
-               ret = -EINVAL;
-               break;
+       /* set spk pin by playback only */
+       if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+               return 0;
+
+       cpu_dai = asoc_rtd_to_cpu(rtd, 0);
+       for_each_rtd_codec_dais(rtd, j, codec_dai) {
+               struct snd_soc_dapm_context *dapm =
+                               snd_soc_component_get_dapm(cpu_dai->component);
+               char pin_name[16];
+
+               snprintf(pin_name, ARRAY_SIZE(pin_name), "%s Spk",
+                        codec_dai->component->name_prefix);
+
+               if (enable)
+                       ret = snd_soc_dapm_enable_pin(dapm, pin_name);
+               else
+                       ret = snd_soc_dapm_disable_pin(dapm, pin_name);
+
+               if (!ret)
+                       snd_soc_dapm_sync(dapm);
        }
 
-       return ret;
+       return 0;
+}
+
+static int mx8373_sdw_prepare(struct snd_pcm_substream *substream)
+{
+       int ret = 0;
+
+       /* according to soc_pcm_prepare dai link prepare is called first */
+       ret = sdw_prepare(substream);
+       if (ret < 0)
+               return ret;
+
+       return mx8373_enable_spk_pin(substream, true);
+}
+
+static int mx8373_sdw_hw_free(struct snd_pcm_substream *substream)
+{
+       int ret = 0;
+
+       /* according to soc_pcm_hw_free dai link free is called first */
+       ret = sdw_hw_free(substream);
+       if (ret < 0)
+               return ret;
+
+       return mx8373_enable_spk_pin(substream, false);
 }
 
 static const struct snd_soc_ops max_98373_sdw_ops = {
        .startup = sdw_startup,
-       .prepare = sdw_prepare,
-       .trigger = max98373_sdw_trigger,
-       .hw_free = sdw_hw_free,
+       .prepare = mx8373_sdw_prepare,
+       .trigger = sdw_trigger,
+       .hw_free = mx8373_sdw_hw_free,
        .shutdown = sdw_shutdown,
 };
 
index 46513bb..d1c570c 100644 (file)
@@ -1015,6 +1015,7 @@ out:
 
 static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
+       struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
        int ret = -EINVAL, _ret = 0;
        int rollback = 0;
 
@@ -1055,14 +1056,23 @@ start_err:
        case SNDRV_PCM_TRIGGER_STOP:
        case SNDRV_PCM_TRIGGER_SUSPEND:
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-               ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
-               if (ret < 0)
-                       break;
+               if (rtd->dai_link->stop_dma_first) {
+                       ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
 
-               ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
-               if (ret < 0)
-                       break;
+                       ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
+               } else {
+                       ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
 
+                       ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
+               }
                ret = snd_soc_link_trigger(substream, cmd, rollback);
                break;
        }
index a002621..d04ce84 100644 (file)
@@ -89,6 +89,7 @@ static const struct sof_dev_desc adls_desc = {
 static const struct sof_dev_desc adl_desc = {
        .machines               = snd_soc_acpi_intel_adl_machines,
        .alt_machines           = snd_soc_acpi_intel_adl_sdw_machines,
+       .use_acpi_target_states = true,
        .resindex_lpe_base      = 0,
        .resindex_pcicfg_base   = -1,
        .resindex_imr_base      = -1,
index 573374b..d3276b4 100644 (file)
@@ -213,19 +213,19 @@ snd_pcm_uframes_t tegra_pcm_pointer(struct snd_soc_component *component,
 }
 EXPORT_SYMBOL_GPL(tegra_pcm_pointer);
 
-static int tegra_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream,
+static int tegra_pcm_preallocate_dma_buffer(struct device *dev, struct snd_pcm *pcm, int stream,
                                            size_t size)
 {
        struct snd_pcm_substream *substream = pcm->streams[stream].substream;
        struct snd_dma_buffer *buf = &substream->dma_buffer;
 
-       buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
+       buf->area = dma_alloc_wc(dev, size, &buf->addr, GFP_KERNEL);
        if (!buf->area)
                return -ENOMEM;
 
        buf->private_data = NULL;
        buf->dev.type = SNDRV_DMA_TYPE_DEV;
-       buf->dev.dev = pcm->card->dev;
+       buf->dev.dev = dev;
        buf->bytes = size;
 
        return 0;
@@ -244,31 +244,28 @@ static void tegra_pcm_deallocate_dma_buffer(struct snd_pcm *pcm, int stream)
        if (!buf->area)
                return;
 
-       dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
+       dma_free_wc(buf->dev.dev, buf->bytes, buf->area, buf->addr);
        buf->area = NULL;
 }
 
-static int tegra_pcm_dma_allocate(struct snd_soc_pcm_runtime *rtd,
+static int tegra_pcm_dma_allocate(struct device *dev, struct snd_soc_pcm_runtime *rtd,
                                  size_t size)
 {
-       struct snd_card *card = rtd->card->snd_card;
        struct snd_pcm *pcm = rtd->pcm;
        int ret;
 
-       ret = dma_set_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
        if (ret < 0)
                return ret;
 
        if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) {
-               ret = tegra_pcm_preallocate_dma_buffer(pcm,
-                       SNDRV_PCM_STREAM_PLAYBACK, size);
+               ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_PLAYBACK, size);
                if (ret)
                        goto err;
        }
 
        if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) {
-               ret = tegra_pcm_preallocate_dma_buffer(pcm,
-                       SNDRV_PCM_STREAM_CAPTURE, size);
+               ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_CAPTURE, size);
                if (ret)
                        goto err_free_play;
        }
@@ -284,7 +281,16 @@ err:
 int tegra_pcm_construct(struct snd_soc_component *component,
                        struct snd_soc_pcm_runtime *rtd)
 {
-       return tegra_pcm_dma_allocate(rtd, tegra_pcm_hardware.buffer_bytes_max);
+       struct device *dev = component->dev;
+
+       /*
+        * Fallback for backwards-compatibility with older device trees that
+        * have the iommus property in the virtual, top-level "sound" node.
+        */
+       if (!of_get_property(dev->of_node, "iommus", NULL))
+               dev = rtd->card->snd_card->dev;
+
+       return tegra_pcm_dma_allocate(dev, rtd, tegra_pcm_hardware.buffer_bytes_max);
 }
 EXPORT_SYMBOL_GPL(tegra_pcm_construct);
 
index a7c0484..265bbc5 100644 (file)
@@ -197,7 +197,7 @@ static int j721e_configure_refclk(struct j721e_priv *priv,
                return ret;
        }
 
-       if (priv->hsdiv_rates[domain->parent_clk_id] != scki) {
+       if (domain->parent_clk_id == -1 || priv->hsdiv_rates[domain->parent_clk_id] != scki) {
                dev_dbg(priv->dev,
                        "%s configuration for %u Hz: %s, %dxFS (SCKI: %u Hz)\n",
                        audio_domain == J721E_AUDIO_DOMAIN_CPB ? "CPB" : "IVI",
@@ -278,23 +278,29 @@ static int j721e_audio_startup(struct snd_pcm_substream *substream)
                                          j721e_rule_rate, &priv->rate_range,
                                          SNDRV_PCM_HW_PARAM_RATE, -1);
 
-       mutex_unlock(&priv->mutex);
 
        if (ret)
-               return ret;
+               goto out;
 
        /* Reset TDM slots to 32 */
        ret = snd_soc_dai_set_tdm_slot(cpu_dai, 0x3, 0x3, 2, 32);
        if (ret && ret != -ENOTSUPP)
-               return ret;
+               goto out;
 
        for_each_rtd_codec_dais(rtd, i, codec_dai) {
                ret = snd_soc_dai_set_tdm_slot(codec_dai, 0x3, 0x3, 2, 32);
                if (ret && ret != -ENOTSUPP)
-                       return ret;
+                       goto out;
        }
 
-       return 0;
+       if (ret == -ENOTSUPP)
+               ret = 0;
+out:
+       if (ret)
+               domain->active--;
+       mutex_unlock(&priv->mutex);
+
+       return ret;
 }
 
 static int j721e_audio_hw_params(struct snd_pcm_substream *substream,
index 30b3e12..f4cdaf1 100644 (file)
@@ -3295,7 +3295,15 @@ static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer,
 {
        struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
        static const char * const val_types[] = {
-               "BOOLEAN", "INV_BOOLEAN", "S8", "U8", "S16", "U16", "S32", "U32",
+               [USB_MIXER_BOOLEAN] = "BOOLEAN",
+               [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN",
+               [USB_MIXER_S8] = "S8",
+               [USB_MIXER_U8] = "U8",
+               [USB_MIXER_S16] = "S16",
+               [USB_MIXER_U16] = "U16",
+               [USB_MIXER_S32] = "S32",
+               [USB_MIXER_U32] = "U32",
+               [USB_MIXER_BESPOKEN] = "BESPOKEN",
        };
        snd_iprintf(buffer, "    Info: id=%i, control=%i, cmask=0x%x, "
                            "channels=%i, type=\"%s\"\n", cval->head.id,
index 8b8bee3..e7accd8 100644 (file)
@@ -1897,6 +1897,9 @@ static const struct registration_quirk registration_quirks[] = {
        REG_QUIRK_ENTRY(0x0951, 0x16d8, 2),     /* Kingston HyperX AMP */
        REG_QUIRK_ENTRY(0x0951, 0x16ed, 2),     /* Kingston HyperX Cloud Alpha S */
        REG_QUIRK_ENTRY(0x0951, 0x16ea, 2),     /* Kingston HyperX Cloud Flight S */
+       REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2),     /* JBL Quantum 600 */
+       REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2),     /* JBL Quantum 400 */
+       REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2),     /* JBL Quantum 800 */
        { 0 }                                   /* terminator */
 };
 
index ff4d327..88b28aa 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **btf** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
+               { **-B** | **--base-btf** } }
 
        *COMMANDS* := { **dump** | **help** }
 
@@ -73,6 +74,20 @@ OPTIONS
 =======
        .. include:: common_options.rst
 
+       -B, --base-btf *FILE*
+                 Pass a base BTF object. Base BTF objects are typically used
+                 with BTF objects for kernel modules. To avoid duplicating
+                 all kernel symbols required by modules, BTF objects for
+                 modules are "split", they are built incrementally on top of
+                 the kernel (vmlinux) BTF object. So the base BTF reference
+                 should usually point to the kernel BTF.
+
+                 When the main BTF object to process (for example, the
+                 module BTF to dump) is passed as a *FILE*, bpftool attempts
+                 to autodetect the path for the base object, and passing
+                 this option is optional. When the main BTF object is passed
+                 through other handles, this option becomes necessary.
+
 EXAMPLES
 ========
 **# bpftool btf dump id 1226**
@@ -217,3 +232,34 @@ All the standard ways to specify map or program are supported:
 **# bpftool btf dump prog tag b88e0a09b1d9759d**
 
 **# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+|
+| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus**
+| (or)
+| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')**
+| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux**
+
+::
+
+  [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2
+          'alert' type_id=393 bits_offset=0
+          'ara' type_id=56050 bits_offset=256
+  [104849] STRUCT 'alert_data' size=12 vlen=3
+          'addr' type_id=16 bits_offset=0
+          'type' type_id=56053 bits_offset=32
+          'data' type_id=7 bits_offset=64
+  [104850] PTR '(anon)' type_id=104848
+  [104851] PTR '(anon)' type_id=104849
+  [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static
+  [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static
+  [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1
+          'ara' type_id=56050
+  [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static
+  [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static
+  [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2
+          'ara' type_id=56050
+          'id' type_id=56056
+  [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static
+  [104859] FUNC 'smbalert_work' type_id=9695 linkage=static
+  [104860] FUNC 'smbus_alert' type_id=71367 linkage=static
+  [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static
index baee859..3e4395e 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **cgroup** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } }
 
        *COMMANDS* :=
        { **show** | **list** | **tree** | **attach** | **detach** | **help** }
index dd3771b..ab9f57e 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **feature** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* := { **probe** | **help** }
 
index 7cd6681..2ef2f2d 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **gen** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-L** | **--use-loader** } }
 
        *COMMAND* := { **object** | **skeleton** | **help** }
 
@@ -152,6 +153,12 @@ OPTIONS
 =======
        .. include:: common_options.rst
 
+       -L, --use-loader
+                 For skeletons, generate a "light" skeleton (also known as "loader"
+                 skeleton). A light skeleton contains a loader eBPF program. It does
+                 not use the majority of the libbpf infrastructure, and does not need
+                 libelf.
+
 EXAMPLES
 ========
 **$ cat example1.bpf.c**
index 51f49be..471f363 100644 (file)
@@ -12,6 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **iter** *COMMAND*
 
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+
        *COMMANDS* := { **pin** | **help** }
 
 ITER COMMANDS
index 5f7db2a..0de90f0 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **link** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
 
        *COMMANDS* := { **show** | **list** | **pin** | **help** }
 
index 3d52256..d0c4abe 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **map** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
 
        *COMMANDS* :=
        { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
index d8165d5..1ae0375 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **net** *COMMAND*
 
-       *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* :=
        { **show** | **list** | **attach** | **detach** | **help** }
index e958ce9..ce52798 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **perf** *COMMAND*
 
-       *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* :=
        { **show** | **list** | **help** }
index fe1b38e..91608cb 100644 (file)
@@ -12,7 +12,9 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **prog** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+               { **-L** | **--use-loader** } }
 
        *COMMANDS* :=
        { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
@@ -48,10 +50,11 @@ PROG COMMANDS
 |              **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
 |      }
 |       *ATTACH_TYPE* := {
-|              **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+|              **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
 |      }
 |      *METRICs* := {
-|              **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+|              **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
+|              **itlb_misses** | **dtlb_misses**
 |      }
 
 
@@ -223,6 +226,20 @@ OPTIONS
                  Do not automatically attempt to mount any virtual file system
                  (such as tracefs or BPF virtual file system) when necessary.
 
+       -L, --use-loader
+                 Load program as a "loader" program. This is useful to debug
+                 the generation of such programs. When this option is in
+                 use, bpftool attempts to load the programs from the object
+                 file into the kernel, but does not pin them (therefore, the
+                 *PATH* must not be provided).
+
+                 When combined with the **-d**\ \|\ **--debug** option,
+                 additional debug messages are generated, and the execution
+                 of the loader program will use the **bpf_trace_printk**\ ()
+                 helper to log each step of loading BTF, creating the maps,
+                 and loading the programs (see **bpftool prog tracelog** as
+                 a way to dump those messages).
+
 EXAMPLES
 ========
 **# bpftool prog show**
@@ -326,3 +343,16 @@ EXAMPLES
       40176203 cycles                                                 (83.05%)
       42518139 instructions    #   1.06 insns per cycle               (83.39%)
            123 llc_misses      #   2.89 LLC misses per million insns  (83.15%)
+
+|
+| Output below is for the trace logs.
+| Run in separate terminals:
+| **# bpftool prog tracelog**
+| **# bpftool prog load -L -d file.o**
+
+::
+
+    bpftool-620059  [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5
+    bpftool-620059  [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6
+    bpftool-620059  [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7
+    bpftool-620059  [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0
index 506e70e..02afc0f 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* :=
        { **show** | **list** | **dump** | **register** | **unregister** | **help** }
index e7d9493..bb23f55 100644 (file)
@@ -18,15 +18,15 @@ SYNOPSIS
 
        *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
 
-       *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
-       | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-V** | **--version** } |
+               { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *MAP-COMMANDS* :=
-       { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
-       | **delete** | **pin** | **event_pipe** | **help** }
+       { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+               **delete** | **pin** | **event_pipe** | **help** }
 
-       *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
-       | **load** | **attach** | **detach** | **help** }
+       *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
+               **load** | **attach** | **detach** | **help** }
 
        *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
index cc33c58..88e2bcf 100644 (file)
@@ -260,7 +260,8 @@ _bpftool()
 
     # Deal with options
     if [[ ${words[cword]} == -* ]]; then
-        local c='--version --json --pretty --bpffs --mapcompat --debug'
+        local c='--version --json --pretty --bpffs --mapcompat --debug \
+              --use-loader --base-btf'
         COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
         return 0
     fi
@@ -278,7 +279,7 @@ _bpftool()
             _sysfs_get_netdevs
             return 0
             ;;
-        file|pinned)
+        file|pinned|-B|--base-btf)
             _filedir
             return 0
             ;;
@@ -291,7 +292,8 @@ _bpftool()
     # Remove all options so completions don't have to deal with them.
     local i
     for (( i=1; i < ${#words[@]}; )); do
-        if [[ ${words[i]::1} == - ]]; then
+        if [[ ${words[i]::1} == - ]] &&
+            [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
             words=( "${words[@]:0:i}" "${words[@]:i+1}" )
             [[ $i -le $cword ]] && cword=$(( cword - 1 ))
         else
@@ -343,7 +345,8 @@ _bpftool()
 
             local PROG_TYPE='id pinned tag name'
             local MAP_TYPE='id pinned name'
-            local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
+            local METRIC_TYPE='cycles instructions l1d_loads llc_misses \
+                itlb_misses dtlb_misses'
             case $command in
                 show|list)
                     [[ $prev != "$command" ]] && return 0
@@ -404,8 +407,10 @@ _bpftool()
                             return 0
                             ;;
                         5)
-                            COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \
-                                stream_parser flow_dissector' -- "$cur" ) )
+                            local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
+                                skb_verdict stream_verdict stream_parser \
+                                flow_dissector'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
                             return 0
                             ;;
                         6)
@@ -464,7 +469,7 @@ _bpftool()
 
                     case $prev in
                         type)
-                            COMPREPLY=( $( compgen -W "socket kprobe \
+                            local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \
                                 kretprobe classifier flow_dissector \
                                 action tracepoint raw_tracepoint \
                                 xdp perf_event cgroup/skb cgroup/sock \
@@ -479,8 +484,8 @@ _bpftool()
                                 cgroup/post_bind4 cgroup/post_bind6 \
                                 cgroup/sysctl cgroup/getsockopt \
                                 cgroup/setsockopt cgroup/sock_release struct_ops \
-                                fentry fexit freplace sk_lookup" -- \
-                                                   "$cur" ) )
+                                fentry fexit freplace sk_lookup'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) )
                             return 0
                             ;;
                         id)
@@ -698,15 +703,15 @@ _bpftool()
                             return 0
                             ;;
                         type)
-                            COMPREPLY=( $( compgen -W 'hash array prog_array \
-                                perf_event_array percpu_hash percpu_array \
-                                stack_trace cgroup_array lru_hash \
+                            local BPFTOOL_MAP_CREATE_TYPES='hash array \
+                                prog_array perf_event_array percpu_hash \
+                                percpu_array stack_trace cgroup_array lru_hash \
                                 lru_percpu_hash lpm_trie array_of_maps \
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
                                 percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops inode_storage task_storage' -- \
-                                                   "$cur" ) )
+                                struct_ops inode_storage task_storage ringbuf'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
                             return 0
                             ;;
                         key|value|flags|entries)
@@ -1017,34 +1022,37 @@ _bpftool()
                     return 0
                     ;;
                 attach|detach)
-                    local ATTACH_TYPES='ingress egress sock_create sock_ops \
-                        device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+                    local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
+                        sock_create sock_ops device \
+                        bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
                         getpeername4 getpeername6 getsockname4 getsockname6 \
                         sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
                         setsockopt sock_release'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag name'
-                    case $prev in
-                        $command)
-                            _filedir
-                            return 0
-                            ;;
-                        ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
-                        post_bind4|post_bind6|connect4|connect6|getpeername4|\
-                        getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
-                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
+                    # Check for $prev = $command first
+                    if [ $prev = $command ]; then
+                        _filedir
+                        return 0
+                    # Then check for attach type. This is done outside of the
+                    # "case $prev in" to avoid writing the whole list of attach
+                    # types again as pattern to match (where we cannot reuse
+                    # our variable).
+                    elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
-                            ;;
+                    fi
+                    # case/esac for the other cases
+                    case $prev in
                         id)
                             _bpftool_get_prog_ids
                             return 0
                             ;;
                         *)
-                            if ! _bpftool_search_list "$ATTACH_TYPES"; then
-                                COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
-                                    "$cur" ) )
+                            if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then
+                                COMPREPLY=( $( compgen -W \
+                                    "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) )
                             elif [[ "$command" == "attach" ]]; then
                                 # We have an attach type on the command line,
                                 # but it is not the previous word, or
index 385d5c9..f7e5ff3 100644 (file)
@@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv)
        }
 
        if (!btf) {
-               err = btf__get_from_id(btf_id, &btf);
+               btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
+               err = libbpf_get_error(btf);
                if (err) {
                        p_err("get btf by id (%u): %s", btf_id, strerror(err));
                        goto done;
                }
-               if (!btf) {
-                       err = -ENOENT;
-                       p_err("can't find btf with ID (%u)", btf_id);
-                       goto done;
-               }
        }
 
        if (dump_c) {
@@ -985,7 +981,8 @@ static int do_help(int argc, char **argv)
                "       FORMAT  := { raw | c }\n"
                "       " HELP_SPEC_MAP "\n"
                "       " HELP_SPEC_PROGRAM "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-B|--base-btf} }\n"
                "",
                bin_name, "btf");
 
index 7ca54d0..9c25286 100644 (file)
@@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
        }
        info = &prog_info->info;
 
-       if (!info->btf_id || !info->nr_func_info ||
-           btf__get_from_id(info->btf_id, &prog_btf))
+       if (!info->btf_id || !info->nr_func_info)
+               goto print;
+       prog_btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(prog_btf))
                goto print;
        finfo = u64_to_ptr(info->func_info);
        func_type = btf__type_by_id(prog_btf, finfo->type_id);
index 6e53b1d..3571a28 100644 (file)
@@ -501,7 +501,8 @@ static int do_help(int argc, char **argv)
                HELP_SPEC_ATTACH_TYPES "\n"
                "       " HELP_SPEC_ATTACH_FLAGS "\n"
                "       " HELP_SPEC_PROGRAM "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} }\n"
                "",
                bin_name, argv[-2]);
 
index dc6daa1..d42d930 100644 (file)
@@ -67,6 +67,12 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
        [BPF_MODIFY_RETURN]             = "mod_ret",
        [BPF_LSM_MAC]                   = "lsm_mac",
        [BPF_SK_LOOKUP]                 = "sk_lookup",
+       [BPF_TRACE_ITER]                = "trace_iter",
+       [BPF_XDP_DEVMAP]                = "xdp_devmap",
+       [BPF_XDP_CPUMAP]                = "xdp_cpumap",
+       [BPF_XDP]                       = "xdp",
+       [BPF_SK_REUSEPORT_SELECT]       = "sk_skb_reuseport_select",
+       [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_skb_reuseport_select_or_migrate",
 };
 
 void p_err(const char *fmt, ...)
index 40a88df..7f36385 100644 (file)
@@ -1005,6 +1005,7 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s help\n"
                "\n"
                "       COMPONENT := { kernel | dev NAME }\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, argv[-2]);
 
index 1d71ff8..d40d92b 100644 (file)
@@ -1026,7 +1026,8 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
                "       %1$s %2$s help\n"
                "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-L|--use-loader} }\n"
                "",
                bin_name, "gen");
 
index 3b1aad7..84a9b01 100644 (file)
@@ -97,7 +97,9 @@ static int do_help(int argc, char **argv)
        fprintf(stderr,
                "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
                "       %1$s %2$s help\n"
+               "\n"
                "       " HELP_SPEC_MAP "\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, "iter");
 
index e77e152..8cc3e36 100644 (file)
@@ -401,7 +401,8 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s help\n"
                "\n"
                "       " HELP_SPEC_LINK "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-n|--nomount} }\n"
                "",
                bin_name, argv[-2]);
 
index 3ddfd48..02eaaf0 100644 (file)
@@ -64,7 +64,8 @@ static int do_help(int argc, char **argv)
                "       %s version\n"
                "\n"
                "       OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-V|--version} }\n"
                "",
                bin_name, bin_name, bin_name);
 
index c1cf297..90caa42 100644 (file)
@@ -57,8 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
 #define HELP_SPEC_PROGRAM                                              \
        "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
 #define HELP_SPEC_OPTIONS                                              \
-       "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n"   \
-       "\t            {-m|--mapcompat} | {-n|--nomount} }"
+       "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
 #define HELP_SPEC_MAP                                                  \
        "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
 #define HELP_SPEC_LINK                                                 \
index 09ae038..407071d 100644 (file)
@@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
        } else if (info->btf_value_type_id) {
                int err;
 
-               err = btf__get_from_id(info->btf_id, &btf);
-               if (err || !btf) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               err = libbpf_get_error(btf);
+               if (err) {
                        p_err("failed to get btf");
-                       btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH);
+                       btf = ERR_PTR(err);
                }
        }
 
@@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key,
                            void *value)
 {
        json_writer_t *btf_wtr;
-       struct btf *btf = NULL;
-       int err;
+       struct btf *btf;
 
-       err = btf__get_from_id(info->btf_id, &btf);
-       if (err) {
+       btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(btf)) {
                p_err("failed to get btf");
                return;
        }
@@ -1466,8 +1466,9 @@ static int do_help(int argc, char **argv)
                "                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
                "                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
                "                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
-               "                 task_storage }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "                 task_storage }\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-n|--nomount} }\n"
                "",
                bin_name, argv[-2]);
 
index f836d11..6490537 100644 (file)
@@ -729,6 +729,7 @@ static int do_help(int argc, char **argv)
                "\n"
                "       " HELP_SPEC_PROGRAM "\n"
                "       ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "\n"
                "Note: Only xdp and tc attachments are supported now.\n"
                "      For progs attached to cgroups, use \"bpftool cgroup\"\n"
index ad23934..50de087 100644 (file)
@@ -231,7 +231,10 @@ static int do_show(int argc, char **argv)
 static int do_help(int argc, char **argv)
 {
        fprintf(stderr,
-               "Usage: %1$s %2$s { show | list | help }\n"
+               "Usage: %1$s %2$s { show | list }\n"
+               "       %1$s %2$s help }\n"
+               "\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, argv[-2]);
 
index cc48726..9c3e343 100644 (file)
@@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps)
        struct bpf_map_info map_info;
        struct btf_var_secinfo *vsi;
        bool printed_header = false;
-       struct btf *btf = NULL;
        unsigned int i, vlen;
        void *value = NULL;
        const char *name;
+       struct btf *btf;
        int err;
 
        if (!num_maps)
@@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps)
        if (!value)
                return;
 
-       err = btf__get_from_id(map_info.btf_id, &btf);
-       if (err || !btf)
+       btf = btf__load_from_kernel_by_id(map_info.btf_id);
+       if (libbpf_get_error(btf))
                goto out_free;
 
        t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
@@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
                member_len = info->xlated_prog_len;
        }
 
-       if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
-               p_err("failed to get btf");
-               return -1;
+       if (info->btf_id) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               if (libbpf_get_error(btf)) {
+                       p_err("failed to get btf");
+                       return -1;
+               }
        }
 
        func_info = u64_to_ptr(info->func_info);
@@ -781,6 +784,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
                kernel_syms_destroy(&dd);
        }
 
+       btf__free(btf);
+
        return 0;
 }
 
@@ -2002,8 +2007,8 @@ static char *profile_target_name(int tgt_fd)
        struct bpf_prog_info_linear *info_linear;
        struct bpf_func_info *func_info;
        const struct btf_type *t;
+       struct btf *btf = NULL;
        char *name = NULL;
-       struct btf *btf;
 
        info_linear = bpf_program__get_prog_info_linear(
                tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -2012,12 +2017,17 @@ static char *profile_target_name(int tgt_fd)
                return NULL;
        }
 
-       if (info_linear->info.btf_id == 0 ||
-           btf__get_from_id(info_linear->info.btf_id, &btf)) {
+       if (info_linear->info.btf_id == 0) {
                p_err("prog FD %d doesn't have valid btf", tgt_fd);
                goto out;
        }
 
+       btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+       if (libbpf_get_error(btf)) {
+               p_err("failed to load btf for prog FD %d", tgt_fd);
+               goto out;
+       }
+
        func_info = u64_to_ptr(info_linear->info.func_info);
        t = btf__type_by_id(btf, func_info[0].type_id);
        if (!t) {
@@ -2027,6 +2037,7 @@ static char *profile_target_name(int tgt_fd)
        }
        name = strdup(btf__name_by_offset(btf, t->name_off));
 out:
+       btf__free(btf);
        free(info_linear);
        return name;
 }
@@ -2245,10 +2256,12 @@ static int do_help(int argc, char **argv)
                "                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
                "                 cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
                "                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
-               "       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
-               "                        flow_dissector }\n"
+               "       ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
+               "                        stream_parser | flow_dissector }\n"
                "       METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
+               "                    {-L|--use-loader} }\n"
                "",
                bin_name, argv[-2]);
 
index b58b91f..ab2d229 100644 (file)
@@ -572,8 +572,8 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s unregister STRUCT_OPS_MAP\n"
                "       %1$s %2$s help\n"
                "\n"
-               "       OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
                "       STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, argv[-2]);
 
index 3ad9301..de6365b 100644 (file)
@@ -291,7 +291,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
        sh->sh_addralign = expected;
 
        if (gelf_update_shdr(scn, sh) == 0) {
-               printf("FAILED cannot update section header: %s\n",
+               pr_err("FAILED cannot update section header: %s\n",
                        elf_errmsg(-1));
                return -1;
        }
@@ -317,6 +317,7 @@ static int elf_collect(struct object *obj)
 
        elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
        if (!elf) {
+               close(fd);
                pr_err("FAILED cannot create ELF descriptor: %s\n",
                        elf_errmsg(-1));
                return -1;
@@ -484,7 +485,7 @@ static int symbols_resolve(struct object *obj)
        err = libbpf_get_error(btf);
        if (err) {
                pr_err("FAILED: load BTF from %s: %s\n",
-                       obj->path, strerror(-err));
+                       obj->btf ?: obj->path, strerror(-err));
                return -1;
        }
 
@@ -555,8 +556,7 @@ static int id_patch(struct object *obj, struct btf_id *id)
        int i;
 
        if (!id->id) {
-               pr_err("FAILED unresolved symbol %s\n", id->name);
-               return -EINVAL;
+               pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
        }
 
        for (i = 0; i < id->addr_cnt; i++) {
@@ -734,8 +734,9 @@ int main(int argc, const char **argv)
 
        err = 0;
 out:
-       if (obj.efile.elf)
+       if (obj.efile.elf) {
                elf_end(obj.efile.elf);
-       close(obj.efile.fd);
+               close(obj.efile.fd);
+       }
        return err;
 }
index d208b2a..eb15f31 100644 (file)
@@ -653,6 +653,7 @@ enum {
        IFLA_BOND_AD_ACTOR_SYSTEM,
        IFLA_BOND_TLB_DYNAMIC_LB,
        IFLA_BOND_PEER_NOTIF_DELAY,
+       IFLA_BOND_AD_LACP_ACTIVE,
        __IFLA_BOND_MAX,
 };
 
index 430f687..94f0a14 100644 (file)
@@ -1,3 +1,3 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
            netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
-           btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
+           btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
index b46760b..85de4fd 100644 (file)
@@ -1180,7 +1180,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
 
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
 
-int btf__load(struct btf *btf)
+int btf__load_into_kernel(struct btf *btf)
 {
        __u32 log_buf_size = 0, raw_size;
        char *log_buf = NULL;
@@ -1228,6 +1228,7 @@ done:
        free(log_buf);
        return libbpf_err(err);
 }
+int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
 
 int btf__fd(const struct btf *btf)
 {
@@ -1382,21 +1383,35 @@ exit_free:
        return btf;
 }
 
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
 {
-       struct btf *res;
-       int err, btf_fd;
+       struct btf *btf;
+       int btf_fd;
 
-       *btf = NULL;
        btf_fd = bpf_btf_get_fd_by_id(id);
        if (btf_fd < 0)
-               return libbpf_err(-errno);
-
-       res = btf_get_from_fd(btf_fd, NULL);
-       err = libbpf_get_error(res);
+               return libbpf_err_ptr(-errno);
 
+       btf = btf_get_from_fd(btf_fd, base_btf);
        close(btf_fd);
 
+       return libbpf_ptr(btf);
+}
+
+struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+       return btf__load_from_kernel_by_id_split(id, NULL);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+       struct btf *res;
+       int err;
+
+       *btf = NULL;
+       res = btf__load_from_kernel_by_id(id);
+       err = libbpf_get_error(res);
+
        if (err)
                return libbpf_err(err);
 
@@ -4021,7 +4036,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
                 */
                if (d->hypot_adjust_canon)
                        continue;
-               
+
                if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
                        d->map[t_id] = c_id;
 
@@ -4394,7 +4409,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
  * Probe few well-known locations for vmlinux kernel image and try to load BTF
  * data out of it to use for target BTF.
  */
-struct btf *libbpf_find_kernel_btf(void)
+struct btf *btf__load_vmlinux_btf(void)
 {
        struct {
                const char *path_fmt;
@@ -4440,6 +4455,16 @@ struct btf *libbpf_find_kernel_btf(void)
        return libbpf_err_ptr(-ESRCH);
 }
 
+struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
+
+struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
+{
+       char path[80];
+
+       snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
+       return btf__parse_split(path, vmlinux_btf);
+}
+
 int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
 {
        int i, n, err;
index b54f1c3..4a711f9 100644 (file)
@@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b
 LIBBPF_API struct btf *btf__parse_raw(const char *path);
 LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
 
+LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
+LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
+LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API int btf__load_into_kernel(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
                                   const char *type_name);
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
@@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
 LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
                                    __u32 expected_key_size,
                                    __u32 expected_value_size,
@@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf,
 LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
 LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
-
 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
@@ -184,6 +190,25 @@ LIBBPF_API int
 btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
                         const struct btf_dump_emit_type_decl_opts *opts);
 
+
+struct btf_dump_type_data_opts {
+       /* size of this struct, for forward/backward compatibility */
+       size_t sz;
+       const char *indent_str;
+       int indent_level;
+       /* below match "show" flags for bpf_show_snprintf() */
+       bool compact;           /* no newlines/indentation */
+       bool skip_names;        /* skip member/type names */
+       bool emit_zeroes;       /* show 0-valued fields */
+       size_t :0;
+};
+#define btf_dump_type_data_opts__last_field emit_zeroes
+
+LIBBPF_API int
+btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+                        const void *data, size_t data_sz,
+                        const struct btf_dump_type_data_opts *opts);
+
 /*
  * A set of helpers for easier BTF types handling
  */
index 5dc6b51..e4b483f 100644 (file)
@@ -10,6 +10,8 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
+#include <ctype.h>
+#include <endian.h>
 #include <errno.h>
 #include <linux/err.h>
 #include <linux/btf.h>
@@ -53,6 +55,26 @@ struct btf_dump_type_aux_state {
        __u8 referenced: 1;
 };
 
+/* indent string length; one indent string is added for each indent level */
+#define BTF_DATA_INDENT_STR_LEN                        32
+
+/*
+ * Common internal data for BTF type data dump operations.
+ */
+struct btf_dump_data {
+       const void *data_end;           /* end of valid data to show */
+       bool compact;
+       bool skip_names;
+       bool emit_zeroes;
+       __u8 indent_lvl;        /* base indent level */
+       char indent_str[BTF_DATA_INDENT_STR_LEN];
+       /* below are used during iteration */
+       int depth;
+       bool is_array_member;
+       bool is_array_terminated;
+       bool is_array_char;
+};
+
 struct btf_dump {
        const struct btf *btf;
        const struct btf_ext *btf_ext;
@@ -60,6 +82,7 @@ struct btf_dump {
        struct btf_dump_opts opts;
        int ptr_sz;
        bool strip_mods;
+       bool skip_anon_defs;
        int last_id;
 
        /* per-type auxiliary state */
@@ -89,6 +112,10 @@ struct btf_dump {
         * name occurrences
         */
        struct hashmap *ident_names;
+       /*
+        * data for typed display; allocated if needed.
+        */
+       struct btf_dump_data *typed_dump;
 };
 
 static size_t str_hash_fn(const void *key, void *ctx)
@@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
                break;
        case BTF_KIND_FUNC_PROTO: {
                const struct btf_param *p = btf_params(t);
-               __u16 vlen = btf_vlen(t);
+               __u16 n = btf_vlen(t);
                int i;
 
                btf_dump_emit_type(d, t->type, cont_id);
-               for (i = 0; i < vlen; i++, p++)
+               for (i = 0; i < n; i++, p++)
                        btf_dump_emit_type(d, p->type, cont_id);
 
                break;
@@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
                                     const struct btf_type *t)
 {
-       btf_dump_printf(d, "%s %s",
+       btf_dump_printf(d, "%s%s%s",
                        btf_is_struct(t) ? "struct" : "union",
+                       t->name_off ? " " : "",
                        btf_dump_type_name(d, id));
 }
 
@@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                case BTF_KIND_UNION:
                        btf_dump_emit_mods(d, decls);
                        /* inline anonymous struct/union */
-                       if (t->name_off == 0)
+                       if (t->name_off == 0 && !d->skip_anon_defs)
                                btf_dump_emit_struct_def(d, id, t, lvl);
                        else
                                btf_dump_emit_struct_fwd(d, id, t);
@@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                case BTF_KIND_ENUM:
                        btf_dump_emit_mods(d, decls);
                        /* inline anonymous enum */
-                       if (t->name_off == 0)
+                       if (t->name_off == 0 && !d->skip_anon_defs)
                                btf_dump_emit_enum_def(d, id, t, lvl);
                        else
                                btf_dump_emit_enum_fwd(d, id, t);
@@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
        btf_dump_emit_name(d, fname, last_was_ptr);
 }
 
+/* show type name as (type_name) */
+static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+                                   bool top_level)
+{
+       const struct btf_type *t;
+
+       /* for array members, we don't bother emitting type name for each
+        * member to avoid the redundancy of
+        * .name = (char[4])[(char)'f',(char)'o',(char)'o',]
+        */
+       if (d->typed_dump->is_array_member)
+               return;
+
+       /* avoid type name specification for variable/section; it will be done
+        * for the associated variable value(s).
+        */
+       t = btf__type_by_id(d->btf, id);
+       if (btf_is_var(t) || btf_is_datasec(t))
+               return;
+
+       if (top_level)
+               btf_dump_printf(d, "(");
+
+       d->skip_anon_defs = true;
+       d->strip_mods = true;
+       btf_dump_emit_type_decl(d, id, "", 0);
+       d->strip_mods = false;
+       d->skip_anon_defs = false;
+
+       if (top_level)
+               btf_dump_printf(d, ")");
+}
+
 /* return number of duplicates (occurrences) of a given name */
 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
                                 const char *orig_name)
@@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
 {
        return btf_dump_resolve_name(d, id, d->ident_names);
 }
+
+static int btf_dump_dump_type_data(struct btf_dump *d,
+                                  const char *fname,
+                                  const struct btf_type *t,
+                                  __u32 id,
+                                  const void *data,
+                                  __u8 bits_offset,
+                                  __u8 bit_sz);
+
+static const char *btf_dump_data_newline(struct btf_dump *d)
+{
+       return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n";
+}
+
+static const char *btf_dump_data_delim(struct btf_dump *d)
+{
+       return d->typed_dump->depth == 0 ? "" : ",";
+}
+
+static void btf_dump_data_pfx(struct btf_dump *d)
+{
+       int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth;
+
+       if (d->typed_dump->compact)
+               return;
+
+       for (i = 0; i < lvl; i++)
+               btf_dump_printf(d, "%s", d->typed_dump->indent_str);
+}
+
+/* A macro is used here as btf_type_value[s]() appends format specifiers
+ * to the format specifier passed in; these do the work of appending
+ * delimiters etc while the caller simply has to specify the type values
+ * in the format specifier + value(s).
+ */
+#define btf_dump_type_values(d, fmt, ...)                              \
+       btf_dump_printf(d, fmt "%s%s",                                  \
+                       ##__VA_ARGS__,                                  \
+                       btf_dump_data_delim(d),                         \
+                       btf_dump_data_newline(d))
+
+static int btf_dump_unsupported_data(struct btf_dump *d,
+                                    const struct btf_type *t,
+                                    __u32 id)
+{
+       btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t));
+       return -ENOTSUP;
+}
+
+static int btf_dump_get_bitfield_value(struct btf_dump *d,
+                                      const struct btf_type *t,
+                                      const void *data,
+                                      __u8 bits_offset,
+                                      __u8 bit_sz,
+                                      __u64 *value)
+{
+       __u16 left_shift_bits, right_shift_bits;
+       __u8 nr_copy_bits, nr_copy_bytes;
+       const __u8 *bytes = data;
+       int sz = t->size;
+       __u64 num = 0;
+       int i;
+
+       /* Maximum supported bitfield size is 64 bits */
+       if (sz > 8) {
+               pr_warn("unexpected bitfield size %d\n", sz);
+               return -EINVAL;
+       }
+
+       /* Bitfield value retrieval is done in two steps; first relevant bytes are
+        * stored in num, then we left/right shift num to eliminate irrelevant bits.
+        */
+       nr_copy_bits = bit_sz + bits_offset;
+       nr_copy_bytes = t->size;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       for (i = nr_copy_bytes - 1; i >= 0; i--)
+               num = num * 256 + bytes[i];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+       for (i = 0; i < nr_copy_bytes; i++)
+               num = num * 256 + bytes[i];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+       left_shift_bits = 64 - nr_copy_bits;
+       right_shift_bits = 64 - bit_sz;
+
+       *value = (num << left_shift_bits) >> right_shift_bits;
+
+       return 0;
+}
+
+static int btf_dump_bitfield_check_zero(struct btf_dump *d,
+                                       const struct btf_type *t,
+                                       const void *data,
+                                       __u8 bits_offset,
+                                       __u8 bit_sz)
+{
+       __u64 check_num;
+       int err;
+
+       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num);
+       if (err)
+               return err;
+       if (check_num == 0)
+               return -ENODATA;
+       return 0;
+}
+
+static int btf_dump_bitfield_data(struct btf_dump *d,
+                                 const struct btf_type *t,
+                                 const void *data,
+                                 __u8 bits_offset,
+                                 __u8 bit_sz)
+{
+       __u64 print_num;
+       int err;
+
+       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num);
+       if (err)
+               return err;
+
+       btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num);
+
+       return 0;
+}
+
+/* ints, floats and ptrs */
+static int btf_dump_base_type_check_zero(struct btf_dump *d,
+                                        const struct btf_type *t,
+                                        __u32 id,
+                                        const void *data)
+{
+       static __u8 bytecmp[16] = {};
+       int nr_bytes;
+
+       /* For pointer types, pointer size is not defined on a per-type basis.
+        * On dump creation however, we store the pointer size.
+        */
+       if (btf_kind(t) == BTF_KIND_PTR)
+               nr_bytes = d->ptr_sz;
+       else
+               nr_bytes = t->size;
+
+       if (nr_bytes < 1 || nr_bytes > 16) {
+               pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id);
+               return -EINVAL;
+       }
+
+       if (memcmp(data, bytecmp, nr_bytes) == 0)
+               return -ENODATA;
+       return 0;
+}
+
+static bool ptr_is_aligned(const void *data, int data_sz)
+{
+       return ((uintptr_t)data) % data_sz == 0;
+}
+
+static int btf_dump_int_data(struct btf_dump *d,
+                            const struct btf_type *t,
+                            __u32 type_id,
+                            const void *data,
+                            __u8 bits_offset)
+{
+       __u8 encoding = btf_int_encoding(t);
+       bool sign = encoding & BTF_INT_SIGNED;
+       int sz = t->size;
+
+       if (sz == 0) {
+               pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+
+       /* handle packed int data - accesses of integers not aligned on
+        * int boundaries can cause problems on some platforms.
+        */
+       if (!ptr_is_aligned(data, sz))
+               return btf_dump_bitfield_data(d, t, data, 0, 0);
+
+       switch (sz) {
+       case 16: {
+               const __u64 *ints = data;
+               __u64 lsi, msi;
+
+               /* avoid use of __int128 as some 32-bit platforms do not
+                * support it.
+                */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               lsi = ints[0];
+               msi = ints[1];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+               lsi = ints[1];
+               msi = ints[0];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+               if (msi == 0)
+                       btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi);
+               else
+                       btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi,
+                                            (unsigned long long)lsi);
+               break;
+       }
+       case 8:
+               if (sign)
+                       btf_dump_type_values(d, "%lld", *(long long *)data);
+               else
+                       btf_dump_type_values(d, "%llu", *(unsigned long long *)data);
+               break;
+       case 4:
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s32 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u32 *)data);
+               break;
+       case 2:
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s16 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u16 *)data);
+               break;
+       case 1:
+               if (d->typed_dump->is_array_char) {
+                       /* check for null terminator */
+                       if (d->typed_dump->is_array_terminated)
+                               break;
+                       if (*(char *)data == '\0') {
+                               d->typed_dump->is_array_terminated = true;
+                               break;
+                       }
+                       if (isprint(*(char *)data)) {
+                               btf_dump_type_values(d, "'%c'", *(char *)data);
+                               break;
+                       }
+               }
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s8 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u8 *)data);
+               break;
+       default:
+               pr_warn("unexpected sz %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+union float_data {
+       long double ld;
+       double d;
+       float f;
+};
+
+static int btf_dump_float_data(struct btf_dump *d,
+                              const struct btf_type *t,
+                              __u32 type_id,
+                              const void *data)
+{
+       const union float_data *flp = data;
+       union float_data fl;
+       int sz = t->size;
+
+       /* handle unaligned data; copy to local union */
+       if (!ptr_is_aligned(data, sz)) {
+               memcpy(&fl, data, sz);
+               flp = &fl;
+       }
+
+       switch (sz) {
+       case 16:
+               btf_dump_type_values(d, "%Lf", flp->ld);
+               break;
+       case 8:
+               btf_dump_type_values(d, "%lf", flp->d);
+               break;
+       case 4:
+               btf_dump_type_values(d, "%f", flp->f);
+               break;
+       default:
+               pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int btf_dump_var_data(struct btf_dump *d,
+                            const struct btf_type *v,
+                            __u32 id,
+                            const void *data)
+{
+       enum btf_func_linkage linkage = btf_var(v)->linkage;
+       const struct btf_type *t;
+       const char *l;
+       __u32 type_id;
+
+       switch (linkage) {
+       case BTF_FUNC_STATIC:
+               l = "static ";
+               break;
+       case BTF_FUNC_EXTERN:
+               l = "extern ";
+               break;
+       case BTF_FUNC_GLOBAL:
+       default:
+               l = "";
+               break;
+       }
+
+       /* format of output here is [linkage] [type] [varname] = (type)value,
+        * for example "static int cpu_profile_flip = (int)1"
+        */
+       btf_dump_printf(d, "%s", l);
+       type_id = v->type;
+       t = btf__type_by_id(d->btf, type_id);
+       btf_dump_emit_type_cast(d, type_id, false);
+       btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off));
+       return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
+}
+
+static int btf_dump_array_data(struct btf_dump *d,
+                              const struct btf_type *t,
+                              __u32 id,
+                              const void *data)
+{
+       const struct btf_array *array = btf_array(t);
+       const struct btf_type *elem_type;
+       __u32 i, elem_size = 0, elem_type_id;
+       bool is_array_member;
+
+       elem_type_id = array->type;
+       elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+       elem_size = btf__resolve_size(d->btf, elem_type_id);
+       if (elem_size <= 0) {
+               pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+               return -EINVAL;
+       }
+
+       if (btf_is_int(elem_type)) {
+               /*
+                * BTF_INT_CHAR encoding never seems to be set for
+                * char arrays, so if size is 1 and element is
+                * printable as a char, we'll do that.
+                */
+               if (elem_size == 1)
+                       d->typed_dump->is_array_char = true;
+       }
+
+       /* note that we increment depth before calling btf_dump_print() below;
+        * this is intentional.  btf_dump_data_newline() will not print a
+        * newline for depth 0 (since this leaves us with trailing newlines
+        * at the end of typed display), so depth is incremented first.
+        * For similar reasons, we decrement depth before showing the closing
+        * parenthesis.
+        */
+       d->typed_dump->depth++;
+       btf_dump_printf(d, "[%s", btf_dump_data_newline(d));
+
+       /* may be a multidimensional array, so store current "is array member"
+        * status so we can restore it correctly later.
+        */
+       is_array_member = d->typed_dump->is_array_member;
+       d->typed_dump->is_array_member = true;
+       for (i = 0; i < array->nelems; i++, data += elem_size) {
+               if (d->typed_dump->is_array_terminated)
+                       break;
+               btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);
+       }
+       d->typed_dump->is_array_member = is_array_member;
+       d->typed_dump->depth--;
+       btf_dump_data_pfx(d);
+       btf_dump_type_values(d, "]");
+
+       return 0;
+}
+
+static int btf_dump_struct_data(struct btf_dump *d,
+                               const struct btf_type *t,
+                               __u32 id,
+                               const void *data)
+{
+       const struct btf_member *m = btf_members(t);
+       __u16 n = btf_vlen(t);
+       int i, err;
+
+       /* note that we increment depth before calling btf_dump_print() below;
+        * this is intentional.  btf_dump_data_newline() will not print a
+        * newline for depth 0 (since this leaves us with trailing newlines
+        * at the end of typed display), so depth is incremented first.
+        * For similar reasons, we decrement depth before showing the closing
+        * parenthesis.
+        */
+       d->typed_dump->depth++;
+       btf_dump_printf(d, "{%s", btf_dump_data_newline(d));
+
+       for (i = 0; i < n; i++, m++) {
+               const struct btf_type *mtype;
+               const char *mname;
+               __u32 moffset;
+               __u8 bit_sz;
+
+               mtype = btf__type_by_id(d->btf, m->type);
+               mname = btf_name_of(d, m->name_off);
+               moffset = btf_member_bit_offset(t, i);
+
+               bit_sz = btf_member_bitfield_size(t, i);
+               err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,
+                                             moffset % 8, bit_sz);
+               if (err < 0)
+                       return err;
+       }
+       d->typed_dump->depth--;
+       btf_dump_data_pfx(d);
+       btf_dump_type_values(d, "}");
+       return err;
+}
+
+union ptr_data {
+       unsigned int p;
+       unsigned long long lp;
+};
+
+static int btf_dump_ptr_data(struct btf_dump *d,
+                             const struct btf_type *t,
+                             __u32 id,
+                             const void *data)
+{
+       if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) {
+               btf_dump_type_values(d, "%p", *(void **)data);
+       } else {
+               union ptr_data pt;
+
+               memcpy(&pt, data, d->ptr_sz);
+               if (d->ptr_sz == 4)
+                       btf_dump_type_values(d, "0x%x", pt.p);
+               else
+                       btf_dump_type_values(d, "0x%llx", pt.lp);
+       }
+       return 0;
+}
+
+static int btf_dump_get_enum_value(struct btf_dump *d,
+                                  const struct btf_type *t,
+                                  const void *data,
+                                  __u32 id,
+                                  __s64 *value)
+{
+       int sz = t->size;
+
+       /* handle unaligned enum value */
+       if (!ptr_is_aligned(data, sz)) {
+               __u64 val;
+               int err;
+
+               err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val);
+               if (err)
+                       return err;
+               *value = (__s64)val;
+               return 0;
+       }
+
+       switch (t->size) {
+       case 8:
+               *value = *(__s64 *)data;
+               return 0;
+       case 4:
+               *value = *(__s32 *)data;
+               return 0;
+       case 2:
+               *value = *(__s16 *)data;
+               return 0;
+       case 1:
+               *value = *(__s8 *)data;
+               return 0;
+       default:
+               pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
+               return -EINVAL;
+       }
+}
+
+static int btf_dump_enum_data(struct btf_dump *d,
+                             const struct btf_type *t,
+                             __u32 id,
+                             const void *data)
+{
+       const struct btf_enum *e;
+       __s64 value;
+       int i, err;
+
+       err = btf_dump_get_enum_value(d, t, data, id, &value);
+       if (err)
+               return err;
+
+       for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+               if (value != e->val)
+                       continue;
+               btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+               return 0;
+       }
+
+       btf_dump_type_values(d, "%d", value);
+       return 0;
+}
+
+static int btf_dump_datasec_data(struct btf_dump *d,
+                                const struct btf_type *t,
+                                __u32 id,
+                                const void *data)
+{
+       const struct btf_var_secinfo *vsi;
+       const struct btf_type *var;
+       __u32 i;
+       int err;
+
+       btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off));
+
+       for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) {
+               var = btf__type_by_id(d->btf, vsi->type);
+               err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0);
+               if (err < 0)
+                       return err;
+               btf_dump_printf(d, ";");
+       }
+       return 0;
+}
+
+/* return size of type, or if base type overflows, return -E2BIG. */
+static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+                                            const struct btf_type *t,
+                                            __u32 id,
+                                            const void *data,
+                                            __u8 bits_offset)
+{
+       __s64 size = btf__resolve_size(d->btf, id);
+
+       if (size < 0 || size >= INT_MAX) {
+               pr_warn("unexpected size [%zu] for id [%u]\n",
+                       (size_t)size, id);
+               return -EINVAL;
+       }
+
+       /* Only do overflow checking for base types; we do not want to
+        * avoid showing part of a struct, union or array, even if we
+        * do not have enough data to show the full object.  By
+        * restricting overflow checking to base types we can ensure
+        * that partial display succeeds, while avoiding overflowing
+        * and using bogus data for display.
+        */
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+       if (!t) {
+               pr_warn("unexpected error skipping mods/typedefs for id [%u]\n",
+                       id);
+               return -EINVAL;
+       }
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_INT:
+       case BTF_KIND_FLOAT:
+       case BTF_KIND_PTR:
+       case BTF_KIND_ENUM:
+               if (data + bits_offset / 8 + size > d->typed_dump->data_end)
+                       return -E2BIG;
+               break;
+       default:
+               break;
+       }
+       return (int)size;
+}
+
+static int btf_dump_type_data_check_zero(struct btf_dump *d,
+                                        const struct btf_type *t,
+                                        __u32 id,
+                                        const void *data,
+                                        __u8 bits_offset,
+                                        __u8 bit_sz)
+{
+       __s64 value;
+       int i, err;
+
+       /* toplevel exceptions; we show zero values if
+        * - we ask for them (emit_zeros)
+        * - if we are at top-level so we see "struct empty { }"
+        * - or if we are an array member and the array is non-empty and
+        *   not a char array; we don't want to be in a situation where we
+        *   have an integer array 0, 1, 0, 1 and only show non-zero values.
+        *   If the array contains zeroes only, or is a char array starting
+        *   with a '\0', the array-level check_zero() will prevent showing it;
+        *   we are concerned with determining zero value at the array member
+        *   level here.
+        */
+       if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 ||
+           (d->typed_dump->is_array_member &&
+            !d->typed_dump->is_array_char))
+               return 0;
+
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_INT:
+               if (bit_sz)
+                       return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz);
+               return btf_dump_base_type_check_zero(d, t, id, data);
+       case BTF_KIND_FLOAT:
+       case BTF_KIND_PTR:
+               return btf_dump_base_type_check_zero(d, t, id, data);
+       case BTF_KIND_ARRAY: {
+               const struct btf_array *array = btf_array(t);
+               const struct btf_type *elem_type;
+               __u32 elem_type_id, elem_size;
+               bool ischar;
+
+               elem_type_id = array->type;
+               elem_size = btf__resolve_size(d->btf, elem_type_id);
+               elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+
+               ischar = btf_is_int(elem_type) && elem_size == 1;
+
+               /* check all elements; if _any_ element is nonzero, all
+                * of array is displayed.  We make an exception however
+                * for char arrays where the first element is 0; these
+                * are considered zeroed also, even if later elements are
+                * non-zero because the string is terminated.
+                */
+               for (i = 0; i < array->nelems; i++) {
+                       if (i == 0 && ischar && *(char *)data == 0)
+                               return -ENODATA;
+                       err = btf_dump_type_data_check_zero(d, elem_type,
+                                                           elem_type_id,
+                                                           data +
+                                                           (i * elem_size),
+                                                           bits_offset, 0);
+                       if (err != -ENODATA)
+                               return err;
+               }
+               return -ENODATA;
+       }
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION: {
+               const struct btf_member *m = btf_members(t);
+               __u16 n = btf_vlen(t);
+
+               /* if any struct/union member is non-zero, the struct/union
+                * is considered non-zero and dumped.
+                */
+               for (i = 0; i < n; i++, m++) {
+                       const struct btf_type *mtype;
+                       __u32 moffset;
+
+                       mtype = btf__type_by_id(d->btf, m->type);
+                       moffset = btf_member_bit_offset(t, i);
+
+                       /* btf_int_bits() does not store member bitfield size;
+                        * bitfield size needs to be stored here so int display
+                        * of member can retrieve it.
+                        */
+                       bit_sz = btf_member_bitfield_size(t, i);
+                       err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,
+                                                           moffset % 8, bit_sz);
+                       if (err != ENODATA)
+                               return err;
+               }
+               return -ENODATA;
+       }
+       case BTF_KIND_ENUM:
+               err = btf_dump_get_enum_value(d, t, data, id, &value);
+               if (err)
+                       return err;
+               if (value == 0)
+                       return -ENODATA;
+               return 0;
+       default:
+               return 0;
+       }
+}
+
+/* returns size of data dumped, or error. */
+static int btf_dump_dump_type_data(struct btf_dump *d,
+                                  const char *fname,
+                                  const struct btf_type *t,
+                                  __u32 id,
+                                  const void *data,
+                                  __u8 bits_offset,
+                                  __u8 bit_sz)
+{
+       int size, err;
+
+       size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+       if (size < 0)
+               return size;
+       err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+       if (err) {
+               /* zeroed data is expected and not an error, so simply skip
+                * dumping such data.  Record other errors however.
+                */
+               if (err == -ENODATA)
+                       return size;
+               return err;
+       }
+       btf_dump_data_pfx(d);
+
+       if (!d->typed_dump->skip_names) {
+               if (fname && strlen(fname) > 0)
+                       btf_dump_printf(d, ".%s = ", fname);
+               btf_dump_emit_type_cast(d, id, true);
+       }
+
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_UNKN:
+       case BTF_KIND_FWD:
+       case BTF_KIND_FUNC:
+       case BTF_KIND_FUNC_PROTO:
+               err = btf_dump_unsupported_data(d, t, id);
+               break;
+       case BTF_KIND_INT:
+               if (bit_sz)
+                       err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz);
+               else
+                       err = btf_dump_int_data(d, t, id, data, bits_offset);
+               break;
+       case BTF_KIND_FLOAT:
+               err = btf_dump_float_data(d, t, id, data);
+               break;
+       case BTF_KIND_PTR:
+               err = btf_dump_ptr_data(d, t, id, data);
+               break;
+       case BTF_KIND_ARRAY:
+               err = btf_dump_array_data(d, t, id, data);
+               break;
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+               err = btf_dump_struct_data(d, t, id, data);
+               break;
+       case BTF_KIND_ENUM:
+               /* handle bitfield and int enum values */
+               if (bit_sz) {
+                       __u64 print_num;
+                       __s64 enum_val;
+
+                       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz,
+                                                         &print_num);
+                       if (err)
+                               break;
+                       enum_val = (__s64)print_num;
+                       err = btf_dump_enum_data(d, t, id, &enum_val);
+               } else
+                       err = btf_dump_enum_data(d, t, id, data);
+               break;
+       case BTF_KIND_VAR:
+               err = btf_dump_var_data(d, t, id, data);
+               break;
+       case BTF_KIND_DATASEC:
+               err = btf_dump_datasec_data(d, t, id, data);
+               break;
+       default:
+               pr_warn("unexpected kind [%u] for id [%u]\n",
+                       BTF_INFO_KIND(t->info), id);
+               return -EINVAL;
+       }
+       if (err < 0)
+               return err;
+       return size;
+}
+
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+                            const void *data, size_t data_sz,
+                            const struct btf_dump_type_data_opts *opts)
+{
+       struct btf_dump_data typed_dump = {};
+       const struct btf_type *t;
+       int ret;
+
+       if (!OPTS_VALID(opts, btf_dump_type_data_opts))
+               return libbpf_err(-EINVAL);
+
+       t = btf__type_by_id(d->btf, id);
+       if (!t)
+               return libbpf_err(-ENOENT);
+
+       d->typed_dump = &typed_dump;
+       d->typed_dump->data_end = data + data_sz;
+       d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
+
+       /* default indent string is a tab */
+       if (!opts->indent_str)
+               d->typed_dump->indent_str[0] = '\t';
+       else
+               strncat(d->typed_dump->indent_str, opts->indent_str,
+                       sizeof(d->typed_dump->indent_str) - 1);
+
+       d->typed_dump->compact = OPTS_GET(opts, compact, false);
+       d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
+       d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+
+       ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
+
+       d->typed_dump = NULL;
+
+       return libbpf_err(ret);
+}
index 4ccfae3..cb106e8 100644 (file)
@@ -498,6 +498,10 @@ struct bpf_object {
         * it at load time.
         */
        struct btf *btf_vmlinux;
+       /* Path to the custom BTF to be used for BPF CO-RE relocations as an
+        * override for vmlinux BTF.
+        */
+       char *btf_custom_path;
        /* vmlinux BTF override for CO-RE relocations */
        struct btf *btf_vmlinux_override;
        /* Lazily initialized kernel module BTFs */
@@ -591,11 +595,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
               insn->off == 0;
 }
 
-static bool is_ldimm64_insn(struct bpf_insn *insn)
-{
-       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
 static bool is_call_insn(const struct bpf_insn *insn)
 {
        return insn->code == (BPF_JMP | BPF_CALL);
@@ -2645,8 +2644,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
        struct bpf_program *prog;
        int i;
 
-       /* CO-RE relocations need kernel BTF */
-       if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+       /* CO-RE relocations need kernel BTF, only when btf_custom_path
+        * is not specified
+        */
+       if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
                return true;
 
        /* Support for typed ksyms needs kernel BTF */
@@ -2679,7 +2680,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
        if (!force && !obj_needs_vmlinux_btf(obj))
                return 0;
 
-       obj->btf_vmlinux = libbpf_find_kernel_btf();
+       obj->btf_vmlinux = btf__load_vmlinux_btf();
        err = libbpf_get_error(obj->btf_vmlinux);
        if (err) {
                pr_warn("Error loading vmlinux BTF: %d\n", err);
@@ -2768,7 +2769,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
                 */
                btf__set_fd(kern_btf, 0);
        } else {
-               err = btf__load(kern_btf);
+               err = btf__load_into_kernel(kern_btf);
        }
        if (sanitize) {
                if (!err) {
@@ -4521,6 +4522,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 {
        struct bpf_create_map_attr create_attr;
        struct bpf_map_def *def = &map->def;
+       int err = 0;
 
        memset(&create_attr, 0, sizeof(create_attr));
 
@@ -4563,8 +4565,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 
        if (bpf_map_type__is_map_in_map(def->type)) {
                if (map->inner_map) {
-                       int err;
-
                        err = bpf_object__create_map(obj, map->inner_map, true);
                        if (err) {
                                pr_warn("map '%s': failed to create inner map: %d\n",
@@ -4589,8 +4589,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
        if (map->fd < 0 && (create_attr.btf_key_type_id ||
                            create_attr.btf_value_type_id)) {
                char *cp, errmsg[STRERR_BUFSIZE];
-               int err = -errno;
 
+               err = -errno;
                cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
                pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
                        map->name, cp, err);
@@ -4602,8 +4602,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
                map->fd = bpf_create_map_xattr(&create_attr);
        }
 
-       if (map->fd < 0)
-               return -errno;
+       err = map->fd < 0 ? -errno : 0;
 
        if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
                if (obj->gen_loader)
@@ -4612,7 +4611,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
                zfree(&map->inner_map);
        }
 
-       return 0;
+       return err;
 }
 
 static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
@@ -4658,10 +4657,13 @@ bpf_object__create_maps(struct bpf_object *obj)
        char *cp, errmsg[STRERR_BUFSIZE];
        unsigned int i, j;
        int err;
+       bool retried;
 
        for (i = 0; i < obj->nr_maps; i++) {
                map = &obj->maps[i];
 
+               retried = false;
+retry:
                if (map->pin_path) {
                        err = bpf_object__reuse_map(map);
                        if (err) {
@@ -4669,6 +4671,12 @@ bpf_object__create_maps(struct bpf_object *obj)
                                        map->name);
                                goto err_out;
                        }
+                       if (retried && map->fd < 0) {
+                               pr_warn("map '%s': cannot find pinned map\n",
+                                       map->name);
+                               err = -ENOENT;
+                               goto err_out;
+                       }
                }
 
                if (map->fd >= 0) {
@@ -4702,9 +4710,13 @@ bpf_object__create_maps(struct bpf_object *obj)
                if (map->pin_path && !map->pinned) {
                        err = bpf_map__pin(map, NULL);
                        if (err) {
+                               zclose(map->fd);
+                               if (!retried && err == -EEXIST) {
+                                       retried = true;
+                                       goto retry;
+                               }
                                pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
                                        map->name, map->pin_path, err);
-                               zclose(map->fd);
                                goto err_out;
                        }
                }
@@ -4721,279 +4733,6 @@ err_out:
        return err;
 }
 
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
-       __u32 type_id;          /* struct/union type or array element type */
-       __u32 idx;              /* field index or array index */
-       const char *name;       /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
-       const struct btf *btf;
-       /* high-level spec: named fields and array indices only */
-       struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
-       /* original unresolved (no skip_mods_or_typedefs) root type ID */
-       __u32 root_type_id;
-       /* CO-RE relocation kind */
-       enum bpf_core_relo_kind relo_kind;
-       /* high-level spec length */
-       int len;
-       /* raw, low-level spec: 1-to-1 with accessor spec string */
-       int raw_spec[BPF_CORE_SPEC_MAX_LEN];
-       /* raw spec length */
-       int raw_len;
-       /* field bit offset represented by spec */
-       __u32 bit_offset;
-};
-
-static bool str_is_empty(const char *s)
-{
-       return !s || !s[0];
-}
-
-static bool is_flex_arr(const struct btf *btf,
-                       const struct bpf_core_accessor *acc,
-                       const struct btf_array *arr)
-{
-       const struct btf_type *t;
-
-       /* not a flexible array, if not inside a struct or has non-zero size */
-       if (!acc->name || arr->nelems > 0)
-               return false;
-
-       /* has to be the last member of enclosing struct */
-       t = btf__type_by_id(btf, acc->type_id);
-       return acc->idx == btf_vlen(t) - 1;
-}
-
-static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_FIELD_BYTE_OFFSET: return "byte_off";
-       case BPF_FIELD_BYTE_SIZE: return "byte_sz";
-       case BPF_FIELD_EXISTS: return "field_exists";
-       case BPF_FIELD_SIGNED: return "signed";
-       case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
-       case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
-       case BPF_TYPE_ID_LOCAL: return "local_type_id";
-       case BPF_TYPE_ID_TARGET: return "target_type_id";
-       case BPF_TYPE_EXISTS: return "type_exists";
-       case BPF_TYPE_SIZE: return "type_size";
-       case BPF_ENUMVAL_EXISTS: return "enumval_exists";
-       case BPF_ENUMVAL_VALUE: return "enumval_value";
-       default: return "unknown";
-       }
-}
-
-static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_FIELD_BYTE_OFFSET:
-       case BPF_FIELD_BYTE_SIZE:
-       case BPF_FIELD_EXISTS:
-       case BPF_FIELD_SIGNED:
-       case BPF_FIELD_LSHIFT_U64:
-       case BPF_FIELD_RSHIFT_U64:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_TYPE_ID_LOCAL:
-       case BPF_TYPE_ID_TARGET:
-       case BPF_TYPE_EXISTS:
-       case BPF_TYPE_SIZE:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_ENUMVAL_EXISTS:
-       case BPF_ENUMVAL_VALUE:
-               return true;
-       default:
-               return false;
-       }
-}
-
-/*
- * Turn bpf_core_relo into a low- and high-level spec representation,
- * validating correctness along the way, as well as calculating resulting
- * field bit offset, specified by accessor string. Low-level spec captures
- * every single level of nestedness, including traversing anonymous
- * struct/union members. High-level one only captures semantically meaningful
- * "turning points": named fields and array indicies.
- * E.g., for this case:
- *
- *   struct sample {
- *       int __unimportant;
- *       struct {
- *           int __1;
- *           int __2;
- *           int a[7];
- *       };
- *   };
- *
- *   struct sample *s = ...;
- *
- *   int x = &s->a[3]; // access string = '0:1:2:3'
- *
- * Low-level spec has 1:1 mapping with each element of access string (it's
- * just a parsed access string representation): [0, 1, 2, 3].
- *
- * High-level spec will capture only 3 points:
- *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
- *   - field 'a' access (corresponds to '2' in low-level spec);
- *   - array element #3 access (corresponds to '3' in low-level spec).
- *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
- * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
- * spec and raw_spec are kept empty.
- *
- * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
- * string to specify enumerator's value index that need to be relocated.
- */
-static int bpf_core_parse_spec(const struct btf *btf,
-                              __u32 type_id,
-                              const char *spec_str,
-                              enum bpf_core_relo_kind relo_kind,
-                              struct bpf_core_spec *spec)
-{
-       int access_idx, parsed_len, i;
-       struct bpf_core_accessor *acc;
-       const struct btf_type *t;
-       const char *name;
-       __u32 id;
-       __s64 sz;
-
-       if (str_is_empty(spec_str) || *spec_str == ':')
-               return -EINVAL;
-
-       memset(spec, 0, sizeof(*spec));
-       spec->btf = btf;
-       spec->root_type_id = type_id;
-       spec->relo_kind = relo_kind;
-
-       /* type-based relocations don't have a field access string */
-       if (core_relo_is_type_based(relo_kind)) {
-               if (strcmp(spec_str, "0"))
-                       return -EINVAL;
-               return 0;
-       }
-
-       /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
-       while (*spec_str) {
-               if (*spec_str == ':')
-                       ++spec_str;
-               if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
-                       return -EINVAL;
-               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                       return -E2BIG;
-               spec_str += parsed_len;
-               spec->raw_spec[spec->raw_len++] = access_idx;
-       }
-
-       if (spec->raw_len == 0)
-               return -EINVAL;
-
-       t = skip_mods_and_typedefs(btf, type_id, &id);
-       if (!t)
-               return -EINVAL;
-
-       access_idx = spec->raw_spec[0];
-       acc = &spec->spec[0];
-       acc->type_id = id;
-       acc->idx = access_idx;
-       spec->len++;
-
-       if (core_relo_is_enumval_based(relo_kind)) {
-               if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
-                       return -EINVAL;
-
-               /* record enumerator name in a first accessor */
-               acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
-               return 0;
-       }
-
-       if (!core_relo_is_field_based(relo_kind))
-               return -EINVAL;
-
-       sz = btf__resolve_size(btf, id);
-       if (sz < 0)
-               return sz;
-       spec->bit_offset = access_idx * sz * 8;
-
-       for (i = 1; i < spec->raw_len; i++) {
-               t = skip_mods_and_typedefs(btf, id, &id);
-               if (!t)
-                       return -EINVAL;
-
-               access_idx = spec->raw_spec[i];
-               acc = &spec->spec[spec->len];
-
-               if (btf_is_composite(t)) {
-                       const struct btf_member *m;
-                       __u32 bit_offset;
-
-                       if (access_idx >= btf_vlen(t))
-                               return -EINVAL;
-
-                       bit_offset = btf_member_bit_offset(t, access_idx);
-                       spec->bit_offset += bit_offset;
-
-                       m = btf_members(t) + access_idx;
-                       if (m->name_off) {
-                               name = btf__name_by_offset(btf, m->name_off);
-                               if (str_is_empty(name))
-                                       return -EINVAL;
-
-                               acc->type_id = id;
-                               acc->idx = access_idx;
-                               acc->name = name;
-                               spec->len++;
-                       }
-
-                       id = m->type;
-               } else if (btf_is_array(t)) {
-                       const struct btf_array *a = btf_array(t);
-                       bool flex;
-
-                       t = skip_mods_and_typedefs(btf, a->type, &id);
-                       if (!t)
-                               return -EINVAL;
-
-                       flex = is_flex_arr(btf, acc - 1, a);
-                       if (!flex && access_idx >= a->nelems)
-                               return -EINVAL;
-
-                       spec->spec[spec->len].type_id = id;
-                       spec->spec[spec->len].idx = access_idx;
-                       spec->len++;
-
-                       sz = btf__resolve_size(btf, id);
-                       if (sz < 0)
-                               return sz;
-                       spec->bit_offset += access_idx * sz * 8;
-               } else {
-                       pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
-                               type_id, spec_str, i, id, btf_kind_str(t));
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
 static bool bpf_core_is_flavor_sep(const char *s)
 {
        /* check X___Y name pattern, where X and Y are not underscores */
@@ -5006,7 +4745,7 @@ static bool bpf_core_is_flavor_sep(const char *s)
  * before last triple underscore. Struct name part after last triple
  * underscore is ignored by BPF CO-RE relocation during relocation matching.
  */
-static size_t bpf_core_essential_name_len(const char *name)
+size_t bpf_core_essential_name_len(const char *name)
 {
        size_t n = strlen(name);
        int i;
@@ -5018,34 +4757,20 @@ static size_t bpf_core_essential_name_len(const char *name)
        return n;
 }
 
-struct core_cand
-{
-       const struct btf *btf;
-       const struct btf_type *t;
-       const char *name;
-       __u32 id;
-};
-
-/* dynamically sized list of type IDs and its associated struct btf */
-struct core_cand_list {
-       struct core_cand *cands;
-       int len;
-};
-
-static void bpf_core_free_cands(struct core_cand_list *cands)
+static void bpf_core_free_cands(struct bpf_core_cand_list *cands)
 {
        free(cands->cands);
        free(cands);
 }
 
-static int bpf_core_add_cands(struct core_cand *local_cand,
+static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
                              size_t local_essent_len,
                              const struct btf *targ_btf,
                              const char *targ_btf_name,
                              int targ_start_id,
-                             struct core_cand_list *cands)
+                             struct bpf_core_cand_list *cands)
 {
-       struct core_cand *new_cands, *cand;
+       struct bpf_core_cand *new_cands, *cand;
        const struct btf_type *t;
        const char *targ_name;
        size_t targ_essent_len;
@@ -5181,11 +4906,11 @@ err_out:
        return 0;
 }
 
-static struct core_cand_list *
+static struct bpf_core_cand_list *
 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
 {
-       struct core_cand local_cand = {};
-       struct core_cand_list *cands;
+       struct bpf_core_cand local_cand = {};
+       struct bpf_core_cand_list *cands;
        const struct btf *main_btf;
        size_t local_essent_len;
        int err, i;
@@ -5239,165 +4964,6 @@ err_out:
        return ERR_PTR(err);
 }
 
-/* Check two types for compatibility for the purpose of field access
- * relocation. const/volatile/restrict and typedefs are skipped to ensure we
- * are relocating semantically compatible entities:
- *   - any two STRUCTs/UNIONs are compatible and can be mixed;
- *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
- *   - any two PTRs are always compatible;
- *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
- *     least one of enums should be anonymous;
- *   - for ENUMs, check sizes, names are ignored;
- *   - for INT, size and signedness are ignored;
- *   - any two FLOATs are always compatible;
- *   - for ARRAY, dimensionality is ignored, element types are checked for
- *     compatibility recursively;
- *   - everything else shouldn't be ever a target of relocation.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_fields_are_compat(const struct btf *local_btf,
-                                     __u32 local_id,
-                                     const struct btf *targ_btf,
-                                     __u32 targ_id)
-{
-       const struct btf_type *local_type, *targ_type;
-
-recur:
-       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
-       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-       if (!local_type || !targ_type)
-               return -EINVAL;
-
-       if (btf_is_composite(local_type) && btf_is_composite(targ_type))
-               return 1;
-       if (btf_kind(local_type) != btf_kind(targ_type))
-               return 0;
-
-       switch (btf_kind(local_type)) {
-       case BTF_KIND_PTR:
-       case BTF_KIND_FLOAT:
-               return 1;
-       case BTF_KIND_FWD:
-       case BTF_KIND_ENUM: {
-               const char *local_name, *targ_name;
-               size_t local_len, targ_len;
-
-               local_name = btf__name_by_offset(local_btf,
-                                                local_type->name_off);
-               targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
-               local_len = bpf_core_essential_name_len(local_name);
-               targ_len = bpf_core_essential_name_len(targ_name);
-               /* one of them is anonymous or both w/ same flavor-less names */
-               return local_len == 0 || targ_len == 0 ||
-                      (local_len == targ_len &&
-                       strncmp(local_name, targ_name, local_len) == 0);
-       }
-       case BTF_KIND_INT:
-               /* just reject deprecated bitfield-like integers; all other
-                * integers are by default compatible between each other
-                */
-               return btf_int_offset(local_type) == 0 &&
-                      btf_int_offset(targ_type) == 0;
-       case BTF_KIND_ARRAY:
-               local_id = btf_array(local_type)->type;
-               targ_id = btf_array(targ_type)->type;
-               goto recur;
-       default:
-               pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
-                       btf_kind(local_type), local_id, targ_id);
-               return 0;
-       }
-}
-
-/*
- * Given single high-level named field accessor in local type, find
- * corresponding high-level accessor for a target type. Along the way,
- * maintain low-level spec for target as well. Also keep updating target
- * bit offset.
- *
- * Searching is performed through recursive exhaustive enumeration of all
- * fields of a struct/union. If there are any anonymous (embedded)
- * structs/unions, they are recursively searched as well. If field with
- * desired name is found, check compatibility between local and target types,
- * before returning result.
- *
- * 1 is returned, if field is found.
- * 0 is returned if no compatible field is found.
- * <0 is returned on error.
- */
-static int bpf_core_match_member(const struct btf *local_btf,
-                                const struct bpf_core_accessor *local_acc,
-                                const struct btf *targ_btf,
-                                __u32 targ_id,
-                                struct bpf_core_spec *spec,
-                                __u32 *next_targ_id)
-{
-       const struct btf_type *local_type, *targ_type;
-       const struct btf_member *local_member, *m;
-       const char *local_name, *targ_name;
-       __u32 local_id;
-       int i, n, found;
-
-       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-       if (!targ_type)
-               return -EINVAL;
-       if (!btf_is_composite(targ_type))
-               return 0;
-
-       local_id = local_acc->type_id;
-       local_type = btf__type_by_id(local_btf, local_id);
-       local_member = btf_members(local_type) + local_acc->idx;
-       local_name = btf__name_by_offset(local_btf, local_member->name_off);
-
-       n = btf_vlen(targ_type);
-       m = btf_members(targ_type);
-       for (i = 0; i < n; i++, m++) {
-               __u32 bit_offset;
-
-               bit_offset = btf_member_bit_offset(targ_type, i);
-
-               /* too deep struct/union/array nesting */
-               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                       return -E2BIG;
-
-               /* speculate this member will be the good one */
-               spec->bit_offset += bit_offset;
-               spec->raw_spec[spec->raw_len++] = i;
-
-               targ_name = btf__name_by_offset(targ_btf, m->name_off);
-               if (str_is_empty(targ_name)) {
-                       /* embedded struct/union, we need to go deeper */
-                       found = bpf_core_match_member(local_btf, local_acc,
-                                                     targ_btf, m->type,
-                                                     spec, next_targ_id);
-                       if (found) /* either found or error */
-                               return found;
-               } else if (strcmp(local_name, targ_name) == 0) {
-                       /* matching named field */
-                       struct bpf_core_accessor *targ_acc;
-
-                       targ_acc = &spec->spec[spec->len++];
-                       targ_acc->type_id = targ_id;
-                       targ_acc->idx = i;
-                       targ_acc->name = targ_name;
-
-                       *next_targ_id = m->type;
-                       found = bpf_core_fields_are_compat(local_btf,
-                                                          local_member->type,
-                                                          targ_btf, m->type);
-                       if (!found)
-                               spec->len--; /* pop accessor */
-                       return found;
-               }
-               /* member turned out not to be what we looked for */
-               spec->bit_offset -= bit_offset;
-               spec->raw_len--;
-       }
-
-       return 0;
-}
-
 /* Check local and target types for compatibility. This check is used for
  * type-based CO-RE relocations and follow slightly different rules than
  * field-based relocations. This function assumes that root types were already
@@ -5417,8 +4983,8 @@ static int bpf_core_match_member(const struct btf *local_btf,
  * These rules are not set in stone and probably will be adjusted as we get
  * more experience with using BPF CO-RE relocations.
  */
-static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
-                                    const struct btf *targ_btf, __u32 targ_id)
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                             const struct btf *targ_btf, __u32 targ_id)
 {
        const struct btf_type *local_type, *targ_type;
        int depth = 32; /* max recursion depth */
@@ -5492,671 +5058,6 @@ recur:
        }
 }
 
-/*
- * Try to match local spec to a target type and, if successful, produce full
- * target spec (high-level, low-level + bit offset).
- */
-static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
-                              const struct btf *targ_btf, __u32 targ_id,
-                              struct bpf_core_spec *targ_spec)
-{
-       const struct btf_type *targ_type;
-       const struct bpf_core_accessor *local_acc;
-       struct bpf_core_accessor *targ_acc;
-       int i, sz, matched;
-
-       memset(targ_spec, 0, sizeof(*targ_spec));
-       targ_spec->btf = targ_btf;
-       targ_spec->root_type_id = targ_id;
-       targ_spec->relo_kind = local_spec->relo_kind;
-
-       if (core_relo_is_type_based(local_spec->relo_kind)) {
-               return bpf_core_types_are_compat(local_spec->btf,
-                                                local_spec->root_type_id,
-                                                targ_btf, targ_id);
-       }
-
-       local_acc = &local_spec->spec[0];
-       targ_acc = &targ_spec->spec[0];
-
-       if (core_relo_is_enumval_based(local_spec->relo_kind)) {
-               size_t local_essent_len, targ_essent_len;
-               const struct btf_enum *e;
-               const char *targ_name;
-
-               /* has to resolve to an enum */
-               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
-               if (!btf_is_enum(targ_type))
-                       return 0;
-
-               local_essent_len = bpf_core_essential_name_len(local_acc->name);
-
-               for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
-                       targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
-                       targ_essent_len = bpf_core_essential_name_len(targ_name);
-                       if (targ_essent_len != local_essent_len)
-                               continue;
-                       if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
-                               targ_acc->type_id = targ_id;
-                               targ_acc->idx = i;
-                               targ_acc->name = targ_name;
-                               targ_spec->len++;
-                               targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
-                               targ_spec->raw_len++;
-                               return 1;
-                       }
-               }
-               return 0;
-       }
-
-       if (!core_relo_is_field_based(local_spec->relo_kind))
-               return -EINVAL;
-
-       for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
-               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
-                                                  &targ_id);
-               if (!targ_type)
-                       return -EINVAL;
-
-               if (local_acc->name) {
-                       matched = bpf_core_match_member(local_spec->btf,
-                                                       local_acc,
-                                                       targ_btf, targ_id,
-                                                       targ_spec, &targ_id);
-                       if (matched <= 0)
-                               return matched;
-               } else {
-                       /* for i=0, targ_id is already treated as array element
-                        * type (because it's the original struct), for others
-                        * we should find array element type first
-                        */
-                       if (i > 0) {
-                               const struct btf_array *a;
-                               bool flex;
-
-                               if (!btf_is_array(targ_type))
-                                       return 0;
-
-                               a = btf_array(targ_type);
-                               flex = is_flex_arr(targ_btf, targ_acc - 1, a);
-                               if (!flex && local_acc->idx >= a->nelems)
-                                       return 0;
-                               if (!skip_mods_and_typedefs(targ_btf, a->type,
-                                                           &targ_id))
-                                       return -EINVAL;
-                       }
-
-                       /* too deep struct/union/array nesting */
-                       if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                               return -E2BIG;
-
-                       targ_acc->type_id = targ_id;
-                       targ_acc->idx = local_acc->idx;
-                       targ_acc->name = NULL;
-                       targ_spec->len++;
-                       targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
-                       targ_spec->raw_len++;
-
-                       sz = btf__resolve_size(targ_btf, targ_id);
-                       if (sz < 0)
-                               return sz;
-                       targ_spec->bit_offset += local_acc->idx * sz * 8;
-               }
-       }
-
-       return 1;
-}
-
-static int bpf_core_calc_field_relo(const struct bpf_program *prog,
-                                   const struct bpf_core_relo *relo,
-                                   const struct bpf_core_spec *spec,
-                                   __u32 *val, __u32 *field_sz, __u32 *type_id,
-                                   bool *validate)
-{
-       const struct bpf_core_accessor *acc;
-       const struct btf_type *t;
-       __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
-       const struct btf_member *m;
-       const struct btf_type *mt;
-       bool bitfield;
-       __s64 sz;
-
-       *field_sz = 0;
-
-       if (relo->kind == BPF_FIELD_EXISTS) {
-               *val = spec ? 1 : 0;
-               return 0;
-       }
-
-       if (!spec)
-               return -EUCLEAN; /* request instruction poisoning */
-
-       acc = &spec->spec[spec->len - 1];
-       t = btf__type_by_id(spec->btf, acc->type_id);
-
-       /* a[n] accessor needs special handling */
-       if (!acc->name) {
-               if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
-                       *val = spec->bit_offset / 8;
-                       /* remember field size for load/store mem size */
-                       sz = btf__resolve_size(spec->btf, acc->type_id);
-                       if (sz < 0)
-                               return -EINVAL;
-                       *field_sz = sz;
-                       *type_id = acc->type_id;
-               } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
-                       sz = btf__resolve_size(spec->btf, acc->type_id);
-                       if (sz < 0)
-                               return -EINVAL;
-                       *val = sz;
-               } else {
-                       pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
-                               prog->name, relo->kind, relo->insn_off / 8);
-                       return -EINVAL;
-               }
-               if (validate)
-                       *validate = true;
-               return 0;
-       }
-
-       m = btf_members(t) + acc->idx;
-       mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
-       bit_off = spec->bit_offset;
-       bit_sz = btf_member_bitfield_size(t, acc->idx);
-
-       bitfield = bit_sz > 0;
-       if (bitfield) {
-               byte_sz = mt->size;
-               byte_off = bit_off / 8 / byte_sz * byte_sz;
-               /* figure out smallest int size necessary for bitfield load */
-               while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
-                       if (byte_sz >= 8) {
-                               /* bitfield can't be read with 64-bit read */
-                               pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
-                                       prog->name, relo->kind, relo->insn_off / 8);
-                               return -E2BIG;
-                       }
-                       byte_sz *= 2;
-                       byte_off = bit_off / 8 / byte_sz * byte_sz;
-               }
-       } else {
-               sz = btf__resolve_size(spec->btf, field_type_id);
-               if (sz < 0)
-                       return -EINVAL;
-               byte_sz = sz;
-               byte_off = spec->bit_offset / 8;
-               bit_sz = byte_sz * 8;
-       }
-
-       /* for bitfields, all the relocatable aspects are ambiguous and we
-        * might disagree with compiler, so turn off validation of expected
-        * value, except for signedness
-        */
-       if (validate)
-               *validate = !bitfield;
-
-       switch (relo->kind) {
-       case BPF_FIELD_BYTE_OFFSET:
-               *val = byte_off;
-               if (!bitfield) {
-                       *field_sz = byte_sz;
-                       *type_id = field_type_id;
-               }
-               break;
-       case BPF_FIELD_BYTE_SIZE:
-               *val = byte_sz;
-               break;
-       case BPF_FIELD_SIGNED:
-               /* enums will be assumed unsigned */
-               *val = btf_is_enum(mt) ||
-                      (btf_int_encoding(mt) & BTF_INT_SIGNED);
-               if (validate)
-                       *validate = true; /* signedness is never ambiguous */
-               break;
-       case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-               *val = 64 - (bit_off + bit_sz - byte_off  * 8);
-#else
-               *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
-#endif
-               break;
-       case BPF_FIELD_RSHIFT_U64:
-               *val = 64 - bit_sz;
-               if (validate)
-                       *validate = true; /* right shift is never ambiguous */
-               break;
-       case BPF_FIELD_EXISTS:
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
-                                  const struct bpf_core_spec *spec,
-                                  __u32 *val)
-{
-       __s64 sz;
-
-       /* type-based relos return zero when target type is not found */
-       if (!spec) {
-               *val = 0;
-               return 0;
-       }
-
-       switch (relo->kind) {
-       case BPF_TYPE_ID_TARGET:
-               *val = spec->root_type_id;
-               break;
-       case BPF_TYPE_EXISTS:
-               *val = 1;
-               break;
-       case BPF_TYPE_SIZE:
-               sz = btf__resolve_size(spec->btf, spec->root_type_id);
-               if (sz < 0)
-                       return -EINVAL;
-               *val = sz;
-               break;
-       case BPF_TYPE_ID_LOCAL:
-       /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
-                                     const struct bpf_core_spec *spec,
-                                     __u32 *val)
-{
-       const struct btf_type *t;
-       const struct btf_enum *e;
-
-       switch (relo->kind) {
-       case BPF_ENUMVAL_EXISTS:
-               *val = spec ? 1 : 0;
-               break;
-       case BPF_ENUMVAL_VALUE:
-               if (!spec)
-                       return -EUCLEAN; /* request instruction poisoning */
-               t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
-               e = btf_enum(t) + spec->spec[0].idx;
-               *val = e->val;
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-struct bpf_core_relo_res
-{
-       /* expected value in the instruction, unless validate == false */
-       __u32 orig_val;
-       /* new value that needs to be patched up to */
-       __u32 new_val;
-       /* relocation unsuccessful, poison instruction, but don't fail load */
-       bool poison;
-       /* some relocations can't be validated against orig_val */
-       bool validate;
-       /* for field byte offset relocations or the forms:
-        *     *(T *)(rX + <off>) = rY
-        *     rX = *(T *)(rY + <off>),
-        * we remember original and resolved field size to adjust direct
-        * memory loads of pointers and integers; this is necessary for 32-bit
-        * host kernel architectures, but also allows to automatically
-        * relocate fields that were resized from, e.g., u32 to u64, etc.
-        */
-       bool fail_memsz_adjust;
-       __u32 orig_sz;
-       __u32 orig_type_id;
-       __u32 new_sz;
-       __u32 new_type_id;
-};
-
-/* Calculate original and target relocation values, given local and target
- * specs and relocation kind. These values are calculated for each candidate.
- * If there are multiple candidates, resulting values should all be consistent
- * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
- * If instruction has to be poisoned, *poison will be set to true.
- */
-static int bpf_core_calc_relo(const struct bpf_program *prog,
-                             const struct bpf_core_relo *relo,
-                             int relo_idx,
-                             const struct bpf_core_spec *local_spec,
-                             const struct bpf_core_spec *targ_spec,
-                             struct bpf_core_relo_res *res)
-{
-       int err = -EOPNOTSUPP;
-
-       res->orig_val = 0;
-       res->new_val = 0;
-       res->poison = false;
-       res->validate = true;
-       res->fail_memsz_adjust = false;
-       res->orig_sz = res->new_sz = 0;
-       res->orig_type_id = res->new_type_id = 0;
-
-       if (core_relo_is_field_based(relo->kind)) {
-               err = bpf_core_calc_field_relo(prog, relo, local_spec,
-                                              &res->orig_val, &res->orig_sz,
-                                              &res->orig_type_id, &res->validate);
-               err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
-                                                     &res->new_val, &res->new_sz,
-                                                     &res->new_type_id, NULL);
-               if (err)
-                       goto done;
-               /* Validate if it's safe to adjust load/store memory size.
-                * Adjustments are performed only if original and new memory
-                * sizes differ.
-                */
-               res->fail_memsz_adjust = false;
-               if (res->orig_sz != res->new_sz) {
-                       const struct btf_type *orig_t, *new_t;
-
-                       orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
-                       new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
-
-                       /* There are two use cases in which it's safe to
-                        * adjust load/store's mem size:
-                        *   - reading a 32-bit kernel pointer, while on BPF
-                        *   size pointers are always 64-bit; in this case
-                        *   it's safe to "downsize" instruction size due to
-                        *   pointer being treated as unsigned integer with
-                        *   zero-extended upper 32-bits;
-                        *   - reading unsigned integers, again due to
-                        *   zero-extension is preserving the value correctly.
-                        *
-                        * In all other cases it's incorrect to attempt to
-                        * load/store field because read value will be
-                        * incorrect, so we poison relocated instruction.
-                        */
-                       if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
-                               goto done;
-                       if (btf_is_int(orig_t) && btf_is_int(new_t) &&
-                           btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
-                           btf_int_encoding(new_t) != BTF_INT_SIGNED)
-                               goto done;
-
-                       /* mark as invalid mem size adjustment, but this will
-                        * only be checked for LDX/STX/ST insns
-                        */
-                       res->fail_memsz_adjust = true;
-               }
-       } else if (core_relo_is_type_based(relo->kind)) {
-               err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
-               err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
-       } else if (core_relo_is_enumval_based(relo->kind)) {
-               err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
-               err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
-       }
-
-done:
-       if (err == -EUCLEAN) {
-               /* EUCLEAN is used to signal instruction poisoning request */
-               res->poison = true;
-               err = 0;
-       } else if (err == -EOPNOTSUPP) {
-               /* EOPNOTSUPP means unknown/unsupported relocation */
-               pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
-                       prog->name, relo_idx, core_relo_kind_str(relo->kind),
-                       relo->kind, relo->insn_off / 8);
-       }
-
-       return err;
-}
-
-/*
- * Turn instruction for which CO_RE relocation failed into invalid one with
- * distinct signature.
- */
-static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
-                                int insn_idx, struct bpf_insn *insn)
-{
-       pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
-                prog->name, relo_idx, insn_idx);
-       insn->code = BPF_JMP | BPF_CALL;
-       insn->dst_reg = 0;
-       insn->src_reg = 0;
-       insn->off = 0;
-       /* if this instruction is reachable (not a dead code),
-        * verifier will complain with the following message:
-        * invalid func unknown#195896080
-        */
-       insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
-}
-
-static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
-{
-       switch (BPF_SIZE(insn->code)) {
-       case BPF_DW: return 8;
-       case BPF_W: return 4;
-       case BPF_H: return 2;
-       case BPF_B: return 1;
-       default: return -1;
-       }
-}
-
-static int insn_bytes_to_bpf_size(__u32 sz)
-{
-       switch (sz) {
-       case 8: return BPF_DW;
-       case 4: return BPF_W;
-       case 2: return BPF_H;
-       case 1: return BPF_B;
-       default: return -1;
-       }
-}
-
-/*
- * Patch relocatable BPF instruction.
- *
- * Patched value is determined by relocation kind and target specification.
- * For existence relocations target spec will be NULL if field/type is not found.
- * Expected insn->imm value is determined using relocation kind and local
- * spec, and is checked before patching instruction. If actual insn->imm value
- * is wrong, bail out with error.
- *
- * Currently supported classes of BPF instruction are:
- * 1. rX = <imm> (assignment with immediate operand);
- * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. rX = <imm64> (load with 64-bit immediate value);
- * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
- * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
- * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
- */
-static int bpf_core_patch_insn(struct bpf_program *prog,
-                              const struct bpf_core_relo *relo,
-                              int relo_idx,
-                              const struct bpf_core_relo_res *res)
-{
-       __u32 orig_val, new_val;
-       struct bpf_insn *insn;
-       int insn_idx;
-       __u8 class;
-
-       if (relo->insn_off % BPF_INSN_SZ)
-               return -EINVAL;
-       insn_idx = relo->insn_off / BPF_INSN_SZ;
-       /* adjust insn_idx from section frame of reference to the local
-        * program's frame of reference; (sub-)program code is not yet
-        * relocated, so it's enough to just subtract in-section offset
-        */
-       insn_idx = insn_idx - prog->sec_insn_off;
-       insn = &prog->insns[insn_idx];
-       class = BPF_CLASS(insn->code);
-
-       if (res->poison) {
-poison:
-               /* poison second part of ldimm64 to avoid confusing error from
-                * verifier about "unknown opcode 00"
-                */
-               if (is_ldimm64_insn(insn))
-                       bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
-               bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
-               return 0;
-       }
-
-       orig_val = res->orig_val;
-       new_val = res->new_val;
-
-       switch (class) {
-       case BPF_ALU:
-       case BPF_ALU64:
-               if (BPF_SRC(insn->code) != BPF_K)
-                       return -EINVAL;
-               if (res->validate && insn->imm != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
-                               prog->name, relo_idx,
-                               insn_idx, insn->imm, orig_val, new_val);
-                       return -EINVAL;
-               }
-               orig_val = insn->imm;
-               insn->imm = new_val;
-               pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
-                        prog->name, relo_idx, insn_idx,
-                        orig_val, new_val);
-               break;
-       case BPF_LDX:
-       case BPF_ST:
-       case BPF_STX:
-               if (res->validate && insn->off != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
-                               prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
-                       return -EINVAL;
-               }
-               if (new_val > SHRT_MAX) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
-                               prog->name, relo_idx, insn_idx, new_val);
-                       return -ERANGE;
-               }
-               if (res->fail_memsz_adjust) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
-                               "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
-                               prog->name, relo_idx, insn_idx);
-                       goto poison;
-               }
-
-               orig_val = insn->off;
-               insn->off = new_val;
-               pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
-                        prog->name, relo_idx, insn_idx, orig_val, new_val);
-
-               if (res->new_sz != res->orig_sz) {
-                       int insn_bytes_sz, insn_bpf_sz;
-
-                       insn_bytes_sz = insn_bpf_size_to_bytes(insn);
-                       if (insn_bytes_sz != res->orig_sz) {
-                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
-                                       prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
-                               return -EINVAL;
-                       }
-
-                       insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
-                       if (insn_bpf_sz < 0) {
-                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
-                                       prog->name, relo_idx, insn_idx, res->new_sz);
-                               return -EINVAL;
-                       }
-
-                       insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
-                       pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
-                                prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
-               }
-               break;
-       case BPF_LD: {
-               __u64 imm;
-
-               if (!is_ldimm64_insn(insn) ||
-                   insn[0].src_reg != 0 || insn[0].off != 0 ||
-                   insn_idx + 1 >= prog->insns_cnt ||
-                   insn[1].code != 0 || insn[1].dst_reg != 0 ||
-                   insn[1].src_reg != 0 || insn[1].off != 0) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
-                               prog->name, relo_idx, insn_idx);
-                       return -EINVAL;
-               }
-
-               imm = insn[0].imm + ((__u64)insn[1].imm << 32);
-               if (res->validate && imm != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
-                               prog->name, relo_idx,
-                               insn_idx, (unsigned long long)imm,
-                               orig_val, new_val);
-                       return -EINVAL;
-               }
-
-               insn[0].imm = new_val;
-               insn[1].imm = 0; /* currently only 32-bit values are supported */
-               pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
-                        prog->name, relo_idx, insn_idx,
-                        (unsigned long long)imm, new_val);
-               break;
-       }
-       default:
-               pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
-                       prog->name, relo_idx, insn_idx, insn->code,
-                       insn->src_reg, insn->dst_reg, insn->off, insn->imm);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-/* Output spec definition in the format:
- * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
- * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
- */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
-{
-       const struct btf_type *t;
-       const struct btf_enum *e;
-       const char *s;
-       __u32 type_id;
-       int i;
-
-       type_id = spec->root_type_id;
-       t = btf__type_by_id(spec->btf, type_id);
-       s = btf__name_by_offset(spec->btf, t->name_off);
-
-       libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
-
-       if (core_relo_is_type_based(spec->relo_kind))
-               return;
-
-       if (core_relo_is_enumval_based(spec->relo_kind)) {
-               t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
-               e = btf_enum(t) + spec->raw_spec[0];
-               s = btf__name_by_offset(spec->btf, e->name_off);
-
-               libbpf_print(level, "::%s = %u", s, e->val);
-               return;
-       }
-
-       if (core_relo_is_field_based(spec->relo_kind)) {
-               for (i = 0; i < spec->len; i++) {
-                       if (spec->spec[i].name)
-                               libbpf_print(level, ".%s", spec->spec[i].name);
-                       else if (i > 0 || spec->spec[i].idx > 0)
-                               libbpf_print(level, "[%u]", spec->spec[i].idx);
-               }
-
-               libbpf_print(level, " (");
-               for (i = 0; i < spec->raw_len; i++)
-                       libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
-
-               if (spec->bit_offset % 8)
-                       libbpf_print(level, " @ offset %u.%u)",
-                                    spec->bit_offset / 8, spec->bit_offset % 8);
-               else
-                       libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
-               return;
-       }
-}
-
 static size_t bpf_core_hash_fn(const void *key, void *ctx)
 {
        return (size_t)key;
@@ -6172,73 +5073,33 @@ static void *u32_as_hash_key(__u32 x)
        return (void *)(uintptr_t)x;
 }
 
-/*
- * CO-RE relocate single instruction.
- *
- * The outline and important points of the algorithm:
- * 1. For given local type, find corresponding candidate target types.
- *    Candidate type is a type with the same "essential" name, ignoring
- *    everything after last triple underscore (___). E.g., `sample`,
- *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
- *    for each other. Names with triple underscore are referred to as
- *    "flavors" and are useful, among other things, to allow to
- *    specify/support incompatible variations of the same kernel struct, which
- *    might differ between different kernel versions and/or build
- *    configurations.
- *
- *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
- *    converter, when deduplicated BTF of a kernel still contains more than
- *    one different types with the same name. In that case, ___2, ___3, etc
- *    are appended starting from second name conflict. But start flavors are
- *    also useful to be defined "locally", in BPF program, to extract same
- *    data from incompatible changes between different kernel
- *    versions/configurations. For instance, to handle field renames between
- *    kernel versions, one can use two flavors of the struct name with the
- *    same common name and use conditional relocations to extract that field,
- *    depending on target kernel version.
- * 2. For each candidate type, try to match local specification to this
- *    candidate target type. Matching involves finding corresponding
- *    high-level spec accessors, meaning that all named fields should match,
- *    as well as all array accesses should be within the actual bounds. Also,
- *    types should be compatible (see bpf_core_fields_are_compat for details).
- * 3. It is supported and expected that there might be multiple flavors
- *    matching the spec. As long as all the specs resolve to the same set of
- *    offsets across all candidates, there is no error. If there is any
- *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- *    imprefection of BTF deduplication, which can cause slight duplication of
- *    the same BTF type, if some directly or indirectly referenced (by
- *    pointer) type gets resolved to different actual types in different
- *    object files. If such situation occurs, deduplicated BTF will end up
- *    with two (or more) structurally identical types, which differ only in
- *    types they refer to through pointer. This should be OK in most cases and
- *    is not an error.
- * 4. Candidate types search is performed by linearly scanning through all
- *    types in target BTF. It is anticipated that this is overall more
- *    efficient memory-wise and not significantly worse (if not better)
- *    CPU-wise compared to prebuilding a map from all local type names to
- *    a list of candidate type names. It's also sped up by caching resolved
- *    list of matching candidates per each local "root" type ID, that has at
- *    least one bpf_core_relo associated with it. This list is shared
- *    between multiple relocations for the same type ID and is updated as some
- *    of the candidates are pruned due to structural incompatibility.
- */
 static int bpf_core_apply_relo(struct bpf_program *prog,
                               const struct bpf_core_relo *relo,
                               int relo_idx,
                               const struct btf *local_btf,
                               struct hashmap *cand_cache)
 {
-       struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
        const void *type_key = u32_as_hash_key(relo->type_id);
-       struct bpf_core_relo_res cand_res, targ_res;
+       struct bpf_core_cand_list *cands = NULL;
+       const char *prog_name = prog->name;
        const struct btf_type *local_type;
        const char *local_name;
-       struct core_cand_list *cands = NULL;
-       __u32 local_id;
-       const char *spec_str;
-       int i, j, err;
+       __u32 local_id = relo->type_id;
+       struct bpf_insn *insn;
+       int insn_idx, err;
+
+       if (relo->insn_off % BPF_INSN_SZ)
+               return -EINVAL;
+       insn_idx = relo->insn_off / BPF_INSN_SZ;
+       /* adjust insn_idx from section frame of reference to the local
+        * program's frame of reference; (sub-)program code is not yet
+        * relocated, so it's enough to just subtract in-section offset
+        */
+       insn_idx = insn_idx - prog->sec_insn_off;
+       if (insn_idx > prog->insns_cnt)
+               return -EINVAL;
+       insn = &prog->insns[insn_idx];
 
-       local_id = relo->type_id;
        local_type = btf__type_by_id(local_btf, local_id);
        if (!local_type)
                return -EINVAL;
@@ -6247,51 +5108,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
        if (!local_name)
                return -EINVAL;
 
-       spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
-       if (str_is_empty(spec_str))
-               return -EINVAL;
-
        if (prog->obj->gen_loader) {
-               pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+               pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
                        prog - prog->obj->programs, relo->insn_off / 8,
-                       local_name, spec_str, relo->kind);
+                       local_name, relo->kind);
                return -ENOTSUP;
        }
-       err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
-       if (err) {
-               pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
-                       prog->name, relo_idx, local_id, btf_kind_str(local_type),
-                       str_is_empty(local_name) ? "<anon>" : local_name,
-                       spec_str, err);
-               return -EINVAL;
-       }
-
-       pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
-                relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-       bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
-       libbpf_print(LIBBPF_DEBUG, "\n");
-
-       /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
-       if (relo->kind == BPF_TYPE_ID_LOCAL) {
-               targ_res.validate = true;
-               targ_res.poison = false;
-               targ_res.orig_val = local_spec.root_type_id;
-               targ_res.new_val = local_spec.root_type_id;
-               goto patch_insn;
-       }
 
-       /* libbpf doesn't support candidate search for anonymous types */
-       if (str_is_empty(spec_str)) {
-               pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
-                       prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-               return -EOPNOTSUPP;
-       }
-
-       if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+       if (relo->kind != BPF_TYPE_ID_LOCAL &&
+           !hashmap__find(cand_cache, type_key, (void **)&cands)) {
                cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
                if (IS_ERR(cands)) {
                        pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
-                               prog->name, relo_idx, local_id, btf_kind_str(local_type),
+                               prog_name, relo_idx, local_id, btf_kind_str(local_type),
                                local_name, PTR_ERR(cands));
                        return PTR_ERR(cands);
                }
@@ -6302,97 +5131,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
                }
        }
 
-       for (i = 0, j = 0; i < cands->len; i++) {
-               err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
-                                         cands->cands[i].id, &cand_spec);
-               if (err < 0) {
-                       pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
-                               prog->name, relo_idx, i);
-                       bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
-                       libbpf_print(LIBBPF_WARN, ": %d\n", err);
-                       return err;
-               }
-
-               pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
-                        relo_idx, err == 0 ? "non-matching" : "matching", i);
-               bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
-               libbpf_print(LIBBPF_DEBUG, "\n");
-
-               if (err == 0)
-                       continue;
-
-               err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
-               if (err)
-                       return err;
-
-               if (j == 0) {
-                       targ_res = cand_res;
-                       targ_spec = cand_spec;
-               } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
-                       /* if there are many field relo candidates, they
-                        * should all resolve to the same bit offset
-                        */
-                       pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
-                               prog->name, relo_idx, cand_spec.bit_offset,
-                               targ_spec.bit_offset);
-                       return -EINVAL;
-               } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
-                       /* all candidates should result in the same relocation
-                        * decision and value, otherwise it's dangerous to
-                        * proceed due to ambiguity
-                        */
-                       pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
-                               prog->name, relo_idx,
-                               cand_res.poison ? "failure" : "success", cand_res.new_val,
-                               targ_res.poison ? "failure" : "success", targ_res.new_val);
-                       return -EINVAL;
-               }
-
-               cands->cands[j++] = cands->cands[i];
-       }
-
-       /*
-        * For BPF_FIELD_EXISTS relo or when used BPF program has field
-        * existence checks or kernel version/config checks, it's expected
-        * that we might not find any candidates. In this case, if field
-        * wasn't found in any candidate, the list of candidates shouldn't
-        * change at all, we'll just handle relocating appropriately,
-        * depending on relo's kind.
-        */
-       if (j > 0)
-               cands->len = j;
-
-       /*
-        * If no candidates were found, it might be both a programmer error,
-        * as well as expected case, depending whether instruction w/
-        * relocation is guarded in some way that makes it unreachable (dead
-        * code) if relocation can't be resolved. This is handled in
-        * bpf_core_patch_insn() uniformly by replacing that instruction with
-        * BPF helper call insn (using invalid helper ID). If that instruction
-        * is indeed unreachable, then it will be ignored and eliminated by
-        * verifier. If it was an error, then verifier will complain and point
-        * to a specific instruction number in its log.
-        */
-       if (j == 0) {
-               pr_debug("prog '%s': relo #%d: no matching targets found\n",
-                        prog->name, relo_idx);
-
-               /* calculate single target relo result explicitly */
-               err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
-               if (err)
-                       return err;
-       }
-
-patch_insn:
-       /* bpf_core_patch_insn() should know how to handle missing targ_spec */
-       err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
-       if (err) {
-               pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
-                       prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
-               return -EINVAL;
-       }
-
-       return 0;
+       return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
 }
 
 static int
@@ -7232,7 +5971,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
 
        for (i = 0; i < obj->nr_programs; i++) {
                struct bpf_program *p = &obj->programs[i];
-               
+
                if (!p->nr_reloc)
                        continue;
 
@@ -7596,7 +6335,7 @@ static struct bpf_object *
 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
                   const struct bpf_object_open_opts *opts)
 {
-       const char *obj_name, *kconfig;
+       const char *obj_name, *kconfig, *btf_tmp_path;
        struct bpf_program *prog;
        struct bpf_object *obj;
        char tmp_name[64];
@@ -7627,11 +6366,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
        if (IS_ERR(obj))
                return obj;
 
+       btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
+       if (btf_tmp_path) {
+               if (strlen(btf_tmp_path) >= PATH_MAX) {
+                       err = -ENAMETOOLONG;
+                       goto out;
+               }
+               obj->btf_custom_path = strdup(btf_tmp_path);
+               if (!obj->btf_custom_path) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+       }
+
        kconfig = OPTS_GET(opts, kconfig, NULL);
        if (kconfig) {
                obj->kconfig = strdup(kconfig);
-               if (!obj->kconfig)
-                       return ERR_PTR(-ENOMEM);
+               if (!obj->kconfig) {
+                       err = -ENOMEM;
+                       goto out;
+               }
        }
 
        err = bpf_object__elf_init(obj);
@@ -8097,7 +6851,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
        err = err ? : bpf_object__sanitize_maps(obj);
        err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
        err = err ? : bpf_object__create_maps(obj);
-       err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+       err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
        err = err ? : bpf_object__load_progs(obj, attr->log_level);
 
        if (obj->gen_loader) {
@@ -8492,6 +7246,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map)
        return map->pin_path;
 }
 
+const char *bpf_map__pin_path(const struct bpf_map *map)
+{
+       return map->pin_path;
+}
+
 bool bpf_map__is_pinned(const struct bpf_map *map)
 {
        return map->pinned;
@@ -8744,6 +7503,7 @@ void bpf_object__close(struct bpf_object *obj)
        for (i = 0; i < obj->nr_maps; i++)
                bpf_map__destroy(&obj->maps[i]);
 
+       zfree(&obj->btf_custom_path);
        zfree(&obj->kconfig);
        zfree(&obj->externs);
        obj->nr_extern = 0;
@@ -9513,7 +8273,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
        ret = snprintf(btf_type_name, sizeof(btf_type_name),
                       "%s%s", prefix, name);
        /* snprintf returns the number of characters written excluding the
-        * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+        * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
         * indicates truncation.
         */
        if (ret < 0 || ret >= sizeof(btf_type_name))
@@ -9537,7 +8297,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
        struct btf *btf;
        int err;
 
-       btf = libbpf_find_kernel_btf();
+       btf = btf__load_vmlinux_btf();
        err = libbpf_get_error(btf);
        if (err) {
                pr_warn("vmlinux BTF is not found\n");
@@ -9556,8 +8316,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 {
        struct bpf_prog_info_linear *info_linear;
        struct bpf_prog_info *info;
-       struct btf *btf = NULL;
-       int err = -EINVAL;
+       struct btf *btf;
+       int err;
 
        info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
        err = libbpf_get_error(info_linear);
@@ -9566,12 +8326,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
                        attach_prog_fd);
                return err;
        }
+
+       err = -EINVAL;
        info = &info_linear->info;
        if (!info->btf_id) {
                pr_warn("The target program doesn't have BTF\n");
                goto out;
        }
-       if (btf__get_from_id(info->btf_id, &btf)) {
+       btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(btf)) {
                pr_warn("Failed to get BTF of the program\n");
                goto out;
        }
@@ -10055,7 +8818,7 @@ struct bpf_link {
 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 {
        int ret;
-       
+
        ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
        return libbpf_err_errno(ret);
 }
@@ -10346,25 +9109,28 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
        return pfd;
 }
 
-struct bpf_program_attach_kprobe_opts {
-       bool retprobe;
-       unsigned long offset;
-};
-
-static struct bpf_link*
+struct bpf_link *
 bpf_program__attach_kprobe_opts(struct bpf_program *prog,
                                const char *func_name,
-                               struct bpf_program_attach_kprobe_opts *opts)
+                               struct bpf_kprobe_opts *opts)
 {
        char errmsg[STRERR_BUFSIZE];
        struct bpf_link *link;
+       unsigned long offset;
+       bool retprobe;
        int pfd, err;
 
-       pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name,
-                                   opts->offset, -1 /* pid */);
+       if (!OPTS_VALID(opts, bpf_kprobe_opts))
+               return libbpf_err_ptr(-EINVAL);
+
+       retprobe = OPTS_GET(opts, retprobe, false);
+       offset = OPTS_GET(opts, offset, 0);
+
+       pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+                                   offset, -1 /* pid */);
        if (pfd < 0) {
                pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
-                       prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
+                       prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return libbpf_err_ptr(pfd);
        }
@@ -10373,7 +9139,7 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog,
        if (err) {
                close(pfd);
                pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
-                       prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
+                       prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                return libbpf_err_ptr(err);
        }
@@ -10384,9 +9150,9 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
                                            bool retprobe,
                                            const char *func_name)
 {
-       struct bpf_program_attach_kprobe_opts opts = {
+       DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
                .retprobe = retprobe,
-       };
+       );
 
        return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
 }
@@ -10394,7 +9160,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
                                      struct bpf_program *prog)
 {
-       struct bpf_program_attach_kprobe_opts opts;
+       DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
        unsigned long offset = 0;
        struct bpf_link *link;
        const char *func_name;
@@ -10404,13 +9170,14 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
        func_name = prog->sec_name + sec->len;
        opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
 
-       n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%lx", &func, &offset);
+       n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
        if (n < 1) {
                err = -EINVAL;
                pr_warn("kprobe name is invalid: %s\n", func_name);
                return libbpf_err_ptr(err);
        }
        if (opts.retprobe && offset != 0) {
+               free(func);
                err = -EINVAL;
                pr_warn("kretprobes do not support offset specification\n");
                return libbpf_err_ptr(err);
index 6e61342..1271d99 100644 (file)
@@ -94,8 +94,26 @@ struct bpf_object_open_opts {
         * system Kconfig for CONFIG_xxx externs.
         */
        const char *kconfig;
+       /* Path to the custom BTF to be used for BPF CO-RE relocations.
+        * This custom BTF completely replaces the use of vmlinux BTF
+        * for the purpose of CO-RE relocations.
+        * NOTE: any other BPF feature (e.g., fentry/fexit programs,
+        * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
+        */
+       const char *btf_custom_path;
 };
-#define bpf_object_open_opts__last_field kconfig
+#define bpf_object_open_opts__last_field btf_custom_path
+
+struct bpf_kprobe_opts {
+       /* size of this struct, for forward/backward compatiblity */
+       size_t sz;
+       /* function's offset to install kprobe to */
+       unsigned long offset;
+       /* kprobe is return probe */
+       bool retprobe;
+       size_t :0;
+};
+#define bpf_kprobe_opts__last_field retprobe
 
 LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
 LIBBPF_API struct bpf_object *
@@ -243,6 +261,10 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
                           const char *func_name);
 LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+                                const char *func_name,
+                                struct bpf_kprobe_opts *opts);
+LIBBPF_API struct bpf_link *
 bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
                           pid_t pid, const char *binary_path,
                           size_t func_offset);
@@ -477,6 +499,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
 LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
index 944c99d..58e0fb2 100644 (file)
@@ -371,7 +371,15 @@ LIBBPF_0.4.0 {
 LIBBPF_0.5.0 {
        global:
                bpf_map__initial_value;
+               bpf_map__pin_path;
                bpf_map_lookup_and_delete_elem_flags;
+               bpf_program__attach_kprobe_opts;
                bpf_object__gen_loader;
+               btf__load_from_kernel_by_id;
+               btf__load_from_kernel_by_id_split;
+               btf__load_into_kernel;
+               btf__load_module_btf;
+               btf__load_vmlinux_btf;
+               btf_dump__dump_type_data;
                libbpf_set_strict_mode;
 } LIBBPF_0.4.0;
index 016ca7c..f7b691d 100644 (file)
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include <linux/err.h>
 #include "libbpf_legacy.h"
+#include "relo_core.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -366,76 +367,6 @@ struct bpf_line_info_min {
        __u32   line_col;
 };
 
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
-       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
-       BPF_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
-       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
-       BPF_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
-       BPF_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
-       BPF_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
-       BPF_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
-       BPF_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
-       BPF_TYPE_EXISTS = 8,            /* type existence in target kernel */
-       BPF_TYPE_SIZE = 9,              /* type size in bytes */
-       BPF_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
-       BPF_ENUMVAL_VALUE = 11,         /* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- *   its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- *   type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- *   interpretation depends on specific relocation kind:
- *     - for field-based relocations, string encodes an accessed field using
- *     a sequence of field and array indices, separated by colon (:). It's
- *     conceptually very close to LLVM's getelementptr ([0]) instruction's
- *     arguments for identifying offset to a field.
- *     - for type-based relocations, strings is expected to be just "0";
- *     - for enum value-based relocations, string contains an index of enum
- *     value within its enum type;
- *
- * Example to provide a better feel.
- *
- *   struct sample {
- *       int a;
- *       struct {
- *           int b[10];
- *       };
- *   };
- *
- *   struct sample *s = ...;
- *   int x = &s->a;     // encoded as "0:0" (a is field #0)
- *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1, 
- *                      // b is field #0 inside anon struct, accessing elem #5)
- *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example  will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- *               __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
-       __u32   insn_off;
-       __u32   type_id;
-       __u32   access_str_off;
-       enum bpf_core_relo_kind kind;
-};
 
 typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
 typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
@@ -494,4 +425,14 @@ static inline void *libbpf_ptr(void *ret)
        return ret;
 }
 
+static inline bool str_is_empty(const char *s)
+{
+       return !s || !s[0];
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
new file mode 100644 (file)
index 0000000..4016ed4
--- /dev/null
@@ -0,0 +1,1295 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Facebook */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <linux/err.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+       __u32 type_id;          /* struct/union type or array element type */
+       __u32 idx;              /* field index or array index */
+       const char *name;       /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+       const struct btf *btf;
+       /* high-level spec: named fields and array indices only */
+       struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+       /* original unresolved (no skip_mods_or_typedefs) root type ID */
+       __u32 root_type_id;
+       /* CO-RE relocation kind */
+       enum bpf_core_relo_kind relo_kind;
+       /* high-level spec length */
+       int len;
+       /* raw, low-level spec: 1-to-1 with accessor spec string */
+       int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+       /* raw spec length */
+       int raw_len;
+       /* field bit offset represented by spec */
+       __u32 bit_offset;
+};
+
+static bool is_flex_arr(const struct btf *btf,
+                       const struct bpf_core_accessor *acc,
+                       const struct btf_array *arr)
+{
+       const struct btf_type *t;
+
+       /* not a flexible array, if not inside a struct or has non-zero size */
+       if (!acc->name || arr->nelems > 0)
+               return false;
+
+       /* has to be the last member of enclosing struct */
+       t = btf__type_by_id(btf, acc->type_id);
+       return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+       case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+       case BPF_FIELD_EXISTS: return "field_exists";
+       case BPF_FIELD_SIGNED: return "signed";
+       case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+       case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+       case BPF_TYPE_ID_LOCAL: return "local_type_id";
+       case BPF_TYPE_ID_TARGET: return "target_type_id";
+       case BPF_TYPE_EXISTS: return "type_exists";
+       case BPF_TYPE_SIZE: return "type_size";
+       case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+       case BPF_ENUMVAL_VALUE: return "enumval_value";
+       default: return "unknown";
+       }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_FIELD_BYTE_OFFSET:
+       case BPF_FIELD_BYTE_SIZE:
+       case BPF_FIELD_EXISTS:
+       case BPF_FIELD_SIGNED:
+       case BPF_FIELD_LSHIFT_U64:
+       case BPF_FIELD_RSHIFT_U64:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_TYPE_ID_LOCAL:
+       case BPF_TYPE_ID_TARGET:
+       case BPF_TYPE_EXISTS:
+       case BPF_TYPE_SIZE:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_ENUMVAL_EXISTS:
+       case BPF_ENUMVAL_VALUE:
+               return true;
+       default:
+               return false;
+       }
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ *   struct sample {
+ *       int __unimportant;
+ *       struct {
+ *           int __1;
+ *           int __2;
+ *           int a[7];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *
+ *   int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ *   - field 'a' access (corresponds to '2' in low-level spec);
+ *   - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+static int bpf_core_parse_spec(const struct btf *btf,
+                              __u32 type_id,
+                              const char *spec_str,
+                              enum bpf_core_relo_kind relo_kind,
+                              struct bpf_core_spec *spec)
+{
+       int access_idx, parsed_len, i;
+       struct bpf_core_accessor *acc;
+       const struct btf_type *t;
+       const char *name;
+       __u32 id;
+       __s64 sz;
+
+       if (str_is_empty(spec_str) || *spec_str == ':')
+               return -EINVAL;
+
+       memset(spec, 0, sizeof(*spec));
+       spec->btf = btf;
+       spec->root_type_id = type_id;
+       spec->relo_kind = relo_kind;
+
+       /* type-based relocations don't have a field access string */
+       if (core_relo_is_type_based(relo_kind)) {
+               if (strcmp(spec_str, "0"))
+                       return -EINVAL;
+               return 0;
+       }
+
+       /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+       while (*spec_str) {
+               if (*spec_str == ':')
+                       ++spec_str;
+               if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+                       return -EINVAL;
+               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                       return -E2BIG;
+               spec_str += parsed_len;
+               spec->raw_spec[spec->raw_len++] = access_idx;
+       }
+
+       if (spec->raw_len == 0)
+               return -EINVAL;
+
+       t = skip_mods_and_typedefs(btf, type_id, &id);
+       if (!t)
+               return -EINVAL;
+
+       access_idx = spec->raw_spec[0];
+       acc = &spec->spec[0];
+       acc->type_id = id;
+       acc->idx = access_idx;
+       spec->len++;
+
+       if (core_relo_is_enumval_based(relo_kind)) {
+               if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+                       return -EINVAL;
+
+               /* record enumerator name in a first accessor */
+               acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+               return 0;
+       }
+
+       if (!core_relo_is_field_based(relo_kind))
+               return -EINVAL;
+
+       sz = btf__resolve_size(btf, id);
+       if (sz < 0)
+               return sz;
+       spec->bit_offset = access_idx * sz * 8;
+
+       for (i = 1; i < spec->raw_len; i++) {
+               t = skip_mods_and_typedefs(btf, id, &id);
+               if (!t)
+                       return -EINVAL;
+
+               access_idx = spec->raw_spec[i];
+               acc = &spec->spec[spec->len];
+
+               if (btf_is_composite(t)) {
+                       const struct btf_member *m;
+                       __u32 bit_offset;
+
+                       if (access_idx >= btf_vlen(t))
+                               return -EINVAL;
+
+                       bit_offset = btf_member_bit_offset(t, access_idx);
+                       spec->bit_offset += bit_offset;
+
+                       m = btf_members(t) + access_idx;
+                       if (m->name_off) {
+                               name = btf__name_by_offset(btf, m->name_off);
+                               if (str_is_empty(name))
+                                       return -EINVAL;
+
+                               acc->type_id = id;
+                               acc->idx = access_idx;
+                               acc->name = name;
+                               spec->len++;
+                       }
+
+                       id = m->type;
+               } else if (btf_is_array(t)) {
+                       const struct btf_array *a = btf_array(t);
+                       bool flex;
+
+                       t = skip_mods_and_typedefs(btf, a->type, &id);
+                       if (!t)
+                               return -EINVAL;
+
+                       flex = is_flex_arr(btf, acc - 1, a);
+                       if (!flex && access_idx >= a->nelems)
+                               return -EINVAL;
+
+                       spec->spec[spec->len].type_id = id;
+                       spec->spec[spec->len].idx = access_idx;
+                       spec->len++;
+
+                       sz = btf__resolve_size(btf, id);
+                       if (sz < 0)
+                               return sz;
+                       spec->bit_offset += access_idx * sz * 8;
+               } else {
+                       pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+                               type_id, spec_str, i, id, btf_kind_str(t));
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ *   - any two STRUCTs/UNIONs are compatible and can be mixed;
+ *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ *   - any two PTRs are always compatible;
+ *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ *     least one of enums should be anonymous;
+ *   - for ENUMs, check sizes, names are ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - any two FLOATs are always compatible;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+                                     __u32 local_id,
+                                     const struct btf *targ_btf,
+                                     __u32 targ_id)
+{
+       const struct btf_type *local_type, *targ_type;
+
+recur:
+       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!local_type || !targ_type)
+               return -EINVAL;
+
+       if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+               return 1;
+       if (btf_kind(local_type) != btf_kind(targ_type))
+               return 0;
+
+       switch (btf_kind(local_type)) {
+       case BTF_KIND_PTR:
+       case BTF_KIND_FLOAT:
+               return 1;
+       case BTF_KIND_FWD:
+       case BTF_KIND_ENUM: {
+               const char *local_name, *targ_name;
+               size_t local_len, targ_len;
+
+               local_name = btf__name_by_offset(local_btf,
+                                                local_type->name_off);
+               targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+               local_len = bpf_core_essential_name_len(local_name);
+               targ_len = bpf_core_essential_name_len(targ_name);
+               /* one of them is anonymous or both w/ same flavor-less names */
+               return local_len == 0 || targ_len == 0 ||
+                      (local_len == targ_len &&
+                       strncmp(local_name, targ_name, local_len) == 0);
+       }
+       case BTF_KIND_INT:
+               /* just reject deprecated bitfield-like integers; all other
+                * integers are by default compatible between each other
+                */
+               return btf_int_offset(local_type) == 0 &&
+                      btf_int_offset(targ_type) == 0;
+       case BTF_KIND_ARRAY:
+               local_id = btf_array(local_type)->type;
+               targ_id = btf_array(targ_type)->type;
+               goto recur;
+       default:
+               pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+                       btf_kind(local_type), local_id, targ_id);
+               return 0;
+       }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+                                const struct bpf_core_accessor *local_acc,
+                                const struct btf *targ_btf,
+                                __u32 targ_id,
+                                struct bpf_core_spec *spec,
+                                __u32 *next_targ_id)
+{
+       const struct btf_type *local_type, *targ_type;
+       const struct btf_member *local_member, *m;
+       const char *local_name, *targ_name;
+       __u32 local_id;
+       int i, n, found;
+
+       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!targ_type)
+               return -EINVAL;
+       if (!btf_is_composite(targ_type))
+               return 0;
+
+       local_id = local_acc->type_id;
+       local_type = btf__type_by_id(local_btf, local_id);
+       local_member = btf_members(local_type) + local_acc->idx;
+       local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+       n = btf_vlen(targ_type);
+       m = btf_members(targ_type);
+       for (i = 0; i < n; i++, m++) {
+               __u32 bit_offset;
+
+               bit_offset = btf_member_bit_offset(targ_type, i);
+
+               /* too deep struct/union/array nesting */
+               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                       return -E2BIG;
+
+               /* speculate this member will be the good one */
+               spec->bit_offset += bit_offset;
+               spec->raw_spec[spec->raw_len++] = i;
+
+               targ_name = btf__name_by_offset(targ_btf, m->name_off);
+               if (str_is_empty(targ_name)) {
+                       /* embedded struct/union, we need to go deeper */
+                       found = bpf_core_match_member(local_btf, local_acc,
+                                                     targ_btf, m->type,
+                                                     spec, next_targ_id);
+                       if (found) /* either found or error */
+                               return found;
+               } else if (strcmp(local_name, targ_name) == 0) {
+                       /* matching named field */
+                       struct bpf_core_accessor *targ_acc;
+
+                       targ_acc = &spec->spec[spec->len++];
+                       targ_acc->type_id = targ_id;
+                       targ_acc->idx = i;
+                       targ_acc->name = targ_name;
+
+                       *next_targ_id = m->type;
+                       found = bpf_core_fields_are_compat(local_btf,
+                                                          local_member->type,
+                                                          targ_btf, m->type);
+                       if (!found)
+                               spec->len--; /* pop accessor */
+                       return found;
+               }
+               /* member turned out not to be what we looked for */
+               spec->bit_offset -= bit_offset;
+               spec->raw_len--;
+       }
+
+       return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+                              const struct btf *targ_btf, __u32 targ_id,
+                              struct bpf_core_spec *targ_spec)
+{
+       const struct btf_type *targ_type;
+       const struct bpf_core_accessor *local_acc;
+       struct bpf_core_accessor *targ_acc;
+       int i, sz, matched;
+
+       memset(targ_spec, 0, sizeof(*targ_spec));
+       targ_spec->btf = targ_btf;
+       targ_spec->root_type_id = targ_id;
+       targ_spec->relo_kind = local_spec->relo_kind;
+
+       if (core_relo_is_type_based(local_spec->relo_kind)) {
+               return bpf_core_types_are_compat(local_spec->btf,
+                                                local_spec->root_type_id,
+                                                targ_btf, targ_id);
+       }
+
+       local_acc = &local_spec->spec[0];
+       targ_acc = &targ_spec->spec[0];
+
+       if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+               size_t local_essent_len, targ_essent_len;
+               const struct btf_enum *e;
+               const char *targ_name;
+
+               /* has to resolve to an enum */
+               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+               if (!btf_is_enum(targ_type))
+                       return 0;
+
+               local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+               for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+                       targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+                       targ_essent_len = bpf_core_essential_name_len(targ_name);
+                       if (targ_essent_len != local_essent_len)
+                               continue;
+                       if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+                               targ_acc->type_id = targ_id;
+                               targ_acc->idx = i;
+                               targ_acc->name = targ_name;
+                               targ_spec->len++;
+                               targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+                               targ_spec->raw_len++;
+                               return 1;
+                       }
+               }
+               return 0;
+       }
+
+       if (!core_relo_is_field_based(local_spec->relo_kind))
+               return -EINVAL;
+
+       for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+                                                  &targ_id);
+               if (!targ_type)
+                       return -EINVAL;
+
+               if (local_acc->name) {
+                       matched = bpf_core_match_member(local_spec->btf,
+                                                       local_acc,
+                                                       targ_btf, targ_id,
+                                                       targ_spec, &targ_id);
+                       if (matched <= 0)
+                               return matched;
+               } else {
+                       /* for i=0, targ_id is already treated as array element
+                        * type (because it's the original struct), for others
+                        * we should find array element type first
+                        */
+                       if (i > 0) {
+                               const struct btf_array *a;
+                               bool flex;
+
+                               if (!btf_is_array(targ_type))
+                                       return 0;
+
+                               a = btf_array(targ_type);
+                               flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+                               if (!flex && local_acc->idx >= a->nelems)
+                                       return 0;
+                               if (!skip_mods_and_typedefs(targ_btf, a->type,
+                                                           &targ_id))
+                                       return -EINVAL;
+                       }
+
+                       /* too deep struct/union/array nesting */
+                       if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                               return -E2BIG;
+
+                       targ_acc->type_id = targ_id;
+                       targ_acc->idx = local_acc->idx;
+                       targ_acc->name = NULL;
+                       targ_spec->len++;
+                       targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+                       targ_spec->raw_len++;
+
+                       sz = btf__resolve_size(targ_btf, targ_id);
+                       if (sz < 0)
+                               return sz;
+                       targ_spec->bit_offset += local_acc->idx * sz * 8;
+               }
+       }
+
+       return 1;
+}
+
+static int bpf_core_calc_field_relo(const char *prog_name,
+                                   const struct bpf_core_relo *relo,
+                                   const struct bpf_core_spec *spec,
+                                   __u32 *val, __u32 *field_sz, __u32 *type_id,
+                                   bool *validate)
+{
+       const struct bpf_core_accessor *acc;
+       const struct btf_type *t;
+       __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+       const struct btf_member *m;
+       const struct btf_type *mt;
+       bool bitfield;
+       __s64 sz;
+
+       *field_sz = 0;
+
+       if (relo->kind == BPF_FIELD_EXISTS) {
+               *val = spec ? 1 : 0;
+               return 0;
+       }
+
+       if (!spec)
+               return -EUCLEAN; /* request instruction poisoning */
+
+       acc = &spec->spec[spec->len - 1];
+       t = btf__type_by_id(spec->btf, acc->type_id);
+
+       /* a[n] accessor needs special handling */
+       if (!acc->name) {
+               if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+                       *val = spec->bit_offset / 8;
+                       /* remember field size for load/store mem size */
+                       sz = btf__resolve_size(spec->btf, acc->type_id);
+                       if (sz < 0)
+                               return -EINVAL;
+                       *field_sz = sz;
+                       *type_id = acc->type_id;
+               } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+                       sz = btf__resolve_size(spec->btf, acc->type_id);
+                       if (sz < 0)
+                               return -EINVAL;
+                       *val = sz;
+               } else {
+                       pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+                               prog_name, relo->kind, relo->insn_off / 8);
+                       return -EINVAL;
+               }
+               if (validate)
+                       *validate = true;
+               return 0;
+       }
+
+       m = btf_members(t) + acc->idx;
+       mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+       bit_off = spec->bit_offset;
+       bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+       bitfield = bit_sz > 0;
+       if (bitfield) {
+               byte_sz = mt->size;
+               byte_off = bit_off / 8 / byte_sz * byte_sz;
+               /* figure out smallest int size necessary for bitfield load */
+               while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+                       if (byte_sz >= 8) {
+                               /* bitfield can't be read with 64-bit read */
+                               pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+                                       prog_name, relo->kind, relo->insn_off / 8);
+                               return -E2BIG;
+                       }
+                       byte_sz *= 2;
+                       byte_off = bit_off / 8 / byte_sz * byte_sz;
+               }
+       } else {
+               sz = btf__resolve_size(spec->btf, field_type_id);
+               if (sz < 0)
+                       return -EINVAL;
+               byte_sz = sz;
+               byte_off = spec->bit_offset / 8;
+               bit_sz = byte_sz * 8;
+       }
+
+       /* for bitfields, all the relocatable aspects are ambiguous and we
+        * might disagree with compiler, so turn off validation of expected
+        * value, except for signedness
+        */
+       if (validate)
+               *validate = !bitfield;
+
+       switch (relo->kind) {
+       case BPF_FIELD_BYTE_OFFSET:
+               *val = byte_off;
+               if (!bitfield) {
+                       *field_sz = byte_sz;
+                       *type_id = field_type_id;
+               }
+               break;
+       case BPF_FIELD_BYTE_SIZE:
+               *val = byte_sz;
+               break;
+       case BPF_FIELD_SIGNED:
+               /* enums will be assumed unsigned */
+               *val = btf_is_enum(mt) ||
+                      (btf_int_encoding(mt) & BTF_INT_SIGNED);
+               if (validate)
+                       *validate = true; /* signedness is never ambiguous */
+               break;
+       case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               *val = 64 - (bit_off + bit_sz - byte_off  * 8);
+#else
+               *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+               break;
+       case BPF_FIELD_RSHIFT_U64:
+               *val = 64 - bit_sz;
+               if (validate)
+                       *validate = true; /* right shift is never ambiguous */
+               break;
+       case BPF_FIELD_EXISTS:
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+                                  const struct bpf_core_spec *spec,
+                                  __u32 *val)
+{
+       __s64 sz;
+
+       /* type-based relos return zero when target type is not found */
+       if (!spec) {
+               *val = 0;
+               return 0;
+       }
+
+       switch (relo->kind) {
+       case BPF_TYPE_ID_TARGET:
+               *val = spec->root_type_id;
+               break;
+       case BPF_TYPE_EXISTS:
+               *val = 1;
+               break;
+       case BPF_TYPE_SIZE:
+               sz = btf__resolve_size(spec->btf, spec->root_type_id);
+               if (sz < 0)
+                       return -EINVAL;
+               *val = sz;
+               break;
+       case BPF_TYPE_ID_LOCAL:
+       /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+                                     const struct bpf_core_spec *spec,
+                                     __u32 *val)
+{
+       const struct btf_type *t;
+       const struct btf_enum *e;
+
+       switch (relo->kind) {
+       case BPF_ENUMVAL_EXISTS:
+               *val = spec ? 1 : 0;
+               break;
+       case BPF_ENUMVAL_VALUE:
+               if (!spec)
+                       return -EUCLEAN; /* request instruction poisoning */
+               t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+               e = btf_enum(t) + spec->spec[0].idx;
+               *val = e->val;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+struct bpf_core_relo_res
+{
+       /* expected value in the instruction, unless validate == false */
+       __u32 orig_val;
+       /* new value that needs to be patched up to */
+       __u32 new_val;
+       /* relocation unsuccessful, poison instruction, but don't fail load */
+       bool poison;
+       /* some relocations can't be validated against orig_val */
+       bool validate;
+       /* for field byte offset relocations or the forms:
+        *     *(T *)(rX + <off>) = rY
+        *     rX = *(T *)(rY + <off>),
+        * we remember original and resolved field size to adjust direct
+        * memory loads of pointers and integers; this is necessary for 32-bit
+        * host kernel architectures, but also allows to automatically
+        * relocate fields that were resized from, e.g., u32 to u64, etc.
+        */
+       bool fail_memsz_adjust;
+       __u32 orig_sz;
+       __u32 orig_type_id;
+       __u32 new_sz;
+       __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const char *prog_name,
+                             const struct bpf_core_relo *relo,
+                             int relo_idx,
+                             const struct bpf_core_spec *local_spec,
+                             const struct bpf_core_spec *targ_spec,
+                             struct bpf_core_relo_res *res)
+{
+       int err = -EOPNOTSUPP;
+
+       res->orig_val = 0;
+       res->new_val = 0;
+       res->poison = false;
+       res->validate = true;
+       res->fail_memsz_adjust = false;
+       res->orig_sz = res->new_sz = 0;
+       res->orig_type_id = res->new_type_id = 0;
+
+       if (core_relo_is_field_based(relo->kind)) {
+               err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
+                                              &res->orig_val, &res->orig_sz,
+                                              &res->orig_type_id, &res->validate);
+               err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
+                                                     &res->new_val, &res->new_sz,
+                                                     &res->new_type_id, NULL);
+               if (err)
+                       goto done;
+               /* Validate if it's safe to adjust load/store memory size.
+                * Adjustments are performed only if original and new memory
+                * sizes differ.
+                */
+               res->fail_memsz_adjust = false;
+               if (res->orig_sz != res->new_sz) {
+                       const struct btf_type *orig_t, *new_t;
+
+                       orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+                       new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+                       /* There are two use cases in which it's safe to
+                        * adjust load/store's mem size:
+                        *   - reading a 32-bit kernel pointer, while on BPF
+                        *   size pointers are always 64-bit; in this case
+                        *   it's safe to "downsize" instruction size due to
+                        *   pointer being treated as unsigned integer with
+                        *   zero-extended upper 32-bits;
+                        *   - reading unsigned integers, again due to
+                        *   zero-extension is preserving the value correctly.
+                        *
+                        * In all other cases it's incorrect to attempt to
+                        * load/store field because read value will be
+                        * incorrect, so we poison relocated instruction.
+                        */
+                       if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+                               goto done;
+                       if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+                           btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+                           btf_int_encoding(new_t) != BTF_INT_SIGNED)
+                               goto done;
+
+                       /* mark as invalid mem size adjustment, but this will
+                        * only be checked for LDX/STX/ST insns
+                        */
+                       res->fail_memsz_adjust = true;
+               }
+       } else if (core_relo_is_type_based(relo->kind)) {
+               err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+               err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+       } else if (core_relo_is_enumval_based(relo->kind)) {
+               err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+               err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+       }
+
+done:
+       if (err == -EUCLEAN) {
+               /* EUCLEAN is used to signal instruction poisoning request */
+               res->poison = true;
+               err = 0;
+       } else if (err == -EOPNOTSUPP) {
+               /* EOPNOTSUPP means unknown/unsupported relocation */
+               pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+                       prog_name, relo_idx, core_relo_kind_str(relo->kind),
+                       relo->kind, relo->insn_off / 8);
+       }
+
+       return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
+                                int insn_idx, struct bpf_insn *insn)
+{
+       pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+                prog_name, relo_idx, insn_idx);
+       insn->code = BPF_JMP | BPF_CALL;
+       insn->dst_reg = 0;
+       insn->src_reg = 0;
+       insn->off = 0;
+       /* if this instruction is reachable (not a dead code),
+        * verifier will complain with the following message:
+        * invalid func unknown#195896080
+        */
+       insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+       switch (BPF_SIZE(insn->code)) {
+       case BPF_DW: return 8;
+       case BPF_W: return 4;
+       case BPF_H: return 2;
+       case BPF_B: return 1;
+       default: return -1;
+       }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+       switch (sz) {
+       case 8: return BPF_DW;
+       case 4: return BPF_W;
+       case 2: return BPF_H;
+       case 1: return BPF_B;
+       default: return -1;
+       }
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+                              int insn_idx, const struct bpf_core_relo *relo,
+                              int relo_idx, const struct bpf_core_relo_res *res)
+{
+       __u32 orig_val, new_val;
+       __u8 class;
+
+       class = BPF_CLASS(insn->code);
+
+       if (res->poison) {
+poison:
+               /* poison second part of ldimm64 to avoid confusing error from
+                * verifier about "unknown opcode 00"
+                */
+               if (is_ldimm64_insn(insn))
+                       bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
+               bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
+               return 0;
+       }
+
+       orig_val = res->orig_val;
+       new_val = res->new_val;
+
+       switch (class) {
+       case BPF_ALU:
+       case BPF_ALU64:
+               if (BPF_SRC(insn->code) != BPF_K)
+                       return -EINVAL;
+               if (res->validate && insn->imm != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+                               prog_name, relo_idx,
+                               insn_idx, insn->imm, orig_val, new_val);
+                       return -EINVAL;
+               }
+               orig_val = insn->imm;
+               insn->imm = new_val;
+               pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+                        prog_name, relo_idx, insn_idx,
+                        orig_val, new_val);
+               break;
+       case BPF_LDX:
+       case BPF_ST:
+       case BPF_STX:
+               if (res->validate && insn->off != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+                               prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+                       return -EINVAL;
+               }
+               if (new_val > SHRT_MAX) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+                               prog_name, relo_idx, insn_idx, new_val);
+                       return -ERANGE;
+               }
+               if (res->fail_memsz_adjust) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+                               "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+                               prog_name, relo_idx, insn_idx);
+                       goto poison;
+               }
+
+               orig_val = insn->off;
+               insn->off = new_val;
+               pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+                        prog_name, relo_idx, insn_idx, orig_val, new_val);
+
+               if (res->new_sz != res->orig_sz) {
+                       int insn_bytes_sz, insn_bpf_sz;
+
+                       insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+                       if (insn_bytes_sz != res->orig_sz) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+                                       prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+                               return -EINVAL;
+                       }
+
+                       insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+                       if (insn_bpf_sz < 0) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+                                       prog_name, relo_idx, insn_idx, res->new_sz);
+                               return -EINVAL;
+                       }
+
+                       insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+                       pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+                                prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+               }
+               break;
+       case BPF_LD: {
+               __u64 imm;
+
+               if (!is_ldimm64_insn(insn) ||
+                   insn[0].src_reg != 0 || insn[0].off != 0 ||
+                   insn[1].code != 0 || insn[1].dst_reg != 0 ||
+                   insn[1].src_reg != 0 || insn[1].off != 0) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+                               prog_name, relo_idx, insn_idx);
+                       return -EINVAL;
+               }
+
+               imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+               if (res->validate && imm != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+                               prog_name, relo_idx,
+                               insn_idx, (unsigned long long)imm,
+                               orig_val, new_val);
+                       return -EINVAL;
+               }
+
+               insn[0].imm = new_val;
+               insn[1].imm = 0; /* currently only 32-bit values are supported */
+               pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+                        prog_name, relo_idx, insn_idx,
+                        (unsigned long long)imm, new_val);
+               break;
+       }
+       default:
+               pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+                       prog_name, relo_idx, insn_idx, insn->code,
+                       insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+       const struct btf_type *t;
+       const struct btf_enum *e;
+       const char *s;
+       __u32 type_id;
+       int i;
+
+       type_id = spec->root_type_id;
+       t = btf__type_by_id(spec->btf, type_id);
+       s = btf__name_by_offset(spec->btf, t->name_off);
+
+       libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+       if (core_relo_is_type_based(spec->relo_kind))
+               return;
+
+       if (core_relo_is_enumval_based(spec->relo_kind)) {
+               t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+               e = btf_enum(t) + spec->raw_spec[0];
+               s = btf__name_by_offset(spec->btf, e->name_off);
+
+               libbpf_print(level, "::%s = %u", s, e->val);
+               return;
+       }
+
+       if (core_relo_is_field_based(spec->relo_kind)) {
+               for (i = 0; i < spec->len; i++) {
+                       if (spec->spec[i].name)
+                               libbpf_print(level, ".%s", spec->spec[i].name);
+                       else if (i > 0 || spec->spec[i].idx > 0)
+                               libbpf_print(level, "[%u]", spec->spec[i].idx);
+               }
+
+               libbpf_print(level, " (");
+               for (i = 0; i < spec->raw_len; i++)
+                       libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+               if (spec->bit_offset % 8)
+                       libbpf_print(level, " @ offset %u.%u)",
+                                    spec->bit_offset / 8, spec->bit_offset % 8);
+               else
+                       libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+               return;
+       }
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ *    Candidate type is a type with the same "essential" name, ignoring
+ *    everything after last triple underscore (___). E.g., `sample`,
+ *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ *    for each other. Names with triple underscore are referred to as
+ *    "flavors" and are useful, among other things, to allow to
+ *    specify/support incompatible variations of the same kernel struct, which
+ *    might differ between different kernel versions and/or build
+ *    configurations.
+ *
+ *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ *    converter, when deduplicated BTF of a kernel still contains more than
+ *    one different types with the same name. In that case, ___2, ___3, etc
+ *    are appended starting from second name conflict. But start flavors are
+ *    also useful to be defined "locally", in BPF program, to extract same
+ *    data from incompatible changes between different kernel
+ *    versions/configurations. For instance, to handle field renames between
+ *    kernel versions, one can use two flavors of the struct name with the
+ *    same common name and use conditional relocations to extract that field,
+ *    depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ *    candidate target type. Matching involves finding corresponding
+ *    high-level spec accessors, meaning that all named fields should match,
+ *    as well as all array accesses should be within the actual bounds. Also,
+ *    types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ *    matching the spec. As long as all the specs resolve to the same set of
+ *    offsets across all candidates, there is no error. If there is any
+ *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ *    imprefection of BTF deduplication, which can cause slight duplication of
+ *    the same BTF type, if some directly or indirectly referenced (by
+ *    pointer) type gets resolved to different actual types in different
+ *    object files. If such situation occurs, deduplicated BTF will end up
+ *    with two (or more) structurally identical types, which differ only in
+ *    types they refer to through pointer. This should be OK in most cases and
+ *    is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ *    types in target BTF. It is anticipated that this is overall more
+ *    efficient memory-wise and not significantly worse (if not better)
+ *    CPU-wise compared to prebuilding a map from all local type names to
+ *    a list of candidate type names. It's also sped up by caching resolved
+ *    list of matching candidates per each local "root" type ID, that has at
+ *    least one bpf_core_relo associated with it. This list is shared
+ *    between multiple relocations for the same type ID and is updated as some
+ *    of the candidates are pruned due to structural incompatibility.
+ */
+int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
+                            int insn_idx,
+                            const struct bpf_core_relo *relo,
+                            int relo_idx,
+                            const struct btf *local_btf,
+                            struct bpf_core_cand_list *cands)
+{
+       struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+       struct bpf_core_relo_res cand_res, targ_res;
+       const struct btf_type *local_type;
+       const char *local_name;
+       __u32 local_id;
+       const char *spec_str;
+       int i, j, err;
+
+       local_id = relo->type_id;
+       local_type = btf__type_by_id(local_btf, local_id);
+       if (!local_type)
+               return -EINVAL;
+
+       local_name = btf__name_by_offset(local_btf, local_type->name_off);
+       if (!local_name)
+               return -EINVAL;
+
+       spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+       if (str_is_empty(spec_str))
+               return -EINVAL;
+
+       err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+       if (err) {
+               pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+                       prog_name, relo_idx, local_id, btf_kind_str(local_type),
+                       str_is_empty(local_name) ? "<anon>" : local_name,
+                       spec_str, err);
+               return -EINVAL;
+       }
+
+       pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+                relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+       bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+       libbpf_print(LIBBPF_DEBUG, "\n");
+
+       /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+       if (relo->kind == BPF_TYPE_ID_LOCAL) {
+               targ_res.validate = true;
+               targ_res.poison = false;
+               targ_res.orig_val = local_spec.root_type_id;
+               targ_res.new_val = local_spec.root_type_id;
+               goto patch_insn;
+       }
+
+       /* libbpf doesn't support candidate search for anonymous types */
+       if (str_is_empty(spec_str)) {
+               pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+                       prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+               return -EOPNOTSUPP;
+       }
+
+
+       for (i = 0, j = 0; i < cands->len; i++) {
+               err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+                                         cands->cands[i].id, &cand_spec);
+               if (err < 0) {
+                       pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+                               prog_name, relo_idx, i);
+                       bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+                       libbpf_print(LIBBPF_WARN, ": %d\n", err);
+                       return err;
+               }
+
+               pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+                        relo_idx, err == 0 ? "non-matching" : "matching", i);
+               bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+               libbpf_print(LIBBPF_DEBUG, "\n");
+
+               if (err == 0)
+                       continue;
+
+               err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+               if (err)
+                       return err;
+
+               if (j == 0) {
+                       targ_res = cand_res;
+                       targ_spec = cand_spec;
+               } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+                       /* if there are many field relo candidates, they
+                        * should all resolve to the same bit offset
+                        */
+                       pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+                               prog_name, relo_idx, cand_spec.bit_offset,
+                               targ_spec.bit_offset);
+                       return -EINVAL;
+               } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+                       /* all candidates should result in the same relocation
+                        * decision and value, otherwise it's dangerous to
+                        * proceed due to ambiguity
+                        */
+                       pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+                               prog_name, relo_idx,
+                               cand_res.poison ? "failure" : "success", cand_res.new_val,
+                               targ_res.poison ? "failure" : "success", targ_res.new_val);
+                       return -EINVAL;
+               }
+
+               cands->cands[j++] = cands->cands[i];
+       }
+
+       /*
+        * For BPF_FIELD_EXISTS relo or when used BPF program has field
+        * existence checks or kernel version/config checks, it's expected
+        * that we might not find any candidates. In this case, if field
+        * wasn't found in any candidate, the list of candidates shouldn't
+        * change at all, we'll just handle relocating appropriately,
+        * depending on relo's kind.
+        */
+       if (j > 0)
+               cands->len = j;
+
+       /*
+        * If no candidates were found, it might be both a programmer error,
+        * as well as expected case, depending whether instruction w/
+        * relocation is guarded in some way that makes it unreachable (dead
+        * code) if relocation can't be resolved. This is handled in
+        * bpf_core_patch_insn() uniformly by replacing that instruction with
+        * BPF helper call insn (using invalid helper ID). If that instruction
+        * is indeed unreachable, then it will be ignored and eliminated by
+        * verifier. If it was an error, then verifier will complain and point
+        * to a specific instruction number in its log.
+        */
+       if (j == 0) {
+               pr_debug("prog '%s': relo #%d: no matching targets found\n",
+                        prog_name, relo_idx);
+
+               /* calculate single target relo result explicitly */
+               err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+               if (err)
+                       return err;
+       }
+
+patch_insn:
+       /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+       err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
+       if (err) {
+               pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
+                       prog_name, relo_idx, relo->insn_off / 8, err);
+               return -EINVAL;
+       }
+
+       return 0;
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
new file mode 100644 (file)
index 0000000..3b9f8f1
--- /dev/null
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __RELO_CORE_H
+#define __RELO_CORE_H
+
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
+ */
+enum bpf_core_relo_kind {
+       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
+       BPF_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
+       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
+       BPF_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
+       BPF_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
+       BPF_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
+       BPF_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
+       BPF_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
+       BPF_TYPE_EXISTS = 8,            /* type existence in target kernel */
+       BPF_TYPE_SIZE = 9,              /* type size in bytes */
+       BPF_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
+       BPF_ENUMVAL_VALUE = 11,         /* enum value integer value */
+};
+
+/* The minimum bpf_core_relo checked by the loader
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ *   its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ *   type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ *   interpretation depends on specific relocation kind:
+ *     - for field-based relocations, string encodes an accessed field using
+ *     a sequence of field and array indices, separated by colon (:). It's
+ *     conceptually very close to LLVM's getelementptr ([0]) instruction's
+ *     arguments for identifying offset to a field.
+ *     - for type-based relocations, strings is expected to be just "0";
+ *     - for enum value-based relocations, string contains an index of enum
+ *     value within its enum type;
+ *
+ * Example to provide a better feel.
+ *
+ *   struct sample {
+ *       int a;
+ *       struct {
+ *           int b[10];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *   int x = &s->a;     // encoded as "0:0" (a is field #0)
+ *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,
+ *                      // b is field #0 inside anon struct, accessing elem #5)
+ *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example  will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ *               __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+       __u32   insn_off;
+       __u32   type_id;
+       __u32   access_str_off;
+       enum bpf_core_relo_kind kind;
+};
+
+struct bpf_core_cand {
+       const struct btf *btf;
+       const struct btf_type *t;
+       const char *name;
+       __u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct bpf_core_cand_list {
+       struct bpf_core_cand *cands;
+       int len;
+};
+
+int bpf_core_apply_relo_insn(const char *prog_name,
+                            struct bpf_insn *insn, int insn_idx,
+                            const struct bpf_core_relo *relo, int relo_idx,
+                            const struct btf *local_btf,
+                            struct bpf_core_cand_list *cands);
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                             const struct btf *targ_btf, __u32 targ_id);
+
+size_t bpf_core_essential_name_len(const char *name);
+#endif
index cdecda1..996d025 100644 (file)
@@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
                        free(info_linear);
                        return -1;
                }
-               if (btf__get_from_id(info->btf_id, &btf)) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               if (libbpf_get_error(btf)) {
                        pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
                        err = -1;
-                       btf = NULL;
                        goto out;
                }
                perf_env__fetch_btf(env, info->btf_id, btf);
@@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
 
 out:
        free(info_linear);
-       free(btf);
+       btf__free(btf);
        return err ? -1 : 0;
 }
 
@@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
        if (btf_id == 0)
                goto out;
 
-       if (btf__get_from_id(btf_id, &btf)) {
+       btf = btf__load_from_kernel_by_id(btf_id);
+       if (libbpf_get_error(btf)) {
                pr_debug("%s: failed to get BTF of id %u, aborting\n",
                         __func__, btf_id);
                goto out;
@@ -486,7 +487,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
        perf_env__fetch_btf(env, btf_id, btf);
 
 out:
-       free(btf);
+       btf__free(btf);
        close(fd);
 }
 
index 8150e03..ba0f208 100644 (file)
@@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd)
        struct bpf_prog_info_linear *info_linear;
        struct bpf_func_info *func_info;
        const struct btf_type *t;
+       struct btf *btf = NULL;
        char *name = NULL;
-       struct btf *btf;
 
        info_linear = bpf_program__get_prog_info_linear(
                tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd)
                return NULL;
        }
 
-       if (info_linear->info.btf_id == 0 ||
-           btf__get_from_id(info_linear->info.btf_id, &btf)) {
+       if (info_linear->info.btf_id == 0) {
                pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
                goto out;
        }
 
+       btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+       if (libbpf_get_error(btf)) {
+               pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+               goto out;
+       }
+
        func_info = u64_to_ptr(info_linear->info.func_info);
        t = btf__type_by_id(btf, func_info[0].type_id);
        if (!t) {
@@ -89,6 +94,7 @@ static char *bpf_target_prog_name(int tgt_fd)
        }
        name = strdup(btf__name_by_offset(btf, t->name_off));
 out:
+       btf__free(btf);
        free(info_linear);
        return name;
 }
index 22f8326..bc1f648 100644 (file)
@@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session,
        return 0;
 }
 
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+                            struct perf_record_auxtrace *event)
+{
+       struct auxtrace_buffer *buf;
+       unsigned int i;
+       /*
+        * Find all buffers with same reference in the queues and dump them.
+        * This is because the queues can contain multiple entries of the same
+        * buffer that were split on aux records.
+        */
+       for (i = 0; i < etm->queues.nr_queues; ++i)
+               list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+                       if (buf->reference == event->reference)
+                               cs_etm__dump_event(etm, buf);
+}
+
 static int cs_etm__process_auxtrace_event(struct perf_session *session,
                                          union perf_event *event,
                                          struct perf_tool *tool __maybe_unused)
@@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
                                cs_etm__dump_event(etm, buffer);
                                auxtrace_buffer__put_data(buffer);
                        }
-       }
+       } else if (dump_trace)
+               dump_queued_data(etm, &event->auxtrace);
 
        return 0;
 }
@@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
        if (dump_trace) {
                cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
-               return 0;
        }
 
        err = cs_etm__synth_events(etm, session);
index 72e7f36..8af693d 100644 (file)
@@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
                        if (!(prot & PROT_EXEC))
                                dso__set_loaded(dso);
                }
-
-               nsinfo__put(dso->nsinfo);
                dso->nsinfo = nsi;
 
                if (build_id__is_defined(bid))
index a1bd700..fc683bc 100644 (file)
@@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void)
        return perf_pmu__find_map(NULL);
 }
 
-static bool perf_pmu__valid_suffix(char *pmu_name, char *tok)
+/*
+ * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name
+ * to be valid.
+ */
+static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok)
 {
-       char *p;
+       const char *p;
 
        if (strncmp(pmu_name, tok, strlen(tok)))
                return false;
@@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok)
        if (*p == 0)
                return true;
 
-       if (*p != '_')
-               return false;
+       if (*p == '_')
+               ++p;
 
-       ++p;
-       if (*p == 0 || !isdigit(*p))
-               return false;
+       /* Ensure we end in a number */
+       while (1) {
+               if (!isdigit(*p))
+                       return false;
+               if (*(++p) == 0)
+                       break;
+       }
 
        return true;
 }
@@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
         *          match "socket" in "socketX_pmunameY" and then "pmuname" in
         *          "pmunameY".
         */
-       for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) {
+       while (1) {
+               char *next_tok = strtok_r(NULL, ",", &tmp);
+
                name = strstr(name, tok);
-               if (!name || !perf_pmu__valid_suffix((char *)name, tok)) {
+               if (!name ||
+                   (!next_tok && !perf_pmu__valid_suffix(name, tok))) {
                        res = false;
                        goto out;
                }
+               if (!next_tok)
+                       break;
+               tok = next_tok;
+               name += strlen(tok);
        }
 
        res = true;
index fb010a3..da9e8b6 100644 (file)
@@ -38,6 +38,7 @@ TARGETS += mount_setattr
 TARGETS += mqueue
 TARGETS += nci
 TARGETS += net
+TARGETS += net/af_unix
 TARGETS += net/forwarding
 TARGETS += net/mptcp
 TARGETS += netfilter
index addcfd8..433f8be 100644 (file)
@@ -23,7 +23,6 @@ test_skb_cgroup_id_user
 test_cgroup_storage
 test_flow_dissector
 flow_dissector_load
-test_netcnt
 test_tcpnotify_user
 test_libbpf
 test_tcp_check_syncookie_user
index f405b20..2a58b7b 100644 (file)
@@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
        test_verifier_log test_dev_cgroup \
        test_sock test_sockmap get_cgroup_id_user \
        test_cgroup_storage \
-       test_netcnt test_tcpnotify_user test_sysctl \
+       test_tcpnotify_user test_sysctl \
        test_progs-no_alu32
 
 # Also test bpf-gcc, if present
@@ -197,7 +197,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
 $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
 
index 8deec1c..9b17f28 100644 (file)
@@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp
 bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
 saves the resulting output (by default in ``~/.bpf_selftests``).
 
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
 For more information on about using the script, run:
 
 .. code-block:: console
index 81084c1..0ab1c88 100644 (file)
@@ -6,19 +6,39 @@
 
 #define MAX_PERCPU_PACKETS 32
 
-struct percpu_net_cnt {
-       __u64 packets;
-       __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It really is about 128 bytes on x86_64, but allocate more to account for
+ * possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM          256
 
-       __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE       (0xFFFF - \
+                                                SIZEOF_BPF_LOCAL_STORAGE_ELEM)
 
-       __u64 prev_packets;
-       __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE                     32768
+
+union percpu_net_cnt {
+       struct {
+               __u64 packets;
+               __u64 bytes;
+
+               __u64 prev_ts;
+
+               __u64 prev_packets;
+               __u64 prev_bytes;
+       };
+       __u8 data[PCPU_MIN_UNIT_SIZE];
 };
 
-struct net_cnt {
-       __u64 packets;
-       __u64 bytes;
+union net_cnt {
+       struct {
+               __u64 packets;
+               __u64 bytes;
+       };
+       __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
 };
 
 #endif
index 2060bc1..d685768 100644 (file)
@@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms)
 
 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
 
-int start_server(int family, int type, const char *addr_str, __u16 port,
-                int timeout_ms)
+static int __start_server(int type, const struct sockaddr *addr,
+                         socklen_t addrlen, int timeout_ms, bool reuseport)
 {
-       struct sockaddr_storage addr = {};
-       socklen_t len;
+       int on = 1;
        int fd;
 
-       if (make_sockaddr(family, addr_str, port, &addr, &len))
-               return -1;
-
-       fd = socket(family, type, 0);
+       fd = socket(addr->sa_family, type, 0);
        if (fd < 0) {
                log_err("Failed to create server socket");
                return -1;
@@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
        if (settimeo(fd, timeout_ms))
                goto error_close;
 
-       if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+       if (reuseport &&
+           setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+               log_err("Failed to set SO_REUSEPORT");
+               return -1;
+       }
+
+       if (bind(fd, addr, addrlen) < 0) {
                log_err("Failed to bind socket");
                goto error_close;
        }
@@ -104,6 +106,69 @@ error_close:
        return -1;
 }
 
+int start_server(int family, int type, const char *addr_str, __u16 port,
+                int timeout_ms)
+{
+       struct sockaddr_storage addr;
+       socklen_t addrlen;
+
+       if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+               return -1;
+
+       return __start_server(type, (struct sockaddr *)&addr,
+                             addrlen, timeout_ms, false);
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+                           __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+       struct sockaddr_storage addr;
+       unsigned int nr_fds = 0;
+       socklen_t addrlen;
+       int *fds;
+
+       if (!nr_listens)
+               return NULL;
+
+       if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+               return NULL;
+
+       fds = malloc(sizeof(*fds) * nr_listens);
+       if (!fds)
+               return NULL;
+
+       fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+                               timeout_ms, true);
+       if (fds[0] == -1)
+               goto close_fds;
+       nr_fds = 1;
+
+       if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+               goto close_fds;
+
+       for (; nr_fds < nr_listens; nr_fds++) {
+               fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+                                            addrlen, timeout_ms, true);
+               if (fds[nr_fds] == -1)
+                       goto close_fds;
+       }
+
+       return fds;
+
+close_fds:
+       free_fds(fds, nr_fds);
+       return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+       if (fds) {
+               while (nr_close_fds)
+                       close(fds[--nr_close_fds]);
+               free(fds);
+       }
+}
+
 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
                     int timeout_ms)
 {
@@ -217,6 +282,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
        if (family == AF_INET) {
                struct sockaddr_in *sin = (void *)addr;
 
+               memset(addr, 0, sizeof(*sin));
                sin->sin_family = AF_INET;
                sin->sin_port = htons(port);
                if (addr_str &&
@@ -230,6 +296,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
        } else if (family == AF_INET6) {
                struct sockaddr_in6 *sin6 = (void *)addr;
 
+               memset(addr, 0, sizeof(*sin6));
                sin6->sin6_family = AF_INET6;
                sin6->sin6_port = htons(port);
                if (addr_str &&
@@ -243,3 +310,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
        }
        return -1;
 }
+
+char *ping_command(int family)
+{
+       if (family == AF_INET6) {
+               /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+               if (!system("which ping6 >/dev/null 2>&1"))
+                       return "ping6";
+               else
+                       return "ping -6";
+       }
+       return "ping";
+}
index 5e0d51c..c59a8f6 100644 (file)
@@ -36,11 +36,16 @@ extern struct ipv6_packet pkt_v6;
 int settimeo(int fd, int timeout_ms);
 int start_server(int family, int type, const char *addr, __u16 port,
                 int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+                           __u16 port, int timeout_ms,
+                           unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
 int connect_to_fd(int server_fd, int timeout_ms);
 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
                     int timeout_ms);
 int make_sockaddr(int family, const char *addr_str, __u16 port,
                  struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
new file mode 100644 (file)
index 0000000..85babb0
--- /dev/null
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+       if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+               return -1;
+
+       if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+               return -1;
+
+       return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+       unsigned int i;
+
+       for (i = 0; i < nr_fds; i++) {
+               if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+                              sizeof("bpf_cubic")))
+                       return i;
+       }
+
+       return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+       char tcp_cc[16];
+       socklen_t optlen = sizeof(tcp_cc);
+       unsigned int i;
+
+       for (i = 0; i < nr_fds; i++) {
+               if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+                              tcp_cc, &optlen) ||
+                   strcmp(tcp_cc, "bpf_dctcp"))
+                       return i;
+       }
+
+       return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+                            int **paccepted_fds)
+{
+       int *est_fds, *accepted_fds;
+       unsigned int i;
+
+       est_fds = malloc(sizeof(*est_fds) * nr_est);
+       if (!est_fds)
+               return NULL;
+
+       accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+       if (!accepted_fds) {
+               free(est_fds);
+               return NULL;
+       }
+
+       for (i = 0; i < nr_est; i++) {
+               est_fds[i] = connect_to_fd(listen_fd, 0);
+               if (est_fds[i] == -1)
+                       break;
+               if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+                       close(est_fds[i]);
+                       break;
+               }
+
+               accepted_fds[i] = accept(listen_fd, NULL, 0);
+               if (accepted_fds[i] == -1) {
+                       close(est_fds[i]);
+                       break;
+               }
+       }
+
+       if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+               free_fds(accepted_fds, i);
+               free_fds(est_fds, i);
+               return NULL;
+       }
+
+       *paccepted_fds = accepted_fds;
+       return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+       struct sockaddr_in6 addr;
+       socklen_t addrlen = sizeof(addr);
+
+       if (!getsockname(fd, &addr, &addrlen))
+               return ntohs(addr.sin6_port);
+
+       return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+                                  bool random_retry)
+{
+       int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+       unsigned int nr_reuse_listens = 256, nr_est = 256;
+       int err, iter_fd = -1, listen_fd = -1;
+       char buf;
+
+       /* Prepare non-reuseport listen_fd */
+       listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       if (!ASSERT_GE(listen_fd, 0, "start_server"))
+               return;
+       if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+                      "set listen_fd to cubic"))
+               goto done;
+       iter_skel->bss->listen_hport = get_local_port(listen_fd);
+       if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+                       "get_local_port(listen_fd)"))
+               goto done;
+
+       /* Connect to non-reuseport listen_fd */
+       est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+       if (!ASSERT_OK_PTR(est_fds, "create established"))
+               goto done;
+
+       /* Prepare reuseport listen fds */
+       reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+                                                 "::1", 0, 0,
+                                                 nr_reuse_listens);
+       if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+               goto done;
+       if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+                      nr_reuse_listens, "set reuse_listen_fds to cubic"))
+               goto done;
+       iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+       if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+                       "get_local_port(reuse_listen_fds[0])"))
+               goto done;
+
+       /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+       iter_skel->bss->random_retry = random_retry;
+       iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+       if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+               goto done;
+
+       while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+              errno == EAGAIN)
+               ;
+       if (!ASSERT_OK(err, "read iter error"))
+               goto done;
+
+       /* Check reuseport listen fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+                 nr_reuse_listens,
+                 "check reuse_listen_fds dctcp");
+
+       /* Check non reuseport listen fd for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+                 "check listen_fd dctcp");
+
+       /* Check established fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+                 "check est_fds dctcp");
+
+       /* Check accepted fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+                 "check accepted_fds dctcp");
+
+done:
+       if (iter_fd != -1)
+               close(iter_fd);
+       if (listen_fd != -1)
+               close(listen_fd);
+       free_fds(reuse_listen_fds, nr_reuse_listens);
+       free_fds(accepted_fds, nr_est);
+       free_fds(est_fds, nr_est);
+}
+
+void test_bpf_iter_setsockopt(void)
+{
+       struct bpf_iter_setsockopt *iter_skel = NULL;
+       struct bpf_cubic *cubic_skel = NULL;
+       struct bpf_dctcp *dctcp_skel = NULL;
+       struct bpf_link *cubic_link = NULL;
+       struct bpf_link *dctcp_link = NULL;
+
+       if (create_netns())
+               return;
+
+       /* Load iter_skel */
+       iter_skel = bpf_iter_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+               return;
+       iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+       if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+               goto done;
+
+       /* Load bpf_cubic */
+       cubic_skel = bpf_cubic__open_and_load();
+       if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+               goto done;
+       cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+       if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+               goto done;
+
+       /* Load bpf_dctcp */
+       dctcp_skel = bpf_dctcp__open_and_load();
+       if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+               goto done;
+       dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+       if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+               goto done;
+
+       do_bpf_iter_setsockopt(iter_skel, true);
+       do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+       bpf_link__destroy(cubic_link);
+       bpf_link__destroy(dctcp_link);
+       bpf_cubic__destroy(cubic_skel);
+       bpf_dctcp__destroy(dctcp_skel);
+       bpf_iter_setsockopt__destroy(iter_skel);
+}
index 857e3f2..649f873 100644 (file)
@@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num)
                goto done;
        }
 
-       err = btf__get_from_id(info.btf_id, &btf);
+       btf = btf__load_from_kernel_by_id(info.btf_id);
+       err = libbpf_get_error(btf);
        if (CHECK(err, "cannot get btf from kernel, err: %d", err))
                goto done;
 
@@ -4386,6 +4387,7 @@ skip:
        fprintf(stderr, "OK");
 
 done:
+       btf__free(btf);
        free(func_info);
        bpf_object__close(obj);
 }
index 1b90e68..52ccf0c 100644 (file)
@@ -232,7 +232,593 @@ err_out:
        btf__free(btf);
 }
 
+#define STRSIZE                                4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+       char *s = ctx, new[STRSIZE];
+
+       vsnprintf(new, STRSIZE, fmt, args);
+       if (strlen(s) < STRSIZE)
+               strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+                        char *name, char *prefix, __u64 flags, void *ptr,
+                        size_t ptr_sz, char *str, const char *expected_val)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       size_t type_sz;
+       __s32 type_id;
+       int ret = 0;
+
+       if (flags & BTF_F_COMPACT)
+               opts.compact = true;
+       if (flags & BTF_F_NONAME)
+               opts.skip_names = true;
+       if (flags & BTF_F_ZERO)
+               opts.emit_zeroes = true;
+       if (prefix) {
+               ASSERT_STRNEQ(name, prefix, strlen(prefix),
+                             "verify prefix match");
+               name += strlen(prefix) + 1;
+       }
+       type_id = btf__find_by_name(btf, name);
+       if (!ASSERT_GE(type_id, 0, "find type id"))
+               return -ENOENT;
+       type_sz = btf__resolve_size(btf, type_id);
+       str[0] = '\0';
+       ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+       if (type_sz <= ptr_sz) {
+               if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+                       return -EINVAL;
+       } else {
+               if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+                       return -EINVAL;
+       }
+       if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+               return -EFAULT;
+       return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags,       \
+                          _expected, ...)                              \
+       do {                                                            \
+               char __ptrtype[64] = #_type;                            \
+               char *_ptrtype = (char *)__ptrtype;                     \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+                                    _ptr, sizeof(_type), _str,         \
+                                    _expected);                        \
+       } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix,  _str, _type, _flags,    \
+                            ...)                                       \
+       TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags,        \
+                          "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz,        \
+                               _expected, ...)                         \
+       do {                                                            \
+               char __ptrtype[64] = #_type;                            \
+               char *_ptrtype = (char *)__ptrtype;                     \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0,      \
+                                    _ptr, _type_sz, _str, _expected);  \
+       } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags,  \
+                         _expected, ...)                               \
+       do {                                                            \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _var, _prefix, _flags,     \
+                                    _ptr, sizeof(_type), _str,         \
+                                    _expected);                        \
+       } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+                                  char *str)
+{
+#ifdef __SIZEOF_INT128__
+       __int128 i = 0xffffffffffffffff;
+
+       /* this dance is required because we cannot directly initialize
+        * a 128-bit value to anything larger than a 64-bit value.
+        */
+       i = (i << 64) | (i - 1);
+#endif
+       /* simple int */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "1234", 1234);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(int)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "-4567", -4567);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
+                          "(__int128)0xffffffffffffffff",
+                          0xffffffffffffffff);
+       ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
+                               "(__int128)0xfffffffffffffffffffffffffffffffe"),
+                 "dump __int128");
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+                                    char *str)
+{
+       float t1 = 1.234567;
+       float t2 = -1.234567;
+       float t3 = 0.0;
+       double t4 = 5.678912;
+       double t5 = -5.678912;
+       double t6 = 0.0;
+       long double t7 = 9.876543;
+       long double t8 = -9.876543;
+       long double t9 = 0.0;
+
+       /* since the kernel does not likely have any float types in its BTF, we
+        * will need to add some of various sizes.
+        */
+
+       ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+                               "(test_float)1.234567"), "dump float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+                               "(test_float)-1.234567"), "dump float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+                               "(test_float)0.000000"), "dump float");
+
+       ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+                 "(test_double)5.678912"), "dump double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+                 "(test_double)-5.678912"), "dump double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+                               "(test_double)0.000000"), "dump double");
+
+       ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+                               str, "(test_long_double)9.876543"),
+                               "dump long_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+                               str, "(test_long_double)-9.876543"),
+                               "dump long_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+                               str, "(test_long_double)0.000000"),
+                               "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+                                   char *str)
+{
+       /* simple char */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+                          "100", 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+                          "(char)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(char)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+                                      char *str)
+{
+       /* simple typedef */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+                          "1", 1);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(u64)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+       /* typedef struct */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+                            {.counter = (int)1,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+                          "{1,}", { .counter = 1 });
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"      .counter = (int)1,\n"
+"}",
+                          {.counter = 1,});
+       /* typedef with 0 value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+                          "{}", {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(atomic_t){.counter = (int)0,}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "{0,}", {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+"      .counter = (int)0,\n"
+"}",
+                          { .counter = 0,});
+
+       /* overflow should show type but not value since it overflows */
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+                               "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+                                   char *str)
+{
+       /* enum where enum value does (and does not) exist */
+       TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+                            BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+                          "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+                          "(enum bpf_cmd)BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "BPF_MAP_CREATE", 0);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_ZERO,
+                          "(enum bpf_cmd)BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "BPF_MAP_CREATE", BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "2000", 2000);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+                          "(enum bpf_cmd)2000", 2000);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+                               sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+                                     char *str)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       char zero_data[512] = { };
+       char type_data[512];
+       void *fops = type_data;
+       void *skb = type_data;
+       size_t type_sz;
+       __s32 type_id;
+       char *cmpstr;
+       int ret;
+
+       memset(type_data, 255, sizeof(type_data));
+
+       /* simple struct */
+       TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+                            {.name_off = (__u32)3,.val = (__s32)-1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{3,-1,}",
+                          { .name_off = 3, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"      .name_off = (__u32)3,\n"
+"      .val = (__s32)-1,\n"
+"}",
+                          { .name_off = 3, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{-1,}",
+                          { .name_off = 0, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "{0,-1,}",
+                          { .name_off = 0, .val = -1,});
+       /* empty struct should be printed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+                          "(struct btf_enum){}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_ZERO,
+                          "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_ZERO,
+"(struct btf_enum){\n"
+"      .name_off = (__u32)0,\n"
+"      .val = (__s32)0,\n"
+"}",
+                          { .name_off = 0, .val = 0,});
+
+       /* struct with pointers */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+                          "(struct list_head){.next = (struct list_head *)0x1,}",
+                          { .next = (struct list_head *)1 });
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"      .next = (struct list_head *)0x1,\n"
+"}",
+                          { .next = (struct list_head *)1 });
+       /* NULL pointer should not be displayed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+                          "(struct list_head){}",
+                          { .next = (struct list_head *)0 });
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+                          { .next = (struct list_head *)0 });
+
+       /* struct with function pointers */
+       type_id = btf__find_by_name(btf, "file_operations");
+       if (ASSERT_GT(type_id, 0, "find type id")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping file_operations");
+               cmpstr =
+"(struct file_operations){\n"
+"      .owner = (struct module *)0xffffffffffffffff,\n"
+"      .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+               ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+       }
+
+       /* struct with char array */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+                          { .name = "foo",});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{['f','o','o',],}",
+                          {.name = "foo",});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+"      .name = (char[16])[\n"
+"              'f',\n"
+"              'o',\n"
+"              'o',\n"
+"      ],\n"
+"}",
+                          {.name = "foo",});
+       /* leading null char means do not display string */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){}",
+                          {.name = {'\0', 'f', 'o', 'o'}});
+       /* handle non-printable characters */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+                          { .name = {1, 2, 3, 0}});
+
+       /* struct with non-char array */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+                          "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+                          { .cb = {1, 2, 3, 4, 5,},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{[1,2,3,4,5,],}",
+                          { .cb = { 1, 2, 3, 4, 5},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+"      .cb = (__u32[5])[\n"
+"              1,\n"
+"              2,\n"
+"              3,\n"
+"              4,\n"
+"              5,\n"
+"      ],\n"
+"}",
+                          { .cb = { 1, 2, 3, 4, 5},});
+       /* For non-char, arrays, show non-zero values only */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+                          "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+                          { .cb = { 0, 0, 1, 0, 0},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+"      .cb = (__u32[5])[\n"
+"              0,\n"
+"              0,\n"
+"              1,\n"
+"              0,\n"
+"              0,\n"
+"      ],\n"
+"}",
+                          { .cb = { 0, 0, 1, 0, 0},});
+
+       /* struct with bitfields */
+       TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+               {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{1,0x2,0x3,4,5,}",
+                          { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+                            .imm = 5,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+"      .code = (__u8)1,\n"
+"      .dst_reg = (__u8)0x2,\n"
+"      .src_reg = (__u8)0x3,\n"
+"      .off = (__s16)4,\n"
+"      .imm = (__s32)5,\n"
+"}",
+                          {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+       /* zeroed bitfields should not be displayed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+                          "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+                          { .code = 0, .dst_reg = 1});
+
+       /* struct with enum bitfield */
+       type_id = btf__find_by_name(btf, "fs_context");
+       if (ASSERT_GT(type_id,  0, "find fs_context")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               opts.emit_zeroes = true;
+               ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping fs_context");
+
+               ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+                                 "bitfield value not present");
+       }
+
+       /* struct with nested anon union */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+                          "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+                          { .op = 1, .args = { 1, 2, 3, 4}});
+
+       /* union with nested struct */
+       TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+                          "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+                          { .map = { .map_fd = 1 }});
+
+       /* struct skb with nested structs/unions; because type output is so
+        * complex, we don't do a string comparison, just verify we return
+        * the type size as the amount of data displayed.
+        */
+       type_id = btf__find_by_name(btf, "sk_buff");
+       if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping sk_buff");
+       }
+
+       /* overflow bpf_sock_ops struct with final element nonzero/zero.
+        * Regardless of the value of the final field, we don't have all the
+        * data we need to display it, so we should trigger an overflow.
+        * In other words oveflow checking should trump "is field zero?"
+        * checks because if we've overflowed, it shouldn't matter what the
+        * field is - we can't trust its value so shouldn't display it.
+        */
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+                               sizeof(struct bpf_sock_ops) - 1,
+                               "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+                               { .op = 1, .skb_tcp_flags = 2});
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+                               sizeof(struct bpf_sock_ops) - 1,
+                               "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+                               { .op = 1, .skb_tcp_flags = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+                                  char *str)
+{
+       TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+                         "int cpu_number = (int)100", 100);
+       TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+                         "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+                            const char *name, const char *expected_val,
+                            void *data, size_t data_sz)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       int ret = 0, cmp;
+       size_t secsize;
+       __s32 type_id;
+
+       opts.compact = true;
+
+       type_id = btf__find_by_name(btf, name);
+       if (!ASSERT_GT(type_id, 0, "find type id"))
+               return;
+
+       secsize = btf__resolve_size(btf, type_id);
+       ASSERT_EQ(secsize,  0, "verify section size");
+
+       str[0] = '\0';
+       ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+       ASSERT_EQ(ret, 0, "unexpected return value");
+
+       cmp = strcmp(str, expected_val);
+       ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+       struct btf *btf = btf__parse("xdping_kern.o", NULL);
+       struct btf_dump_opts opts = { .ctx = str };
+       char license[4] = "GPL";
+       struct btf_dump *d;
+
+       if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+               return;
+
+       d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+       if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+               return;
+
+       test_btf_datasec(btf, d, str, "license",
+                        "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+                        license, sizeof(license));
+}
+
 void test_btf_dump() {
+       char str[STRSIZE];
+       struct btf_dump_opts opts = { .ctx = str };
+       struct btf_dump *d;
+       struct btf *btf;
        int i;
 
        for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -245,4 +831,33 @@ void test_btf_dump() {
        }
        if (test__start_subtest("btf_dump: incremental"))
                test_btf_dump_incremental();
+
+       btf = libbpf_find_kernel_btf();
+       if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+               return;
+
+       d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+       if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+               return;
+
+       /* Verify type display for various types. */
+       if (test__start_subtest("btf_dump: int_data"))
+               test_btf_dump_int_data(btf, d, str);
+       if (test__start_subtest("btf_dump: float_data"))
+               test_btf_dump_float_data(btf, d, str);
+       if (test__start_subtest("btf_dump: char_data"))
+               test_btf_dump_char_data(btf, d, str);
+       if (test__start_subtest("btf_dump: typedef_data"))
+               test_btf_dump_typedef_data(btf, d, str);
+       if (test__start_subtest("btf_dump: enum_data"))
+               test_btf_dump_enum_data(btf, d, str);
+       if (test__start_subtest("btf_dump: struct_data"))
+               test_btf_dump_struct_data(btf, d, str);
+       if (test__start_subtest("btf_dump: var_data"))
+               test_btf_dump_var_data(btf, d, str);
+       btf_dump__free(d);
+       btf__free(btf);
+
+       if (test__start_subtest("btf_dump: datasec_data"))
+               test_btf_dump_datasec_data(str);
 }
index 981c251..3d4b2a3 100644 (file)
@@ -53,8 +53,8 @@ void test_core_autosize(void)
        char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
        int err, fd = -1, zero = 0;
        int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
        struct test_core_autosize* skel = NULL;
-       struct bpf_object_load_attr load_attr = {};
        struct bpf_program *prog;
        struct bpf_map *bss_map;
        struct btf *btf = NULL;
@@ -125,9 +125,10 @@ void test_core_autosize(void)
        fd = -1;
 
        /* open and load BPF program with custom BTF as the kernel BTF */
-       skel = test_core_autosize__open();
+       open_opts.btf_custom_path = btf_file;
+       skel = test_core_autosize__open_opts(&open_opts);
        if (!ASSERT_OK_PTR(skel, "skel_open"))
-               return;
+               goto cleanup;
 
        /* disable handle_signed() for now */
        prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
@@ -135,9 +136,7 @@ void test_core_autosize(void)
                goto cleanup;
        bpf_program__set_autoload(prog, false);
 
-       load_attr.obj = skel->obj;
-       load_attr.target_btf_path = btf_file;
-       err = bpf_object__load_xattr(&load_attr);
+       err = bpf_object__load(skel->obj);
        if (!ASSERT_OK(err, "prog_load"))
                goto cleanup;
 
@@ -204,14 +203,13 @@ void test_core_autosize(void)
        skel = NULL;
 
        /* now re-load with handle_signed() enabled, it should fail loading */
-       skel = test_core_autosize__open();
+       open_opts.btf_custom_path = btf_file;
+       skel = test_core_autosize__open_opts(&open_opts);
        if (!ASSERT_OK_PTR(skel, "skel_open"))
-               return;
+               goto cleanup;
 
-       load_attr.obj = skel->obj;
-       load_attr.target_btf_path = btf_file;
-       err = bpf_object__load_xattr(&load_attr);
-       if (!ASSERT_ERR(err, "bad_prog_load"))
+       err = test_core_autosize__load(skel);
+       if (!ASSERT_ERR(err, "skel_load"))
                goto cleanup;
 
 cleanup:
index d02e064..4739b15 100644 (file)
@@ -816,7 +816,7 @@ static size_t roundup_page(size_t sz)
 void test_core_reloc(void)
 {
        const size_t mmap_sz = roundup_page(sizeof(struct data));
-       struct bpf_object_load_attr load_attr = {};
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
        struct core_reloc_test_case *test_case;
        const char *tp_name, *probe_name;
        int err, i, equal;
@@ -846,9 +846,16 @@ void test_core_reloc(void)
                                continue;
                }
 
-               obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
+               if (test_case->btf_src_file) {
+                       err = access(test_case->btf_src_file, R_OK);
+                       if (!ASSERT_OK(err, "btf_src_file"))
+                               goto cleanup;
+               }
+
+               open_opts.btf_custom_path = test_case->btf_src_file;
+               obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
                if (!ASSERT_OK_PTR(obj, "obj_open"))
-                       continue;
+                       goto cleanup;
 
                probe_name = "raw_tracepoint/sys_enter";
                tp_name = "sys_enter";
@@ -862,17 +869,7 @@ void test_core_reloc(void)
                          "prog '%s' not found\n", probe_name))
                        goto cleanup;
 
-
-               if (test_case->btf_src_file) {
-                       err = access(test_case->btf_src_file, R_OK);
-                       if (!ASSERT_OK(err, "btf_src_file"))
-                               goto cleanup;
-               }
-
-               load_attr.obj = obj;
-               load_attr.log_level = 0;
-               load_attr.target_btf_path = test_case->btf_src_file;
-               err = bpf_object__load_xattr(&load_attr);
+               err = bpf_object__load(obj);
                if (err) {
                        if (!test_case->fails)
                                ASSERT_OK(err, "obj_load");
index 088b365..02a465f 100644 (file)
@@ -17,6 +17,7 @@ void test_get_func_ip_test(void)
         */
 #ifndef __x86_64__
        bpf_program__set_autoload(skel->progs.test6, false);
+       bpf_program__set_autoload(skel->progs.test7, false);
 #endif
 
        err = get_func_ip_test__load(skel);
@@ -46,6 +47,7 @@ void test_get_func_ip_test(void)
        ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
 #ifdef __x86_64__
        ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+       ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
 #endif
 
 cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
new file mode 100644 (file)
index 0000000..6ede48b
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void test_netcnt(void)
+{
+       union percpu_net_cnt *percpu_netcnt = NULL;
+       struct bpf_cgroup_storage_key key;
+       int map_fd, percpu_map_fd;
+       struct netcnt_prog *skel;
+       unsigned long packets;
+       union net_cnt netcnt;
+       unsigned long bytes;
+       int cpu, nproc;
+       int cg_fd = -1;
+       char cmd[128];
+
+       skel = netcnt_prog__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+               return;
+
+       nproc = get_nprocs_conf();
+       percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+       if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+               goto err;
+
+       cg_fd = test__join_cgroup(CG_NAME);
+       if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+               goto err;
+
+       skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+       if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+                          "attach_cgroup(bpf_nextcnt)"))
+               goto err;
+
+       snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+       ASSERT_OK(system(cmd), cmd);
+
+       map_fd = bpf_map__fd(skel->maps.netcnt);
+       if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+               goto err;
+
+       if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+               goto err;
+
+       percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+       if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+                      "bpf_map_lookup_elem(percpu_netcnt)"))
+               goto err;
+
+       /* Some packets can be still in per-cpu cache, but not more than
+        * MAX_PERCPU_PACKETS.
+        */
+       packets = netcnt.packets;
+       bytes = netcnt.bytes;
+       for (cpu = 0; cpu < nproc; cpu++) {
+               ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+               packets += percpu_netcnt[cpu].packets;
+               bytes += percpu_netcnt[cpu].bytes;
+       }
+
+       /* No packets should be lost */
+       ASSERT_EQ(packets, 10000, "packets");
+
+       /* Let's check that bytes counter matches the number of packets
+        * multiplied by the size of ipv6 ICMP packet.
+        */
+       ASSERT_EQ(bytes, packets * 104, "bytes");
+
+err:
+       if (cg_fd != -1)
+               close(cg_fd);
+       free(percpu_netcnt);
+       netcnt_prog__destroy(skel);
+}
index fcf54b3..d4b953a 100644 (file)
@@ -125,6 +125,10 @@ void test_pinning(void)
        if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
                goto out;
 
+       /* get pinning path */
+       if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+               goto out;
+
        /* set pinning path of other map and re-pin all */
        map = bpf_object__find_map_by_name(obj, "nopinmap");
        if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
        if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
                goto out;
 
+       /* get pinning path after set */
+       if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+                         "get pin path after set"))
+               goto out;
+
        /* should only pin the one unpinned map */
        err = bpf_object__pin_maps(obj, NULL);
        if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
index de26881..4e91f4d 100644 (file)
@@ -34,8 +34,8 @@ void test_reference_tracking(void)
                if (!test__start_subtest(title))
                        continue;
 
-               /* Expect verifier failure if test name has 'fail' */
-               if (strstr(title, "fail") != NULL) {
+               /* Expect verifier failure if test name has 'err' */
+               if (strstr(title, "err_") != NULL) {
                        libbpf_print_fn_t old_print_fn;
 
                        old_print_fn = libbpf_set_print(NULL);
index 5703c91..e7201ba 100644 (file)
 #define _GNU_SOURCE
 
 #include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
 #include <linux/limits.h>
 #include <linux/sysctl.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
-#include <sys/stat.h>
 #include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
 
 #include "test_progs.h"
 #include "network_helpers.h"
@@ -391,9 +392,7 @@ done:
 
 static int test_ping(int family, const char *addr)
 {
-       const char *ping = family == AF_INET6 ? "ping6" : "ping";
-
-       SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+       SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
        return 0;
 fail:
        return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
new file mode 100644 (file)
index 0000000..6b186b4
--- /dev/null
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+       ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+       int nsfd, err;
+       char nspath[PATH_MAX];
+
+       snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+       nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+       if (nsfd < 0)
+               return -1;
+
+       err = setns(nsfd, CLONE_NEWNET);
+       close(nsfd);
+       return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+       FILE *f;
+       char line[512];
+       int iface_len = strlen(iface);
+
+       f = fopen("/proc/net/dev", "r");
+       if (!f)
+               return -1;
+
+       while (fgets(line, sizeof(line), f)) {
+               char *p = line;
+
+               while (*p == ' ')
+                       p++; /* skip whitespace */
+               if (!strncmp(p, iface, iface_len)) {
+                       p += iface_len;
+                       if (*p++ != ':')
+                               continue;
+                       while (*p == ' ')
+                               p++; /* skip whitespace */
+                       while (*p && *p != ' ')
+                               p++; /* skip rx bytes */
+                       while (*p == ' ')
+                               p++; /* skip whitespace */
+                       fclose(f);
+                       return atoi(p);
+               }
+       }
+       fclose(f);
+       return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+       struct xdp_dummy *xdp_dummy;
+       struct xdp_tx *xdp_tx;
+       struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+       int nlinks;
+       struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+       struct bpf_link *link;
+       int ifindex;
+
+       ifindex = if_nametoindex(iface);
+       if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+               return -1;
+
+       if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+               return -1;
+
+       link = bpf_program__attach_xdp(prog, ifindex);
+       if (!ASSERT_OK_PTR(link, "attach xdp program"))
+               return -1;
+
+       skeletons->links[skeletons->nlinks++] = link;
+       return 0;
+}
+
+enum {
+       BOND_ONE_NO_ATTACH = 0,
+       BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+       [BOND_MODE_ROUNDROBIN]   = "balance-rr",
+       [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+       [BOND_MODE_XOR]          = "balance-xor",
+       [BOND_MODE_BROADCAST]    = "broadcast",
+       [BOND_MODE_8023AD]       = "802.3ad",
+       [BOND_MODE_TLB]          = "balance-tlb",
+       [BOND_MODE_ALB]          = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+       [BOND_XMIT_POLICY_LAYER2]       = "layer2",
+       [BOND_XMIT_POLICY_LAYER34]      = "layer3+4",
+       [BOND_XMIT_POLICY_LAYER23]      = "layer2+3",
+       [BOND_XMIT_POLICY_ENCAP23]      = "encap2+3",
+       [BOND_XMIT_POLICY_ENCAP34]      = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+                        int bond_both_attach)
+{
+#define SYS(fmt, ...)                                          \
+       ({                                                      \
+               char cmd[1024];                                 \
+               snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+               if (!ASSERT_OK(system(cmd), cmd))               \
+                       return -1;                              \
+       })
+
+       SYS("ip netns add ns_dst");
+       SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+       SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+       SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+           mode_names[mode], xmit_policy_names[xmit_policy]);
+       SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+       SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+           mode_names[mode], xmit_policy_names[xmit_policy]);
+       SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+       SYS("ip link set veth1_1 master bond1");
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+               SYS("ip link set veth1_2 master bond1");
+       } else {
+               SYS("ip link set veth1_2 up addrgenmode none");
+
+               if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+                       return -1;
+       }
+
+       SYS("ip -netns ns_dst link set veth2_1 master bond2");
+
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+               SYS("ip -netns ns_dst link set veth2_2 master bond2");
+       else
+               SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+       /* Load a dummy program on sending side as with veth peer needs to have a
+        * XDP program loaded as well.
+        */
+       if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+               return -1;
+
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+               if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+                       return -1;
+
+               if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+                       return -1;
+
+               restore_root_netns();
+       }
+
+       return 0;
+
+#undef SYS
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+       restore_root_netns();
+       while (skeletons->nlinks) {
+               skeletons->nlinks--;
+               bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+       }
+       ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+       ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+       ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+       ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+       struct ethhdr eh = {
+               .h_source = BOND1_MAC,
+               .h_dest = BOND2_MAC,
+               .h_proto = htons(ETH_P_IP),
+       };
+       uint8_t buf[128] = {};
+       struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
+       struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+       int i, s = -1;
+       int ifindex;
+
+       s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+       if (!ASSERT_GE(s, 0, "socket"))
+               goto err;
+
+       ifindex = if_nametoindex("bond1");
+       if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+               goto err;
+
+       memcpy(buf, &eh, sizeof(eh));
+       iph->ihl = 5;
+       iph->version = 4;
+       iph->tos = 16;
+       iph->id = 1;
+       iph->ttl = 64;
+       iph->protocol = IPPROTO_UDP;
+       iph->saddr = 1;
+       iph->daddr = 2;
+       iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
+       iph->check = 0;
+
+       for (i = 1; i <= NPACKETS; i++) {
+               int n;
+               struct sockaddr_ll saddr_ll = {
+                       .sll_ifindex = ifindex,
+                       .sll_halen = ETH_ALEN,
+                       .sll_addr = BOND2_MAC,
+               };
+
+               /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+               uh->dest++;
+
+               if (vary_dst_ip)
+                       iph->daddr++;
+
+               n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+               if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+                       goto err;
+       }
+
+       return 0;
+
+err:
+       if (s >= 0)
+               close(s);
+       return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+       int bond1_rx;
+
+       if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+               goto out;
+
+       if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+               goto out;
+
+       bond1_rx = get_rx_packets("bond1");
+       ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+       switch (mode) {
+       case BOND_MODE_ROUNDROBIN:
+       case BOND_MODE_XOR: {
+               int veth1_rx = get_rx_packets("veth1_1");
+               int veth2_rx = get_rx_packets("veth1_2");
+               int diff = abs(veth1_rx - veth2_rx);
+
+               ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+               switch (xmit_policy) {
+               case BOND_XMIT_POLICY_LAYER2:
+                       ASSERT_GE(diff, NPACKETS,
+                                 "expected packets on only one of the interfaces");
+                       break;
+               case BOND_XMIT_POLICY_LAYER23:
+               case BOND_XMIT_POLICY_LAYER34:
+                       ASSERT_LT(diff, NPACKETS/2,
+                                 "expected even distribution of packets");
+                       break;
+               default:
+                       PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+                       break;
+               }
+               break;
+       }
+       case BOND_MODE_ACTIVEBACKUP: {
+               int veth1_rx = get_rx_packets("veth1_1");
+               int veth2_rx = get_rx_packets("veth1_2");
+               int diff = abs(veth1_rx - veth2_rx);
+
+               ASSERT_GE(diff, NPACKETS,
+                         "expected packets on only one of the interfaces");
+               break;
+       }
+       default:
+               PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+               break;
+       }
+
+out:
+       bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+       static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+       int veth1_1_rx, veth1_2_rx;
+       int err;
+
+       if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+                         BOND_ONE_NO_ATTACH))
+               goto out;
+
+
+       if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+               goto out;
+
+       /* populate the devmap with the relevant interfaces */
+       for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+               int ifindex = if_nametoindex(ifaces[i]);
+               int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+               if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+                       goto out;
+
+               err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+               if (!ASSERT_OK(err, "add interface to map_all"))
+                       goto out;
+       }
+
+       if (xdp_attach(skeletons,
+                      skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+                      "bond2"))
+               goto out;
+
+       restore_root_netns();
+
+       if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+               goto out;
+
+       veth1_1_rx = get_rx_packets("veth1_1");
+       veth1_2_rx = get_rx_packets("veth1_2");
+
+       ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+       ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+       restore_root_netns();
+       bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+       struct bpf_link *link = NULL;
+       struct bpf_link *link2 = NULL;
+       int veth, bond;
+       int err;
+
+       if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+               goto out;
+       if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+               goto out;
+
+       veth = if_nametoindex("veth");
+       if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+               goto out;
+       bond = if_nametoindex("bond");
+       if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+               goto out;
+
+       /* enslaving with a XDP program loaded fails */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+       if (!ASSERT_OK_PTR(link, "attach program to veth"))
+               goto out;
+
+       err = system("ip link set veth master bond");
+       if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
+               goto out;
+
+       bpf_link__destroy(link);
+       link = NULL;
+
+       err = system("ip link set veth master bond");
+       if (!ASSERT_OK(err, "set veth master"))
+               goto out;
+
+       /* attaching to slave when master has no program is allowed */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+       if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+               goto out;
+
+       /* attaching to master not allowed when slave has program loaded */
+       link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+               goto out;
+
+       bpf_link__destroy(link);
+       link = NULL;
+
+       /* attaching XDP program to master allowed when slave has no program */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       if (!ASSERT_OK_PTR(link, "attach program to master"))
+               goto out;
+
+       /* attaching to slave not allowed when master has program loaded */
+       link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
+
+out:
+       bpf_link__destroy(link);
+       bpf_link__destroy(link2);
+
+       system("ip link del veth");
+       system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+                             const char *format, va_list args)
+{
+       if (level != LIBBPF_WARN)
+               vprintf(format, args);
+       return 0;
+}
+
+struct bond_test_case {
+       char *name;
+       int mode;
+       int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+       { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+       { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+       { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+       { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+       { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void test_xdp_bonding(void)
+{
+       libbpf_print_fn_t old_print_fn;
+       struct skeletons skeletons = {};
+       int i;
+
+       old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+       root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+       if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+               goto out;
+
+       skeletons.xdp_dummy = xdp_dummy__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+               goto out;
+
+       skeletons.xdp_tx = xdp_tx__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+               goto out;
+
+       skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+                          "xdp_redirect_multi_kern__open_and_load"))
+               goto out;
+
+       if (!test__start_subtest("xdp_bonding_attach"))
+               test_xdp_bonding_attach(&skeletons);
+
+       for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+               struct bond_test_case *test_case = &bond_test_cases[i];
+
+               if (!test__start_subtest(test_case->name))
+                       test_xdp_bonding_with_mode(
+                               &skeletons,
+                               test_case->mode,
+                               test_case->xmit_policy);
+       }
+
+       if (!test__start_subtest("xdp_bonding_redirect_multi"))
+               test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+       xdp_dummy__destroy(skeletons.xdp_dummy);
+       xdp_tx__destroy(skeletons.xdp_tx);
+       xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+       libbpf_set_print(old_print_fn);
+       if (root_netns_fd >= 0)
+               close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
new file mode 100644 (file)
index 0000000..b77adfd
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc)        ({                              \
+       struct sock_common *_skc = skc;                 \
+       sk = NULL;                                      \
+       tp = NULL;                                      \
+       if (_skc) {                                     \
+               tp = bpf_skc_to_tcp_sock(_skc);         \
+               sk = (struct sock *)tp;                 \
+       }                                               \
+       tp;                                             \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+       int i;
+
+       for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+               if (a[i] != b[i])
+                       return false;
+               if (!a[i])
+                       break;
+       }
+
+       return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+       char cur_cc[TCP_CA_NAME_MAX];
+       struct tcp_sock *tp;
+       struct sock *sk;
+       int ret;
+
+       if (!bpf_tcp_sk(ctx->sk_common))
+               return 0;
+
+       if (sk->sk_family != AF_INET6 ||
+           (sk->sk_state != TCP_LISTEN &&
+            sk->sk_state != TCP_ESTABLISHED) ||
+           (sk->sk_num != reuse_listen_hport &&
+            sk->sk_num != listen_hport &&
+            bpf_ntohs(sk->sk_dport) != listen_hport))
+               return 0;
+
+       if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+                          cur_cc, sizeof(cur_cc)))
+               return 0;
+
+       if (!tcp_cc_eq(cur_cc, cubic_cc))
+               return 0;
+
+       if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+               return 1;
+
+       bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 2e4775c..92267ab 100644 (file)
@@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
        }
 
        BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
-                      seq_num, src, srcp, destp, destp);
+                      seq_num, src, srcp, dest, destp);
        BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
                       state,
                       tp->write_seq - tp->snd_una, rx_queue,
index 0137891..3af0998 100644 (file)
@@ -5,6 +5,10 @@
 #define AF_INET                        2
 #define AF_INET6               10
 
+#define SOL_TCP                        6
+#define TCP_CONGESTION         13
+#define TCP_CA_NAME_MAX                16
+
 #define ICSK_TIME_RETRANS      1
 #define ICSK_TIME_PROBE0       3
 #define ICSK_TIME_LOSS_PROBE   5
@@ -32,6 +36,8 @@
 #define ir_v6_rmt_addr         req.__req_common.skc_v6_daddr
 #define ir_v6_loc_addr         req.__req_common.skc_v6_rcv_saddr
 
+#define sk_num                 __sk_common.skc_num
+#define sk_dport               __sk_common.skc_dport
 #define sk_family              __sk_common.skc_family
 #define sk_rmem_alloc          sk_backlog.rmem_alloc
 #define sk_refcnt              __sk_common.skc_refcnt
index acd587b..a587aec 100644 (file)
@@ -11,6 +11,7 @@ extern const void bpf_fentry_test3 __ksym;
 extern const void bpf_fentry_test4 __ksym;
 extern const void bpf_modify_return_test __ksym;
 extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
 
 __u64 test1_result = 0;
 SEC("fentry/bpf_fentry_test1")
@@ -71,3 +72,13 @@ int test6(struct pt_regs *ctx)
        test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
        return 0;
 }
+
+__u64 test7_result = 0;
+SEC("kprobe/bpf_fentry_test7+5")
+int test7(struct pt_regs *ctx)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+       return 0;
+}
index d071adf..43649bc 100644 (file)
 struct {
        __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
        __type(key, struct bpf_cgroup_storage_key);
-       __type(value, struct percpu_net_cnt);
+       __type(value, union percpu_net_cnt);
 } percpu_netcnt SEC(".maps");
 
 struct {
        __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
        __type(key, struct bpf_cgroup_storage_key);
-       __type(value, struct net_cnt);
+       __type(value, union net_cnt);
 } netcnt SEC(".maps");
 
 SEC("cgroup/skb")
 int bpf_nextcnt(struct __sk_buff *skb)
 {
-       struct percpu_net_cnt *percpu_cnt;
+       union percpu_net_cnt *percpu_cnt;
        char fmt[] = "%d %llu %llu\n";
-       struct net_cnt *cnt;
+       union net_cnt *cnt;
        __u64 ts, dt;
        int ret;
 
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
new file mode 100644 (file)
index 0000000..703c08e
--- /dev/null
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, int);
+       __uint(max_entries, 4);
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+       __uint(max_entries, 0); /* This will make map creation to fail */
+       __uint(key_size, sizeof(__u32));
+       __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index e83d0b4..8249075 100644 (file)
@@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_use_after_free")
+SEC("classifier/err_use_after_free")
 int bpf_sk_lookup_uaf(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
        return family;
 }
 
-SEC("classifier/fail_modify_sk_pointer")
+SEC("classifier/err_modify_sk_pointer")
 int bpf_sk_lookup_modptr(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_modify_sk_or_null_pointer")
+SEC("classifier/err_modify_sk_or_null_pointer")
 int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_no_release")
+SEC("classifier/err_no_release")
 int bpf_sk_lookup_test2(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_release_twice")
+SEC("classifier/err_release_twice")
 int bpf_sk_lookup_test3(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_release_unchecked")
+SEC("classifier/err_release_unchecked")
 int bpf_sk_lookup_test4(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
        bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 }
 
-SEC("classifier/fail_no_release_subcall")
+SEC("classifier/err_no_release_subcall")
 int bpf_sk_lookup_test5(struct __sk_buff *skb)
 {
        lookup_no_release(skb);
index 94e6c2b..5f725c7 100644 (file)
@@ -3,7 +3,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-SEC("tx")
+SEC("xdp")
 int xdp_tx(struct xdp_md *xdp)
 {
        return XDP_TX;
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
new file mode 100755 (executable)
index 0000000..be54b73
--- /dev/null
@@ -0,0 +1,586 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+    os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+retval = 0
+
+class BlockParser(object):
+    """
+    A parser for extracting set of values from blocks such as enums.
+    @reader: a pointer to the open file to parse
+    """
+    def __init__(self, reader):
+        self.reader = reader
+
+    def search_block(self, start_marker):
+        """
+        Search for a given structure in a file.
+        @start_marker: regex marking the beginning of a structure to parse
+        """
+        offset = self.reader.tell()
+        array_start = re.search(start_marker, self.reader.read())
+        if array_start is None:
+            raise Exception('Failed to find start of block')
+        self.reader.seek(offset + array_start.start())
+
+    def parse(self, pattern, end_marker):
+        """
+        Parse a block and return a set of values. Values to extract must be
+        on separate lines in the file.
+        @pattern: pattern used to identify the values to extract
+        @end_marker: regex marking the end of the block to parse
+        """
+        entries = set()
+        while True:
+            line = self.reader.readline()
+            if not line or re.match(end_marker, line):
+                break
+            capture = pattern.search(line)
+            if capture and pattern.groups >= 1:
+                entries.add(capture.group(1))
+        return entries
+
+class ArrayParser(BlockParser):
+    """
+    A parser for extracting dicionaries of values from some BPF-related arrays.
+    @reader: a pointer to the open file to parse
+    @array_name: name of the array to parse
+    """
+    end_marker = re.compile('^};')
+
+    def __init__(self, reader, array_name):
+        self.array_name = array_name
+        self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+        super().__init__(reader)
+
+    def search_block(self):
+        """
+        Search for the given array in a file.
+        """
+        super().search_block(self.start_marker);
+
+    def parse(self):
+        """
+        Parse a block and return data as a dictionary. Items to extract must be
+        on separate lines in the file.
+        """
+        pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
+        entries = {}
+        while True:
+            line = self.reader.readline()
+            if line == '' or re.match(self.end_marker, line):
+                break
+            capture = pattern.search(line)
+            if capture:
+                entries[capture.group(1)] = capture.group(2)
+        return entries
+
+class InlineListParser(BlockParser):
+    """
+    A parser for extracting set of values from inline lists.
+    """
+    def parse(self, pattern, end_marker):
+        """
+        Parse a block and return a set of values. Multiple values to extract
+        can be on a same line in the file.
+        @pattern: pattern used to identify the values to extract
+        @end_marker: regex marking the end of the block to parse
+        """
+        entries = set()
+        while True:
+            line = self.reader.readline()
+            if not line:
+                break
+            entries.update(pattern.findall(line))
+            if re.search(end_marker, line):
+                break
+        return entries
+
+class FileExtractor(object):
+    """
+    A generic reader for extracting data from a given file. This class contains
+    several helper methods that wrap arround parser objects to extract values
+    from different structures.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def __init__(self):
+        self.reader = open(self.filename, 'r')
+
+    def close(self):
+        """
+        Close the file used by the parser.
+        """
+        self.reader.close()
+
+    def reset_read(self):
+        """
+        Reset the file position indicator for this parser. This is useful when
+        parsing several structures in the file without respecting the order in
+        which those structures appear in the file.
+        """
+        self.reader.seek(0)
+
+    def get_types_from_array(self, array_name):
+        """
+        Search for and parse an array associating names to BPF_* enum members,
+        for example:
+
+            const char * const prog_type_name[] = {
+                    [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
+                    [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
+                    [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
+            };
+
+        Return a dictionary with the enum member names as keys and the
+        associated names as values, for example:
+
+            {'BPF_PROG_TYPE_UNSPEC': 'unspec',
+             'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
+             'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+
+        @array_name: name of the array to parse
+        """
+        array_parser = ArrayParser(self.reader, array_name)
+        array_parser.search_block()
+        return array_parser.parse()
+
+    def get_enum(self, enum_name):
+        """
+        Search for and parse an enum containing BPF_* members, for example:
+
+            enum bpf_prog_type {
+                    BPF_PROG_TYPE_UNSPEC,
+                    BPF_PROG_TYPE_SOCKET_FILTER,
+                    BPF_PROG_TYPE_KPROBE,
+            };
+
+        Return a set containing all member names, for example:
+
+            {'BPF_PROG_TYPE_UNSPEC',
+             'BPF_PROG_TYPE_SOCKET_FILTER',
+             'BPF_PROG_TYPE_KPROBE'}
+
+        @enum_name: name of the enum to parse
+        """
+        start_marker = re.compile(f'enum {enum_name} {{\n')
+        pattern = re.compile('^\s*(BPF_\w+),?$')
+        end_marker = re.compile('^};')
+        parser = BlockParser(self.reader)
+        parser.search_block(start_marker)
+        return parser.parse(pattern, end_marker)
+
+    def __get_description_list(self, start_marker, pattern, end_marker):
+        parser = InlineListParser(self.reader)
+        parser.search_block(start_marker)
+        return parser.parse(pattern, end_marker)
+
+    def get_rst_list(self, block_name):
+        """
+        Search for and parse a list of type names from RST documentation, for
+        example:
+
+             |       *TYPE* := {
+             |               **socket** | **kprobe** |
+             |               **kretprobe**
+             |       }
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'\*{block_name}\* := {{')
+        pattern = re.compile('\*\*([\w/-]+)\*\*')
+        end_marker = re.compile('}\n')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def get_help_list(self, block_name):
+        """
+        Search for and parse a list of type names from a help message in
+        bpftool, for example:
+
+            "       TYPE := { socket | kprobe |\\n"
+            "               kretprobe }\\n"
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'"\s*{block_name} := {{')
+        pattern = re.compile('([\w/]+) [|}]')
+        end_marker = re.compile('}')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def get_help_list_macro(self, macro):
+        """
+        Search for and parse a list of values from a help message starting with
+        a macro in bpftool, for example:
+
+            "       " HELP_SPEC_OPTIONS " |\\n"
+            "                    {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+        Return a set containing all item names, for example:
+
+            {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+        @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+        """
+        start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+        pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+        end_marker = re.compile('}\\\\n')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def default_options(self):
+        """
+        Return the default options contained in HELP_SPEC_OPTIONS
+        """
+        return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
+
+    def get_bashcomp_list(self, block_name):
+        """
+        Search for and parse a list of type names from a variable in bash
+        completion file, for example:
+
+            local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+                kretprobe'
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'local {block_name}=\'')
+        pattern = re.compile('(?:.*=\')?([\w/]+)')
+        end_marker = re.compile('\'$')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+    """
+    An abstract extractor for a source file with usage message.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def get_options(self):
+        return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+
+class ProgFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's prog.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+    def get_prog_types(self):
+        return self.get_types_from_array('prog_type_name')
+
+    def get_attach_types(self):
+        return self.get_types_from_array('attach_type_strings')
+
+    def get_prog_attach_help(self):
+        return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's map.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+    def get_map_types(self):
+        return self.get_types_from_array('map_type_name')
+
+    def get_map_help(self):
+        return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's cgroup.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+    def get_prog_attach_help(self):
+        return self.get_help_list('ATTACH_TYPE')
+
+class CommonFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's common.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'common.c')
+
+    def __init__(self):
+        super().__init__()
+        self.attach_types = {}
+
+    def get_attach_types(self):
+        if not self.attach_types:
+            self.attach_types = self.get_types_from_array('attach_type_name')
+        return self.attach_types
+
+    def get_cgroup_attach_types(self):
+        if not self.attach_types:
+            self.get_attach_types()
+        cgroup_types = {}
+        for (key, value) in self.attach_types.items():
+            if key.find('BPF_CGROUP') != -1:
+                cgroup_types[key] = value
+        return cgroup_types
+
+class GenericSourceExtractor(SourceFileExtractor):
+    """
+    An extractor for generic source code files.
+    """
+    filename = ""
+
+    def __init__(self, filename):
+        self.filename = os.path.join(BPFTOOL_DIR, filename)
+        super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+    """
+    An extractor for the UAPI BPF header.
+    """
+    filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+
+    def get_prog_types(self):
+        return self.get_enum('bpf_prog_type')
+
+    def get_map_types(self):
+        return self.get_enum('bpf_map_type')
+
+    def get_attach_types(self):
+        return self.get_enum('bpf_attach_type')
+
+class ManPageExtractor(FileExtractor):
+    """
+    An abstract extractor for an RST documentation page.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def get_options(self):
+        return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-prog.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+
+    def get_attach_types(self):
+        return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-map.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+
+    def get_map_types(self):
+        return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-cgroup.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+
+    def get_attach_types(self):
+        return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+    """
+    An extractor for generic RST documentation pages.
+    """
+    filename = ""
+
+    def __init__(self, filename):
+        self.filename = os.path.join(BPFTOOL_DIR, filename)
+        super().__init__()
+
+class BashcompExtractor(FileExtractor):
+    """
+    An extractor for bpftool's bash completion file.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+
+    def get_prog_attach_types(self):
+        return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+    def get_map_types(self):
+        return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
+
+    def get_cgroup_attach_types(self):
+        return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+    """
+    Print all values that differ between two sets.
+    @first_set: one set to compare
+    @second_set: another set to compare
+    @message: message to print for values belonging to only one of the sets
+    """
+    global retval
+    diff = first_set.symmetric_difference(second_set)
+    if diff:
+        print(message, diff)
+        retval = 1
+
+def main():
+    # No arguments supported at this time, but print usage for -h|--help
+    argParser = argparse.ArgumentParser(description="""
+    Verify that bpftool's code, help messages, documentation and bash
+    completion are all in sync on program types, map types, attach types, and
+    options. Also check that bpftool is in sync with the UAPI BPF header.
+    """)
+    args = argParser.parse_args()
+
+    # Map types (enum)
+
+    bpf_info = BpfHeaderExtractor()
+    ref = bpf_info.get_map_types()
+
+    map_info = MapFileExtractor()
+    source_map_items = map_info.get_map_types()
+    map_types_enum = set(source_map_items.keys())
+
+    verify(ref, map_types_enum,
+            f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
+
+    # Map types (names)
+
+    source_map_types = set(source_map_items.values())
+    source_map_types.discard('unspec')
+
+    help_map_types = map_info.get_map_help()
+    help_map_options = map_info.get_options()
+    map_info.close()
+
+    man_map_info = ManMapExtractor()
+    man_map_options = man_map_info.get_options()
+    man_map_types = man_map_info.get_map_types()
+    man_map_info.close()
+
+    bashcomp_info = BashcompExtractor()
+    bashcomp_map_types = bashcomp_info.get_map_types()
+
+    verify(source_map_types, help_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+    verify(source_map_types, man_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+    verify(help_map_options, man_map_options,
+            f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+    verify(source_map_types, bashcomp_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
+
+    # Program types (enum)
+
+    ref = bpf_info.get_prog_types()
+
+    prog_info = ProgFileExtractor()
+    prog_types = set(prog_info.get_prog_types().keys())
+
+    verify(ref, prog_types,
+            f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
+
+    # Attach types (enum)
+
+    ref = bpf_info.get_attach_types()
+    bpf_info.close()
+
+    common_info = CommonFileExtractor()
+    attach_types = common_info.get_attach_types()
+
+    verify(ref, attach_types,
+            f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+
+    # Attach types (names)
+
+    source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+    help_prog_attach_types = prog_info.get_prog_attach_help()
+    help_prog_options = prog_info.get_options()
+    prog_info.close()
+
+    man_prog_info = ManProgExtractor()
+    man_prog_options = man_prog_info.get_options()
+    man_prog_attach_types = man_prog_info.get_attach_types()
+    man_prog_info.close()
+
+    bashcomp_info.reset_read() # We stopped at map types, rewind
+    bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+
+    verify(source_prog_attach_types, help_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+    verify(source_prog_attach_types, man_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+    verify(help_prog_options, man_prog_options,
+            f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+    verify(source_prog_attach_types, bashcomp_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+    # Cgroup attach types
+
+    source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
+    common_info.close()
+
+    cgroup_info = CgroupFileExtractor()
+    help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+    help_cgroup_options = cgroup_info.get_options()
+    cgroup_info.close()
+
+    man_cgroup_info = ManCgroupExtractor()
+    man_cgroup_options = man_cgroup_info.get_options()
+    man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+    man_cgroup_info.close()
+
+    bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
+    bashcomp_info.close()
+
+    verify(source_cgroup_attach_types, help_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+    verify(source_cgroup_attach_types, man_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+    verify(help_cgroup_options, man_cgroup_options,
+            f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+    verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+
+    # Options for remaining commands
+
+    for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+        source_info = GenericSourceExtractor(cmd + '.c')
+        help_cmd_options = source_info.get_options()
+        source_info.close()
+
+        man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+        man_cmd_options = man_cmd_info.get_options()
+        man_cmd_info.close()
+
+        verify(help_cmd_options, man_cmd_options,
+                f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+    source_main_info = GenericSourceExtractor('main.c')
+    help_main_options = source_main_info.get_options()
+    source_main_info.close()
+
+    man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+    man_main_options = man_main_info.get_options()
+    man_main_info.close()
+
+    verify(help_main_options, man_main_options,
+            f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+    sys.exit(retval)
+
+if __name__ == "__main__":
+    main()
index 30cbf5d..14cea86 100644 (file)
@@ -764,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data)
        udp = socket(AF_INET, SOCK_DGRAM, 0);
        i = 0;
        err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
-       if (!err) {
-               printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+       if (err) {
+               printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
                       i, udp);
                goto out_sockmap;
        }
@@ -1153,12 +1153,17 @@ out_sockmap:
 }
 
 #define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
 static void test_map_in_map(void)
 {
        struct bpf_object *obj;
        struct bpf_map *map;
        int mim_fd, fd, err;
        int pos = 0;
+       struct bpf_map_info info = {};
+       __u32 len = sizeof(info);
+       __u32 id = 0;
+       libbpf_print_fn_t old_print_fn;
 
        obj = bpf_object__open(MAPINMAP_PROG);
 
@@ -1228,11 +1233,72 @@ static void test_map_in_map(void)
        }
 
        close(fd);
+       fd = -1;
        bpf_object__close(obj);
+
+       /* Test that failing bpf_object__create_map() destroys the inner map */
+       obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+       err = libbpf_get_error(obj);
+       if (err) {
+               printf("Failed to load %s program: %d %d",
+                      MAPINMAP_INVALID_PROG, err, errno);
+               goto out_map_in_map;
+       }
+
+       map = bpf_object__find_map_by_name(obj, "mim");
+       if (!map) {
+               printf("Failed to load array of maps from test prog\n");
+               goto out_map_in_map;
+       }
+
+       old_print_fn = libbpf_set_print(NULL);
+
+       err = bpf_object__load(obj);
+       if (!err) {
+               printf("Loading obj supposed to fail\n");
+               goto out_map_in_map;
+       }
+
+       libbpf_set_print(old_print_fn);
+
+       /* Iterate over all maps to check whether the internal map
+        * ("mim.internal") has been destroyed.
+        */
+       while (true) {
+               err = bpf_map_get_next_id(id, &id);
+               if (err) {
+                       if (errno == ENOENT)
+                               break;
+                       printf("Failed to get next map: %d", errno);
+                       goto out_map_in_map;
+               }
+
+               fd = bpf_map_get_fd_by_id(id);
+               if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
+                       printf("Failed to get map by id %u: %d", id, errno);
+                       goto out_map_in_map;
+               }
+
+               err = bpf_obj_get_info_by_fd(fd, &info, &len);
+               if (err) {
+                       printf("Failed to get map info by fd %d: %d", fd,
+                              errno);
+                       goto out_map_in_map;
+               }
+
+               if (!strcmp(info.name, "mim.inner")) {
+                       printf("Inner map mim.inner was not destroyed\n");
+                       goto out_map_in_map;
+               }
+       }
+
        return;
 
 out_map_in_map:
-       close(fd);
+       if (fd >= 0)
+               close(fd);
        exit(1);
 }
 
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
deleted file mode 100644 (file)
index a7b9a69..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
-                       const char *name)
-{
-       struct bpf_map *map;
-
-       map = bpf_object__find_map_by_name(obj, name);
-       if (!map) {
-               printf("%s:FAIL:map '%s' not found\n", test, name);
-               return -1;
-       }
-       return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
-       struct percpu_net_cnt *percpu_netcnt;
-       struct bpf_cgroup_storage_key key;
-       int map_fd, percpu_map_fd;
-       int error = EXIT_FAILURE;
-       struct net_cnt netcnt;
-       struct bpf_object *obj;
-       int prog_fd, cgroup_fd;
-       unsigned long packets;
-       unsigned long bytes;
-       int cpu, nproc;
-       __u32 prog_cnt;
-
-       nproc = get_nprocs_conf();
-       percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
-       if (!percpu_netcnt) {
-               printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
-               goto err;
-       }
-
-       if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
-                         &obj, &prog_fd)) {
-               printf("Failed to load bpf program\n");
-               goto out;
-       }
-
-       cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
-       if (cgroup_fd < 0)
-               goto err;
-
-       /* Attach bpf program */
-       if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
-               printf("Failed to attach bpf program");
-               goto err;
-       }
-
-       if (system("which ping6 &>/dev/null") == 0)
-               assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
-       else
-               assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
-       if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
-                          &prog_cnt)) {
-               printf("Failed to query attached programs");
-               goto err;
-       }
-
-       map_fd = bpf_find_map(__func__, obj, "netcnt");
-       if (map_fd < 0) {
-               printf("Failed to find bpf map with net counters");
-               goto err;
-       }
-
-       percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
-       if (percpu_map_fd < 0) {
-               printf("Failed to find bpf map with percpu net counters");
-               goto err;
-       }
-
-       if (bpf_map_get_next_key(map_fd, NULL, &key)) {
-               printf("Failed to get key in cgroup storage\n");
-               goto err;
-       }
-
-       if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
-               printf("Failed to lookup cgroup storage\n");
-               goto err;
-       }
-
-       if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
-               printf("Failed to lookup percpu cgroup storage\n");
-               goto err;
-       }
-
-       /* Some packets can be still in per-cpu cache, but not more than
-        * MAX_PERCPU_PACKETS.
-        */
-       packets = netcnt.packets;
-       bytes = netcnt.bytes;
-       for (cpu = 0; cpu < nproc; cpu++) {
-               if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
-                       printf("Unexpected percpu value: %llu\n",
-                              percpu_netcnt[cpu].packets);
-                       goto err;
-               }
-
-               packets += percpu_netcnt[cpu].packets;
-               bytes += percpu_netcnt[cpu].bytes;
-       }
-
-       /* No packets should be lost */
-       if (packets != 10000) {
-               printf("Unexpected packet count: %lu\n", packets);
-               goto err;
-       }
-
-       /* Let's check that bytes counter matches the number of packets
-        * multiplied by the size of ipv6 ICMP packet.
-        */
-       if (bytes != packets * 104) {
-               printf("Unexpected bytes count: %lu\n", bytes);
-               goto err;
-       }
-
-       error = 0;
-       printf("test_netcnt:PASS\n");
-
-err:
-       cleanup_cgroup_environment();
-       free(percpu_netcnt);
-
-out:
-       return error;
-}
index 8ef7f33..c8c2bf8 100644 (file)
@@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path);
        ___ok;                                                          \
 })
 
+#define ASSERT_STRNEQ(actual, expected, len, name) ({                  \
+       static int duration = 0;                                        \
+       const char *___act = actual;                                    \
+       const char *___exp = expected;                                  \
+       int ___len = len;                                               \
+       bool ___ok = strncmp(___act, ___exp, ___len) == 0;              \
+       CHECK(!___ok, (name),                                           \
+             "unexpected %s: actual '%.*s' != expected '%.*s'\n",      \
+             (name), ___len, ___act, ___len, ___exp);                  \
+       ___ok;                                                          \
+})
+
 #define ASSERT_OK(res, name) ({                                                \
        static int duration = 0;                                        \
        long long ___res = (res);                                       \
index c9dde9b..088fcad 100755 (executable)
@@ -69,7 +69,7 @@ cleanup() {
 }
 
 server_listen() {
-       ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+       ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
        server_pid=$!
        sleep 0.2
 }
index ba8ffcd..995278e 100755 (executable)
@@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
 
 ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
 ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
 
 trap cleanup EXIT
index a3e593d..2debba4 100644 (file)
@@ -1,3 +1,232 @@
+{
+       "map access: known scalar += value_ptr unknown vs const",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_1, 3),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr const vs unknown",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 3),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr const vs const (ne)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 3),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_1, 5),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr const vs const (eq)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 5),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_1, 5),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr unknown vs unknown (eq)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr unknown vs unknown (lt)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr unknown vs unknown (gt)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
 {
        "map access: known scalar += value_ptr from different maps",
        .insns = {
index 06a351b..0709af0 100644 (file)
@@ -38,6 +38,7 @@
 /x86_64/xen_vmcall_test
 /x86_64/xss_msr_test
 /x86_64/vmx_pmu_msrs_test
+/access_tracking_perf_test
 /demand_paging_test
 /dirty_log_test
 /dirty_log_perf_test
index b853be2..5832f51 100644 (file)
@@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
index a16c8f0..cc89818 100644 (file)
@@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = {
 #define VREGS_SUBLIST \
        { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
 #define PMU_SUBLIST \
-       { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+       { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+         .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
 #define SVE_SUBLIST \
        { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
          .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
new file mode 100644 (file)
index 0000000..e2baa18
--- /dev/null
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * access_tracking_perf_test
+ *
+ * Copyright (C) 2021, Google, Inc.
+ *
+ * This test measures the performance effects of KVM's access tracking.
+ * Access tracking is driven by the MMU notifiers test_young, clear_young, and
+ * clear_flush_young. These notifiers do not have a direct userspace API,
+ * however the clear_young notifier can be triggered by marking a pages as idle
+ * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
+ * enable access tracking on guest memory.
+ *
+ * To measure performance this test runs a VM with a configurable number of
+ * vCPUs that each touch every page in disjoint regions of memory. Performance
+ * is measured in the time it takes all vCPUs to finish touching their
+ * predefined region.
+ *
+ * Note that a deterministic correctness test of access tracking is not possible
+ * by using page_idle as it exists today. This is for a few reasons:
+ *
+ * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
+ *    means subsequent guest accesses are not guaranteed to see page table
+ *    updates made by KVM until some time in the future.
+ *
+ * 2. page_idle only operates on LRU pages. Newly allocated pages are not
+ *    immediately allocated to LRU lists. Instead they are held in a "pagevec",
+ *    which is drained to LRU lists some time in the future. There is no
+ *    userspace API to force this drain to occur.
+ *
+ * These limitations are worked around in this test by using a large enough
+ * region of memory for each vCPU such that the number of translations cached in
+ * the TLB and the number of pages held in pagevecs are a small fraction of the
+ * overall workload. And if either of those conditions are not true this test
+ * will fail rather than silently passing.
+ */
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration = -1;
+
+/* Defines what vCPU threads should do during a given iteration. */
+static enum {
+       /* Run the vCPU to access all its memory. */
+       ITERATION_ACCESS_MEMORY,
+       /* Mark the vCPU's memory idle in page_idle. */
+       ITERATION_MARK_IDLE,
+} iteration_work;
+
+/* Set to true when vCPU threads should exit. */
+static bool done;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+struct test_params {
+       /* The backing source for the region of memory. */
+       enum vm_mem_backing_src_type backing_src;
+
+       /* The amount of memory to allocate for each vCPU. */
+       uint64_t vcpu_memory_bytes;
+
+       /* The number of vCPUs to create in the VM. */
+       int vcpus;
+};
+
+static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+{
+       uint64_t value;
+       off_t offset = index * sizeof(value);
+
+       TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
+                   "pread from %s offset 0x%" PRIx64 " failed!",
+                   filename, offset);
+
+       return value;
+
+}
+
+#define PAGEMAP_PRESENT (1ULL << 63)
+#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
+
+static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+{
+       uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
+       uint64_t entry;
+       uint64_t pfn;
+
+       entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+       if (!(entry & PAGEMAP_PRESENT))
+               return 0;
+
+       pfn = entry & PAGEMAP_PFN_MASK;
+       if (!pfn) {
+               print_skip("Looking up PFNs requires CAP_SYS_ADMIN");
+               exit(KSFT_SKIP);
+       }
+
+       return pfn;
+}
+
+static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+{
+       uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+
+       return !!((bits >> (pfn % 64)) & 1);
+}
+
+static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+{
+       uint64_t bits = 1ULL << (pfn % 64);
+
+       TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
+                   "Set page_idle bits for PFN 0x%" PRIx64, pfn);
+}
+
+static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
+{
+       uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva;
+       uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages;
+       uint64_t page;
+       uint64_t still_idle = 0;
+       uint64_t no_pfn = 0;
+       int page_idle_fd;
+       int pagemap_fd;
+
+       /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
+       if (overlap_memory_access && vcpu_id)
+               return;
+
+       page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+       TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
+
+       pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+       TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
+
+       for (page = 0; page < pages; page++) {
+               uint64_t gva = base_gva + page * perf_test_args.guest_page_size;
+               uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+
+               if (!pfn) {
+                       no_pfn++;
+                       continue;
+               }
+
+               if (is_page_idle(page_idle_fd, pfn)) {
+                       still_idle++;
+                       continue;
+               }
+
+               mark_page_idle(page_idle_fd, pfn);
+       }
+
+       /*
+        * Assumption: Less than 1% of pages are going to be swapped out from
+        * under us during this test.
+        */
+       TEST_ASSERT(no_pfn < pages / 100,
+                   "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
+                   vcpu_id, no_pfn, pages);
+
+       /*
+        * Test that at least 90% of memory has been marked idle (the rest might
+        * not be marked idle because the pages have not yet made it to an LRU
+        * list or the translations are still cached in the TLB). 90% is
+        * arbitrary; high enough that we ensure most memory access went through
+        * access tracking but low enough as to not make the test too brittle
+        * over time and across architectures.
+        */
+       TEST_ASSERT(still_idle < pages / 10,
+                   "vCPU%d: Too many pages still idle (%"PRIu64 " out of %"
+                   PRIu64 ").\n",
+                   vcpu_id, still_idle, pages);
+
+       close(page_idle_fd);
+       close(pagemap_fd);
+}
+
+static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id,
+                        uint64_t expected_ucall)
+{
+       struct ucall uc;
+       uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc);
+
+       TEST_ASSERT(expected_ucall == actual_ucall,
+                   "Guest exited unexpectedly (expected ucall %" PRIu64
+                   ", got %" PRIu64 ")",
+                   expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+       int last_iteration = *current_iteration;
+
+       do {
+               if (READ_ONCE(done))
+                       return false;
+
+               *current_iteration = READ_ONCE(iteration);
+       } while (last_iteration == *current_iteration);
+
+       return true;
+}
+
+static void *vcpu_thread_main(void *arg)
+{
+       struct perf_test_vcpu_args *vcpu_args = arg;
+       struct kvm_vm *vm = perf_test_args.vm;
+       int vcpu_id = vcpu_args->vcpu_id;
+       int current_iteration = -1;
+
+       vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+       while (spin_wait_for_next_iteration(&current_iteration)) {
+               switch (READ_ONCE(iteration_work)) {
+               case ITERATION_ACCESS_MEMORY:
+                       vcpu_run(vm, vcpu_id);
+                       assert_ucall(vm, vcpu_id, UCALL_SYNC);
+                       break;
+               case ITERATION_MARK_IDLE:
+                       mark_vcpu_memory_idle(vm, vcpu_id);
+                       break;
+               };
+
+               vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+       }
+
+       return NULL;
+}
+
+static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
+{
+       while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+              target_iteration) {
+               continue;
+       }
+}
+
+/* The type of memory accesses to perform in the VM. */
+enum access_type {
+       ACCESS_READ,
+       ACCESS_WRITE,
+};
+
+static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
+{
+       struct timespec ts_start;
+       struct timespec ts_elapsed;
+       int next_iteration;
+       int vcpu_id;
+
+       /* Kick off the vCPUs by incrementing iteration. */
+       next_iteration = ++iteration;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+       /* Wait for all vCPUs to finish the iteration. */
+       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+               spin_wait_for_vcpu(vcpu_id, next_iteration);
+
+       ts_elapsed = timespec_elapsed(ts_start);
+       pr_info("%-30s: %ld.%09lds\n",
+               description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
+                         const char *description)
+{
+       perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
+       sync_global_to_guest(vm, perf_test_args);
+       iteration_work = ITERATION_ACCESS_MEMORY;
+       run_iteration(vm, vcpus, description);
+}
+
+static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
+{
+       /*
+        * Even though this parallelizes the work across vCPUs, this is still a
+        * very slow operation because page_idle forces the test to mark one pfn
+        * at a time and the clear_young notifier serializes on the KVM MMU
+        * lock.
+        */
+       pr_debug("Marking VM memory idle (slow)...\n");
+       iteration_work = ITERATION_MARK_IDLE;
+       run_iteration(vm, vcpus, "Mark memory idle");
+}
+
+static pthread_t *create_vcpu_threads(int vcpus)
+{
+       pthread_t *vcpu_threads;
+       int i;
+
+       vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
+       TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
+
+       for (i = 0; i < vcpus; i++) {
+               vcpu_last_completed_iteration[i] = iteration;
+               pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
+                              &perf_test_args.vcpu_args[i]);
+       }
+
+       return vcpu_threads;
+}
+
+static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
+{
+       int i;
+
+       /* Set done to signal the vCPU threads to exit */
+       done = true;
+
+       for (i = 0; i < vcpus; i++)
+               pthread_join(vcpu_threads[i], NULL);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+       struct test_params *params = arg;
+       struct kvm_vm *vm;
+       pthread_t *vcpu_threads;
+       int vcpus = params->vcpus;
+
+       vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes,
+                                params->backing_src);
+
+       perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
+                             !overlap_memory_access);
+
+       vcpu_threads = create_vcpu_threads(vcpus);
+
+       pr_info("\n");
+       access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
+
+       /* As a control, read and write to the populated memory first. */
+       access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory");
+       access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory");
+
+       /* Repeat on memory that has been marked as idle. */
+       mark_memory_idle(vm, vcpus);
+       access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory");
+       mark_memory_idle(vm, vcpus);
+       access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
+
+       terminate_vcpu_threads(vcpu_threads, vcpus);
+       free(vcpu_threads);
+       perf_test_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o]  [-s mem_type]\n",
+              name);
+       puts("");
+       printf(" -h: Display this help message.");
+       guest_modes_help();
+       printf(" -b: specify the size of the memory region which should be\n"
+              "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+              "     (default: 1G)\n");
+       printf(" -v: specify the number of vCPUs to run.\n");
+       printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+              "     them into a separate region of memory for each vCPU.\n");
+       printf(" -s: specify the type of memory that should be used to\n"
+              "     back the guest data region.\n\n");
+       backing_src_help();
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct test_params params = {
+               .backing_src = VM_MEM_SRC_ANONYMOUS,
+               .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+               .vcpus = 1,
+       };
+       int page_idle_fd;
+       int opt;
+
+       guest_modes_append_default();
+
+       while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+               switch (opt) {
+               case 'm':
+                       guest_modes_cmdline(optarg);
+                       break;
+               case 'b':
+                       params.vcpu_memory_bytes = parse_size(optarg);
+                       break;
+               case 'v':
+                       params.vcpus = atoi(optarg);
+                       break;
+               case 'o':
+                       overlap_memory_access = true;
+                       break;
+               case 's':
+                       params.backing_src = parse_backing_src_type(optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+       if (page_idle_fd < 0) {
+               print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled");
+               exit(KSFT_SKIP);
+       }
+       close(page_idle_fd);
+
+       for_each_guest_mode(run_test, &params);
+
+       return 0;
+}
index 04a2641..80cbd3a 100644 (file)
@@ -312,6 +312,7 @@ int main(int argc, char *argv[])
                        break;
                case 'o':
                        p.partition_vcpu_memory_access = false;
+                       break;
                case 's':
                        p.backing_src = parse_backing_src_type(optarg);
                        break;
index 412eaee..b669107 100644 (file)
 #define HV_X64_GUEST_DEBUGGING_AVAILABLE               BIT(1)
 #define HV_X64_PERF_MONITOR_AVAILABLE                  BIT(2)
 #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE      BIT(3)
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE          BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE           BIT(4)
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE              BIT(5)
 #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE            BIT(8)
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE           BIT(10)
 #define HV_STATUS_INVALID_CONNECTION_ID                18
 #define HV_STATUS_INSUFFICIENT_BUFFERS         19
 
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT          BIT(16)
+
 #endif /* !SELFTEST_KVM_HYPERV_H */
index b0031f2..ecec308 100644 (file)
@@ -320,7 +320,7 @@ int main(int ac, char **av)
                run_delay = get_run_delay();
                pthread_create(&thread, &attr, do_steal_time, NULL);
                do
-                       pthread_yield();
+                       sched_yield();
                while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
                pthread_join(thread, NULL);
                run_delay = get_run_delay() - run_delay;
index bab10ae..e0b2bb1 100644 (file)
@@ -215,7 +215,7 @@ int main(void)
        vcpu_set_hv_cpuid(vm, VCPU_ID);
 
        tsc_page_gva = vm_vaddr_alloc_page(vm);
-       memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+       memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
        TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
                "TSC page has to be page aligned\n");
        vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
index af27c7e..91d88aa 100644 (file)
@@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val)
 }
 
 static int nr_gp;
+static int nr_ud;
 
 static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
                            vm_vaddr_t output_address)
@@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs)
                regs->rip = (uint64_t)&wrmsr_end;
 }
 
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       nr_ud++;
+       regs->rip += 3;
+}
+
 struct msr_data {
        uint32_t idx;
        bool available;
@@ -90,6 +97,7 @@ struct msr_data {
 struct hcall_data {
        uint64_t control;
        uint64_t expect;
+       bool ud_expected;
 };
 
 static void guest_msr(struct msr_data *msr)
@@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr)
 static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 {
        int i = 0;
+       u64 res, input, output;
 
        wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
        wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
 
        while (hcall->control) {
-               GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
-                                      pgs_gpa + 4096) == hcall->expect);
+               nr_ud = 0;
+               if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+                       input = pgs_gpa;
+                       output = pgs_gpa + 4096;
+               } else {
+                       input = output = 0;
+               }
+
+               res = hypercall(hcall->control, input, output);
+               if (hcall->ud_expected)
+                       GUEST_ASSERT(nr_ud == 1);
+               else
+                       GUEST_ASSERT(res == hcall->expect);
+
                GUEST_SYNC(i++);
        }
 
@@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
                        recomm.ebx = 0xfff;
                        hcall->expect = HV_STATUS_SUCCESS;
                        break;
-
                case 17:
+                       /* XMM fast hypercall */
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+                       hcall->ud_expected = true;
+                       break;
+               case 18:
+                       feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+                       hcall->ud_expected = false;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+
+               case 19:
                        /* END */
                        hcall->control = 0;
                        break;
@@ -625,6 +656,10 @@ int main(void)
        /* Test hypercalls */
        vm = vm_create_default(VCPU_ID, 0, guest_hcall);
 
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
        /* Hypercall input/output */
        hcall_page = vm_vaddr_alloc_pages(vm, 2);
        memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
index 5b169e9..4f9f73e 100644 (file)
@@ -38,8 +38,10 @@ TEST_GEN_FILES += reuseaddr_ports_exhausted
 TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
 TEST_GEN_FILES += ipsec
 TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644 (file)
index 0000000..cfc7f4f
--- /dev/null
@@ -0,0 +1,5 @@
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644 (file)
index 0000000..0f3e376
--- /dev/null
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+       signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+       struct sigaction sa;
+
+       sa.sa_sigaction = sig_hand;
+       sigemptyset(&sa.sa_mask);
+       sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+       return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+       int flags = fcntl(fd, F_GETFL, 0);
+
+       if (set)
+               flags &= ~O_NONBLOCK;
+       else
+               flags |= O_NONBLOCK;
+       fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+       char cmd;
+
+       cmd = 'S';
+       write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+       char buf[5];
+
+       read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+       fflush(NULL);
+       unlink(sock_name);
+       kill(producer_id, SIGTERM);
+       exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+       int ans = -1;
+
+       if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+               perror("SIOCATMARK Failed");
+#endif
+       }
+       return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+       *c = ' ';
+       if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+               perror("Reading MSG_OOB Failed");
+#endif
+       }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+       int len = 0;
+
+       memset(buf, size, '0');
+       len = read(pfd, buf, size);
+#ifdef DEBUG
+       if (len < 0)
+               perror("read failed");
+#endif
+       return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+       struct pollfd pfds[1];
+
+       pfds[0].fd = pfd;
+       pfds[0].events = event;
+       poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+       int cfd;
+       char buf[64];
+       int i;
+
+       memset(buf, 'x', sizeof(buf));
+       cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+       wait_for_signal(pipefd[0]);
+       if (connect(cfd, (struct sockaddr *)consumer_addr,
+                    sizeof(struct sockaddr)) != 0) {
+               perror("Connect failed");
+               kill(0, SIGTERM);
+               exit(1);
+       }
+
+       for (i = 0; i < 2; i++) {
+               /* Test 1: Test for SIGURG and OOB */
+               wait_for_signal(pipefd[0]);
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '@';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 2: Test for OOB being overwitten */
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '%';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '#';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 3: Test for SIOCATMARK */
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '@';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '%';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               send(cfd, buf, sizeof(buf), 0);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 4: Test for 1byte OOB msg */
+               memset(buf, 'x', sizeof(buf));
+               buf[0] = '@';
+               send(cfd, buf, 1, MSG_OOB);
+       }
+}
+
+int
+main(int argc, char **argv)
+{
+       int lfd, pfd;
+       struct sockaddr_un consumer_addr, paddr;
+       socklen_t len = sizeof(consumer_addr);
+       char buf[1024];
+       int on = 0;
+       char oob;
+       int flags;
+       int atmark;
+       char *tmp_file;
+
+       lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+       memset(&consumer_addr, 0, sizeof(consumer_addr));
+       consumer_addr.sun_family = AF_UNIX;
+       sprintf(sock_name, "unix_oob_%d", getpid());
+       unlink(sock_name);
+       strcpy(consumer_addr.sun_path, sock_name);
+
+       if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+                 sizeof(consumer_addr))) != 0) {
+               perror("socket bind failed");
+               exit(1);
+       }
+
+       pipe(pipefd);
+
+       listen(lfd, 1);
+
+       producer_id = fork();
+       if (producer_id == 0) {
+               producer(&consumer_addr);
+               exit(0);
+       }
+
+       set_sig_handler(SIGURG);
+       signal_producer(pipefd[1]);
+
+       pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+       fcntl(pfd, F_SETOWN, getpid());
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 1:
+        * veriyf that SIGURG is
+        * delivered and 63 bytes are
+        * read and oob is '@'
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       read_oob(pfd, &oob);
+       len = read_data(pfd, buf, 1024);
+       if (!signal_recvd || len != 63 || oob != '@') {
+               fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+                        signal_recvd, len, oob);
+                       die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 2:
+        * Verify that the first OOB is over written by
+        * the 2nd one and the first OOB is returned as
+        * part of the read, and sigurg is received.
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = 0;
+       while (len < 70)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       read_oob(pfd, &oob);
+       if (!signal_recvd || len != 127 || oob != '#') {
+               fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+               signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 3:
+        * verify that 2nd oob over writes
+        * the first one and read breaks at
+        * oob boundary returning 127 bytes
+        * and sigurg is received and atmark
+        * is set.
+        * oob is '%' and second read returns
+        * 64 bytes.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 150)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       read_oob(pfd, &oob);
+
+       if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+               fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+               "atmark %d\n", signal_recvd, len, oob, atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+
+       len = read_data(pfd, buf, 1024);
+       if (len != 64) {
+               fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+                       signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 4:
+        * verify that a single byte
+        * oob message is delivered.
+        * set non blocking mode and
+        * check proper error is
+        * returned and sigurg is
+        * received and correct
+        * oob is read.
+        */
+
+       set_filemode(pfd, 0);
+
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+       if ((len == -1) && (errno == 11))
+               len = 0;
+
+       read_oob(pfd, &oob);
+
+       if (!signal_recvd || len != 0 || oob != '@') {
+               fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+                        signal_recvd, len, oob);
+               die(1);
+       }
+
+       set_filemode(pfd, 1);
+
+       /* Inline Testing */
+
+       on = 1;
+       if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+               perror("SO_OOBINLINE");
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 1 -- Inline:
+        * Check that SIGURG is
+        * delivered and 63 bytes are
+        * read and oob is '@'
+        */
+
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+
+       if (!signal_recvd || len != 63) {
+               fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+                       signal_recvd, len);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+
+       if (len != 1) {
+               fprintf(stderr,
+                        "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+                        signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 2 -- Inline:
+        * Verify that the first OOB is over written by
+        * the 2nd one and read breaks correctly on
+        * 2nd OOB boundary with the first OOB returned as
+        * part of the read, and sigurg is delivered and
+        * siocatmark returns true.
+        * next read returns one byte, the oob byte
+        * and siocatmark returns false.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 70)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 127 || atmark != 1 || !signal_recvd) {
+               fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+                        len, atmark);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 1 || buf[0] != '#' || atmark == 1) {
+               fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+                       len, buf[0], atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 3 -- Inline:
+        * verify that 2nd oob over writes
+        * the first one and read breaks at
+        * oob boundary returning 127 bytes
+        * and sigurg is received and siocatmark
+        * is true after the read.
+        * subsequent read returns 65 bytes
+        * because of oob which should be '%'.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 126)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (!signal_recvd || len != 127 || !atmark) {
+               fprintf(stderr,
+                        "Test 3 Inline failed, sigurg %d len %d data %c\n",
+                        signal_recvd, len, buf[0]);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 65 || buf[0] != '%' || atmark != 0) {
+               fprintf(stderr,
+                        "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+                        len, buf[0], atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 4 -- Inline:
+        * verify that a single
+        * byte oob message is delivered
+        * and read returns one byte, the oob
+        * byte and sigurg is received
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+       if (!signal_recvd || len != 1 || buf[0] != '@') {
+               fprintf(stderr,
+                       "Test 4 Inline failed, signal %d len %d data %c\n",
+               signal_recvd, len, buf[0]);
+               die(1);
+       }
+       die(0);
+}
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644 (file)
index 0000000..cf37ce8
--- /dev/null
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ *  Data packets of the same size and same header setup with correct
+ *  sequence numbers coalesce. The one exception being the last data
+ *  packet coalesced: it can be smaller than the rest and coalesced
+ *  as long as it is in the same flow.
+ * 2.ack
+ *  Pure ACK does not coalesce.
+ * 3.flags
+ *  Specific test cases: no packets with PSH, SYN, URG, RST set will
+ *  be coalesced.
+ * 4.tcp
+ *  Packets with incorrect checksum, non-consecutive seqno and
+ *  different TCP header options shouldn't coalesce. Nit: given that
+ *  some extension headers have paddings, such as timestamp, headers
+ *  that are padding differently would not be coalesced.
+ * 5.ip:
+ *  Packets with different (ECN, TTL, TOS) header, ip options or
+ *  ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+       va_list args;
+
+       if (verbose) {
+               va_start(args, fmt);
+               vfprintf(stderr, fmt, args);
+               va_end(args);
+       }
+}
+
+static void setup_sock_filter(int fd)
+{
+       const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+       const int ethproto_off = offsetof(struct ethhdr, h_proto);
+       int optlen = 0;
+       int ipproto_off;
+       int next_off;
+
+       if (proto == PF_INET)
+               next_off = offsetof(struct iphdr, protocol);
+       else
+               next_off = offsetof(struct ipv6hdr, nexthdr);
+       ipproto_off = ETH_HLEN + next_off;
+
+       if (strcmp(testname, "ip") == 0) {
+               if (proto == PF_INET)
+                       optlen = sizeof(struct ip_timestamp);
+               else
+                       optlen = sizeof(struct ip6_frag);
+       }
+
+       struct sock_filter filter[] = {
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+                       BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off + optlen),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+                       BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+                       BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+
+       struct sock_fprog bpf = {
+               .len = ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+               error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+       uint16_t *words = data;
+       int i;
+
+       for (i = 0; i < len / 2; i++)
+               sum += words[i];
+       if (len & 1)
+               sum += ((char *)data)[len - 1];
+       return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+       sum = checksum_nofold(data, len, sum);
+       while (sum > 0xFFFF)
+               sum = (sum & 0xFFFF) + (sum >> 16);
+       return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+       struct pseudo_header6 {
+               struct in6_addr saddr;
+               struct in6_addr daddr;
+               uint16_t protocol;
+               uint16_t payload_len;
+       } ph6;
+       struct pseudo_header4 {
+               struct in_addr saddr;
+               struct in_addr daddr;
+               uint16_t protocol;
+               uint16_t payload_len;
+       } ph4;
+       uint32_t sum = 0;
+
+       if (proto == PF_INET6) {
+               if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+                       error(1, errno, "inet_pton6 source ip pseudo");
+               if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+                       error(1, errno, "inet_pton6 dest ip pseudo");
+               ph6.protocol = htons(IPPROTO_TCP);
+               ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+               sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+       } else if (proto == PF_INET) {
+               if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+                       error(1, errno, "inet_pton source ip pseudo");
+               if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+                       error(1, errno, "inet_pton dest ip pseudo");
+               ph4.protocol = htons(IPPROTO_TCP);
+               ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+               sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+       }
+
+       return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+       if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+                  &mac_addr[0], &mac_addr[1], &mac_addr[2],
+                  &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+               error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+       struct ethhdr *eth = buf;
+
+       memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+       memcpy(eth->h_source, src_mac, ETH_ALEN);
+       eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+       struct ipv6hdr *ip6h = buf;
+       struct iphdr *iph = buf;
+
+       if (proto == PF_INET6) {
+               memset(ip6h, 0, sizeof(*ip6h));
+
+               ip6h->version = 6;
+               ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+               ip6h->nexthdr = IPPROTO_TCP;
+               ip6h->hop_limit = 8;
+               if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+                       error(1, errno, "inet_pton source ip6");
+               if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+                       error(1, errno, "inet_pton dest ip6");
+       } else if (proto == PF_INET) {
+               memset(iph, 0, sizeof(*iph));
+
+               iph->version = 4;
+               iph->ihl = 5;
+               iph->ttl = 8;
+               iph->protocol   = IPPROTO_TCP;
+               iph->tot_len = htons(sizeof(struct tcphdr) +
+                               payload_len + sizeof(struct iphdr));
+               iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+               if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+                       error(1, errno, "inet_pton source ip");
+               if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+                       error(1, errno, "inet_pton dest ip");
+               iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+       }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+                               int payload_len, int fin)
+{
+       struct tcphdr *tcph = buf;
+
+       memset(tcph, 0, sizeof(*tcph));
+
+       tcph->source = htons(SPORT);
+       tcph->dest = htons(DPORT);
+       tcph->seq = ntohl(START_SEQ + seq_offset);
+       tcph->ack_seq = ntohl(START_ACK + ack_offset);
+       tcph->ack = 1;
+       tcph->fin = fin;
+       tcph->doff = 5;
+       tcph->window = htons(TCP_MAXWIN);
+       tcph->urg_ptr = 0;
+       tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+       int ret = -1;
+
+       ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+       if (ret == -1)
+               error(1, errno, "sendto failure");
+       if (ret != len)
+               error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+                         int payload_len, int fin)
+{
+       memset(buf, 0, total_hdr_len);
+       memset(buf + total_hdr_len, 'a', payload_len);
+       fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+                           payload_len, fin);
+       fill_networklayer(buf + ETH_HLEN, payload_len);
+       fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+                      int rst, int urg)
+{
+       static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       int payload_len, pkt_size, flag, i;
+       struct tcphdr *tcph;
+
+       payload_len = PAYLOAD_LEN * psh;
+       pkt_size = total_hdr_len + payload_len;
+       flag = NUM_PACKETS / 2;
+
+       create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+       tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+       tcph->psh = psh;
+       tcph->syn = syn;
+       tcph->rst = rst;
+       tcph->urg = urg;
+       tcph->check = 0;
+       tcph->check = tcp_checksum(tcph, payload_len);
+
+       for (i = 0; i < NUM_PACKETS + 1; i++) {
+               if (i == flag) {
+                       write_packet(fd, flag_buf, pkt_size, daddr);
+                       continue;
+               }
+               create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+               write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+       }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+                          int payload_len1, int payload_len2)
+{
+       static char buf[ETH_HLEN + IP_MAXPACKET];
+
+       create_packet(buf, 0, 0, payload_len1, 0);
+       write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+       create_packet(buf, payload_len1, 0, payload_len2, 0);
+       write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+       static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+       static char last[TOTAL_HDR_LEN + MSS];
+       static char new_seg[TOTAL_HDR_LEN + MSS];
+       int i;
+
+       for (i = 0; i < NUM_LARGE_PKT; i++)
+               create_packet(pkts[i], i * MSS, 0, MSS, 0);
+       create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+       create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+       for (i = 0; i < NUM_LARGE_PKT; i++)
+               write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+       write_packet(fd, last, total_hdr_len + remainder, daddr);
+       write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN];
+
+       create_packet(buf, 0, 0, 0, 0);
+       write_packet(fd, buf, total_hdr_len, daddr);
+       write_packet(fd, buf, total_hdr_len, daddr);
+       create_packet(buf, 0, 1, 0, 0);
+       write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+       struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       memmove(buf, no_ext, total_hdr_len);
+       memmove(buf + total_hdr_len + extlen,
+               no_ext + total_hdr_len, PAYLOAD_LEN);
+
+       tcphdr->doff = tcphdr->doff + (extlen / 4);
+       tcphdr->check = 0;
+       tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+       if (proto == PF_INET) {
+               iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else {
+               ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+       }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+       struct tcp_option_ts {
+               uint8_t kind;
+               uint8_t len;
+               uint32_t tsval;
+               uint32_t tsecr;
+       } *opt_ts = (void *)buf;
+       struct tcp_option_window {
+               uint8_t kind;
+               uint8_t len;
+               uint8_t shift;
+       } *opt_window = (void *)buf;
+
+       switch (kind) {
+       case TCPOPT_NOP:
+               buf[0] = TCPOPT_NOP;
+               break;
+       case TCPOPT_WINDOW:
+               memset(opt_window, 0, sizeof(struct tcp_option_window));
+               opt_window->kind = TCPOPT_WINDOW;
+               opt_window->len = TCPOLEN_WINDOW;
+               opt_window->shift = 0;
+               break;
+       case TCPOPT_TIMESTAMP:
+               memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+               opt_ts->kind = TCPOPT_TIMESTAMP;
+               opt_ts->len = TCPOLEN_TIMESTAMP;
+               opt_ts->tsval = ts;
+               opt_ts->tsecr = 0;
+               break;
+       default:
+               error(1, 0, "unimplemented TCP option");
+               break;
+       }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+       switch (order) {
+       case 0:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+                                 TCPOPT_TIMESTAMP, ts);
+               break;
+       case 1:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 1,
+                                 TCPOPT_TIMESTAMP, ts);
+               tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+                                 TCPOPT_NOP, 0);
+               break;
+       case 2:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+               tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+                                 TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+                                 TCPOPT_NOP, 0);
+               break;
+       default:
+               error(1, 0, "unknown order");
+               break;
+       }
+       recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       tcph->check = tcph->check - 1;
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       tcph->seq = ntohl(htonl(tcph->seq) + 1);
+       tcph->check = 0;
+       tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+  * don't coalesce.
+  */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+       int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 0, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 0, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 1);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 2);
+       write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+       static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+       int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+       int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt1, buf, 0, 0);
+       write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt1, buf, 0, 0);
+       write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+       tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+       recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+       write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+       struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+       int optlen = sizeof(struct ip_timestamp);
+       struct iphdr *iph;
+
+       if (optlen % 4)
+               error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+       ts->ipt_code = IPOPT_TS;
+       ts->ipt_len = optlen;
+       ts->ipt_ptr = 5;
+       ts->ipt_flg = IPOPT_TS_TSONLY;
+
+       memcpy(optpkt, buf, tcp_offset);
+       memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+              sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+       iph = (struct iphdr *)(optpkt + ETH_HLEN);
+       iph->ihl = 5 + (optlen / 4);
+       iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+       int optlen = sizeof(struct ip_timestamp);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+       add_ipv4_ts_option(buf, optpkt);
+       write_packet(fd, optpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/*  IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[IP_MAXPACKET];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       /* Once fragmented, packet would retain the total_len.
+        * Tcp header is prepared as if rest of data is in follow-up frags,
+        * but follow up frags aren't actually sent.
+        */
+       memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+       fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+       fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+       fill_datalinklayer(buf);
+
+       iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       iph->ttl = 7;
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       if (proto == PF_INET) {
+               iph->tos = 1;
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else if (proto == PF_INET6) {
+               ip6h->priority = 0xf;
+       }
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       if (proto == PF_INET) {
+               buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else {
+               buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+       }
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+                          sizeof(struct ip6_frag)];
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+       struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+       int extlen = sizeof(struct ip6_frag);
+       int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+       int extpkt_len = bufpkt_len + extlen;
+       int i;
+
+       for (i = 0; i < 2; i++) {
+               create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+               write_packet(fd, buf, bufpkt_len, daddr);
+       }
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       memset(extpkt, 0, extpkt_len);
+
+       ip6h->nexthdr = IPPROTO_FRAGMENT;
+       ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+       frag->ip6f_nxt = IPPROTO_TCP;
+
+       memcpy(extpkt, buf, tcp_offset);
+       memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+              sizeof(struct tcphdr) + PAYLOAD_LEN);
+       write_packet(fd, extpkt, extpkt_len, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+       struct sockaddr_ll daddr = {};
+
+       daddr.sll_family = AF_PACKET;
+       daddr.sll_protocol = ethhdr_proto;
+       daddr.sll_ifindex = if_nametoindex(ifname);
+       if (daddr.sll_ifindex == 0)
+               error(1, errno, "if_nametoindex");
+
+       if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+               error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+       struct timeval timeout;
+
+       timeout.tv_sec = 120;
+       timeout.tv_usec = 0;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+                      sizeof(timeout)) < 0)
+               error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+                           int correct_num_pkts)
+{
+       static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+       struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+       struct tcphdr *tcph;
+       bool bad_packet = false;
+       int tcp_ext_len = 0;
+       int ip_ext_len = 0;
+       int pkt_size = -1;
+       int data_len = 0;
+       int num_pkt = 0;
+       int i;
+
+       vlog("Expected {");
+       for (i = 0; i < correct_num_pkts; i++)
+               vlog("%d ", correct_payload[i]);
+       vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+       while (1) {
+               pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+               if (pkt_size < 0)
+                       error(1, errno, "could not receive");
+
+               if (iph->version == 4)
+                       ip_ext_len = (iph->ihl - 5) * 4;
+               else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+                       ip_ext_len = sizeof(struct ip6_frag);
+
+               tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+               if (tcph->fin)
+                       break;
+
+               tcp_ext_len = (tcph->doff - 5) * 4;
+               data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+               /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+                * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+                * Packet sockets are protocol agnostic, and will not trim the padding.
+                */
+               if (pkt_size == ETH_ZLEN && iph->version == 4) {
+                       data_len = ntohs(iph->tot_len)
+                               - sizeof(struct tcphdr) - sizeof(struct iphdr);
+               }
+               vlog("%d ", data_len);
+               if (data_len != correct_payload[num_pkt]) {
+                       vlog("[!=%d]", correct_payload[num_pkt]);
+                       bad_packet = true;
+               }
+               num_pkt++;
+       }
+       vlog("}, Total %d packets.\n", num_pkt);
+       if (num_pkt != correct_num_pkts)
+               error(1, 0, "incorrect number of packets");
+       if (bad_packet)
+               error(1, 0, "incorrect packet geometry");
+
+       printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+       static char fin_pkt[MAX_HDR_LEN];
+       struct sockaddr_ll daddr = {};
+       int txfd = -1;
+
+       txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+       if (txfd < 0)
+               error(1, errno, "socket creation");
+
+       memset(&daddr, 0, sizeof(daddr));
+       daddr.sll_ifindex = if_nametoindex(ifname);
+       if (daddr.sll_ifindex == 0)
+               error(1, errno, "if_nametoindex");
+       daddr.sll_family = AF_PACKET;
+       memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+       daddr.sll_halen = ETH_ALEN;
+       create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+       if (strcmp(testname, "data") == 0) {
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "ack") == 0) {
+               send_ack(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "flags") == 0) {
+               send_flags(txfd, &daddr, 1, 0, 0, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 1, 0, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 0, 1, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 0, 0, 1);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "tcp") == 0) {
+               send_changed_checksum(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_seq(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_ts(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_diff_opt(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "ip") == 0) {
+               send_changed_ECN(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_tos(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               if (proto == PF_INET) {
+                       /* Modified packets may be received out of order.
+                        * Sleep function added to enforce test boundaries
+                        * so that fin pkts are not received prior to other pkts.
+                        */
+                       sleep(1);
+                       send_changed_ttl(txfd, &daddr);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+                       sleep(1);
+                       send_ip_options(txfd, &daddr);
+                       sleep(1);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+                       sleep(1);
+                       send_fragment4(txfd, &daddr);
+                       sleep(1);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               } else if (proto == PF_INET6) {
+                       send_fragment6(txfd, &daddr);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               }
+       } else if (strcmp(testname, "large") == 0) {
+               /* 20 is the difference between min iphdr size
+                * and min ipv6hdr size. Like MAX_HDR_SIZE,
+                * MAX_PAYLOAD is defined with the larger header of the two.
+                */
+               int offset = proto == PF_INET ? 20 : 0;
+               int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+               send_large(txfd, &daddr, remainder);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_large(txfd, &daddr, remainder + 1);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else {
+               error(1, 0, "Unknown testcase");
+       }
+
+       if (close(txfd))
+               error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+       static int correct_payload[NUM_PACKETS];
+       int rxfd = -1;
+
+       rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+       if (rxfd < 0)
+               error(1, 0, "socket creation");
+       setup_sock_filter(rxfd);
+       set_timeout(rxfd);
+       bind_packetsocket(rxfd);
+
+       memset(correct_payload, 0, sizeof(correct_payload));
+
+       if (strcmp(testname, "data") == 0) {
+               printf("pure data packet of same size: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 1);
+
+               printf("large data packets followed by a smaller one: ");
+               correct_payload[0] = PAYLOAD_LEN * 1.5;
+               check_recv_pkts(rxfd, correct_payload, 1);
+
+               printf("small data packets followed by a larger one: ");
+               correct_payload[0] = PAYLOAD_LEN / 2;
+               correct_payload[1] = PAYLOAD_LEN;
+               check_recv_pkts(rxfd, correct_payload, 2);
+       } else if (strcmp(testname, "ack") == 0) {
+               printf("duplicate ack and pure ack: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else if (strcmp(testname, "flags") == 0) {
+               correct_payload[0] = PAYLOAD_LEN * 3;
+               correct_payload[1] = PAYLOAD_LEN * 2;
+
+               printf("psh flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               correct_payload[1] = 0;
+               correct_payload[2] = PAYLOAD_LEN * 2;
+               printf("syn flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+
+               printf("rst flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+
+               printf("urg flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else if (strcmp(testname, "tcp") == 0) {
+               correct_payload[0] = PAYLOAD_LEN;
+               correct_payload[1] = PAYLOAD_LEN;
+               correct_payload[2] = PAYLOAD_LEN;
+               correct_payload[3] = PAYLOAD_LEN;
+
+               printf("changed checksum does not coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("Wrong Seq number doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("Different timestamp doesn't coalesce: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 4);
+
+               printf("Different options doesn't coalesce: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 2);
+       } else if (strcmp(testname, "ip") == 0) {
+               correct_payload[0] = PAYLOAD_LEN;
+               correct_payload[1] = PAYLOAD_LEN;
+
+               printf("different ECN doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("different tos doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               if (proto == PF_INET) {
+                       printf("different ttl doesn't coalesce: ");
+                       check_recv_pkts(rxfd, correct_payload, 2);
+
+                       printf("ip options doesn't coalesce: ");
+                       correct_payload[2] = PAYLOAD_LEN;
+                       check_recv_pkts(rxfd, correct_payload, 3);
+
+                       printf("fragmented ip4 doesn't coalesce: ");
+                       check_recv_pkts(rxfd, correct_payload, 2);
+               } else if (proto == PF_INET6) {
+                       /* GRO doesn't check for ipv6 hop limit when flushing.
+                        * Hence no corresponding test to the ipv4 case.
+                        */
+                       printf("fragmented ip6 doesn't coalesce: ");
+                       correct_payload[0] = PAYLOAD_LEN * 2;
+                       check_recv_pkts(rxfd, correct_payload, 2);
+               }
+       } else if (strcmp(testname, "large") == 0) {
+               int offset = proto == PF_INET ? 20 : 0;
+               int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+               correct_payload[0] = (MAX_PAYLOAD + offset);
+               correct_payload[1] = remainder;
+               printf("Shouldn't coalesce if exceed IP max pkt size: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               /* last segment sent individually, doesn't start new segment */
+               correct_payload[0] = correct_payload[0] - remainder;
+               correct_payload[1] = remainder + 1;
+               correct_payload[2] = remainder + 1;
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else {
+               error(1, 0, "Test case error, should never trigger");
+       }
+
+       if (close(rxfd))
+               error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+       static const struct option opts[] = {
+               { "dmac", required_argument, NULL, 'D' },
+               { "iface", required_argument, NULL, 'i' },
+               { "ipv4", no_argument, NULL, '4' },
+               { "ipv6", no_argument, NULL, '6' },
+               { "rx", no_argument, NULL, 'r' },
+               { "smac", required_argument, NULL, 'S' },
+               { "test", required_argument, NULL, 't' },
+               { "verbose", no_argument, NULL, 'v' },
+               { 0, 0, 0, 0 }
+       };
+       int c;
+
+       while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+               switch (c) {
+               case '4':
+                       proto = PF_INET;
+                       ethhdr_proto = htons(ETH_P_IP);
+                       break;
+               case '6':
+                       proto = PF_INET6;
+                       ethhdr_proto = htons(ETH_P_IPV6);
+                       break;
+               case 'D':
+                       dmac = optarg;
+                       break;
+               case 'i':
+                       ifname = optarg;
+                       break;
+               case 'r':
+                       tx_socket = false;
+                       break;
+               case 'S':
+                       smac = optarg;
+                       break;
+               case 't':
+                       testname = optarg;
+                       break;
+               case 'v':
+                       verbose = true;
+                       break;
+               default:
+                       error(1, 0, "%s invalid option %c\n", __func__, c);
+                       break;
+               }
+       }
+}
+
+int main(int argc, char **argv)
+{
+       parse_args(argc, argv);
+
+       if (proto == PF_INET) {
+               tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+               total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+       } else if (proto == PF_INET6) {
+               tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+               total_hdr_len = MAX_HDR_LEN;
+       } else {
+               error(1, 0, "Protocol family is not ipv4 or ipv6");
+       }
+
+       read_MAC(src_mac, smac);
+       read_MAC(dst_mac, dmac);
+
+       if (tx_socket)
+               gro_sender();
+       else
+               gro_receiver();
+       return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755 (executable)
index 0000000..794d2bf
--- /dev/null
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source setup_loopback.sh
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev="eth0"
+test="all"
+proto="ipv4"
+
+setup_interrupt() {
+  # Use timer on  host to trigger the network stack
+  # Also disable device interrupt to not depend on NIC interrupt
+  # Reduce test flakiness caused by unexpected interrupts
+  echo 100000 >"${FLUSH_PATH}"
+  echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+  # Set up server_ns namespace and client_ns namespace
+  setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+  setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+  cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+  setup_loopback_environment "${dev}"
+  setup_interrupt
+}
+
+cleanup() {
+  cleanup_loopback "${dev}"
+
+  echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+  echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
+
+run_test() {
+  local server_pid=0
+  local exit_code=0
+  local protocol=$1
+  local test=$2
+  local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+  "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+  setup_ns
+  # Each test is run 3 times to deflake, because given the receive timing,
+  # not all packets that should coalesce will be considered in the same flow
+  # on every try.
+  for tries in {1..3}; do
+    # Actual test starts here
+    ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+      1>>log.txt &
+    server_pid=$!
+    sleep 0.5  # to allow for socket init
+    ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+      1>>log.txt
+    wait "${server_pid}"
+    exit_code=$?
+    if [[ "${exit_code}" -eq 0 ]]; then
+        break;
+    fi
+  done
+  cleanup_ns
+  echo ${exit_code}
+}
+
+run_all_tests() {
+  local failed_tests=()
+  for proto in "${PROTOS[@]}"; do
+    for test in "${TESTS[@]}"; do
+      echo "running test ${proto} ${test}" >&2
+      exit_code=$(run_test $proto $test)
+      if [[ "${exit_code}" -ne 0 ]]; then
+        failed_tests+=("${proto}_${test}")
+      fi;
+    done;
+  done
+  if [[ ${#failed_tests[@]} -ne 0 ]]; then
+    echo "failed tests: ${failed_tests[*]}. \
+    Please see log.txt for more logs"
+    exit 1
+  else
+    echo "All Tests Succeeded!"
+  fi;
+}
+
+usage() {
+  echo "Usage: $0 \
+  [-i <DEV>] \
+  [-t data|ack|flags|tcp|ip|large] \
+  [-p <ipv4|ipv6>]" 1>&2;
+  exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+  case "${opt}" in
+    i)
+      dev="${OPTARG}"
+      ;;
+    t)
+      test="${OPTARG}"
+      ;;
+    p)
+      proto="${OPTARG}"
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+  run_all_tests
+else
+  run_test "${proto}" "${test}"
+fi;
index f23438d..3d7dde2 100644 (file)
@@ -484,13 +484,16 @@ enum desc_type {
        MONITOR_ACQUIRE,
        EXPIRE_STATE,
        EXPIRE_POLICY,
+       SPDINFO_ATTRS,
 };
 const char *desc_name[] = {
        "create tunnel",
        "alloc spi",
        "monitor acquire",
        "expire state",
-       "expire policy"
+       "expire policy",
+       "spdinfo attributes",
+       ""
 };
 struct xfrm_desc {
        enum desc_type  type;
@@ -1593,6 +1596,155 @@ out_close:
        return ret;
 }
 
+static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
+               unsigned thresh4_l, unsigned thresh4_r,
+               unsigned thresh6_l, unsigned thresh6_r,
+               bool add_bad_attr)
+
+{
+       struct {
+               struct nlmsghdr         nh;
+               union {
+                       uint32_t        unused;
+                       int             error;
+               };
+               char                    attrbuf[MAX_PAYLOAD];
+       } req;
+       struct xfrmu_spdhthresh thresh;
+
+       memset(&req, 0, sizeof(req));
+       req.nh.nlmsg_len        = NLMSG_LENGTH(sizeof(req.unused));
+       req.nh.nlmsg_type       = XFRM_MSG_NEWSPDINFO;
+       req.nh.nlmsg_flags      = NLM_F_REQUEST | NLM_F_ACK;
+       req.nh.nlmsg_seq        = (*seq)++;
+
+       thresh.lbits = thresh4_l;
+       thresh.rbits = thresh4_r;
+       if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
+               return -1;
+
+       thresh.lbits = thresh6_l;
+       thresh.rbits = thresh6_r;
+       if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
+               return -1;
+
+       if (add_bad_attr) {
+               BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
+               if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
+                       pr_err("adding attribute failed: no space");
+                       return -1;
+               }
+       }
+
+       if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+               pr_err("send()");
+               return -1;
+       }
+
+       if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+               pr_err("recv()");
+               return -1;
+       } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+               printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+               return -1;
+       }
+
+       if (req.error) {
+               printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+               return -1;
+       }
+
+       return 0;
+}
+
+static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
+{
+       struct {
+               struct nlmsghdr                 nh;
+               union {
+                       uint32_t        unused;
+                       int             error;
+               };
+               char                    attrbuf[MAX_PAYLOAD];
+       } req;
+
+       if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
+               pr_err("Can't set SPD HTHRESH");
+               return KSFT_FAIL;
+       }
+
+       memset(&req, 0, sizeof(req));
+
+       req.nh.nlmsg_len        = NLMSG_LENGTH(sizeof(req.unused));
+       req.nh.nlmsg_type       = XFRM_MSG_GETSPDINFO;
+       req.nh.nlmsg_flags      = NLM_F_REQUEST;
+       req.nh.nlmsg_seq        = (*seq)++;
+       if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+               pr_err("send()");
+               return KSFT_FAIL;
+       }
+
+       if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+               pr_err("recv()");
+               return KSFT_FAIL;
+       } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
+               size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
+               struct rtattr *attr = (void *)req.attrbuf;
+               int got_thresh = 0;
+
+               for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+                       if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
+                               struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+                               got_thresh++;
+                               if (t->lbits != 32 || t->rbits != 31) {
+                                       pr_err("thresh differ: %u, %u",
+                                                       t->lbits, t->rbits);
+                                       return KSFT_FAIL;
+                               }
+                       }
+                       if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
+                               struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+                               got_thresh++;
+                               if (t->lbits != 120 || t->rbits != 16) {
+                                       pr_err("thresh differ: %u, %u",
+                                                       t->lbits, t->rbits);
+                                       return KSFT_FAIL;
+                               }
+                       }
+               }
+               if (got_thresh != 2) {
+                       pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
+                       return KSFT_FAIL;
+               }
+       } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+               printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+               return KSFT_FAIL;
+       } else {
+               printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+               return -1;
+       }
+
+       /* Restore the default */
+       if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
+               pr_err("Can't restore SPD HTHRESH");
+               return KSFT_FAIL;
+       }
+
+       /*
+        * At this moment xfrm uses nlmsg_parse_deprecated(), which
+        * implies NL_VALIDATE_LIBERAL - ignoring attributes with
+        * (type > maxtype). nla_parse_depricated_strict() would enforce
+        * it. Or even stricter nla_parse().
+        * Right now it's not expected to fail, but to be ignored.
+        */
+       if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
+               return KSFT_PASS;
+
+       return KSFT_PASS;
+}
+
 static int child_serv(int xfrm_sock, uint32_t *seq,
                unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
 {
@@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
                case EXPIRE_POLICY:
                        ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
                        break;
+               case SPDINFO_ATTRS:
+                       ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
+                       break;
                default:
                        printk("Unknown desc type %d", desc.type);
                        exit(KSFT_FAIL);
@@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto)
  *   sizeof(xfrm_user_polexpire)  = 168  |  sizeof(xfrm_user_polexpire)  = 176
  *
  * Check the affected by the UABI difference structures.
+ * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
+ * which needs to be correctly copied, but not translated.
  */
-const unsigned int compat_plan = 4;
+const unsigned int compat_plan = 5;
 static int write_compat_struct_tests(int test_desc_fd)
 {
        struct xfrm_desc desc = {};
@@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd)
        if (__write_desc(test_desc_fd, &desc))
                return -1;
 
+       desc.type = SPDINFO_ATTRS;
+       if (__write_desc(test_desc_fd, &desc))
+               return -1;
+
        return 0;
 }
 
index 170be65..1cbfeb5 100755 (executable)
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
 echo "raw gso min size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
 
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
 echo "raw gso max size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
 
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100755 (executable)
index 0000000..0a8ad97
--- /dev/null
@@ -0,0 +1,82 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+netdev_check_for_carrier() {
+       local -r dev="$1"
+
+       for i in {1..5}; do
+               carrier="$(cat /sys/class/net/${dev}/carrier)"
+               if [[ "${carrier}" -ne 1 ]] ; then
+                       echo "carrier not ready yet..." >&2
+                       sleep 1
+               else
+                       echo "carrier ready" >&2
+                       break
+               fi
+       done
+       echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+    local dev="$1"
+
+       # Fail hard if cannot turn on loopback mode for current NIC
+       ethtool -K "${dev}" loopback on || exit 1
+       sleep 1
+
+       # Check for the carrier
+       carrier=$(netdev_check_for_carrier ${dev})
+       if [[ "${carrier}" -ne 1 ]] ; then
+               echo "setup_loopback_environment failed"
+               exit 1
+       fi
+}
+
+setup_macvlan_ns(){
+       local -r link_dev="$1"
+       local -r ns_name="$2"
+       local -r ns_dev="$3"
+       local -r ns_mac="$4"
+       local -r addr="$5"
+
+       ip link add link "${link_dev}" dev "${ns_dev}" \
+               address "${ns_mac}" type macvlan
+       exit_code=$?
+       if [[ "${exit_code}" -ne 0 ]]; then
+               echo "setup_macvlan_ns failed"
+               exit $exit_code
+       fi
+
+       [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+       ip link set dev "${ns_dev}" netns "${ns_name}"
+       ip -netns "${ns_name}" link set dev "${ns_dev}" up
+       if [[ -n "${addr}" ]]; then
+               ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+       fi
+
+       sleep 1
+}
+
+cleanup_macvlan_ns(){
+       while (( $# >= 2 )); do
+               ns_name="$1"
+               ns_dev="$2"
+               ip -netns "${ns_name}" link del dev "${ns_dev}"
+               ip netns del "${ns_name}"
+               shift 2
+       done
+}
+
+cleanup_loopback(){
+       local -r dev="$1"
+
+       ethtool -K "${dev}" loopback off
+       sleep 1
+
+       # Check for the carrier
+       carrier=$(netdev_check_for_carrier ${dev})
+       if [[ "${carrier}" -ne 1 ]] ; then
+               echo "setup_loopback_environment failed"
+               exit 1
+       fi
+}
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644 (file)
index 0000000..710ac95
--- /dev/null
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN   40
+#define TOEPLITZ_KEY_MAX_LEN   60
+
+#define TOEPLITZ_STR_LEN(K)    (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN   TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN   TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN     ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL      /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport =    8000;
+static int cfg_family =                AF_INET6;
+static char *cfg_ifname =      "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type =          SOCK_STREAM;
+static int cfg_timeout_msec =  1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...)   do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+       int fd;
+       char *mmap;
+       int idx;
+       int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+                               const unsigned char *key)
+{
+       int i, bit, ret = 0;
+       uint32_t key32;
+
+       key32 = ntohl(*((uint32_t *)key));
+       key += 4;
+
+       for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+               for (bit = 7; bit >= 0; bit--) {
+                       if (four_tuple[i] & (1 << bit))
+                               ret ^= key32;
+
+                       key32 <<= 1;
+                       key32 |= !!(key[0] & (1 << bit));
+               }
+               key++;
+       }
+
+       return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+       int queue = rx_hash % cfg_num_queues;
+
+       log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+       if (rx_irq_cpus[queue] != cpu) {
+               log_verbose(". error: rss cpu mismatch (%d)", cpu);
+               frames_error++;
+       }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+       int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+       log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+       if (rps_silo_to_cpu[silo] != cpu) {
+               log_verbose(". error: rps cpu mismatch (%d)", cpu);
+               frames_error++;
+       }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+                      const char *addrs, int addr_len)
+{
+       char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+       uint16_t *ports;
+
+       if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+           !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+               error(1, 0, "address parse error");
+
+       ports = (void *)addrs + (addr_len * 2);
+       log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+                   cpu, rx_hash, saddr, daddr,
+                   ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+       unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+       uint32_t rx_hash_sw;
+       const char *addrs;
+       int addr_len;
+
+       if (cfg_family == AF_INET) {
+               addr_len = sizeof(struct in_addr);
+               addrs = pkt + offsetof(struct iphdr, saddr);
+       } else {
+               addr_len = sizeof(struct in6_addr);
+               addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+       }
+
+       memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+       rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+       if (cfg_verbose)
+               log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+       if (rx_hash != rx_hash_sw) {
+               log_verbose(" != expected 0x%x\n", rx_hash_sw);
+               frames_error++;
+               return;
+       }
+
+       log_verbose(" OK");
+       if (cfg_num_queues)
+               verify_rss(rx_hash, cpu);
+       else if (cfg_num_rps_cpus)
+               verify_rps(rx_hash, cpu);
+       log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+       struct tpacket3_hdr *hdr = (void *)frame;
+
+       if (hdr->hv1.tp_rxhash)
+               verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+                             ring->cpu);
+       else
+               frames_nohash++;
+
+       return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+       struct tpacket_block_desc *block;
+       char *frame;
+       int i;
+
+       block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+       if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+               return;
+
+       frame = (char *)block;
+       frame += block->hdr.bh1.offset_to_first_pkt;
+
+       for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+               frame = recv_frame(ring, frame);
+               frames_received++;
+       }
+
+       block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+       ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+       int i;
+
+       usleep(1000 * cfg_timeout_msec);
+
+       for (i = 0; i < num_cpus; i++)
+               recv_block(&rings[i]);
+
+       fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+               frames_received - frames_nohash - frames_error,
+               frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+       struct tpacket_req3 req3 = {0};
+       void *ring;
+
+       req3.tp_retire_blk_tov = cfg_timeout_msec;
+       req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+       req3.tp_frame_size = 2048;
+       req3.tp_frame_nr = 1 << 10;
+       req3.tp_block_nr = 2;
+
+       req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+       req3.tp_block_size /= req3.tp_block_nr;
+
+       if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+               error(1, errno, "setsockopt PACKET_RX_RING");
+
+       ring_block_sz = req3.tp_block_size;
+       ring_block_nr = req3.tp_block_nr;
+
+       ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+                   PROT_READ | PROT_WRITE,
+                   MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+       if (ring == MAP_FAILED)
+               error(1, 0, "mmap failed");
+
+       return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+               BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+               BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+               BPF_STMT(BPF_RET + BPF_K, 0),
+               BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+       };
+       struct sock_fprog prog = {};
+
+       prog.filter = filter;
+       prog.len = sizeof(filter) / sizeof(struct sock_filter);
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+               error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+       const int off_dport = offsetof(struct tcphdr, dest);    /* same for udp */
+       uint8_t proto;
+
+       proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+       if (cfg_family == AF_INET)
+               __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+                            sizeof(struct iphdr) + off_dport);
+       else
+               __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+                            sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+       struct sock_fprog prog = {};
+
+       prog.filter = filter;
+       prog.len = sizeof(filter) / sizeof(struct sock_filter);
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+               error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+       struct fanout_args args = {
+               .id = 1,
+               .type_flags = PACKET_FANOUT_CPU,
+               .max_num_members = RSS_MAX_CPUS
+       };
+       struct sockaddr_ll ll = { 0 };
+       int fd, val;
+
+       fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+       if (fd == -1)
+               error(1, errno, "socket creation failed");
+
+       val = TPACKET_V3;
+       if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+               error(1, errno, "setsockopt PACKET_VERSION");
+       *ring = setup_ring(fd);
+
+       /* block packets until all rings are added to the fanout group:
+        * else packets can arrive during setup and get misclassified
+        */
+       set_filter_null(fd);
+
+       ll.sll_family = AF_PACKET;
+       ll.sll_ifindex = if_nametoindex(cfg_ifname);
+       ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+                                                 htons(ETH_P_IPV6);
+       if (bind(fd, (void *)&ll, sizeof(ll)))
+               error(1, errno, "bind");
+
+       /* must come after bind: verifies all programs in group match */
+       if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+               /* on failure, retry using old API if that is sufficient:
+                * it has a hard limit of 256 sockets, so only try if
+                * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+                * in this API, the third argument is left implicit.
+                */
+               if (cfg_num_queues || num_cpus > 256 ||
+                   setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+                              &args, sizeof(uint32_t)))
+                       error(1, errno, "setsockopt PACKET_FANOUT cpu");
+       }
+
+       return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+       int fd, val;
+
+       fd = socket(cfg_family, cfg_type, 0);
+       if (fd == -1)
+               error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+       val = 1 << 20;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+               error(1, errno, "setsockopt rcvbuf");
+
+       return fd;
+}
+
+static void setup_rings(void)
+{
+       int i;
+
+       for (i = 0; i < num_cpus; i++) {
+               rings[i].cpu = i;
+               rings[i].fd = create_ring(&rings[i].mmap);
+       }
+
+       /* accept packets once all rings in the fanout group are up */
+       for (i = 0; i < num_cpus; i++)
+               set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+       int i;
+
+       for (i = 0; i < num_cpus; i++) {
+               if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+                       error(1, errno, "munmap");
+               if (close(rings[i].fd))
+                       error(1, errno, "close");
+       }
+}
+
+static void parse_cpulist(const char *arg)
+{
+       do {
+               rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+               arg = strchr(arg, ',');
+               if (!arg)
+                       break;
+               arg++;                  // skip ','
+       } while (1);
+}
+
+static void show_cpulist(void)
+{
+       int i;
+
+       for (i = 0; i < cfg_num_queues; i++)
+               fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+       int i;
+
+       for (i = 0; i < cfg_num_rps_cpus; i++)
+               fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+       int i, ret, off;
+
+       if (slen < TOEPLITZ_STR_MIN_LEN ||
+           slen > TOEPLITZ_STR_MAX_LEN + 1)
+               error(1, 0, "invalid toeplitz key");
+
+       for (i = 0, off = 0; off < slen; i++, off += 3) {
+               ret = sscanf(str + off, "%hhx", &key[i]);
+               if (ret != 1)
+                       error(1, 0, "key parse error at %d off %d len %d",
+                             i, off, slen);
+       }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+       unsigned long bitmap;
+       int i;
+
+       bitmap = strtoul(arg, NULL, 0);
+
+       if (bitmap & ~(RPS_MAX_CPUS - 1))
+               error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+                     bitmap, RPS_MAX_CPUS - 1);
+
+       for (i = 0; i < RPS_MAX_CPUS; i++)
+               if (bitmap & 1UL << i)
+                       rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       static struct option long_options[] = {
+           {"dport",   required_argument, 0, 'd'},
+           {"cpus",    required_argument, 0, 'C'},
+           {"key",     required_argument, 0, 'k'},
+           {"iface",   required_argument, 0, 'i'},
+           {"ipv4",    no_argument, 0, '4'},
+           {"ipv6",    no_argument, 0, '6'},
+           {"sink",    no_argument, 0, 's'},
+           {"tcp",     no_argument, 0, 't'},
+           {"timeout", required_argument, 0, 'T'},
+           {"udp",     no_argument, 0, 'u'},
+           {"verbose", no_argument, 0, 'v'},
+           {"rps",     required_argument, 0, 'r'},
+           {0, 0, 0, 0}
+       };
+       bool have_toeplitz = false;
+       int index, c;
+
+       while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+               switch (c) {
+               case '4':
+                       cfg_family = AF_INET;
+                       break;
+               case '6':
+                       cfg_family = AF_INET6;
+                       break;
+               case 'C':
+                       parse_cpulist(optarg);
+                       break;
+               case 'd':
+                       cfg_dport = strtol(optarg, NULL, 0);
+                       break;
+               case 'i':
+                       cfg_ifname = optarg;
+                       break;
+               case 'k':
+                       parse_toeplitz_key(optarg, strlen(optarg),
+                                          toeplitz_key);
+                       have_toeplitz = true;
+                       break;
+               case 'r':
+                       parse_rps_bitmap(optarg);
+                       break;
+               case 's':
+                       cfg_sink = true;
+                       break;
+               case 't':
+                       cfg_type = SOCK_STREAM;
+                       break;
+               case 'T':
+                       cfg_timeout_msec = strtol(optarg, NULL, 0);
+                       break;
+               case 'u':
+                       cfg_type = SOCK_DGRAM;
+                       break;
+               case 'v':
+                       cfg_verbose = true;
+                       break;
+
+               default:
+                       error(1, 0, "unknown option %c", optopt);
+                       break;
+               }
+       }
+
+       if (!have_toeplitz)
+               error(1, 0, "Must supply rss key ('-k')");
+
+       num_cpus = get_nprocs();
+       if (num_cpus > RSS_MAX_CPUS)
+               error(1, 0, "increase RSS_MAX_CPUS");
+
+       if (cfg_num_queues && cfg_num_rps_cpus)
+               error(1, 0,
+                     "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+       if (cfg_verbose) {
+               show_cpulist();
+               show_silos();
+       }
+}
+
+int main(int argc, char **argv)
+{
+       const int min_tests = 10;
+       int fd_sink = -1;
+
+       parse_opts(argc, argv);
+
+       if (cfg_sink)
+               fd_sink = setup_sink();
+
+       setup_rings();
+       process_rings();
+       cleanup_rings();
+
+       if (cfg_sink && close(fd_sink))
+               error(1, errno, "close sink");
+
+       if (frames_received - frames_nohash < min_tests)
+               error(1, 0, "too few frames for verification");
+
+       return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755 (executable)
index 0000000..0a49907
--- /dev/null
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+       echo $(ethtool -x "${DEV}" |
+               egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+               cut -d: -f2- |
+               awk '{$1=$1};1' |
+               tr ' ' '\n' |
+               sort -u |
+               wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+       CPUS=""
+       # sort so that irq 2 is read before irq 10
+       SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+       # Consider only as many queues as RSS actually uses. We assume that
+       # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+       RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+       RXQ_COUNT=0
+
+       for i in ${SORTED_IRQS}
+       do
+               [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+               # lookup relevant IRQs by action name
+               [[ -e "$i/actions" ]] || continue
+               cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+               irqname=$(<"$i/actions")
+
+               # does the IRQ get called
+               irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+               [[ -n "${irqcount}" ]] || continue
+
+               # lookup CPU
+               irq=$(basename "$i")
+               cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+               if [[ -z "${CPUS}" ]]; then
+                       CPUS="${cpu}"
+               else
+                       CPUS="${CPUS},${cpu}"
+               fi
+               RXQ_COUNT=$((RXQ_COUNT+1))
+       done
+
+       echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+       echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+       CMD=""
+       for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+       do
+               CMD="${CMD} echo $1 > ${i};"
+       done
+
+       echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+       echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+       echo "$1"
+       exit 1
+}
+
+check_nic_rxhash_enabled() {
+       local -r pattern="receive-hashing:\ on"
+
+       ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+       local prog=$0
+       shift 1
+
+       while [[ "$1" =~ "-" ]]; do
+               if [[ "$1" = "-irq_prefix" ]]; then
+                       shift
+                       IRQ_PATTERN="^$1-[0-9]*$"
+               elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+                       PROTO_FLAG="$1"
+               elif [[ "$1" = "-4" ]]; then
+                       IP_FLAG="$1"
+                       SERVER_IP="${SERVER_IP4}"
+                       CLIENT_IP="${CLIENT_IP4}"
+               elif [[ "$1" = "-6" ]]; then
+                       IP_FLAG="$1"
+                       SERVER_IP="${SERVER_IP6}"
+                       CLIENT_IP="${CLIENT_IP6}"
+               elif [[ "$1" = "-rss" ]]; then
+                       TEST_RSS=true
+               elif [[ "$1" = "-rps" ]]; then
+                       shift
+                       RPS_MAP="$1"
+               elif [[ "$1" = "-i" ]]; then
+                       shift
+                       DEV="$1"
+               else
+                       die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+                            [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+               fi
+               shift
+       done
+}
+
+setup() {
+       setup_loopback_environment "${DEV}"
+
+       # Set up server_ns namespace and client_ns namespace
+       setup_macvlan_ns "${DEV}" server_ns server \
+       "${SERVER_MAC}" "${SERVER_IP}"
+       setup_macvlan_ns "${DEV}" client_ns client \
+       "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+       cleanup_macvlan_ns server_ns server client_ns client
+       cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+       # RPS/RFS must be disabled because they move packets between cpus,
+       # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+       eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+         ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+         -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+       eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+         ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+         -r "0x${RPS_MAP}" -s -v &
+else
+       ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+  "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+       echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755 (executable)
index 0000000..2fef34f
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+       expiration=$((SECONDS+20))
+       while [[ "${SECONDS}" -lt "${expiration}" ]]
+       do
+               if [[ "${PROTO}" == "-u" ]]; then
+                       echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+               else
+                       echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+               fi
+               sleep 0.001
+       done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
new file mode 100644 (file)
index 0000000..88a20c7
--- /dev/null
@@ -0,0 +1,137 @@
+[
+       {
+           "id": "ce7d",
+           "name": "Add mq Qdisc to multi-queue device (4 queues)",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "0",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "4",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "2f82",
+           "name": "Add mq Qdisc to multi-queue device (256 queues)",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "0",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "256",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "c525",
+           "name": "Add duplicate mq Qdisc",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: mq"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "4",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "128a",
+           "name": "Delete nonexistent mq Qdisc",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "03a9",
+           "name": "Delete mq Qdisc twice",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: mq",
+            "$TC qdisc del dev $ETH root handle 1: mq"
+           ],
+           "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+    {
+           "id": "be0f",
+           "name": "Add mq Qdisc to single-queue device",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       }
+]
index cd4a27e..ea04f04 100644 (file)
@@ -17,6 +17,7 @@ NAMES = {
           'DEV1': 'v0p1',
           'DEV2': '',
           'DUMMY': 'dummy1',
+         'ETH': 'eth0',
           'BATCH_FILE': './batch.txt',
           'BATCH_DIR': 'tmp',
           # Length of time in seconds to wait before terminating a command
index e363bda..2ea438e 100644 (file)
@@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area)
 
 static void anon_allocate_area(void **alloc_area)
 {
-       if (posix_memalign(alloc_area, page_size, nr_pages * page_size))
-               err("posix_memalign() failed");
+       *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+                          MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+       if (*alloc_area == MAP_FAILED)
+               err("mmap of anonymous memory failed");
 }
 
 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
index 9869598..b50dbe2 100644 (file)
@@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
 
 static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 {
+       static DEFINE_MUTEX(kvm_debugfs_lock);
+       struct dentry *dent;
        char dir_name[ITOA_MAX_LEN * 2];
        struct kvm_stat_data *stat_data;
        const struct _kvm_stats_desc *pdesc;
@@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
                return 0;
 
        snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
-       kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+       mutex_lock(&kvm_debugfs_lock);
+       dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
+       if (dent) {
+               pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name);
+               dput(dent);
+               mutex_unlock(&kvm_debugfs_lock);
+               return 0;
+       }
+       dent = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+       mutex_unlock(&kvm_debugfs_lock);
+       if (IS_ERR(dent))
+               return 0;
 
+       kvm->debugfs_dentry = dent;
        kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
                                         sizeof(*kvm->debugfs_stat_data),
                                         GFP_KERNEL_ACCOUNT);
@@ -3110,6 +3124,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                                        ++vcpu->stat.generic.halt_poll_invalid;
                                goto out;
                        }
+                       cpu_relax();
                        poll_end = cur = ktime_get();
                } while (kvm_vcpu_can_poll(cur, stop));
        }
@@ -4390,6 +4405,16 @@ struct compat_kvm_dirty_log {
        };
 };
 
+struct compat_kvm_clear_dirty_log {
+       __u32 slot;
+       __u32 num_pages;
+       __u64 first_page;
+       union {
+               compat_uptr_t dirty_bitmap; /* one bit per page */
+               __u64 padding2;
+       };
+};
+
 static long kvm_vm_compat_ioctl(struct file *filp,
                           unsigned int ioctl, unsigned long arg)
 {
@@ -4399,6 +4424,24 @@ static long kvm_vm_compat_ioctl(struct file *filp,
        if (kvm->mm != current->mm)
                return -EIO;
        switch (ioctl) {
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+       case KVM_CLEAR_DIRTY_LOG: {
+               struct compat_kvm_clear_dirty_log compat_log;
+               struct kvm_clear_dirty_log log;
+
+               if (copy_from_user(&compat_log, (void __user *)arg,
+                                  sizeof(compat_log)))
+                       return -EFAULT;
+               log.slot         = compat_log.slot;
+               log.num_pages    = compat_log.num_pages;
+               log.first_page   = compat_log.first_page;
+               log.padding2     = compat_log.padding2;
+               log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
+
+               r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
+               break;
+       }
+#endif
        case KVM_GET_DIRTY_LOG: {
                struct compat_kvm_dirty_log compat_log;
                struct kvm_dirty_log log;
@@ -5172,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
        }
        add_uevent_var(env, "PID=%d", kvm->userspace_pid);
 
-       if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
+       if (kvm->debugfs_dentry) {
                char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
 
                if (p) {