Merge tag 'batadv-next-for-davem-20180302' of git://git.open-mesh.org/linux-merge
authorDavid S. Miller <davem@davemloft.net>
Sun, 4 Mar 2018 23:45:39 +0000 (18:45 -0500)
committerDavid S. Miller <davem@davemloft.net>
Sun, 4 Mar 2018 23:45:39 +0000 (18:45 -0500)
Simon Wunderlich says:

====================
This cleanup patchset includes the following patches:

 - bump version strings, by Simon Wunderlich

 - bump copyright years, by Sven Eckelmann

 - fix macro indendation for checkpatch, by Sven Eckelmann

 - fix comparison operator for bool returning functions,
   by Sven Eckelmann

 - assume 2-byte packet alignments for all packet types,
   by Matthias Schiffer
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
1188 files changed:
.gitignore
Documentation/ABI/testing/sysfs-devices-platform-dock [new file with mode: 0644]
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/ABI/testing/sysfs-platform-dptf [new file with mode: 0644]
Documentation/atomic_bitops.txt
Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/sff,sfp.txt
Documentation/devicetree/bindings/net/ti,dp83867.txt
Documentation/devicetree/bindings/power/mti,mips-cpc.txt [new file with mode: 0644]
Documentation/features/sched/membarrier-sync-core/arch-support.txt [new file with mode: 0644]
Documentation/gpu/tve200.rst
Documentation/i2c/busses/i2c-i801
Documentation/locking/mutex-design.txt
Documentation/networking/ip-sysctl.txt
Documentation/networking/segmentation-offloads.txt
MAINTAINERS
Makefile
arch/arc/include/asm/bug.h
arch/arm/boot/dts/armada-370-rd.dts
arch/arm/mach-ux500/cpu-db8500.c
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/hugetlb.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/stacktrace.h
arch/arm64/include/asm/uaccess.h
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/efi.c
arch/arm64/kernel/hibernate.c
arch/arm64/kernel/perf_event.c
arch/arm64/kernel/process.c
arch/arm64/kernel/stacktrace.c
arch/arm64/kernel/sys_compat.c
arch/arm64/kernel/time.c
arch/arm64/kernel/traps.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/mm/hugetlbpage.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmu.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc.S
arch/arm64/net/bpf_jit_comp.c
arch/cris/include/arch-v10/arch/bug.h
arch/ia64/include/asm/bug.h
arch/ia64/kernel/Makefile
arch/m68k/include/asm/bug.h
arch/m68k/mac/config.c
arch/mips/boot/Makefile
arch/mips/include/asm/compat.h
arch/mips/kernel/mips-cpc.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp-bmips.c
arch/powerpc/include/asm/book3s/32/pgtable.h
arch/powerpc/include/asm/book3s/64/hash-4k.h
arch/powerpc/include/asm/book3s/64/hash-64k.h
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgalloc.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/kexec.h
arch/powerpc/include/asm/nohash/32/pgtable.h
arch/powerpc/include/asm/nohash/64/pgtable.h
arch/powerpc/include/asm/topology.h
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/sysfs.c
arch/powerpc/mm/drmem.c
arch/powerpc/mm/hash64_4k.c
arch/powerpc/mm/hash64_64k.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/hugetlbpage-hash64.c
arch/powerpc/mm/init-common.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/pgtable-radix.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/tlb_hash64.c
arch/powerpc/platforms/powernv/opal-imc.c
arch/powerpc/platforms/powernv/vas-window.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/sysdev/xive/spapr.c
arch/riscv/Kconfig
arch/riscv/kernel/entry.S
arch/riscv/kernel/head.S
arch/riscv/kernel/setup.c
arch/sparc/Kconfig
arch/sparc/include/asm/bug.h
arch/x86/.gitignore
arch/x86/Kconfig
arch/x86/Kconfig.cpu
arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
arch/x86/entry/calling.h
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/events/intel/core.c
arch/x86/events/intel/lbr.c
arch/x86/events/intel/p6.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/page_64.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/tlbflush.h
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/centaur.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cyrix.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_rdt.c
arch/x86/kernel/cpu/mcheck/mce-internal.h
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/head_32.S
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/module.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/traps.c
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/lib/cpu.c
arch/x86/lib/error-inject.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/kmmio.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/tlb.c
arch/x86/net/bpf_jit_comp.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/tools/relocs.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/smp.c
block/blk-mq.c
certs/blacklist_nohashes.c
crypto/asymmetric_keys/pkcs7_trust.c
crypto/asymmetric_keys/pkcs7_verify.c
crypto/asymmetric_keys/public_key.c
crypto/asymmetric_keys/restrict.c
crypto/sha3_generic.c
drivers/acpi/bus.c
drivers/acpi/ec.c
drivers/acpi/property.c
drivers/acpi/spcr.c
drivers/android/binder.c
drivers/atm/idt77252.c
drivers/base/core.c
drivers/base/power/wakeirq.c
drivers/base/property.c
drivers/bluetooth/ath3k.c
drivers/bluetooth/btmrvl_main.c
drivers/bluetooth/btrtl.c
drivers/bluetooth/btusb.c
drivers/bluetooth/hci_ath.c
drivers/bluetooth/hci_ll.c
drivers/char/hw_random/via-rng.c
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/longhaul.c
drivers/cpufreq/p4-clockmod.c
drivers/cpufreq/powernow-k7.c
drivers/cpufreq/speedstep-centrino.c
drivers/cpufreq/speedstep-lib.c
drivers/crypto/caam/ctrl.c
drivers/crypto/padlock-aes.c
drivers/crypto/s5p-sss.c
drivers/crypto/sunxi-ss/sun4i-ss-prng.c
drivers/crypto/talitos.c
drivers/edac/amd64_edac.c
drivers/extcon/extcon-axp288.c
drivers/extcon/extcon-intel-int3496.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
drivers/gpu/drm/cirrus/cirrus_mode.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_mm.c
drivers/gpu/drm/drm_probe_helper.c
drivers/gpu/drm/exynos/exynos_drm_g2d.c
drivers/gpu/drm/exynos/exynos_drm_rotator.h [deleted file]
drivers/gpu/drm/exynos/exynos_hdmi.c
drivers/gpu/drm/exynos/regs-fimc.h
drivers/gpu/drm/exynos/regs-hdmi.h
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/gvt/trace.h
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_oa_cflgt3.c
drivers/gpu/drm/i915/i915_oa_cnl.c
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_pmu.h
drivers/gpu/drm/i915/intel_bios.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_cdclk.c
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/meson/meson_crtc.c
drivers/gpu/drm/meson/meson_drv.h
drivers/gpu/drm/meson/meson_plane.c
drivers/gpu/drm/nouveau/nouveau_connector.c
drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/ipu-v3/ipu-common.c
drivers/gpu/ipu-v3/ipu-cpmem.c
drivers/gpu/ipu-v3/ipu-csi.c
drivers/gpu/ipu-v3/ipu-pre.c
drivers/gpu/ipu-v3/ipu-prg.c
drivers/hid/hid-ids.h
drivers/hid/hid-quirks.c
drivers/hwmon/coretemp.c
drivers/hwmon/hwmon-vid.c
drivers/hwmon/k10temp.c
drivers/hwmon/k8temp.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-bcm2835.c
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-sirf.c
drivers/iio/adc/aspeed_adc.c
drivers/iio/adc/stm32-adc.c
drivers/iio/imu/adis_trigger.c
drivers/iio/industrialio-buffer.c
drivers/iio/proximity/Kconfig
drivers/infiniband/core/cma.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/rdma_core.c
drivers/infiniband/core/restrack.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_ioctl.c
drivers/infiniband/core/uverbs_ioctl_merge.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_std_types.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/bnxt_re/bnxt_re.h
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/ib_verbs.h
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_fp.h
drivers/infiniband/hw/bnxt_re/qplib_sp.c
drivers/infiniband/hw/mlx5/Makefile
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/ib_rep.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/ib_rep.h [new file with mode: 0644]
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/usnic/usnic_transport.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_fs.c
drivers/irqchip/irq-bcm7038-l1.c
drivers/irqchip/irq-bcm7120-l2.c
drivers/irqchip/irq-brcmstb-l2.c
drivers/irqchip/irq-gic-v2m.c
drivers/irqchip/irq-gic-v3-its-pci-msi.c
drivers/irqchip/irq-gic-v3-its-platform-msi.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-mips-gic.c
drivers/isdn/mISDN/socket.c
drivers/macintosh/macio_asic.c
drivers/md/dm.c
drivers/message/fusion/mptctl.c
drivers/misc/mei/bus.c
drivers/misc/mei/client.c
drivers/misc/mei/hw-me-regs.h
drivers/misc/mei/pci-me.c
drivers/misc/ocxl/file.c
drivers/mmc/host/bcm2835.c
drivers/mmc/host/meson-gx-mmc.c
drivers/mtd/nand/Kconfig
drivers/mtd/nand/vf610_nfc.c
drivers/net/Kconfig
drivers/net/Space.c
drivers/net/bonding/bond_main.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/dsa_loop.c
drivers/net/dsa/lan9303-core.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mv88e6xxx/Kconfig
drivers/net/dsa/mv88e6xxx/Makefile
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/mv88e6xxx/global2_avb.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/global2_scratch.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/hwtstamp.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/hwtstamp.h [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/ptp.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/ptp.h [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/serdes.c
drivers/net/dsa/mv88e6xxx/serdes.h
drivers/net/dsa/qca8k.c
drivers/net/ethernet/8390/Makefile
drivers/net/ethernet/8390/ax88796.c
drivers/net/ethernet/8390/axnet_cs.c
drivers/net/ethernet/8390/etherh.c
drivers/net/ethernet/8390/hydra.c
drivers/net/ethernet/8390/lib8390.c
drivers/net/ethernet/8390/mac8390.c
drivers/net/ethernet/8390/mcf8390.c
drivers/net/ethernet/8390/ne.c
drivers/net/ethernet/8390/pcnet_cs.c
drivers/net/ethernet/8390/wd.c
drivers/net/ethernet/8390/zorro8390.c
drivers/net/ethernet/amd/amd8111e.c
drivers/net/ethernet/amd/xgbe/xgbe-pci.c
drivers/net/ethernet/apple/macmace.c
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile [new file with mode: 0644]
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/cavium/common/cavium_ptp.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.h
drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/cirrus/mac89x0.c
drivers/net/ethernet/cisco/enic/enic.h
drivers/net/ethernet/cisco/enic/enic_ethtool.c
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/cisco/enic/vnic_dev.c
drivers/net/ethernet/cisco/enic/vnic_dev.h
drivers/net/ethernet/cisco/enic/vnic_devcmd.h
drivers/net/ethernet/cisco/enic/vnic_nic.h
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
drivers/net/ethernet/freescale/fman/fman_dtsec.c
drivers/net/ethernet/freescale/fman/fman_dtsec.h
drivers/net/ethernet/freescale/fman/fman_memac.c
drivers/net/ethernet/freescale/fman/fman_memac.h
drivers/net/ethernet/freescale/fman/fman_tgec.c
drivers/net/ethernet/freescale/fman/fman_tgec.h
drivers/net/ethernet/freescale/fman/mac.c
drivers/net/ethernet/freescale/fman/mac.h
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/fm10k/fm10k_common.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/fm10k/fm10k_pf.c
drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_type.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.h
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbevf/ethtool.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
drivers/net/ethernet/mellanox/mlx5/core/alloc.c
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/wq.c
drivers/net/ethernet/mellanox/mlx5/core/wq.h
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/Makefile
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/natsemi/jazzsonic.c
drivers/net/ethernet/natsemi/macsonic.c
drivers/net/ethernet/natsemi/sonic.c
drivers/net/ethernet/natsemi/sonic.h
drivers/net/ethernet/natsemi/xtsonic.c
drivers/net/ethernet/netronome/nfp/bpf/Makefile [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/flower/Makefile [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/flower/cmsg.h
drivers/net/ethernet/netronome/nfp/flower/main.h
drivers/net/ethernet/netronome/nfp/flower/match.c
drivers/net/ethernet/netronome/nfp/flower/offload.c
drivers/net/ethernet/netronome/nfp/nfp_main.c
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
drivers/net/ethernet/netronome/nfp/nfpcore/Makefile [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/nic/Makefile [new file with mode: 0644]
drivers/net/ethernet/qlogic/qed/qed_iwarp.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/renesas/ravb.h
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/renesas/sh_eth.h
drivers/net/ethernet/sfc/falcon/enum.h
drivers/net/ethernet/smsc/Kconfig
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/geneve.c
drivers/net/gtp.c
drivers/net/ieee802154/Kconfig
drivers/net/ieee802154/Makefile
drivers/net/ieee802154/mcr20a.c [new file with mode: 0644]
drivers/net/ieee802154/mcr20a.h [new file with mode: 0644]
drivers/net/ipvlan/ipvlan.h
drivers/net/ipvlan/ipvlan_core.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/loopback.c
drivers/net/macvlan.c
drivers/net/phy/aquantia.c
drivers/net/phy/bcm7xxx.c
drivers/net/phy/cortina.c
drivers/net/phy/dp83867.c
drivers/net/phy/marvell.c
drivers/net/phy/marvell10g.c
drivers/net/phy/phy-c45.c
drivers/net/phy/phy-core.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/sfp-bus.c
drivers/net/phy/sfp.c
drivers/net/phy/teranetics.c
drivers/net/ppp/ppp_generic.c
drivers/net/ppp/pppoe.c
drivers/net/ppp/pptp.c
drivers/net/team/team.c
drivers/net/thunderbolt.c
drivers/net/tun.c
drivers/net/usb/kalmia.c
drivers/net/usb/smsc75xx.c
drivers/net/virtio_net.c
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wireless/ath/wil6210/cfg80211.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/mac80211_hwsim.h
drivers/net/xen-netback/rx.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/io-cmd.c
drivers/of/property.c
drivers/opp/cpu.c
drivers/pci/quirks.c
drivers/perf/arm_pmu.c
drivers/perf/arm_pmu_acpi.c
drivers/perf/arm_pmu_platform.c
drivers/platform/x86/dell-laptop.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/wmi.c
drivers/s390/virtio/virtio_ccw.c
drivers/scsi/Makefile
drivers/scsi/aacraid/linit.c
drivers/scsi/aic7xxx/aiclib.c [deleted file]
drivers/scsi/bnx2fc/bnx2fc_io.c
drivers/scsi/csiostor/csio_lnode.c
drivers/scsi/device_handler/scsi_dh_alua.c
drivers/scsi/ibmvscsi/ibmvfc.h
drivers/scsi/iscsi_tcp.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/qedi/qedi_main.c
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla4xxx/ql4_def.h
drivers/scsi/qla4xxx/ql4_os.c
drivers/scsi/storvsc_drv.c
drivers/scsi/sym53c8xx_2/sym_hipd.c
drivers/scsi/ufs/ufshcd.c
drivers/soc/qcom/qmi_interface.c
drivers/staging/android/ashmem.c
drivers/staging/android/ion/ion_cma_heap.c
drivers/staging/fsl-mc/bus/Kconfig
drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
drivers/staging/iio/adc/ad7192.c
drivers/staging/iio/impedance-analyzer/ad5933.c
drivers/staging/ipx/af_ipx.c
drivers/staging/irda/net/af_irda.c
drivers/staging/lustre/lnet/lnet/lib-socket.c
drivers/target/iscsi/iscsi_target_login.c
drivers/usb/Kconfig
drivers/usb/class/cdc-acm.c
drivers/usb/core/quirks.c
drivers/usb/dwc2/gadget.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/core.h
drivers/usb/dwc3/dwc3-of-simple.c
drivers/usb/dwc3/dwc3-omap.c
drivers/usb/dwc3/ep0.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/function/f_uac2.c
drivers/usb/gadget/udc/Kconfig
drivers/usb/gadget/udc/bdc/bdc_pci.c
drivers/usb/gadget/udc/core.c
drivers/usb/gadget/udc/fsl_udc_core.c
drivers/usb/gadget/udc/renesas_usb3.c
drivers/usb/host/Kconfig
drivers/usb/host/ehci-hub.c
drivers/usb/host/ehci-q.c
drivers/usb/host/ohci-hcd.c
drivers/usb/host/ohci-hub.c
drivers/usb/host/ohci-q.c
drivers/usb/host/pci-quirks.c
drivers/usb/host/pci-quirks.h
drivers/usb/host/xhci-debugfs.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/misc/ldusb.c
drivers/usb/musb/musb_core.c
drivers/usb/musb/musb_host.c
drivers/usb/phy/phy-mxs-usb.c
drivers/usb/renesas_usbhs/fifo.c
drivers/usb/serial/option.c
drivers/usb/usbip/stub_dev.c
drivers/usb/usbip/vhci_hcd.c
drivers/vhost/net.c
drivers/video/fbdev/geode/video_gx.c
drivers/xen/pvcalls-front.c
drivers/xen/tmem.c
drivers/xen/xenbus/xenbus.h
drivers/xen/xenbus/xenbus_comms.c
drivers/xen/xenbus/xenbus_xs.c
fs/btrfs/backref.c
fs/btrfs/delayed-ref.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/qgroup.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/dlm/lowcomms.c
fs/efivarfs/file.c
fs/gfs2/bmap.c
fs/lockd/svc.c
fs/nfs/inode.c
fs/nfs_common/grace.c
fs/nsfs.c
fs/ocfs2/cluster/tcp.c
fs/proc/kcore.c
fs/proc/proc_net.c
fs/signalfd.c
include/asm-generic/bitops/lock.h
include/asm-generic/bug.h
include/drm/drm_atomic.h
include/drm/drm_crtc_helper.h
include/dt-bindings/net/ti-dp83867.h
include/linux/acpi.h
include/linux/atalk.h
include/linux/avf/virtchnl.h
include/linux/blkdev.h
include/linux/bpf-cgroup.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/cpuidle.h
include/linux/cpumask.h
include/linux/dma-mapping.h
include/linux/filter.h
include/linux/fwnode.h
include/linux/ieee80211.h
include/linux/kconfig.h
include/linux/kcore.h
include/linux/memcontrol.h
include/linux/mlx5/cq.h
include/linux/mlx5/driver.h
include/linux/mlx5/eswitch.h [new file with mode: 0644]
include/linux/mm_inline.h
include/linux/mroute.h
include/linux/mroute6.h
include/linux/mroute_base.h [new file with mode: 0644]
include/linux/net.h
include/linux/netdevice.h
include/linux/nospec.h
include/linux/perf/arm_pmu.h
include/linux/phy.h
include/linux/property.h
include/linux/ptp_classify.h
include/linux/ptr_ring.h
include/linux/rtnetlink.h
include/linux/sched/mm.h
include/linux/sched/user.h
include/linux/semaphore.h
include/linux/sfp.h
include/linux/skbuff.h
include/linux/socket.h
include/linux/swap.h
include/linux/workqueue.h
include/net/Space.h
include/net/act_api.h
include/net/ax25.h
include/net/cfg80211.h
include/net/devlink.h
include/net/dsa.h
include/net/ethoc.h
include/net/fib_rules.h
include/net/flow.h
include/net/gre.h
include/net/ieee80211_radiotap.h
include/net/inet_common.h
include/net/inet_connection_sock.h
include/net/ip.h
include/net/ip6_fib.h
include/net/ip6_route.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/ipv6.h
include/net/lwtunnel.h
include/net/mac80211.h
include/net/net_namespace.h
include/net/netevent.h
include/net/netns/ipv4.h
include/net/netns/ipv6.h
include/net/pkt_cls.h
include/net/regulatory.h
include/net/route.h
include/net/sch_generic.h
include/net/sock.h
include/net/tcp.h
include/net/tcp_states.h
include/net/udplite.h
include/net/xfrm.h
include/rdma/restrack.h
include/rdma/uverbs_ioctl.h
include/sound/ac97/regs.h
include/trace/events/xen.h
include/uapi/linux/bpf.h
include/uapi/linux/fib_rules.h
include/uapi/linux/if_ether.h
include/uapi/linux/if_link.h
include/uapi/linux/libc-compat.h
include/uapi/linux/nl80211.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/ptrace.h
include/uapi/linux/rds.h
include/uapi/linux/tc_ematch/tc_em_ipt.h [new file with mode: 0644]
include/uapi/rdma/rdma_user_ioctl.h
kernel/audit.c
kernel/bpf/arraymap.c
kernel/bpf/core.c
kernel/bpf/cpumap.c
kernel/bpf/lpm_trie.c
kernel/bpf/sockmap.c
kernel/bpf/verifier.c
kernel/fork.c
kernel/irq/irqdomain.c
kernel/kprobes.c
kernel/locking/qspinlock.c
kernel/relay.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/deadline.c
kernel/sched/rt.c
kernel/seccomp.c
kernel/trace/bpf_trace.c
kernel/user.c
kernel/workqueue.c
lib/Kconfig.debug
lib/dma-direct.c
lib/idr.c
lib/kobject_uevent.c
lib/radix-tree.c
lib/vsprintf.c
mm/memory-failure.c
mm/memory.c
mm/mlock.c
mm/page_alloc.c
mm/swap.c
mm/vmalloc.c
mm/vmscan.c
mm/zpool.c
mm/zswap.c
net/8021q/vlan.c
net/9p/trans_virtio.c
net/appletalk/ddp.c
net/atm/pvc.c
net/atm/svc.c
net/ax25/af_ax25.c
net/bluetooth/hci_request.c
net/bluetooth/hci_sock.c
net/bluetooth/l2cap_sock.c
net/bluetooth/rfcomm/sock.c
net/bluetooth/sco.c
net/bridge/br.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_sysfs_if.c
net/bridge/netfilter/ebt_among.c
net/bridge/netfilter/ebt_limit.c
net/can/bcm.c
net/can/raw.c
net/core/dev.c
net/core/devlink.c
net/core/fib_notifier.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow_dissector.c
net/core/gen_estimator.c
net/core/net-procfs.c
net/core/net_namespace.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/core/sock_diag.c
net/core/sysctl_net_core.c
net/decnet/af_decnet.c
net/dsa/dsa.c
net/dsa/master.c
net/dsa/slave.c
net/ieee802154/6lowpan/core.c
net/ieee802154/core.c
net/ipv4/Kconfig
net/ipv4/Makefile
net/ipv4/af_inet.c
net/ipv4/arp.c
net/ipv4/devinet.c
net/ipv4/fib_frontend.c
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/inetpeer.c
net/ipv4/ip_fragment.c
net/ipv4/ip_gre.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel.c
net/ipv4/ip_vti.c
net/ipv4/ipip.c
net/ipv4/ipmr.c
net/ipv4/ipmr_base.c [new file with mode: 0644]
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/ipt_ECN.c
net/ipv4/netfilter/ipt_REJECT.c
net/ipv4/netfilter/ipt_rpfilter.c
net/ipv4/netfilter/iptable_filter.c
net/ipv4/netfilter/nf_defrag_ipv4.c
net/ipv4/ping.c
net/ipv4/proc.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_metrics.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tunnel4.c
net/ipv4/udp.c
net/ipv4/udplite.c
net/ipv4/xfrm4_policy.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/addrlabel.c
net/ipv6/af_inet6.c
net/ipv6/anycast.c
net/ipv6/exthdrs_core.c
net/ipv6/fib6_rules.c
net/ipv6/icmp.c
net/ipv6/ila/ila_xlat.c
net/ipv6/ip6_checksum.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_flowlabel.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/ndisc.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6t_REJECT.c
net/ipv6/netfilter/ip6t_rpfilter.c
net/ipv6/netfilter/ip6t_srh.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
net/ipv6/netfilter/nft_fib_ipv6.c
net/ipv6/ping.c
net/ipv6/proc.c
net/ipv6/raw.c
net/ipv6/reassembly.c
net/ipv6/route.c
net/ipv6/seg6.c
net/ipv6/seg6_local.c
net/ipv6/sit.c
net/ipv6/sysctl_net_ipv6.c
net/ipv6/tcp_ipv6.c
net/ipv6/udplite.c
net/ipv6/xfrm6_policy.c
net/ipv6/xfrm6_state.c
net/ipv6/xfrm6_tunnel.c
net/iucv/af_iucv.c
net/kcm/kcmproc.c
net/kcm/kcmsock.c
net/key/af_key.c
net/l2tp/l2tp_ip.c
net/l2tp/l2tp_ip6.c
net/l2tp/l2tp_ppp.c
net/llc/af_llc.c
net/mac80211/agg-rx.c
net/mac80211/cfg.c
net/mac80211/debugfs.c
net/mac80211/debugfs_sta.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/mesh.c
net/mac80211/michael.c
net/mac80211/mlme.c
net/mac80211/rc80211_minstrel_ht.c
net/mac80211/rx.c
net/mac80211/spectmgmt.c
net/mac80211/sta_info.c
net/mac80211/sta_info.h
net/mac80211/status.c
net/mac80211/tx.c
net/mac80211/vht.c
net/mac80211/wpa.c
net/netfilter/core.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/nf_log.c
net/netfilter/nf_nat_proto_common.c
net/netfilter/nf_synproxy_core.c
net/netfilter/x_tables.c
net/netfilter/xt_AUDIT.c
net/netfilter/xt_CHECKSUM.c
net/netfilter/xt_CONNSECMARK.c
net/netfilter/xt_CT.c
net/netfilter/xt_DSCP.c
net/netfilter/xt_HL.c
net/netfilter/xt_HMARK.c
net/netfilter/xt_IDLETIMER.c
net/netfilter/xt_LED.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_SECMARK.c
net/netfilter/xt_TCPMSS.c
net/netfilter/xt_TPROXY.c
net/netfilter/xt_addrtype.c
net/netfilter/xt_bpf.c
net/netfilter/xt_cgroup.c
net/netfilter/xt_cluster.c
net/netfilter/xt_connbytes.c
net/netfilter/xt_connlabel.c
net/netfilter/xt_connmark.c
net/netfilter/xt_conntrack.c
net/netfilter/xt_dscp.c
net/netfilter/xt_ecn.c
net/netfilter/xt_hashlimit.c
net/netfilter/xt_helper.c
net/netfilter/xt_ipcomp.c
net/netfilter/xt_ipvs.c
net/netfilter/xt_l2tp.c
net/netfilter/xt_limit.c
net/netfilter/xt_nat.c
net/netfilter/xt_nfacct.c
net/netfilter/xt_physdev.c
net/netfilter/xt_policy.c
net/netfilter/xt_recent.c
net/netfilter/xt_set.c
net/netfilter/xt_socket.c
net/netfilter/xt_state.c
net/netfilter/xt_time.c
net/netlink/af_netlink.c
net/netlink/genetlink.c
net/netrom/af_netrom.c
net/nfc/llcp_commands.c
net/nfc/llcp_sock.c
net/nfc/netlink.c
net/packet/af_packet.c
net/phonet/pn_dev.c
net/phonet/socket.c
net/qrtr/qrtr.c
net/rds/af_rds.c
net/rds/connection.c
net/rds/message.c
net/rds/rds.h
net/rds/recv.c
net/rds/send.c
net/rds/tcp.c
net/rose/af_rose.c
net/rxrpc/output.c
net/rxrpc/recvmsg.c
net/sched/Kconfig
net/sched/Makefile
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_u32.c
net/sched/em_ipt.c [new file with mode: 0644]
net/sched/sch_api.c
net/sched/sch_prio.c
net/sctp/Makefile
net/sctp/debug.c
net/sctp/diag.c [new file with mode: 0644]
net/sctp/input.c
net/sctp/ipv6.c
net/sctp/sctp_diag.c [deleted file]
net/sctp/socket.c
net/sctp/stream.c
net/sctp/stream_interleave.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_clc.c
net/smc/smc_clc.h
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_llc.c
net/smc/smc_llc.h
net/socket.c
net/sunrpc/clnt.c
net/sunrpc/svcsock.c
net/sunrpc/xprtsock.c
net/sysctl_net.c
net/tipc/Makefile
net/tipc/bearer.c
net/tipc/bearer.h
net/tipc/core.h
net/tipc/group.c
net/tipc/link.c
net/tipc/name_table.c
net/tipc/name_table.h
net/tipc/net.c
net/tipc/net.h
net/tipc/netlink_compat.c
net/tipc/node.c
net/tipc/node.h
net/tipc/server.c [deleted file]
net/tipc/server.h [deleted file]
net/tipc/socket.c
net/tipc/subscr.c
net/tipc/subscr.h
net/tipc/topsrv.c [new file with mode: 0644]
net/tipc/topsrv.h [new file with mode: 0644]
net/tls/tls_main.c
net/unix/af_unix.c
net/vmw_vsock/af_vsock.c
net/wireless/core.c
net/wireless/mesh.c
net/wireless/nl80211.c
net/wireless/rdev-ops.h
net/wireless/sme.c
net/wireless/trace.h
net/wireless/util.c
net/wireless/wext-core.c
net/x25/af_x25.c
net/x25/x25_subr.c
net/xfrm/xfrm_policy.c
samples/bpf/Makefile
samples/bpf/cpustat_kern.c [new file with mode: 0644]
samples/bpf/cpustat_user.c [new file with mode: 0644]
samples/bpf/tcbpf2_kern.c
samples/bpf/test_cgrp2_sock.sh
samples/bpf/test_cgrp2_sock2.sh
samples/bpf/test_tunnel_bpf.sh
samples/bpf/xdp_redirect_user.c
samples/sockmap/Makefile
samples/sockmap/sockmap_user.c
security/integrity/digsig.c
security/keys/big_key.c
security/selinux/hooks.c
security/smack/smack_netfilter.c
security/tomoyo/network.c
sound/ac97/Kconfig
sound/core/seq/seq_clientmgr.c
sound/pci/hda/patch_realtek.c
sound/usb/mixer.c
sound/usb/pcm.c
sound/usb/quirks.c
tools/arch/powerpc/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/unistd.h [deleted file]
tools/arch/x86/include/asm/cpufeatures.h
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/cfg.c [new file with mode: 0644]
tools/bpf/bpftool/cfg.h [new file with mode: 0644]
tools/bpf/bpftool/main.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/xlated_dumper.c [new file with mode: 0644]
tools/bpf/bpftool/xlated_dumper.h [new file with mode: 0644]
tools/cgroup/Makefile
tools/gpio/Makefile
tools/hv/Makefile
tools/iio/Makefile
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/if_link.h
tools/include/uapi/linux/kvm.h
tools/laptop/freefall/Makefile
tools/leds/Makefile
tools/lib/bpf/libbpf.c
tools/objtool/check.c
tools/objtool/check.h
tools/perf/Documentation/perf-data.txt
tools/perf/Makefile.perf
tools/perf/arch/s390/Makefile
tools/perf/arch/s390/entry/syscalls/mksyscalltbl
tools/perf/arch/s390/entry/syscalls/syscall.tbl [new file with mode: 0644]
tools/perf/builtin-c2c.c
tools/perf/builtin-report.c
tools/perf/builtin-top.c
tools/perf/check-headers.sh
tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/mapfile.csv
tools/perf/tests/backward-ring-buffer.c
tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
tools/perf/ui/browsers/hists.c
tools/perf/ui/browsers/hists.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/hist.h
tools/perf/util/mmap.c
tools/perf/util/mmap.h
tools/perf/util/util.c
tools/power/acpi/Makefile.config
tools/scripts/Makefile.include
tools/spi/Makefile
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_rlimit.h [new file with mode: 0644]
tools/testing/selftests/bpf/test_align.c
tools/testing/selftests/bpf/test_dev_cgroup.c
tools/testing/selftests/bpf/test_lpm_map.c
tools/testing/selftests/bpf/test_lru_map.c
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_tag.c
tools/testing/selftests/bpf/test_tcpbpf_kern.c
tools/testing/selftests/bpf/test_tcpbpf_user.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/test_verifier_log.c
tools/testing/selftests/memfd/Makefile
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/fib-onlink-tests.sh [new file with mode: 0755]
tools/testing/selftests/net/fib_tests.sh
tools/testing/selftests/net/forwarding/.gitignore [new file with mode: 0644]
tools/testing/selftests/net/forwarding/README [new file with mode: 0644]
tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/config [new file with mode: 0644]
tools/testing/selftests/net/forwarding/forwarding.config.sample [new file with mode: 0644]
tools/testing/selftests/net/forwarding/lib.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/router.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/router_multipath.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_actions.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_chains.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_common.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/tc_flower.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_shblocks.sh [new file with mode: 0755]
tools/testing/selftests/net/in_netns.sh [new file with mode: 0755]
tools/testing/selftests/net/msg_zerocopy.c
tools/testing/selftests/net/psock_fanout.c
tools/testing/selftests/net/rtnetlink.sh
tools/testing/selftests/net/run_afpackettests
tools/testing/selftests/powerpc/alignment/alignment_handler.c
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/tc-testing/README
tools/testing/selftests/tc-testing/TODO.txt
tools/testing/selftests/tc-testing/TdcPlugin.py [new file with mode: 0644]
tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt [new file with mode: 0644]
tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS [new file with mode: 0644]
tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py [new file with mode: 0644]
tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py [new file with mode: 0644]
tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py [new file with mode: 0644]
tools/testing/selftests/tc-testing/plugins/__init__.py [new file with mode: 0644]
tools/testing/selftests/tc-testing/tdc.py
tools/testing/selftests/tc-testing/tdc_batch.py
tools/testing/selftests/tc-testing/tdc_helper.py
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/mpx-mini-test.c
tools/testing/selftests/x86/protection_keys.c
tools/testing/selftests/x86/single_step_syscall.c
tools/testing/selftests/x86/test_mremap_vdso.c
tools/testing/selftests/x86/test_vdso.c
tools/testing/selftests/x86/test_vsyscall.c
tools/usb/Makefile
tools/vm/Makefile
tools/wmi/Makefile

index 705e099..1be78fd 100644 (file)
@@ -127,3 +127,7 @@ all.config
 
 # Kdevelop4
 *.kdev4
+
+#Automatically generated by ASN.1 compiler
+net/ipv4/netfilter/nf_nat_snmp_basic-asn1.c
+net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-dock b/Documentation/ABI/testing/sysfs-devices-platform-dock
new file mode 100644 (file)
index 0000000..1d8c18f
--- /dev/null
@@ -0,0 +1,39 @@
+What:          /sys/devices/platform/dock.N/docked
+Date:          Dec, 2006
+KernelVersion: 2.6.19
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Value 1 or 0 indicates whether the software believes the
+               laptop is docked in a docking station.
+
+What:          /sys/devices/platform/dock.N/undock
+Date:          Dec, 2006
+KernelVersion: 2.6.19
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (WO) Writing to this file causes the software to initiate an
+               undock request to the firmware.
+
+What:          /sys/devices/platform/dock.N/uid
+Date:          Feb, 2007
+KernelVersion: v2.6.21
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Displays the docking station the laptop is docked to.
+
+What:          /sys/devices/platform/dock.N/flags
+Date:          May, 2007
+KernelVersion: v2.6.21
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Show dock station flags, useful for checking if undock
+               request has been made by the user (from the immediate_undock
+               option).
+
+What:          /sys/devices/platform/dock.N/type
+Date:          Aug, 2008
+KernelVersion: v2.6.27
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Display the dock station type- dock_station, ata_bay or
+               battery_bay.
index bfd29bc..4ed63b6 100644 (file)
@@ -108,6 +108,8 @@ Description:        CPU topology files that describe a logical CPU's relationship
 
 What:          /sys/devices/system/cpu/cpuidle/current_driver
                /sys/devices/system/cpu/cpuidle/current_governer_ro
+               /sys/devices/system/cpu/cpuidle/available_governors
+               /sys/devices/system/cpu/cpuidle/current_governor
 Date:          September 2007
 Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Discover cpuidle policy and mechanism
@@ -119,13 +121,84 @@ Description:      Discover cpuidle policy and mechanism
                Idle policy (governor) is differentiated from idle mechanism
                (driver)
 
-               current_driver: displays current idle mechanism
+               current_driver: (RO) displays current idle mechanism
 
-               current_governor_ro: displays current idle policy
+               current_governor_ro: (RO) displays current idle policy
+
+               With the cpuidle_sysfs_switch boot option enabled (meant for
+               developer testing), the following three attributes are visible
+               instead:
+
+               current_driver: same as described above
+
+               available_governors: (RO) displays a space separated list of
+               available governors
+
+               current_governor: (RW) displays current idle policy. Users can
+               switch the governor at runtime by writing to this file.
 
                See files in Documentation/cpuidle/ for more information.
 
 
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/name
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/power
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/time
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+Date:          September 2007
+KernelVersion: v2.6.24
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
+               logical CPU specific cpuidle information for each online cpu X.
+               The processor idle states which are available for use have the
+               following attributes:
+
+               name: (RO) Name of the idle state (string).
+
+               latency: (RO) The latency to exit out of this idle state (in
+               microseconds).
+
+               power: (RO) The power consumed while in this idle state (in
+               milliwatts).
+
+               time: (RO) The total time spent in this idle state (in microseconds).
+
+               usage: (RO) Number of times this state was entered (a count).
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
+Date:          February 2008
+KernelVersion: v2.6.25
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RO) A small description about the idle state (string).
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
+Date:          March 2012
+KernelVersion: v3.10
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RW) Option to disable this idle state (bool). The behavior and
+               the effect of the disable variable depends on the implementation
+               of a particular governor. In the ladder governor, for example,
+               it is not coherent, i.e. if one is disabling a light state, then
+               all deeper states are disabled as well, but the disable variable
+               does not reflect it. Likewise, if one enables a deep state but a
+               lighter state still is disabled, then this has no effect.
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
+Date:          March 2014
+KernelVersion: v3.15
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RO) Display the target residency i.e. the minimum amount of
+               time (in microseconds) this cpu should spend in this idle state
+               to make the transition worth the effort.
+
+
 What:          /sys/devices/system/cpu/cpu#/cpufreq/*
 Date:          pre-git history
 Contact:       linux-pm@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
new file mode 100644 (file)
index 0000000..325dc06
--- /dev/null
@@ -0,0 +1,40 @@
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) The charger type - Traditional, Hybrid or NVDC.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Adapter rating in milliwatts (the maximum Adapter power).
+               Must be 0 if no AC Adaptor is plugged in.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Maximum platform power that can be supported by the battery
+               in milliwatts.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Display the platform power source
+               0x00 = DC
+               0x01 = AC
+               0x02 = USB
+               0x03 = Wireless Charger
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) The maximum sustained power for battery in milliwatts.
index 5550bfd..be70b32 100644 (file)
@@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
 
  - RMW operations that have a return value are fully ordered.
 
-Except for test_and_set_bit_lock() which has ACQUIRE semantics and
+ - RMW operations that are conditional are unordered on FAILURE,
+   otherwise the above rules apply. In the case of test_and_{}_bit() operations,
+   if the bit in memory is unchanged by the operation then it is deemed to have
+   failed.
+
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
 clear_bit_unlock() which has RELEASE semantics.
 
 Since a platform only has a single means of achieving atomic operations
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644 (file)
index 0000000..2aaef56
--- /dev/null
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+  - compatible:                should be "nxp,mcr20a"
+  - spi-max-frequency: maximal bus speed, should be set to a frequency
+                       lower than 9000000 depends sync or async operation mode
+  - reg:               the chipselect index
+  - interrupts:                the interrupt generated by the device. Non high-level
+                       can occur deadlocks while handling isr.
+
+Optional properties:
+  - rst_b-gpio:                GPIO spec for the RST_B pin
+
+Example:
+
+       mcr20a@0 {
+               compatible = "nxp,mcr20a";
+               spi-max-frequency = <9000000>;
+               reg = <0>;
+               interrupts = <17 2>;
+               interrupt-parent = <&gpio>;
+               rst_b-gpio = <&gpio 27 1>
+       };
index f1c441b..929591d 100644 (file)
@@ -33,6 +33,10 @@ Optional Properties:
   Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
   high Tx rate. Must not be present for SFF modules
 
+- maximum-power-milliwatt : Maximum module power consumption
+  Specifies the maximum power consumption allowable by a module in the
+  slot, in milli-Watts.  Presently, modules can be up to 1W, 1.5W or 2W.
+
 Example #1: Direct serdes to SFP connection
 
 sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
        i2c-bus = <&sfp_1g_i2c>;
        los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
        mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+       maximum-power-milliwatt = <1000>;
        pinctrl-names = "default";
        pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
        tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
index 02c4353..9ef9338 100644 (file)
@@ -25,6 +25,8 @@ Optional property:
                                    software needs to take when this pin is
                                    strapped in these modes. See data manual
                                    for details.
+       - ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
+                                   for applicable values.
 
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance
diff --git a/Documentation/devicetree/bindings/power/mti,mips-cpc.txt b/Documentation/devicetree/bindings/power/mti,mips-cpc.txt
new file mode 100644 (file)
index 0000000..c6b8251
--- /dev/null
@@ -0,0 +1,8 @@
+Binding for MIPS Cluster Power Controller (CPC).
+
+This binding allows a system to specify where the CPC registers are
+located.
+
+Required properties:
+compatible : Should be "mti,mips-cpc".
+regs: Should describe the address & size of the CPC register region.
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
new file mode 100644 (file)
index 0000000..2c815a7
--- /dev/null
@@ -0,0 +1,62 @@
+#
+# Feature name:          membarrier-sync-core
+#         Kconfig:       ARCH_HAS_MEMBARRIER_SYNC_CORE
+#         description:   arch supports core serializing membarrier
+#
+# Architecture requirements
+#
+# * arm64
+#
+# Rely on eret context synchronization when returning from IPI handler, and
+# when returning to user-space.
+#
+# * x86
+#
+# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
+# instruction is core serializing, but not SYSEXIT.
+#
+# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it can return to user-space through either SYSRETL (compat code),
+# SYSRETQ, or IRET.
+#
+# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
+# instead on write_cr3() performed by switch_mm() to provide core serialization
+# after changing the current mm, and deal with the special case of kthread ->
+# uthread (temporarily keeping current mm into active_mm) by issuing a
+# sync_core_before_usermode() in that specific case.
+#
+    -----------------------
+    |         arch |status|
+    -----------------------
+    |       alpha: | TODO |
+    |         arc: | TODO |
+    |         arm: | TODO |
+    |       arm64: |  ok  |
+    |    blackfin: | TODO |
+    |         c6x: | TODO |
+    |        cris: | TODO |
+    |         frv: | TODO |
+    |       h8300: | TODO |
+    |     hexagon: | TODO |
+    |        ia64: | TODO |
+    |        m32r: | TODO |
+    |        m68k: | TODO |
+    |       metag: | TODO |
+    |  microblaze: | TODO |
+    |        mips: | TODO |
+    |     mn10300: | TODO |
+    |       nios2: | TODO |
+    |    openrisc: | TODO |
+    |      parisc: | TODO |
+    |     powerpc: | TODO |
+    |        s390: | TODO |
+    |       score: | TODO |
+    |          sh: | TODO |
+    |       sparc: | TODO |
+    |        tile: | TODO |
+    |          um: | TODO |
+    |   unicore32: | TODO |
+    |         x86: |  ok  |
+    |      xtensa: | TODO |
+    -----------------------
index 69b17b3..152ea93 100644 (file)
@@ -3,4 +3,4 @@
 ==================================
 
 .. kernel-doc:: drivers/gpu/drm/tve200/tve200_drv.c
-   :doc: Faraday TV Encoder 200
+   :doc: Faraday TV Encoder TVE200 DRM Driver
index d477024..65514c2 100644 (file)
@@ -28,8 +28,10 @@ Supported adapters:
   * Intel Wildcat Point (PCH)
   * Intel Wildcat Point-LP (PCH)
   * Intel BayTrail (SOC)
+  * Intel Braswell (SOC)
   * Intel Sunrise Point-H (PCH)
   * Intel Sunrise Point-LP (PCH)
+  * Intel Kaby Lake-H (PCH)
   * Intel DNV (SOC)
   * Intel Broxton (SOC)
   * Intel Lewisburg (PCH)
index 60c482d..818aca1 100644 (file)
@@ -21,37 +21,23 @@ Implementation
 --------------
 
 Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
-         1: unlocked
-         0: locked, no waiters
-   negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
+and implemented in kernel/locking/mutex.c. These locks use an atomic variable
+(->owner) to keep track of the lock state during its lifetime.  Field owner
+actually contains 'struct task_struct *' to the current lock owner and it is
+therefore NULL if not currently owned. Since task_struct pointers are aligned
+at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
+if waiter list is non-empty).  In its most basic form it also includes a
+wait-queue and a spinlock that serializes access to it. Furthermore,
+CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
+below in (ii).
 
 When acquiring a mutex, there are three possible paths that can be
 taken, depending on the state of the lock:
 
-(i) fastpath: tries to atomically acquire the lock by decrementing the
-    counter. If it was already taken by another task it goes to the next
-    possible path. This logic is architecture specific. On x86-64, the
-    locking fastpath is 2 instructions:
-
-    0000000000000e10 <mutex_lock>:
-    e21:   f0 ff 0b                lock decl (%rbx)
-    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
-
-   the unlocking fastpath is equally tight:
-
-    0000000000000bc0 <mutex_unlock>:
-    bc8:   f0 ff 07                lock incl (%rdi)
-    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
-
+(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
+    the current task. This only works in the uncontended case (cmpxchg() checks
+    against 0UL, so all 3 state bits above have to be 0). If the lock is
+    contended it goes to the next possible path.
 
 (ii) midpath: aka optimistic spinning, tries to spin for acquisition
      while the lock owner is running and there are no other tasks ready
@@ -143,11 +129,10 @@ Test if the mutex is taken:
 Disadvantages
 -------------
 
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
-for the largest lock in the kernel. Larger structure sizes mean more
-CPU cache and memory footprint.
+Unlike its original design and purpose, 'struct mutex' is among the largest
+locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
+is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
+cache and memory footprint.
 
 When to use mutexes
 -------------------
index a553d4e..783675a 100644 (file)
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
        FALSE: disabled
        Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+       Controls which hash policy to use for multipath routes.
+       Default: 0 (Layer 3)
+       Possible values:
+       0 - Layer 3 (source and destination addresses plus flow label)
+       1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
        Controls the use of anycast addresses as source addresses for ICMPv6
        echo reply
index 2f09455..d47480b 100644 (file)
@@ -13,6 +13,7 @@ The following technologies are described:
  * Generic Segmentation Offload - GSO
  * Generic Receive Offload - GRO
  * Partial Generic Segmentation Offload - GSO_PARTIAL
+ * SCTP accelleration with GSO - GSO_BY_FRAGS
 
 TCP Segmentation Offload
 ========================
@@ -49,6 +50,10 @@ datagram into multiple IPv4 fragments.  Many of the requirements for UDP
 fragmentation offload are the same as TSO.  However the IPv4 ID for
 fragments should not increment as a single IPv4 datagram is fragmented.
 
+UFO is deprecated: modern kernels will no longer generate UFO skbs, but can
+still receive them from tuntap and similar devices. Offload of UDP-based
+tunnel protocols is still supported.
+
 IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
 ========================================================
 
@@ -83,10 +88,10 @@ SKB_GSO_UDP_TUNNEL_CSUM.  These two additional tunnel types reflect the
 fact that the outer header also requests to have a non-zero checksum
 included in the outer header.
 
-Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header
-has requested a remote checksum offload.  In this case the inner headers
-will be left with a partial checksum and only the outer header checksum
-will be computed.
+Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel
+header has requested a remote checksum offload.  In this case the inner
+headers will be left with a partial checksum and only the outer header
+checksum will be computed.
 
 Generic Segmentation Offload
 ============================
@@ -128,3 +133,28 @@ values for if the header was simply duplicated.  The one exception to this
 is the outer IPv4 ID field.  It is up to the device drivers to guarantee
 that the IPv4 ID field is incremented in the case that a given header does
 not have the DF bit set.
+
+SCTP accelleration with GSO
+===========================
+
+SCTP - despite the lack of hardware support - can still take advantage of
+GSO to pass one large packet through the network stack, rather than
+multiple small packets.
+
+This requires a different approach to other offloads, as SCTP packets
+cannot be just segmented to (P)MTU. Rather, the chunks must be contained in
+IP segments, padding respected. So unlike regular GSO, SCTP can't just
+generate a big skb, set gso_size to the fragmentation point and deliver it
+to IP layer.
+
+Instead, the SCTP protocol layer builds an skb with the segments correctly
+padded and stored as chained skbs, and skb_segment() splits based on those.
+To signal this, gso_size is set to the special value GSO_BY_FRAGS.
+
+Therefore, any code in the core networking stack must be aware of the
+possibility that gso_size will be GSO_BY_FRAGS and handle that case
+appropriately. (For size checks, the skb_gso_validate_*_len family of
+helpers do this automatically.)
+
+This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
+set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
index 3bdc260..e0b3900 100644 (file)
@@ -7909,7 +7909,6 @@ S:        Maintained
 F:     scripts/leaking_addresses.pl
 
 LED SUBSYSTEM
-M:     Richard Purdie <rpurdie@rpsys.net>
 M:     Jacek Anaszewski <jacek.anaszewski@gmail.com>
 M:     Pavel Machek <pavel@ucw.cz>
 L:     linux-leds@vger.kernel.org
@@ -8593,6 +8592,15 @@ S:       Maintained
 F:     Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
 F:     drivers/iio/potentiometer/mcp4531.c
 
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M:     Xue Liu <liuxuenetmail@gmail.com>
+L:     linux-wpan@vger.kernel.org
+W:     https://github.com/xueliu/mcr20a-linux
+S:     Maintained
+F:     drivers/net/ieee802154/mcr20a.c
+F:     drivers/net/ieee802154/mcr20a.h
+F:     Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:     William Breathitt Gray <vilhelm.gray@gmail.com>
 L:     linux-iio@vger.kernel.org
@@ -9206,6 +9214,7 @@ MIPS GENERIC PLATFORM
 M:     Paul Burton <paul.burton@mips.com>
 L:     linux-mips@linux-mips.org
 S:     Supported
+F:     Documentation/devicetree/bindings/power/mti,mips-cpc.txt
 F:     arch/mips/generic/
 F:     arch/mips/tools/generic-board-config.sh
 
@@ -9945,6 +9954,7 @@ F:        drivers/nfc/nxp-nci
 
 OBJTOOL
 M:     Josh Poimboeuf <jpoimboe@redhat.com>
+M:     Peter Zijlstra <peterz@infradead.org>
 S:     Supported
 F:     tools/objtool/
 
index 79ad2bf..d9cf3a4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
index ea022d4..21ec824 100644 (file)
@@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address);
 
 #define BUG()  do {                                                            \
        pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
-       dump_stack();                                                           \
+       barrier_before_unreachable();                                           \
+       __builtin_trap();                                                       \
 } while (0)
 
 #define HAVE_ARCH_BUG
index 8b2fa9a..c28afb2 100644 (file)
@@ -56,6 +56,7 @@
 
 /dts-v1/;
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/gpio/gpio.h>
 #include "armada-370.dtsi"
 
                #address-cells = <1>;
                #size-cells = <0>;
                reg = <0x10>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
 
                ports {
                        #address-cells = <1>;
                                };
                        };
                };
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switchphy0: switchphy@0 {
+                               reg = <0>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy1: switchphy@1 {
+                               reg = <1>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy2: switchphy@2 {
+                               reg = <2>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy3: switchphy@3 {
+                               reg = <3>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+               };
        };
 };
 
index 57058ac..7e5d7a0 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
-#include <linux/perf/arm_pmu.h>
 #include <linux/regulator/machine.h>
 
 #include <asm/outercache.h>
@@ -112,37 +111,6 @@ static void ux500_restart(enum reboot_mode mode, const char *cmd)
        prcmu_system_reset(0);
 }
 
-/*
- * The PMU IRQ lines of two cores are wired together into a single interrupt.
- * Bounce the interrupt to the other core if it's not ours.
- */
-static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler)
-{
-       irqreturn_t ret = handler(irq, dev);
-       int other = !smp_processor_id();
-
-       if (ret == IRQ_NONE && cpu_online(other))
-               irq_set_affinity(irq, cpumask_of(other));
-
-       /*
-        * We should be able to get away with the amount of IRQ_NONEs we give,
-        * while still having the spurious IRQ detection code kick in if the
-        * interrupt really starts hitting spuriously.
-        */
-       return ret;
-}
-
-static struct arm_pmu_platdata db8500_pmu_platdata = {
-       .handle_irq             = db8500_pmu_handler,
-       .irq_flags              = IRQF_NOBALANCING | IRQF_NO_THREAD,
-};
-
-static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = {
-       /* Requires call-back bindings. */
-       OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata),
-       {},
-};
-
 static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = {
        OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL),
        {},
@@ -165,9 +133,6 @@ static void __init u8500_init_machine(void)
        if (of_machine_is_compatible("st-ericsson,u8540"))
                of_platform_populate(NULL, u8500_local_bus_nodes,
                                     u8540_auxdata_lookup, NULL);
-       else
-               of_platform_populate(NULL, u8500_local_bus_nodes,
-                                    u8500_auxdata_lookup, NULL);
 }
 
 static const char * stericsson_dt_platform_compat[] = {
index be7bd19..350c76a 100644 (file)
@@ -20,7 +20,7 @@
 
 #define MPIDR_UP_BITMASK       (0x1 << 30)
 #define MPIDR_MT_BITMASK       (0x1 << 24)
-#define MPIDR_HWID_BITMASK     0xff00ffffff
+#define MPIDR_HWID_BITMASK     UL(0xff00ffffff)
 
 #define MPIDR_LEVEL_BITS_SHIFT 3
 #define MPIDR_LEVEL_BITS       (1 << MPIDR_LEVEL_BITS_SHIFT)
index 1dca41b..e73f685 100644 (file)
@@ -22,7 +22,7 @@
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
-       return *ptep;
+       return READ_ONCE(*ptep);
 }
 
 
index 9679067..7faed6e 100644 (file)
@@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
        return pmd;
 }
 
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
+static inline void kvm_set_s2pte_readonly(pte_t *ptep)
 {
        pteval_t old_pteval, pteval;
 
-       pteval = READ_ONCE(pte_val(*pte));
+       pteval = READ_ONCE(pte_val(*ptep));
        do {
                old_pteval = pteval;
                pteval &= ~PTE_S2_RDWR;
                pteval |= PTE_S2_RDONLY;
-               pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+               pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
        } while (pteval != old_pteval);
 }
 
-static inline bool kvm_s2pte_readonly(pte_t *pte)
+static inline bool kvm_s2pte_readonly(pte_t *ptep)
 {
-       return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+       return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
 }
 
-static inline bool kvm_s2pte_exec(pte_t *pte)
+static inline bool kvm_s2pte_exec(pte_t *ptep)
 {
-       return !(pte_val(*pte) & PTE_S2_XN);
+       return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
 }
 
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
 {
-       kvm_set_s2pte_readonly((pte_t *)pmd);
+       kvm_set_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
 {
-       return kvm_s2pte_readonly((pte_t *)pmd);
+       return kvm_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
 {
-       return !(pmd_val(*pmd) & PMD_S2_XN);
+       return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
 }
 
 static inline bool kvm_page_empty(void *ptr)
index 8d33319..39ec0b8 100644 (file)
@@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void cpu_replace_ttbr1(pgd_t *pgd)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
 {
        typedef void (ttbr_replace_func)(phys_addr_t);
        extern ttbr_replace_func idmap_cpu_replace_ttbr1;
        ttbr_replace_func *replace_phys;
 
-       phys_addr_t pgd_phys = virt_to_phys(pgd);
+       phys_addr_t pgd_phys = virt_to_phys(pgdp);
 
        replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
 
index e9d9f1b..2e05bcd 100644 (file)
@@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
        return (pmd_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
 {
-       BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-       free_page((unsigned long)pmd);
+       BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+       free_page((unsigned long)pmdp);
 }
 
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
-       set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
+       set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-       __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+       __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
 }
 #else
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
        BUILD_BUG();
 }
@@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
        return (pud_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
 {
-       BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-       free_page((unsigned long)pud);
+       BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
+       free_page((unsigned long)pudp);
 }
 
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
-       set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
+       set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
 {
-       __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+       __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
 }
 #else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
        BUILD_BUG();
 }
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 /*
  * Free a PTE table.
  */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
 {
-       if (pte)
-               free_page((unsigned long)pte);
+       if (ptep)
+               free_page((unsigned long)ptep);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
        __free_page(pte);
 }
 
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
                                  pmdval_t prot)
 {
-       set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
+       set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
 }
 
 /*
index 094374c..7e2c27e 100644 (file)
@@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
-       *ptep = pte;
+       WRITE_ONCE(*ptep, pte);
 
        /*
         * Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
+       pte_t old_pte;
+
        if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
                __sync_icache_dcache(pte, addr);
 
@@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
         * hardware updates of the pte (ptep_set_access_flags safely changes
         * valid ptes without going through an invalid entry).
         */
-       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+       old_pte = READ_ONCE(*ptep);
+       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
           (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
                VM_WARN_ONCE(!pte_young(pte),
                             "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
-                            __func__, pte_val(*ptep), pte_val(pte));
-               VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+                            __func__, pte_val(old_pte), pte_val(pte));
+               VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
                             "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
-                            __func__, pte_val(*ptep), pte_val(pte));
+                            __func__, pte_val(old_pte), pte_val(pte));
        }
 
        set_pte(ptep, pte);
@@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-       *pmdp = pmd;
+       WRITE_ONCE(*pmdp, pmd);
        dsb(ishst);
        isb();
 }
@@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
-       *pudp = pud;
+       WRITE_ONCE(*pudp, pud);
        dsb(ishst);
        isb();
 }
@@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)                (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-#define pmd_offset_phys(dir, addr)     (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset_phys(dir, addr)     (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)          ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
 #define pmd_set_fixmap(addr)           ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
-       *pgdp = pgd;
+       WRITE_ONCE(*pgdp, pgd);
        dsb(ishst);
 }
 
@@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)                (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-#define pud_offset_phys(dir, addr)     (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset_phys(dir, addr)     (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)          ((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
 #define pud_set_fixmap(addr)           ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
index 472ef94..902f9ed 100644 (file)
@@ -28,7 +28,7 @@ struct stackframe {
        unsigned long fp;
        unsigned long pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       unsigned int graph;
+       int graph;
 #endif
 };
 
index 543e11f..e66b0fc 100644 (file)
@@ -72,15 +72,15 @@ static inline void set_fs(mm_segment_t fs)
  * This is equivalent to the following test:
  * (u65)addr + (u65)size <= (u65)current->addr_limit + 1
  */
-static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
+static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
 {
-       unsigned long limit = current_thread_info()->addr_limit;
+       unsigned long ret, limit = current_thread_info()->addr_limit;
 
        __chk_user_ptr(addr);
        asm volatile(
        // A + B <= C + 1 for all A,B,C, in four easy steps:
        // 1: X = A + B; X' = X % 2^64
-       "       adds    %0, %0, %2\n"
+       "       adds    %0, %3, %2\n"
        // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
        "       csel    %1, xzr, %1, hi\n"
        // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
@@ -92,9 +92,9 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
        //    testing X' - C == 0, subject to the previous adjustments.
        "       sbcs    xzr, %0, %1\n"
        "       cset    %0, ls\n"
-       : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc");
+       : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc");
 
-       return addr;
+       return ret;
 }
 
 /*
@@ -104,7 +104,7 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
  */
 #define untagged_addr(addr)            sign_extend64(addr, 55)
 
-#define access_ok(type, addr, size)    __range_ok((unsigned long)(addr), size)
+#define access_ok(type, addr, size)    __range_ok(addr, size)
 #define user_addr_max                  get_fs
 
 #define _ASM_EXTABLE(from, to)                                         \
index c33b5e4..68450e9 100644 (file)
@@ -370,6 +370,7 @@ static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
 static int swp_handler(struct pt_regs *regs, u32 instr)
 {
        u32 destreg, data, type, address = 0;
+       const void __user *user_ptr;
        int rn, rt2, res = 0;
 
        perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
@@ -401,7 +402,8 @@ static int swp_handler(struct pt_regs *regs, u32 instr)
                aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data);
 
        /* Check access in reasonable access range for both SWP and SWPB */
-       if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+       user_ptr = (const void __user *)(unsigned long)(address & ~3);
+       if (!access_ok(VERIFY_WRITE, user_ptr, 4)) {
                pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n",
                        address);
                goto fault;
index 0782359..52f15cd 100644 (file)
@@ -406,6 +406,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
                MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
        },
+       {
+               .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+               MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+               .enable = qcom_enable_link_stack_sanitization,
+       },
+       {
+               .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+               MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+       },
        {
                .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
                MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
index 29b1f87..2985a06 100644 (file)
@@ -199,9 +199,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 };
 
 static const struct arm64_ftr_bits ftr_ctr[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),   /* RAO */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),           /* RES1 */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1),      /* DIC */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1),      /* IDC */
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),     /* CWG */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),      /* ERG */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0),     /* ERG */
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),      /* DminLine */
        /*
         * Linux can handle differing I-cache policies. Userspace JITs will
index f85ac58..a8bf1c8 100644 (file)
@@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
                                  unsigned long addr, void *data)
 {
        efi_memory_desc_t *md = data;
-       pte_t pte = *ptep;
+       pte_t pte = READ_ONCE(*ptep);
 
        if (md->attribute & EFI_MEMORY_RO)
                pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
index f20cf7e..1ec5f28 100644 (file)
@@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
                                 gfp_t mask)
 {
        int rc = 0;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
        unsigned long dst = (unsigned long)allocator(mask);
 
        if (!dst) {
@@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
        memcpy((void *)dst, src_start, length);
        flush_icache_range(dst, dst + length);
 
-       pgd = pgd_offset_raw(allocator(mask), dst_addr);
-       if (pgd_none(*pgd)) {
-               pud = allocator(mask);
-               if (!pud) {
+       pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+       if (pgd_none(READ_ONCE(*pgdp))) {
+               pudp = allocator(mask);
+               if (!pudp) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pgd_populate(&init_mm, pgd, pud);
+               pgd_populate(&init_mm, pgdp, pudp);
        }
 
-       pud = pud_offset(pgd, dst_addr);
-       if (pud_none(*pud)) {
-               pmd = allocator(mask);
-               if (!pmd) {
+       pudp = pud_offset(pgdp, dst_addr);
+       if (pud_none(READ_ONCE(*pudp))) {
+               pmdp = allocator(mask);
+               if (!pmdp) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pud_populate(&init_mm, pud, pmd);
+               pud_populate(&init_mm, pudp, pmdp);
        }
 
-       pmd = pmd_offset(pud, dst_addr);
-       if (pmd_none(*pmd)) {
-               pte = allocator(mask);
-               if (!pte) {
+       pmdp = pmd_offset(pudp, dst_addr);
+       if (pmd_none(READ_ONCE(*pmdp))) {
+               ptep = allocator(mask);
+               if (!ptep) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pmd_populate_kernel(&init_mm, pmd, pte);
+               pmd_populate_kernel(&init_mm, pmdp, ptep);
        }
 
-       pte = pte_offset_kernel(pmd, dst_addr);
-       set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+       ptep = pte_offset_kernel(pmdp, dst_addr);
+       set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 
        /*
         * Load our new page tables. A strict BBM approach requires that we
@@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
         */
        cpu_set_reserved_ttbr0();
        local_flush_tlb_all();
-       write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
+       write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
        isb();
 
        *phys_dst_addr = virt_to_phys((void *)dst);
@@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
        return ret;
 }
 
-static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 {
-       pte_t pte = *src_pte;
+       pte_t pte = READ_ONCE(*src_ptep);
 
        if (pte_valid(pte)) {
                /*
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
                 * read only (code, rodata). Clear the RDONLY bit from
                 * the temporary mappings we use during restore.
                 */
-               set_pte(dst_pte, pte_mkwrite(pte));
+               set_pte(dst_ptep, pte_mkwrite(pte));
        } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
                /*
                 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
                 */
                BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-               set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
+               set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
        }
 }
 
-static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
                    unsigned long end)
 {
-       pte_t *src_pte;
-       pte_t *dst_pte;
+       pte_t *src_ptep;
+       pte_t *dst_ptep;
        unsigned long addr = start;
 
-       dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
-       if (!dst_pte)
+       dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+       if (!dst_ptep)
                return -ENOMEM;
-       pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
-       dst_pte = pte_offset_kernel(dst_pmd, start);
+       pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+       dst_ptep = pte_offset_kernel(dst_pmdp, start);
 
-       src_pte = pte_offset_kernel(src_pmd, start);
+       src_ptep = pte_offset_kernel(src_pmdp, start);
        do {
-               _copy_pte(dst_pte, src_pte, addr);
-       } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+               _copy_pte(dst_ptep, src_ptep, addr);
+       } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
 
        return 0;
 }
 
-static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
                    unsigned long end)
 {
-       pmd_t *src_pmd;
-       pmd_t *dst_pmd;
+       pmd_t *src_pmdp;
+       pmd_t *dst_pmdp;
        unsigned long next;
        unsigned long addr = start;
 
-       if (pud_none(*dst_pud)) {
-               dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pmd)
+       if (pud_none(READ_ONCE(*dst_pudp))) {
+               dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pmdp)
                        return -ENOMEM;
-               pud_populate(&init_mm, dst_pud, dst_pmd);
+               pud_populate(&init_mm, dst_pudp, dst_pmdp);
        }
-       dst_pmd = pmd_offset(dst_pud, start);
+       dst_pmdp = pmd_offset(dst_pudp, start);
 
-       src_pmd = pmd_offset(src_pud, start);
+       src_pmdp = pmd_offset(src_pudp, start);
        do {
+               pmd_t pmd = READ_ONCE(*src_pmdp);
+
                next = pmd_addr_end(addr, end);
-               if (pmd_none(*src_pmd))
+               if (pmd_none(pmd))
                        continue;
-               if (pmd_table(*src_pmd)) {
-                       if (copy_pte(dst_pmd, src_pmd, addr, next))
+               if (pmd_table(pmd)) {
+                       if (copy_pte(dst_pmdp, src_pmdp, addr, next))
                                return -ENOMEM;
                } else {
-                       set_pmd(dst_pmd,
-                               __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+                       set_pmd(dst_pmdp,
+                               __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
                }
-       } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+       } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
 
        return 0;
 }
 
-static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
                    unsigned long end)
 {
-       pud_t *dst_pud;
-       pud_t *src_pud;
+       pud_t *dst_pudp;
+       pud_t *src_pudp;
        unsigned long next;
        unsigned long addr = start;
 
-       if (pgd_none(*dst_pgd)) {
-               dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pud)
+       if (pgd_none(READ_ONCE(*dst_pgdp))) {
+               dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pudp)
                        return -ENOMEM;
-               pgd_populate(&init_mm, dst_pgd, dst_pud);
+               pgd_populate(&init_mm, dst_pgdp, dst_pudp);
        }
-       dst_pud = pud_offset(dst_pgd, start);
+       dst_pudp = pud_offset(dst_pgdp, start);
 
-       src_pud = pud_offset(src_pgd, start);
+       src_pudp = pud_offset(src_pgdp, start);
        do {
+               pud_t pud = READ_ONCE(*src_pudp);
+
                next = pud_addr_end(addr, end);
-               if (pud_none(*src_pud))
+               if (pud_none(pud))
                        continue;
-               if (pud_table(*(src_pud))) {
-                       if (copy_pmd(dst_pud, src_pud, addr, next))
+               if (pud_table(pud)) {
+                       if (copy_pmd(dst_pudp, src_pudp, addr, next))
                                return -ENOMEM;
                } else {
-                       set_pud(dst_pud,
-                               __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+                       set_pud(dst_pudp,
+                               __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
                }
-       } while (dst_pud++, src_pud++, addr = next, addr != end);
+       } while (dst_pudp++, src_pudp++, addr = next, addr != end);
 
        return 0;
 }
 
-static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
                            unsigned long end)
 {
        unsigned long next;
        unsigned long addr = start;
-       pgd_t *src_pgd = pgd_offset_k(start);
+       pgd_t *src_pgdp = pgd_offset_k(start);
 
-       dst_pgd = pgd_offset_raw(dst_pgd, start);
+       dst_pgdp = pgd_offset_raw(dst_pgdp, start);
        do {
                next = pgd_addr_end(addr, end);
-               if (pgd_none(*src_pgd))
+               if (pgd_none(READ_ONCE(*src_pgdp)))
                        continue;
-               if (copy_pud(dst_pgd, src_pgd, addr, next))
+               if (copy_pud(dst_pgdp, src_pgdp, addr, next))
                        return -ENOMEM;
-       } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+       } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
 
        return 0;
 }
index 75b220b..85a251b 100644 (file)
@@ -908,9 +908,9 @@ static void __armv8pmu_probe_pmu(void *info)
        int pmuver;
 
        dfr0 = read_sysreg(id_aa64dfr0_el1);
-       pmuver = cpuid_feature_extract_signed_field(dfr0,
+       pmuver = cpuid_feature_extract_unsigned_field(dfr0,
                        ID_AA64DFR0_PMUVER_SHIFT);
-       if (pmuver < 1)
+       if (pmuver == 0xf || pmuver == 0)
                return;
 
        probe->present = true;
index ad8aeb0..c0da6ef 100644 (file)
@@ -220,8 +220,15 @@ void __show_regs(struct pt_regs *regs)
 
        show_regs_print_info(KERN_DEFAULT);
        print_pstate(regs);
-       printk("pc : %pS\n", (void *)regs->pc);
-       printk("lr : %pS\n", (void *)lr);
+
+       if (!user_mode(regs)) {
+               printk("pc : %pS\n", (void *)regs->pc);
+               printk("lr : %pS\n", (void *)lr);
+       } else {
+               printk("pc : %016llx\n", regs->pc);
+               printk("lr : %016llx\n", lr);
+       }
+
        printk("sp : %016llx\n", sp);
 
        i = top_reg;
index 76809cc..d5718a0 100644 (file)
@@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        if (tsk->ret_stack &&
                        (frame->pc == (unsigned long)return_to_handler)) {
+               if (WARN_ON_ONCE(frame->graph == -1))
+                       return -EINVAL;
+               if (frame->graph < -1)
+                       frame->graph += FTRACE_NOTRACE_DEPTH;
+
                /*
                 * This is a case where function graph tracer has
                 * modified a return address (LR) in a stack frame
index 8b8bbd3..a382b2a 100644 (file)
@@ -57,7 +57,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
        if (end < start || flags)
                return -EINVAL;
 
-       if (!access_ok(VERIFY_READ, start, end - start))
+       if (!access_ok(VERIFY_READ, (const void __user *)start, end - start))
                return -EFAULT;
 
        return __do_compat_cache_op(start, end);
index a439128..f258636 100644 (file)
@@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs)
        frame.fp = regs->regs[29];
        frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       frame.graph = -1; /* no task info */
+       frame.graph = current->curr_ret_stack;
 #endif
        do {
                int ret = unwind_frame(NULL, &frame);
index bbb0fde..eb2d151 100644 (file)
@@ -57,7 +57,7 @@ static const char *handler[]= {
        "Error"
 };
 
-int show_unhandled_signals = 1;
+int show_unhandled_signals = 0;
 
 static void dump_backtrace_entry(unsigned long where)
 {
@@ -526,14 +526,6 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
        }
 #endif
 
-       if (show_unhandled_signals_ratelimited()) {
-               pr_info("%s[%d]: syscall %d\n", current->comm,
-                       task_pid_nr(current), regs->syscallno);
-               dump_instr("", regs);
-               if (user_mode(regs))
-                       __show_regs(regs);
-       }
-
        return sys_ni_syscall();
 }
 
index 116252a..870f4b1 100644 (file)
@@ -407,8 +407,10 @@ again:
                u32 midr = read_cpuid_id();
 
                /* Apply BTAC predictors mitigation to all Falkor chips */
-               if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+               if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+                   ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
                        __qcom_hyp_sanitize_btac_predictors();
+               }
        }
 
        fp_enabled = __fpsimd_enabled();
index 7b60d62..65dfc85 100644 (file)
@@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
 {
-       pte_t *pte = pte_offset_kernel(pmd, 0UL);
+       pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+       for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
                addr = start + i * PAGE_SIZE;
-               note_page(st, addr, 4, pte_val(*pte));
+               note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
        }
 }
 
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
 {
-       pmd_t *pmd = pmd_offset(pud, 0UL);
+       pmd_t *pmdp = pmd_offset(pudp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+       for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+               pmd_t pmd = READ_ONCE(*pmdp);
+
                addr = start + i * PMD_SIZE;
-               if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-                       note_page(st, addr, 3, pmd_val(*pmd));
+               if (pmd_none(pmd) || pmd_sect(pmd)) {
+                       note_page(st, addr, 3, pmd_val(pmd));
                } else {
-                       BUG_ON(pmd_bad(*pmd));
-                       walk_pte(st, pmd, addr);
+                       BUG_ON(pmd_bad(pmd));
+                       walk_pte(st, pmdp, addr);
                }
        }
 }
 
-static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
 {
-       pud_t *pud = pud_offset(pgd, 0UL);
+       pud_t *pudp = pud_offset(pgdp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+       for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+               pud_t pud = READ_ONCE(*pudp);
+
                addr = start + i * PUD_SIZE;
-               if (pud_none(*pud) || pud_sect(*pud)) {
-                       note_page(st, addr, 2, pud_val(*pud));
+               if (pud_none(pud) || pud_sect(pud)) {
+                       note_page(st, addr, 2, pud_val(pud));
                } else {
-                       BUG_ON(pud_bad(*pud));
-                       walk_pmd(st, pud, addr);
+                       BUG_ON(pud_bad(pud));
+                       walk_pmd(st, pudp, addr);
                }
        }
 }
@@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
                     unsigned long start)
 {
-       pgd_t *pgd = pgd_offset(mm, 0UL);
+       pgd_t *pgdp = pgd_offset(mm, 0UL);
        unsigned i;
        unsigned long addr;
 
-       for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+       for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+               pgd_t pgd = READ_ONCE(*pgdp);
+
                addr = start + i * PGDIR_SIZE;
-               if (pgd_none(*pgd)) {
-                       note_page(st, addr, 1, pgd_val(*pgd));
+               if (pgd_none(pgd)) {
+                       note_page(st, addr, 1, pgd_val(pgd));
                } else {
-                       BUG_ON(pgd_bad(*pgd));
-                       walk_pud(st, pgd, addr);
+                       BUG_ON(pgd_bad(pgd));
+                       walk_pud(st, pgdp, addr);
                }
        }
 }
index f76bb2c..bff1155 100644 (file)
@@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
 void show_pte(unsigned long addr)
 {
        struct mm_struct *mm;
-       pgd_t *pgd;
+       pgd_t *pgdp;
+       pgd_t pgd;
 
        if (addr < TASK_SIZE) {
                /* TTBR0 */
@@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
                return;
        }
 
-       pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+       pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
                 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
                 VA_BITS, mm->pgd);
-       pgd = pgd_offset(mm, addr);
-       pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
+       pgdp = pgd_offset(mm, addr);
+       pgd = READ_ONCE(*pgdp);
+       pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
 
        do {
-               pud_t *pud;
-               pmd_t *pmd;
-               pte_t *pte;
+               pud_t *pudp, pud;
+               pmd_t *pmdp, pmd;
+               pte_t *ptep, pte;
 
-               if (pgd_none(*pgd) || pgd_bad(*pgd))
+               if (pgd_none(pgd) || pgd_bad(pgd))
                        break;
 
-               pud = pud_offset(pgd, addr);
-               pr_cont(", *pud=%016llx", pud_val(*pud));
-               if (pud_none(*pud) || pud_bad(*pud))
+               pudp = pud_offset(pgdp, addr);
+               pud = READ_ONCE(*pudp);
+               pr_cont(", pud=%016llx", pud_val(pud));
+               if (pud_none(pud) || pud_bad(pud))
                        break;
 
-               pmd = pmd_offset(pud, addr);
-               pr_cont(", *pmd=%016llx", pmd_val(*pmd));
-               if (pmd_none(*pmd) || pmd_bad(*pmd))
+               pmdp = pmd_offset(pudp, addr);
+               pmd = READ_ONCE(*pmdp);
+               pr_cont(", pmd=%016llx", pmd_val(pmd));
+               if (pmd_none(pmd) || pmd_bad(pmd))
                        break;
 
-               pte = pte_offset_map(pmd, addr);
-               pr_cont(", *pte=%016llx", pte_val(*pte));
-               pte_unmap(pte);
+               ptep = pte_offset_map(pmdp, addr);
+               pte = READ_ONCE(*ptep);
+               pr_cont(", pte=%016llx", pte_val(pte));
+               pte_unmap(ptep);
        } while(0);
 
        pr_cont("\n");
@@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
                          pte_t entry, int dirty)
 {
        pteval_t old_pteval, pteval;
+       pte_t pte = READ_ONCE(*ptep);
 
-       if (pte_same(*ptep, entry))
+       if (pte_same(pte, entry))
                return 0;
 
        /* only preserve the access flags and write permission */
@@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
         * (calculated as: a & b == ~(~a | ~b)).
         */
        pte_val(entry) ^= PTE_RDONLY;
-       pteval = READ_ONCE(pte_val(*ptep));
+       pteval = pte_val(pte);
        do {
                old_pteval = pteval;
                pteval ^= PTE_RDONLY;
index 6cb0fa9..ecc6818 100644 (file)
@@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
                           pte_t *ptep, size_t *pgsize)
 {
-       pgd_t *pgd = pgd_offset(mm, addr);
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp = pgd_offset(mm, addr);
+       pud_t *pudp;
+       pmd_t *pmdp;
 
        *pgsize = PAGE_SIZE;
-       pud = pud_offset(pgd, addr);
-       pmd = pmd_offset(pud, addr);
-       if ((pte_t *)pmd == ptep) {
+       pudp = pud_offset(pgdp, addr);
+       pmdp = pmd_offset(pudp, addr);
+       if ((pte_t *)pmdp == ptep) {
                *pgsize = PMD_SIZE;
                return CONT_PMDS;
        }
@@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
        clear_flush(mm, addr, ptep, pgsize, ncontig);
 
-       for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
-               pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
-                        pte_val(pfn_pte(pfn, hugeprot)));
+       for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
-       }
 }
 
 void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *huge_pte_alloc(struct mm_struct *mm,
                      unsigned long addr, unsigned long sz)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pte_t *pte = NULL;
-
-       pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
-       pgd = pgd_offset(mm, addr);
-       pud = pud_alloc(mm, pgd, addr);
-       if (!pud)
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep = NULL;
+
+       pgdp = pgd_offset(mm, addr);
+       pudp = pud_alloc(mm, pgdp, addr);
+       if (!pudp)
                return NULL;
 
        if (sz == PUD_SIZE) {
-               pte = (pte_t *)pud;
+               ptep = (pte_t *)pudp;
        } else if (sz == (PAGE_SIZE * CONT_PTES)) {
-               pmd_t *pmd = pmd_alloc(mm, pud, addr);
+               pmdp = pmd_alloc(mm, pudp, addr);
 
                WARN_ON(addr & (sz - 1));
                /*
@@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                 * will be no pte_unmap() to correspond with this
                 * pte_alloc_map().
                 */
-               pte = pte_alloc_map(mm, pmd, addr);
+               ptep = pte_alloc_map(mm, pmdp, addr);
        } else if (sz == PMD_SIZE) {
                if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
-                   pud_none(*pud))
-                       pte = huge_pmd_share(mm, addr, pud);
+                   pud_none(READ_ONCE(*pudp)))
+                       ptep = huge_pmd_share(mm, addr, pudp);
                else
-                       pte = (pte_t *)pmd_alloc(mm, pud, addr);
+                       ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
        } else if (sz == (PMD_SIZE * CONT_PMDS)) {
-               pmd_t *pmd;
-
-               pmd = pmd_alloc(mm, pud, addr);
+               pmdp = pmd_alloc(mm, pudp, addr);
                WARN_ON(addr & (sz - 1));
-               return (pte_t *)pmd;
+               return (pte_t *)pmdp;
        }
 
-       pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
-              sz, pte, pte_val(*pte));
-       return pte;
+       return ptep;
 }
 
 pte_t *huge_pte_offset(struct mm_struct *mm,
                       unsigned long addr, unsigned long sz)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
 
-       pgd = pgd_offset(mm, addr);
-       pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
-       if (!pgd_present(*pgd))
+       pgdp = pgd_offset(mm, addr);
+       if (!pgd_present(READ_ONCE(*pgdp)))
                return NULL;
 
-       pud = pud_offset(pgd, addr);
-       if (sz != PUD_SIZE && pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (sz != PUD_SIZE && pud_none(pud))
                return NULL;
        /* hugepage or swap? */
-       if (pud_huge(*pud) || !pud_present(*pud))
-               return (pte_t *)pud;
+       if (pud_huge(pud) || !pud_present(pud))
+               return (pte_t *)pudp;
        /* table; check the next level */
 
        if (sz == CONT_PMD_SIZE)
                addr &= CONT_PMD_MASK;
 
-       pmd = pmd_offset(pud, addr);
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
        if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
-           pmd_none(*pmd))
+           pmd_none(pmd))
                return NULL;
-       if (pmd_huge(*pmd) || !pmd_present(*pmd))
-               return (pte_t *)pmd;
+       if (pmd_huge(pmd) || !pmd_present(pmd))
+               return (pte_t *)pmdp;
 
-       if (sz == CONT_PTE_SIZE) {
-               pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
-               return pte;
-       }
+       if (sz == CONT_PTE_SIZE)
+               return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 
        return NULL;
 }
@@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
        size_t pgsize;
        pte_t pte;
 
-       if (!pte_cont(*ptep)) {
+       if (!pte_cont(READ_ONCE(*ptep))) {
                ptep_set_wrprotect(mm, addr, ptep);
                return;
        }
@@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
        size_t pgsize;
        int ncontig;
 
-       if (!pte_cont(*ptep)) {
+       if (!pte_cont(READ_ONCE(*ptep))) {
                ptep_clear_flush(vma, addr, ptep);
                return;
        }
index 6e02e6f..dabfc1e 100644 (file)
@@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
        return __pa(p);
 }
 
-static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pmd_none(*pmd)) {
+       if (pmd_none(READ_ONCE(*pmdp))) {
                phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
                                             : kasan_alloc_zeroed_page(node);
-               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+               __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pte_offset_kimg(pmd, addr)
-                    : pte_offset_kernel(pmd, addr);
+       return early ? pte_offset_kimg(pmdp, addr)
+                    : pte_offset_kernel(pmdp, addr);
 }
 
-static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pud_none(*pud)) {
+       if (pud_none(READ_ONCE(*pudp))) {
                phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
                                             : kasan_alloc_zeroed_page(node);
-               __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
+               __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
+       return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
 }
 
-static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
+static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pgd_none(*pgd)) {
+       if (pgd_none(READ_ONCE(*pgdp))) {
                phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
                                             : kasan_alloc_zeroed_page(node);
-               __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
+               __pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
+       return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
 }
 
-static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
+       pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
 
        do {
                phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
                                              : kasan_alloc_zeroed_page(node);
                next = addr + PAGE_SIZE;
-               set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
-       } while (pte++, addr = next, addr != end && pte_none(*pte));
+               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
 }
 
-static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
+       pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
 
        do {
                next = pmd_addr_end(addr, end);
-               kasan_pte_populate(pmd, addr, next, node, early);
-       } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+               kasan_pte_populate(pmdp, addr, next, node, early);
+       } while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
 }
 
-static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
+       pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
 
        do {
                next = pud_addr_end(addr, end);
-               kasan_pmd_populate(pud, addr, next, node, early);
-       } while (pud++, addr = next, addr != end && pud_none(*pud));
+               kasan_pmd_populate(pudp, addr, next, node, early);
+       } while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
 }
 
 static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
                                      int node, bool early)
 {
        unsigned long next;
-       pgd_t *pgd;
+       pgd_t *pgdp;
 
-       pgd = pgd_offset_k(addr);
+       pgdp = pgd_offset_k(addr);
        do {
                next = pgd_addr_end(addr, end);
-               kasan_pud_populate(pgd, addr, next, node, early);
-       } while (pgd++, addr = next, addr != end);
+               kasan_pud_populate(pgdp, addr, next, node, early);
+       } while (pgdp++, addr = next, addr != end);
 }
 
 /* The early shadow maps everything to a single page of zeroes */
@@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
  */
 void __init kasan_copy_shadow(pgd_t *pgdir)
 {
-       pgd_t *pgd, *pgd_new, *pgd_end;
+       pgd_t *pgdp, *pgdp_new, *pgdp_end;
 
-       pgd = pgd_offset_k(KASAN_SHADOW_START);
-       pgd_end = pgd_offset_k(KASAN_SHADOW_END);
-       pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+       pgdp = pgd_offset_k(KASAN_SHADOW_START);
+       pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
+       pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
        do {
-               set_pgd(pgd_new, *pgd);
-       } while (pgd++, pgd_new++, pgd != pgd_end);
+               set_pgd(pgdp_new, READ_ONCE(*pgdp));
+       } while (pgdp++, pgdp_new++, pgdp != pgdp_end);
 }
 
 static void __init clear_pgds(unsigned long start,
index 4694cda..84a019f 100644 (file)
@@ -125,45 +125,48 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
        return ((old ^ new) & ~mask) == 0;
 }
 
-static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
                     phys_addr_t phys, pgprot_t prot)
 {
-       pte_t *pte;
+       pte_t *ptep;
 
-       pte = pte_set_fixmap_offset(pmd, addr);
+       ptep = pte_set_fixmap_offset(pmdp, addr);
        do {
-               pte_t old_pte = *pte;
+               pte_t old_pte = READ_ONCE(*ptep);
 
-               set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
+               set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
 
                /*
                 * After the PTE entry has been populated once, we
                 * only allow updates to the permission attributes.
                 */
-               BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+               BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
+                                             READ_ONCE(pte_val(*ptep))));
 
                phys += PAGE_SIZE;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+       } while (ptep++, addr += PAGE_SIZE, addr != end);
 
        pte_clear_fixmap();
 }
 
-static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
                                unsigned long end, phys_addr_t phys,
                                pgprot_t prot,
                                phys_addr_t (*pgtable_alloc)(void),
                                int flags)
 {
        unsigned long next;
+       pmd_t pmd = READ_ONCE(*pmdp);
 
-       BUG_ON(pmd_sect(*pmd));
-       if (pmd_none(*pmd)) {
+       BUG_ON(pmd_sect(pmd));
+       if (pmd_none(pmd)) {
                phys_addr_t pte_phys;
                BUG_ON(!pgtable_alloc);
                pte_phys = pgtable_alloc();
-               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+               __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+               pmd = READ_ONCE(*pmdp);
        }
-       BUG_ON(pmd_bad(*pmd));
+       BUG_ON(pmd_bad(pmd));
 
        do {
                pgprot_t __prot = prot;
@@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
                    (flags & NO_CONT_MAPPINGS) == 0)
                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-               init_pte(pmd, addr, next, phys, __prot);
+               init_pte(pmdp, addr, next, phys, __prot);
 
                phys += next - addr;
        } while (addr = next, addr != end);
 }
 
-static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
                     phys_addr_t phys, pgprot_t prot,
                     phys_addr_t (*pgtable_alloc)(void), int flags)
 {
        unsigned long next;
-       pmd_t *pmd;
+       pmd_t *pmdp;
 
-       pmd = pmd_set_fixmap_offset(pud, addr);
+       pmdp = pmd_set_fixmap_offset(pudp, addr);
        do {
-               pmd_t old_pmd = *pmd;
+               pmd_t old_pmd = READ_ONCE(*pmdp);
 
                next = pmd_addr_end(addr, end);
 
                /* try section mapping first */
                if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
                    (flags & NO_BLOCK_MAPPINGS) == 0) {
-                       pmd_set_huge(pmd, phys, prot);
+                       pmd_set_huge(pmdp, phys, prot);
 
                        /*
                         * After the PMD entry has been populated once, we
                         * only allow updates to the permission attributes.
                         */
                        BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
-                                                     pmd_val(*pmd)));
+                                                     READ_ONCE(pmd_val(*pmdp))));
                } else {
-                       alloc_init_cont_pte(pmd, addr, next, phys, prot,
+                       alloc_init_cont_pte(pmdp, addr, next, phys, prot,
                                            pgtable_alloc, flags);
 
                        BUG_ON(pmd_val(old_pmd) != 0 &&
-                              pmd_val(old_pmd) != pmd_val(*pmd));
+                              pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
                }
                phys += next - addr;
-       } while (pmd++, addr = next, addr != end);
+       } while (pmdp++, addr = next, addr != end);
 
        pmd_clear_fixmap();
 }
 
-static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
                                unsigned long end, phys_addr_t phys,
                                pgprot_t prot,
                                phys_addr_t (*pgtable_alloc)(void), int flags)
 {
        unsigned long next;
+       pud_t pud = READ_ONCE(*pudp);
 
        /*
         * Check for initial section mappings in the pgd/pud.
         */
-       BUG_ON(pud_sect(*pud));
-       if (pud_none(*pud)) {
+       BUG_ON(pud_sect(pud));
+       if (pud_none(pud)) {
                phys_addr_t pmd_phys;
                BUG_ON(!pgtable_alloc);
                pmd_phys = pgtable_alloc();
-               __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+               __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+               pud = READ_ONCE(*pudp);
        }
-       BUG_ON(pud_bad(*pud));
+       BUG_ON(pud_bad(pud));
 
        do {
                pgprot_t __prot = prot;
@@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
                    (flags & NO_CONT_MAPPINGS) == 0)
                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-               init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+               init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
 
                phys += next - addr;
        } while (addr = next, addr != end);
@@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
        return true;
 }
 
-static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
-                                 phys_addr_t phys, pgprot_t prot,
-                                 phys_addr_t (*pgtable_alloc)(void),
-                                 int flags)
+static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+                          phys_addr_t phys, pgprot_t prot,
+                          phys_addr_t (*pgtable_alloc)(void),
+                          int flags)
 {
-       pud_t *pud;
        unsigned long next;
+       pud_t *pudp;
+       pgd_t pgd = READ_ONCE(*pgdp);
 
-       if (pgd_none(*pgd)) {
+       if (pgd_none(pgd)) {
                phys_addr_t pud_phys;
                BUG_ON(!pgtable_alloc);
                pud_phys = pgtable_alloc();
-               __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
+               __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
+               pgd = READ_ONCE(*pgdp);
        }
-       BUG_ON(pgd_bad(*pgd));
+       BUG_ON(pgd_bad(pgd));
 
-       pud = pud_set_fixmap_offset(pgd, addr);
+       pudp = pud_set_fixmap_offset(pgdp, addr);
        do {
-               pud_t old_pud = *pud;
+               pud_t old_pud = READ_ONCE(*pudp);
 
                next = pud_addr_end(addr, end);
 
@@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
                 */
                if (use_1G_block(addr, next, phys) &&
                    (flags & NO_BLOCK_MAPPINGS) == 0) {
-                       pud_set_huge(pud, phys, prot);
+                       pud_set_huge(pudp, phys, prot);
 
                        /*
                         * After the PUD entry has been populated once, we
                         * only allow updates to the permission attributes.
                         */
                        BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
-                                                     pud_val(*pud)));
+                                                     READ_ONCE(pud_val(*pudp))));
                } else {
-                       alloc_init_cont_pmd(pud, addr, next, phys, prot,
+                       alloc_init_cont_pmd(pudp, addr, next, phys, prot,
                                            pgtable_alloc, flags);
 
                        BUG_ON(pud_val(old_pud) != 0 &&
-                              pud_val(old_pud) != pud_val(*pud));
+                              pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
                }
                phys += next - addr;
-       } while (pud++, addr = next, addr != end);
+       } while (pudp++, addr = next, addr != end);
 
        pud_clear_fixmap();
 }
@@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
                                 int flags)
 {
        unsigned long addr, length, end, next;
-       pgd_t *pgd = pgd_offset_raw(pgdir, virt);
+       pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 
        /*
         * If the virtual and physical address don't have the same offset
@@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
        end = addr + length;
        do {
                next = pgd_addr_end(addr, end);
-               alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+               alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
                               flags);
                phys += next - addr;
-       } while (pgd++, addr = next, addr != end);
+       } while (pgdp++, addr = next, addr != end);
 }
 
 static phys_addr_t pgd_pgtable_alloc(void)
@@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
        flush_tlb_kernel_range(virt, virt + size);
 }
 
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
                                  phys_addr_t end, pgprot_t prot, int flags)
 {
-       __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+       __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
                             prot, early_pgtable_alloc, flags);
 }
 
@@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
                            PAGE_KERNEL_RO);
 }
 
-static void __init map_mem(pgd_t *pgd)
+static void __init map_mem(pgd_t *pgdp)
 {
        phys_addr_t kernel_start = __pa_symbol(_text);
        phys_addr_t kernel_end = __pa_symbol(__init_begin);
@@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
                if (memblock_is_nomap(reg))
                        continue;
 
-               __map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+               __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
        }
 
        /*
@@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
         * Note that contiguous mappings cannot be remapped in this way,
         * so we should avoid them here.
         */
-       __map_memblock(pgd, kernel_start, kernel_end,
+       __map_memblock(pgdp, kernel_start, kernel_end,
                       PAGE_KERNEL, NO_CONT_MAPPINGS);
        memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 
@@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
         * through /sys/kernel/kexec_crash_size interface.
         */
        if (crashk_res.end) {
-               __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+               __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
                               PAGE_KERNEL,
                               NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
                memblock_clear_nomap(crashk_res.start,
@@ -499,7 +506,7 @@ void mark_rodata_ro(void)
        debug_checkwx();
 }
 
-static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
                                      pgprot_t prot, struct vm_struct *vma,
                                      int flags, unsigned long vm_flags)
 {
@@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
        BUG_ON(!PAGE_ALIGNED(pa_start));
        BUG_ON(!PAGE_ALIGNED(size));
 
-       __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+       __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
                             early_pgtable_alloc, flags);
 
        if (!(vm_flags & VM_NO_GUARD))
@@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
 /*
  * Create fine-grained mappings for the kernel.
  */
-static void __init map_kernel(pgd_t *pgd)
+static void __init map_kernel(pgd_t *pgdp)
 {
        static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
                                vmlinux_initdata, vmlinux_data;
@@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
         * Only rodata will be remapped with different permissions later on,
         * all other segments are allowed to use contiguous mappings.
         */
-       map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
+       map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
                           VM_NO_GUARD);
-       map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+       map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
                           &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
-       map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+       map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
                           &vmlinux_inittext, 0, VM_NO_GUARD);
-       map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+       map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
                           &vmlinux_initdata, 0, VM_NO_GUARD);
-       map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
+       map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
 
-       if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+       if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
                /*
                 * The fixmap falls in a separate pgd to the kernel, and doesn't
                 * live in the carveout for the swapper_pg_dir. We can simply
                 * re-use the existing dir for the fixmap.
                 */
-               set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
-                       *pgd_offset_k(FIXADDR_START));
+               set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
+                       READ_ONCE(*pgd_offset_k(FIXADDR_START)));
        } else if (CONFIG_PGTABLE_LEVELS > 3) {
                /*
                 * The fixmap shares its top level pgd entry with the kernel
@@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
                 * entry instead.
                 */
                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-               pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+               pud_populate(&init_mm,
+                            pud_set_fixmap_offset(pgdp, FIXADDR_START),
                             lm_alias(bm_pmd));
                pud_clear_fixmap();
        } else {
                BUG();
        }
 
-       kasan_copy_shadow(pgd);
+       kasan_copy_shadow(pgdp);
 }
 
 /*
@@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
 void __init paging_init(void)
 {
        phys_addr_t pgd_phys = early_pgtable_alloc();
-       pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+       pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
 
-       map_kernel(pgd);
-       map_mem(pgd);
+       map_kernel(pgdp);
+       map_mem(pgdp);
 
        /*
         * We want to reuse the original swapper_pg_dir so we don't have to
@@ -635,7 +643,7 @@ void __init paging_init(void)
         * To do this we need to go via a temporary pgd.
         */
        cpu_replace_ttbr1(__va(pgd_phys));
-       memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+       memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
        cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
        pgd_clear_fixmap();
@@ -655,37 +663,40 @@ void __init paging_init(void)
  */
 int kern_addr_valid(unsigned long addr)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
+       pte_t *ptep, pte;
 
        if ((((long)addr) >> VA_BITS) != -1UL)
                return 0;
 
-       pgd = pgd_offset_k(addr);
-       if (pgd_none(*pgd))
+       pgdp = pgd_offset_k(addr);
+       if (pgd_none(READ_ONCE(*pgdp)))
                return 0;
 
-       pud = pud_offset(pgd, addr);
-       if (pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (pud_none(pud))
                return 0;
 
-       if (pud_sect(*pud))
-               return pfn_valid(pud_pfn(*pud));
+       if (pud_sect(pud))
+               return pfn_valid(pud_pfn(pud));
 
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
+       if (pmd_none(pmd))
                return 0;
 
-       if (pmd_sect(*pmd))
-               return pfn_valid(pmd_pfn(*pmd));
+       if (pmd_sect(pmd))
+               return pfn_valid(pmd_pfn(pmd));
 
-       pte = pte_offset_kernel(pmd, addr);
-       if (pte_none(*pte))
+       ptep = pte_offset_kernel(pmdp, addr);
+       pte = READ_ONCE(*ptep);
+       if (pte_none(pte))
                return 0;
 
-       return pfn_valid(pte_pfn(*pte));
+       return pfn_valid(pte_pfn(pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 #if !ARM64_SWAPPER_USES_SECTION_MAPS
@@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 {
        unsigned long addr = start;
        unsigned long next;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
 
        do {
                next = pmd_addr_end(addr, end);
 
-               pgd = vmemmap_pgd_populate(addr, node);
-               if (!pgd)
+               pgdp = vmemmap_pgd_populate(addr, node);
+               if (!pgdp)
                        return -ENOMEM;
 
-               pud = vmemmap_pud_populate(pgd, addr, node);
-               if (!pud)
+               pudp = vmemmap_pud_populate(pgdp, addr, node);
+               if (!pudp)
                        return -ENOMEM;
 
-               pmd = pmd_offset(pud, addr);
-               if (pmd_none(*pmd)) {
+               pmdp = pmd_offset(pudp, addr);
+               if (pmd_none(READ_ONCE(*pmdp))) {
                        void *p = NULL;
 
                        p = vmemmap_alloc_block_buf(PMD_SIZE, node);
                        if (!p)
                                return -ENOMEM;
 
-                       pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+                       pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
                } else
-                       vmemmap_verify((pte_t *)pmd, node, addr, next);
+                       vmemmap_verify((pte_t *)pmdp, node, addr, next);
        } while (addr = next, addr != end);
 
        return 0;
@@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
 
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
+       pgd_t *pgdp = pgd_offset_k(addr);
+       pgd_t pgd = READ_ONCE(*pgdp);
 
-       BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+       BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
 
-       return pud_offset_kimg(pgd, addr);
+       return pud_offset_kimg(pgdp, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
 {
-       pud_t *pud = fixmap_pud(addr);
+       pud_t *pudp = fixmap_pud(addr);
+       pud_t pud = READ_ONCE(*pudp);
 
-       BUG_ON(pud_none(*pud) || pud_bad(*pud));
+       BUG_ON(pud_none(pud) || pud_bad(pud));
 
-       return pmd_offset_kimg(pud, addr);
+       return pmd_offset_kimg(pudp, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
@@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
  */
 void __init early_fixmap_init(void)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp, pgd;
+       pud_t *pudp;
+       pmd_t *pmdp;
        unsigned long addr = FIXADDR_START;
 
-       pgd = pgd_offset_k(addr);
+       pgdp = pgd_offset_k(addr);
+       pgd = READ_ONCE(*pgdp);
        if (CONFIG_PGTABLE_LEVELS > 3 &&
-           !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
+           !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
                /*
                 * We only end up here if the kernel mapping and the fixmap
                 * share the top level pgd entry, which should only happen on
                 * 16k/4 levels configurations.
                 */
                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-               pud = pud_offset_kimg(pgd, addr);
+               pudp = pud_offset_kimg(pgdp, addr);
        } else {
-               if (pgd_none(*pgd))
-                       __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
-               pud = fixmap_pud(addr);
+               if (pgd_none(pgd))
+                       __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+               pudp = fixmap_pud(addr);
        }
-       if (pud_none(*pud))
-               __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
-       pmd = fixmap_pmd(addr);
-       __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
+       if (pud_none(READ_ONCE(*pudp)))
+               __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+       pmdp = fixmap_pmd(addr);
+       __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 
        /*
         * The boot-ioremap range spans multiple pmds, for which
@@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 
-       if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
-            || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+       if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+            || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
                WARN_ON(1);
-               pr_warn("pmd %p != %p, %p\n",
-                       pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+               pr_warn("pmdp %p != %p, %p\n",
+                       pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
                        fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
                pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
                        fix_to_virt(FIX_BTMAP_BEGIN));
@@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
                               phys_addr_t phys, pgprot_t flags)
 {
        unsigned long addr = __fix_to_virt(idx);
-       pte_t *pte;
+       pte_t *ptep;
 
        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-       pte = fixmap_pte(addr);
+       ptep = fixmap_pte(addr);
 
        if (pgprot_val(flags)) {
-               set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+               set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
        } else {
-               pte_clear(&init_mm, addr, pte);
+               pte_clear(&init_mm, addr, ptep);
                flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
        }
 }
@@ -915,36 +929,46 @@ int __init arch_ioremap_pmd_supported(void)
        return 1;
 }
 
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 {
        pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
                                        pgprot_val(mk_sect_prot(prot)));
+
+       /* ioremap_page_range doesn't honour BBM */
+       if (pud_present(READ_ONCE(*pudp)))
+               return 0;
+
        BUG_ON(phys & ~PUD_MASK);
-       set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
+       set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
        return 1;
 }
 
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 {
        pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
                                        pgprot_val(mk_sect_prot(prot)));
+
+       /* ioremap_page_range doesn't honour BBM */
+       if (pmd_present(READ_ONCE(*pmdp)))
+               return 0;
+
        BUG_ON(phys & ~PMD_MASK);
-       set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+       set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
        return 1;
 }
 
-int pud_clear_huge(pud_t *pud)
+int pud_clear_huge(pud_t *pudp)
 {
-       if (!pud_sect(*pud))
+       if (!pud_sect(READ_ONCE(*pudp)))
                return 0;
-       pud_clear(pud);
+       pud_clear(pudp);
        return 1;
 }
 
-int pmd_clear_huge(pmd_t *pmd)
+int pmd_clear_huge(pmd_t *pmdp)
 {
-       if (!pmd_sect(*pmd))
+       if (!pmd_sect(READ_ONCE(*pmdp)))
                return 0;
-       pmd_clear(pmd);
+       pmd_clear(pmdp);
        return 1;
 }
index a682a0a..a563593 100644 (file)
@@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
                        void *data)
 {
        struct page_change_data *cdata = data;
-       pte_t pte = *ptep;
+       pte_t pte = READ_ONCE(*ptep);
 
        pte = clear_pte_bit(pte, cdata->clear_mask);
        pte = set_pte_bit(pte, cdata->set_mask);
@@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
  */
 bool kernel_page_present(struct page *page)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
+       pte_t *ptep;
        unsigned long addr = (unsigned long)page_address(page);
 
-       pgd = pgd_offset_k(addr);
-       if (pgd_none(*pgd))
+       pgdp = pgd_offset_k(addr);
+       if (pgd_none(READ_ONCE(*pgdp)))
                return false;
 
-       pud = pud_offset(pgd, addr);
-       if (pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (pud_none(pud))
                return false;
-       if (pud_sect(*pud))
+       if (pud_sect(pud))
                return true;
 
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
+       if (pmd_none(pmd))
                return false;
-       if (pmd_sect(*pmd))
+       if (pmd_sect(pmd))
                return true;
 
-       pte = pte_offset_kernel(pmd, addr);
-       return pte_valid(*pte);
+       ptep = pte_offset_kernel(pmdp, addr);
+       return pte_valid(READ_ONCE(*ptep));
 }
 #endif /* CONFIG_HIBERNATION */
 #endif /* CONFIG_DEBUG_PAGEALLOC */
index 71baed7..c0af476 100644 (file)
@@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
        dc      cvac, cur_\()\type\()p          // Ensure any existing dirty
        dmb     sy                              // lines are written back before
        ldr     \type, [cur_\()\type\()p]       // loading the entry
-       tbz     \type, #0, next_\()\type        // Skip invalid entries
+       tbz     \type, #0, skip_\()\type        // Skip invalid and
+       tbnz    \type, #11, skip_\()\type       // non-global entries
        .endm
 
        .macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
        add     end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
 do_pgd:        __idmap_kpti_get_pgtable_ent    pgd
        tbnz    pgd, #1, walk_puds
-       __idmap_kpti_put_pgtable_ent_ng pgd
 next_pgd:
+       __idmap_kpti_put_pgtable_ent_ng pgd
+skip_pgd:
        add     cur_pgdp, cur_pgdp, #8
        cmp     cur_pgdp, end_pgdp
        b.ne    do_pgd
@@ -294,8 +296,9 @@ walk_puds:
        add     end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
 do_pud:        __idmap_kpti_get_pgtable_ent    pud
        tbnz    pud, #1, walk_pmds
-       __idmap_kpti_put_pgtable_ent_ng pud
 next_pud:
+       __idmap_kpti_put_pgtable_ent_ng pud
+skip_pud:
        add     cur_pudp, cur_pudp, 8
        cmp     cur_pudp, end_pudp
        b.ne    do_pud
@@ -314,8 +317,9 @@ walk_pmds:
        add     end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
 do_pmd:        __idmap_kpti_get_pgtable_ent    pmd
        tbnz    pmd, #1, walk_ptes
-       __idmap_kpti_put_pgtable_ent_ng pmd
 next_pmd:
+       __idmap_kpti_put_pgtable_ent_ng pmd
+skip_pmd:
        add     cur_pmdp, cur_pmdp, #8
        cmp     cur_pmdp, end_pmdp
        b.ne    do_pmd
@@ -333,7 +337,7 @@ walk_ptes:
        add     end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
 do_pte:        __idmap_kpti_get_pgtable_ent    pte
        __idmap_kpti_put_pgtable_ent_ng pte
-next_pte:
+skip_pte:
        add     cur_ptep, cur_ptep, #8
        cmp     cur_ptep, end_ptep
        b.ne    do_pte
index 1d4f1da..a933504 100644 (file)
@@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
        off = offsetof(struct bpf_array, map.max_entries);
        emit_a64_mov_i64(tmp, off, ctx);
        emit(A64_LDR32(tmp, r2, tmp), ctx);
+       emit(A64_MOV(0, r3, r3), ctx);
        emit(A64_CMP(0, r3, tmp), ctx);
-       emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
+       emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
 
        /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
         *     goto out;
@@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
         */
        emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
        emit(A64_CMP(1, tcc, tmp), ctx);
-       emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
+       emit(A64_B_(A64_COND_HI, jmp_offset), ctx);
        emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
 
        /* prog = array->ptrs[index];
index 905afea..06da9d4 100644 (file)
@@ -44,18 +44,25 @@ struct bug_frame {
  * not be used like this with newer versions of gcc.
  */
 #define BUG()                                                          \
+do {                                                                   \
        __asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\
                              "movu.w " __stringify(__LINE__) ",$r0\n\t"\
                              "jump 0f\n\t"                             \
                              ".section .rodata\n"                      \
                              "0:\t.string \"" __FILE__ "\"\n\t"        \
-                             ".previous")
+                             ".previous");                             \
+       unreachable();                                                  \
+} while (0)
 #endif
 
 #else
 
 /* This just causes an oops. */
-#define BUG() (*(int *)0 = 0)
+#define BUG()                                                          \
+do {                                                                   \
+       barrier_before_unreachable();                                   \
+       __builtin_trap();                                               \
+} while (0)
 
 #endif
 
index bd3eeb8..66b37a5 100644 (file)
@@ -4,7 +4,11 @@
 
 #ifdef CONFIG_BUG
 #define ia64_abort()   __builtin_trap()
-#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
+#define BUG() do {                                             \
+       printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__);   \
+       barrier_before_unreachable();                           \
+       ia64_abort();                                           \
+} while (0)
 
 /* should this BUG be made generic? */
 #define HAVE_ARCH_BUG
index 0b4c65a..498f3da 100644 (file)
@@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
 obj-y                          += esi_stub.o   # must be in kernel proper
 endif
 obj-$(CONFIG_INTEL_IOMMU)      += pci-dma.o
-obj-$(CONFIG_SWIOTLB)          += pci-swiotlb.o
 
 obj-$(CONFIG_BINFMT_ELF)       += elfcore.o
 
index b7e2bf1..275dca1 100644 (file)
@@ -8,16 +8,19 @@
 #ifndef CONFIG_SUN3
 #define BUG() do { \
        pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+       barrier_before_unreachable(); \
        __builtin_trap(); \
 } while (0)
 #else
 #define BUG() do { \
        pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+       barrier_before_unreachable(); \
        panic("BUG!"); \
 } while (0)
 #endif
 #else
 #define BUG() do { \
+       barrier_before_unreachable(); \
        __builtin_trap(); \
 } while (0)
 #endif
index d3d4352..c73eb82 100644 (file)
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
            macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
                platform_device_register_simple("macsonic", -1, NULL, 0);
 
+       if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+           macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+               platform_device_register_simple("mac89x0", -1, NULL, 0);
+
        if (macintosh_config->ether_type == MAC_ETHER_MACE)
                platform_device_register_simple("macmace", -1, NULL, 0);
 
index 1bd5c4f..c22da16 100644 (file)
@@ -126,6 +126,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS
 
 quiet_cmd_cpp_its_S = ITS     $@
       cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \
+                       -D__ASSEMBLY__ \
                        -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \
                        -DVMLINUX_BINARY="\"$(3)\"" \
                        -DVMLINUX_COMPRESSION="\"$(2)\"" \
index 946681d..9a0fa66 100644 (file)
@@ -86,7 +86,6 @@ struct compat_flock {
        compat_off_t    l_len;
        s32             l_sysid;
        compat_pid_t    l_pid;
-       short           __unused;
        s32             pad[4];
 };
 
index 19c88d7..fcf9af4 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/errno.h>
 #include <linux/percpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/spinlock.h>
 
 #include <asm/mips-cps.h>
@@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
 phys_addr_t __weak mips_cpc_default_phys_base(void)
 {
+       struct device_node *cpc_node;
+       struct resource res;
+       int err;
+
+       cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+       if (cpc_node) {
+               err = of_address_to_resource(cpc_node, 0, &res);
+               if (!err)
+                       return res.start;
+       }
+
        return 0;
 }
 
index 85bc601..5f8b0a9 100644 (file)
@@ -375,6 +375,7 @@ static void __init bootmem_init(void)
        unsigned long reserved_end;
        unsigned long mapstart = ~0UL;
        unsigned long bootmap_size;
+       phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
        bool bootmap_valid = false;
        int i;
 
@@ -395,7 +396,8 @@ static void __init bootmem_init(void)
        max_low_pfn = 0;
 
        /*
-        * Find the highest page frame number we have available.
+        * Find the highest page frame number we have available
+        * and the lowest used RAM address
         */
        for (i = 0; i < boot_mem_map.nr_map; i++) {
                unsigned long start, end;
@@ -407,6 +409,8 @@ static void __init bootmem_init(void)
                end = PFN_DOWN(boot_mem_map.map[i].addr
                                + boot_mem_map.map[i].size);
 
+               ramstart = min(ramstart, boot_mem_map.map[i].addr);
+
 #ifndef CONFIG_HIGHMEM
                /*
                 * Skip highmem here so we get an accurate max_low_pfn if low
@@ -436,6 +440,13 @@ static void __init bootmem_init(void)
                mapstart = max(reserved_end, start);
        }
 
+       /*
+        * Reserve any memory between the start of RAM and PHYS_OFFSET
+        */
+       if (ramstart > PHYS_OFFSET)
+               add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
+                                 BOOT_MEM_RESERVED);
+
        if (min_low_pfn >= max_low_pfn)
                panic("Incorrect memory mapping !!!");
        if (min_low_pfn > ARCH_PFN_OFFSET) {
@@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
 
        add_memory_region(start, size, BOOT_MEM_RAM);
 
-       if (start && start > PHYS_OFFSET)
-               add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
-                               BOOT_MEM_RESERVED);
        return 0;
 }
 early_param("mem", early_parse_mem);
index 87dcac2..9d41732 100644 (file)
@@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
         */
 }
 
-void __init bmips_cpu_setup(void)
+void bmips_cpu_setup(void)
 {
        void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
        u32 __maybe_unused cfg;
index 30a155c..c615abd 100644 (file)
@@ -16,6 +16,7 @@
 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX        PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
index 949d691..67c5475 100644 (file)
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
  * keeping the prototype consistent across the two formats.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-                       unsigned int subpg_index, unsigned long hidx)
+                                        unsigned int subpg_index, unsigned long hidx,
+                                        int offset)
 {
        return (hidx << H_PAGE_F_GIX_SHIFT) &
                (H_PAGE_F_SECOND | H_PAGE_F_GIX);
index 338b7da..3bcf269 100644 (file)
@@ -45,7 +45,7 @@
  * generic accessors and iterators here
  */
 #define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
 {
        real_pte_t rpte;
        unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
         */
        smp_rmb();
 
-       hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+       hidxp = (unsigned long *)(ptep + offset);
        rpte.hidx = *hidxp;
        return rpte;
 }
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
  * expected to modify the PTE bits accordingly and commit the PTE to memory.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-               unsigned int subpg_index, unsigned long hidx)
+                                        unsigned int subpg_index,
+                                        unsigned long hidx, int offset)
 {
-       unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+       unsigned long *hidxp = (unsigned long *)(ptep + offset);
 
        rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
        *hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
 }
 
 #define H_PTE_TABLE_SIZE       PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
 #define H_PMD_TABLE_SIZE       ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
                                 (sizeof(unsigned long) << PMD_INDEX_SIZE))
 #else
 #define H_PMD_TABLE_SIZE       (sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE       ((sizeof(pud_t) << PUD_INDEX_SIZE) +    \
+                                (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
 #define H_PUD_TABLE_SIZE       (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
 #define H_PGD_TABLE_SIZE       (sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
index 0920eff..935adcd 100644 (file)
@@ -23,7 +23,8 @@
                                 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
 #define H_PGTABLE_RANGE                (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+       defined(CONFIG_PPC_64K_PAGES)
 /*
  * only with hash 64k we need to use the second half of pmd page table
  * to store pointer to deposited pgtable_t
 #else
 #define H_PMD_CACHE_INDEX      H_PMD_INDEX_SIZE
 #endif
+/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX      (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX      (H_PUD_INDEX_SIZE)
+#endif
 /*
  * Define the address range of the kernel non-linear virtual area
  */
index 1fcfa42..4746bc6 100644 (file)
@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+       pgd_t *pgd;
+
        if (radix_enabled())
                return radix__pgd_alloc(mm);
-       return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
-               pgtable_gfp_flags(mm, GFP_KERNEL));
+
+       pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+                              pgtable_gfp_flags(mm, GFP_KERNEL));
+       memset(pgd, 0, PGD_TABLE_SIZE);
+
+       return pgd;
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+       return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
                pgtable_gfp_flags(mm, GFP_KERNEL));
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-       kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+       kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
         * ahead and flush the page walk cache
         */
        flush_tlb_pgtable(tlb, address);
-        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+       pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
index 5101772..a6b9f1d 100644 (file)
@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
 extern unsigned long __pud_index_size;
 extern unsigned long __pgd_index_size;
 extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
 #define PTE_INDEX_SIZE  __pte_index_size
 #define PMD_INDEX_SIZE  __pmd_index_size
 #define PUD_INDEX_SIZE  __pud_index_size
 #define PGD_INDEX_SIZE  __pgd_index_size
 #define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
 /*
  * Because of use of pte fragments and THP, size of page table
  * are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
  */
 #ifndef __real_pte
 
-#define __real_pte(e,p)                ((real_pte_t){(e)})
+#define __real_pte(e, p, o)            ((real_pte_t){(e)})
 #define __rpte_to_pte(r)       ((r).pte)
 #define __rpte_to_hidx(r,index)        (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
 
index 176dfb7..471b227 100644 (file)
@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
                                          EXC_HV, SOFTEN_TEST_HV, bitmask)
 
 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)           \
-       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
        EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*
index 88e5e8f..855e17d 100644 (file)
 #define PACA_IRQ_HMI           0x20
 #define PACA_IRQ_PMI           0x40
 
+/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK        (PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK        (PACA_IRQ_EE)
+#endif
+
 /*
  * flags for paca->irq_soft_mask
  */
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
 static inline void may_hard_irq_enable(void)
 {
        get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-       if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+       if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
                __hard_irq_enable();
 }
 
index 9dcbfa6..d8b1e8e 100644 (file)
@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
        return false;
 }
 
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
 #endif /* CONFIG_KEXEC_CORE */
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */
index 504a3c3..03bbd11 100644 (file)
@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX        PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
index abddf58..5c5f75d 100644 (file)
@@ -27,6 +27,7 @@
 #else
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
 #endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
 
 /*
  * Define the address range of the kernel non-linear virtual area
index 88187c2..9f42164 100644 (file)
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
 
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+       numa_cpu_lookup_table[cpu] = node;
+}
+
 static inline int early_cpu_to_node(int cpu)
 {
        int nid;
@@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
 {
        return 0;
 }
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
 extern int start_topology_update(void);
 extern int stop_topology_update(void);
 extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
 #else
 static inline int start_topology_update(void)
 {
@@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
 {
        return 0;
 }
+static inline int find_and_online_cpu_nid(int cpu)
+{
+       return 0;
+}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
index ee832d3..9b6e653 100644 (file)
@@ -943,6 +943,8 @@ kernel_dbg_exc:
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
  * accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
  */
 
 .macro masked_interrupt_book3e paca_irq full_mask
index 243d072..3ac87e5 100644 (file)
@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
  *   triggered and won't automatically refire.
  * - If it was a HMI we return immediately since we handled it in realmode
  *   and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
  * This is called with r10 containing the value to OR to the paca field.
  */
 #define MASKED_INTERRUPT(_H)                           \
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt:                                   \
        ori     r10,r10,0xffff;                         \
        mtspr   SPRN_DEC,r10;                           \
        b       MASKED_DEC_HANDLER_LABEL;               \
-1:     andi.   r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);  \
-       bne     2f;                                     \
+1:     andi.   r10,r10,PACA_IRQ_MUST_HARD_MASK;        \
+       beq     2f;                                     \
        mfspr   r10,SPRN_##_H##SRR1;                    \
        xori    r10,r10,MSR_EE; /* clear MSR_EE */      \
        mtspr   SPRN_##_H##SRR1,r10;                    \
index 5a8bfee..04d0bbd 100644 (file)
@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_create_file(s, &dev_attr_pir);
 
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
+       if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+               !firmware_has_feature(FW_FEATURE_LPAR))
                device_create_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_remove_file(s, &dev_attr_pir);
 
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
+       if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+               !firmware_has_feature(FW_FEATURE_LPAR))
                device_remove_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
index 1604110..916844f 100644 (file)
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
        u32 i, n_lmbs;
 
        n_lmbs = of_read_number(prop++, 1);
+       if (n_lmbs == 0)
+               return;
 
        for (i = 0; i < n_lmbs; i++) {
                read_drconf_v1_cell(&lmb, &prop);
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
        u32 i, j, lmb_sets;
 
        lmb_sets = of_read_number(prop++, 1);
+       if (lmb_sets == 0)
+               return;
 
        for (i = 0; i < lmb_sets; i++) {
                read_drconf_v2_cell(&dr_cell, &prop);
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
        struct drmem_lmb *lmb;
 
        drmem_info->n_lmbs = of_read_number(prop++, 1);
+       if (drmem_info->n_lmbs == 0)
+               return;
 
        drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
                                   GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
        int lmb_index;
 
        lmb_sets = of_read_number(prop++, 1);
+       if (lmb_sets == 0)
+               return;
 
        /* first pass, calculate the number of LMBs */
        p = prop;
index 5a69b51..d573d7d 100644 (file)
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
         * need to add in 0x1 if it's a read-only user page
         */
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ repeat:
                        return -1;
                }
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 2253bbc..e601d95 100644 (file)
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 
        subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
        vpn  = hpt_vpn(ea, vsid, ssize);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
        /*
         *None of the sub 4k page is hashed
         */
@@ -214,7 +214,7 @@ repeat:
                return -1;
        }
 
-       new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+       new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
        new_pte |= H_PAGE_HASHPTE;
 
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
@@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
        } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -327,7 +327,7 @@ repeat:
                }
 
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 7d07c7e..cf290d4 100644 (file)
@@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
        __pmd_index_size = H_PMD_INDEX_SIZE;
        __pud_index_size = H_PUD_INDEX_SIZE;
        __pgd_index_size = H_PGD_INDEX_SIZE;
+       __pud_cache_index = H_PUD_CACHE_INDEX;
        __pmd_cache_index = H_PMD_CACHE_INDEX;
        __pte_table_size = H_PTE_TABLE_SIZE;
        __pmd_table_size = H_PMD_TABLE_SIZE;
index 12511f5..b320f50 100644 (file)
@@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        unsigned long vpn;
        unsigned long old_pte, new_pte;
        unsigned long rflags, pa, sz;
-       long slot;
+       long slot, offset;
 
        BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
@@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       if (unlikely(mmu_psize == MMU_PAGE_16G))
+               offset = PTRS_PER_PUD;
+       else
+               offset = PTRS_PER_PMD;
+       rpte = __real_pte(__pte(old_pte), ptep, offset);
 
        sz = ((1UL) << shift);
        if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
                        return -1;
                }
 
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
        }
 
        /*
index eb8c6c8..2b656e6 100644 (file)
@@ -100,6 +100,6 @@ void pgtable_cache_init(void)
         * same size as either the pgd or pmd index except with THP enabled
         * on book3s 64
         */
-       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
-               pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+       if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+               pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
 }
index 314d19a..edd8d0b 100644 (file)
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
                numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
-       numa_cpu_lookup_table[cpu] = node;
-}
-
 static void map_cpu_to_node(int cpu, int node)
 {
        update_numa_cpu_lookup_table(cpu, node);
index 573a9a2..2e10a96 100644 (file)
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
 #include <linux/string_helpers.h>
+#include <linux/stop_machine.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
 #include <asm/dma.h>
 #include <asm/machdep.h>
 #include <asm/mmu.h>
@@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
                     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
        asm volatile("eieio; tlbsync; ptesync" : : : "memory");
        trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
+
+       /*
+        * The init_mm context is given the first available (non-zero) PID,
+        * which is the "guard PID" and contains no page table. PIDR should
+        * never be set to zero because that duplicates the kernel address
+        * space at the 0x0... offset (quadrant 0)!
+        *
+        * An arbitrary PID that may later be allocated by the PID allocator
+        * for userspace processes must not be used either, because that
+        * would cause stale user mappings for that PID on CPUs outside of
+        * the TLB invalidation scheme (because it won't be in mm_cpumask).
+        *
+        * So permanently carve out one PID for the purpose of a guard PID.
+        */
+       init_mm.context.id = mmu_base_pid;
+       mmu_base_pid++;
 }
 
 static void __init radix_init_partition_table(void)
@@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
        __pmd_index_size = RADIX_PMD_INDEX_SIZE;
        __pud_index_size = RADIX_PUD_INDEX_SIZE;
        __pgd_index_size = RADIX_PGD_INDEX_SIZE;
+       __pud_cache_index = RADIX_PUD_INDEX_SIZE;
        __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
        __pte_table_size = RADIX_PTE_TABLE_SIZE;
        __pmd_table_size = RADIX_PMD_TABLE_SIZE;
@@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
 
        radix_init_iamr();
        radix_init_pgtable();
-
+       /* Switch to the guard PID before turning on MMU */
+       radix__switch_mmu_context(NULL, &init_mm);
        if (cpu_has_feature(CPU_FTR_HVMODE))
                tlbiel_all();
 }
@@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
        }
        radix_init_iamr();
 
+       radix__switch_mmu_context(NULL, &init_mm);
        if (cpu_has_feature(CPU_FTR_HVMODE))
                tlbiel_all();
 }
@@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
        pud_clear(pud);
 }
 
+struct change_mapping_params {
+       pte_t *pte;
+       unsigned long start;
+       unsigned long end;
+       unsigned long aligned_start;
+       unsigned long aligned_end;
+};
+
+static int stop_machine_change_mapping(void *data)
+{
+       struct change_mapping_params *params =
+                       (struct change_mapping_params *)data;
+
+       if (!data)
+               return -1;
+
+       spin_unlock(&init_mm.page_table_lock);
+       pte_clear(&init_mm, params->aligned_start, params->pte);
+       create_physical_mapping(params->aligned_start, params->start);
+       create_physical_mapping(params->end, params->aligned_end);
+       spin_lock(&init_mm.page_table_lock);
+       return 0;
+}
+
 static void remove_pte_table(pte_t *pte_start, unsigned long addr,
                             unsigned long end)
 {
@@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
        }
 }
 
+/*
+ * clear the pte and potentially split the mapping helper
+ */
+static void split_kernel_mapping(unsigned long addr, unsigned long end,
+                               unsigned long size, pte_t *pte)
+{
+       unsigned long mask = ~(size - 1);
+       unsigned long aligned_start = addr & mask;
+       unsigned long aligned_end = addr + size;
+       struct change_mapping_params params;
+       bool split_region = false;
+
+       if ((end - addr) < size) {
+               /*
+                * We're going to clear the PTE, but not flushed
+                * the mapping, time to remap and flush. The
+                * effects if visible outside the processor or
+                * if we are running in code close to the
+                * mapping we cleared, we are in trouble.
+                */
+               if (overlaps_kernel_text(aligned_start, addr) ||
+                       overlaps_kernel_text(end, aligned_end)) {
+                       /*
+                        * Hack, just return, don't pte_clear
+                        */
+                       WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
+                                 "text, not splitting\n", addr, end);
+                       return;
+               }
+               split_region = true;
+       }
+
+       if (split_region) {
+               params.pte = pte;
+               params.start = addr;
+               params.end = end;
+               params.aligned_start = addr & ~(size - 1);
+               params.aligned_end = min_t(unsigned long, aligned_end,
+                               (unsigned long)__va(memblock_end_of_DRAM()));
+               stop_machine(stop_machine_change_mapping, &params, NULL);
+               return;
+       }
+
+       pte_clear(&init_mm, addr, pte);
+}
+
 static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
                             unsigned long end)
 {
@@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
                        continue;
 
                if (pmd_huge(*pmd)) {
-                       if (!IS_ALIGNED(addr, PMD_SIZE) ||
-                           !IS_ALIGNED(next, PMD_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pmd);
+                       split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
                        continue;
                }
 
@@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
                        continue;
 
                if (pud_huge(*pud)) {
-                       if (!IS_ALIGNED(addr, PUD_SIZE) ||
-                           !IS_ALIGNED(next, PUD_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pud);
+                       split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
                        continue;
                }
 
@@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
                        continue;
 
                if (pgd_huge(*pgd)) {
-                       if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
-                           !IS_ALIGNED(next, PGDIR_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pgd);
+                       split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
                        continue;
                }
 
index c9a623c..28c980e 100644 (file)
@@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
 EXPORT_SYMBOL(__pgd_index_size);
 unsigned long __pmd_cache_index;
 EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
 unsigned long __pte_table_size;
 EXPORT_SYMBOL(__pte_table_size);
 unsigned long __pmd_table_size;
@@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
        if (old & PATB_HR) {
                asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
                             "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+               asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+                            "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
                trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
        } else {
                asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
index 881ebd5..9b23f12 100644 (file)
@@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
        unsigned int psize;
        int ssize;
        real_pte_t rpte;
-       int i;
+       int i, offset;
 
        i = batch->index;
 
@@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
                psize = get_slice_psize(mm, addr);
                /* Mask the address for the correct page size */
                addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+               if (unlikely(psize == MMU_PAGE_16G))
+                       offset = PTRS_PER_PUD;
+               else
+                       offset = PTRS_PER_PMD;
 #else
                BUG();
                psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
                 * support 64k pages, this might be different from the
                 * hardware page size encoded in the slice table. */
                addr &= PAGE_MASK;
+               offset = PTRS_PER_PTE;
        }
 
 
@@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
        }
        WARN_ON(vsid == 0);
        vpn = hpt_vpn(addr, vsid, ssize);
-       rpte = __real_pte(__pte(pte), ptep);
+       rpte = __real_pte(__pte(pte), ptep, offset);
 
        /*
         * Check if we have an active batch on this CPU. If not, just
index dd4c9b8..f6f55ab 100644 (file)
@@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
        const struct cpumask *l_cpumask;
 
        get_online_cpus();
-       for_each_online_node(nid) {
+       for_each_node_with_cpus(nid) {
                l_cpumask = cpumask_of_node(nid);
-               cpu = cpumask_first(l_cpumask);
+               cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+               if (cpu >= nr_cpu_ids)
+                       continue;
                opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
                                       get_hard_smp_processor_id(cpu));
        }
index 2b3eb01..b7c53a5 100644 (file)
@@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
                        rc = PTR_ERR(txwin->paste_kaddr);
                        goto free_window;
                }
+       } else {
+               /*
+                * A user mapping must ensure that context switch issues
+                * CP_ABORT for this thread.
+                */
+               rc = set_thread_uses_vas();
+               if (rc)
+                       goto free_window;
        }
 
-       /*
-        * Now that we have a send window, ensure context switch issues
-        * CP_ABORT for this thread.
-        */
-       rc = -EINVAL;
-       if (set_thread_uses_vas() < 0)
-               goto free_window;
-
        set_vinst_win(vinst, txwin);
 
        return txwin;
index dceb514..652d3e9 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/xics.h>
 #include <asm/xive.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
                        BUG_ON(cpu_online(cpu));
                        set_cpu_present(cpu, false);
                        set_hard_smp_processor_id(cpu, -1);
+                       update_numa_cpu_lookup_table(cpu, -1);
                        break;
                }
                if (cpu >= nr_cpu_ids)
@@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
        cpu_maps_update_done();
 }
 
-extern int find_and_online_cpu_nid(int cpu);
-
 static int dlpar_online_cpu(struct device_node *dn)
 {
        int rc = 0;
index 81d8614..5e1ef91 100644 (file)
@@ -48,6 +48,28 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
 
+/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+       struct device_node *np;
+
+       /* Hotplug Events */
+       np = of_find_node_by_path("/event-sources/hot-plug-events");
+       if (np != NULL) {
+               if (dlpar_workqueue_init() == 0)
+                       request_event_sources_irqs(np, ras_hotplug_interrupt,
+                                                  "RAS_HOTPLUG");
+               of_node_put(np);
+       }
+
+       return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
 /*
  * Initialize handlers for the set of interrupts caused by hardware errors
  * and power system events.
@@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
                of_node_put(np);
        }
 
-       /* Hotplug Events */
-       np = of_find_node_by_path("/event-sources/hot-plug-events");
-       if (np != NULL) {
-               if (dlpar_workqueue_init() == 0)
-                       request_event_sources_irqs(np, ras_hotplug_interrupt,
-                                          "RAS_HOTPLUG");
-               of_node_put(np);
-       }
-
        /* EPOW Events */
        np = of_find_node_by_path("/event-sources/epow-events");
        if (np != NULL) {
index d9c4c93..091f1d0 100644 (file)
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
 
        rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
        if (rc) {
-               pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+               pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+                      target, prio);
                rc = -EIO;
                goto fail;
        }
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
        /* Configure and enable the queue in HW */
        rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
        if (rc) {
-               pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+               pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+                      target, prio);
                rc = -EIO;
        } else {
                q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
        if (IS_ERR(qpage))
                return PTR_ERR(qpage);
 
-       return xive_spapr_configure_queue(cpu, q, prio, qpage,
-                                         xive_queue_shift);
+       return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+                                         q, prio, qpage, xive_queue_shift);
 }
 
 static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
        struct xive_q *q = &xc->queue[prio];
        unsigned int alloc_order;
        long rc;
+       int hw_cpu = get_hard_smp_processor_id(cpu);
 
-       rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+       rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
        if (rc)
-               pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+               pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+                      hw_cpu, prio);
 
        alloc_order = xive_alloc_order(xive_queue_shift);
        free_pages((unsigned long)q->qpage, alloc_order);
index b6722c2..04807c7 100644 (file)
@@ -8,7 +8,6 @@ config RISCV
        select OF
        select OF_EARLY_FLATTREE
        select OF_IRQ
-       select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        select ARCH_WANT_FRAME_POINTERS
        select CLONE_BACKWARDS
        select COMMON_CLK
@@ -20,7 +19,6 @@ config RISCV
        select GENERIC_STRNLEN_USER
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
-       select ARCH_WANT_OPTIONAL_GPIOLIB
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_DMA_API_DEBUG
@@ -34,7 +32,6 @@ config RISCV
        select HAVE_ARCH_TRACEHOOK
        select MODULES_USE_ELF_RELA if MODULES
        select THREAD_INFO_IN_TASK
-       select RISCV_IRQ_INTC
        select RISCV_TIMER
 
 config MMU
index 87fc045..56fa592 100644 (file)
@@ -172,6 +172,9 @@ ENTRY(handle_exception)
        move a1, sp /* pt_regs */
        tail do_IRQ
 1:
+       /* Exceptions run with interrupts enabled */
+       csrs sstatus, SR_SIE
+
        /* Handle syscalls */
        li t0, EXC_SYSCALL
        beq s4, t0, handle_syscall
@@ -198,8 +201,6 @@ handle_syscall:
         */
        addi s2, s2, 0x4
        REG_S s2, PT_SEPC(sp)
-       /* System calls run with interrupts enabled */
-       csrs sstatus, SR_SIE
        /* Trace syscalls, but only if requested by the user. */
        REG_L t0, TASK_TI_FLAGS(tp)
        andi t0, t0, _TIF_SYSCALL_TRACE
index 226eeb1..6e07ed3 100644 (file)
@@ -64,7 +64,7 @@ ENTRY(_start)
        /* Start the kernel */
        mv a0, s0
        mv a1, s1
-       call sbi_save
+       call parse_dtb
        tail start_kernel
 
 relocate:
index 09f7064..c11f40c 100644 (file)
@@ -144,7 +144,7 @@ asmlinkage void __init setup_vm(void)
 #endif
 }
 
-void __init sbi_save(unsigned int hartid, void *dtb)
+void __init parse_dtb(unsigned int hartid, void *dtb)
 {
        early_init_dt_scan(__va(dtb));
 }
index 6bf594a..8767e45 100644 (file)
@@ -430,6 +430,8 @@ config SPARC_LEON
        depends on SPARC32
        select USB_EHCI_BIG_ENDIAN_MMIO
        select USB_EHCI_BIG_ENDIAN_DESC
+       select USB_UHCI_BIG_ENDIAN_MMIO
+       select USB_UHCI_BIG_ENDIAN_DESC
        ---help---
          If you say Y here if you are running on a SPARC-LEON processor.
          The LEON processor is a synthesizable VHDL model of the
index 6f17528..ea53e41 100644 (file)
@@ -9,10 +9,14 @@
 void do_BUG(const char *file, int line);
 #define BUG() do {                                     \
        do_BUG(__FILE__, __LINE__);                     \
+       barrier_before_unreachable();                   \
        __builtin_trap();                               \
 } while (0)
 #else
-#define BUG()          __builtin_trap()
+#define BUG() do {                                     \
+       barrier_before_unreachable();                   \
+       __builtin_trap();                               \
+} while (0)
 #endif
 
 #define HAVE_ARCH_BUG
index aff152c..5a82bac 100644 (file)
@@ -1,6 +1,7 @@
 boot/compressed/vmlinux
 tools/test_get_len
 tools/insn_sanity
+tools/insn_decoder_test
 purgatory/kexec-purgatory.c
 purgatory/purgatory.ro
 
index 63bf349..c1236b1 100644 (file)
@@ -423,12 +423,6 @@ config X86_MPPARSE
          For old smp systems that do not have proper acpi support. Newer systems
          (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
-config X86_BIGSMP
-       bool "Support for big SMP systems with more than 8 CPUs"
-       depends on X86_32 && SMP
-       ---help---
-         This option is needed for the systems that have more than 8 CPUs
-
 config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
@@ -460,6 +454,12 @@ config INTEL_RDT
          Say N if unsure.
 
 if X86_32
+config X86_BIGSMP
+       bool "Support for big SMP systems with more than 8 CPUs"
+       depends on SMP
+       ---help---
+         This option is needed for the systems that have more than 8 CPUs
+
 config X86_EXTENDED_PLATFORM
        bool "Support for extended (non-PC) x86 platforms"
        default y
@@ -949,25 +949,66 @@ config MAXSMP
          Enable maximum number of CPUS and NUMA Nodes for this architecture.
          If unsure, say N.
 
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+       int
+       default NR_CPUS_RANGE_END if MAXSMP
+       default    1 if !SMP
+       default    2
+
+config NR_CPUS_RANGE_END
+       int
+       depends on X86_32
+       default   64 if  SMP &&  X86_BIGSMP
+       default    8 if  SMP && !X86_BIGSMP
+       default    1 if !SMP
+
+config NR_CPUS_RANGE_END
+       int
+       depends on X86_64
+       default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+       default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+       default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+       int
+       depends on X86_32
+       default   32 if  X86_BIGSMP
+       default    8 if  SMP
+       default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+       int
+       depends on X86_64
+       default 8192 if  MAXSMP
+       default   64 if  SMP
+       default    1 if !SMP
+
 config NR_CPUS
        int "Maximum number of CPUs" if SMP && !MAXSMP
-       range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 64 if SMP && X86_32 && X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
-       range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
-       default "1" if !SMP
-       default "8192" if MAXSMP
-       default "32" if SMP && X86_BIGSMP
-       default "8" if SMP && X86_32
-       default "64" if SMP
+       range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+       default NR_CPUS_DEFAULT
        ---help---
          This allows you to specify the maximum number of CPUs which this
          kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
          supported value is 8192, otherwise the maximum value is 512.  The
          minimum value which makes sense is 2.
 
-         This is purely to save memory - each supported CPU adds
-         approximately eight kilobytes to the kernel image.
+         This is purely to save memory: each supported CPU adds about 8KB
+         to the kernel image.
 
 config SCHED_SMT
        bool "SMT (Hyperthreading) scheduler support"
@@ -1363,7 +1404,7 @@ config HIGHMEM4G
 
 config HIGHMEM64G
        bool "64GB"
-       depends on !M486
+       depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
        select X86_PAE
        ---help---
          Select this if you have a 32-bit processor and more than 4
index 65a9a47..8b8d229 100644 (file)
@@ -374,7 +374,7 @@ config X86_TSC
 
 config X86_CMPXCHG64
        def_bool y
-       depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+       depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
 
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
@@ -385,7 +385,7 @@ config X86_CMOV
 config X86_MINIMUM_CPU_FAMILY
        int
        default "64" if X86_64
-       default "6" if X86_32 && X86_P6_NOP
+       default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
        default "5" if X86_32 && X86_CMPXCHG64
        default "4"
 
index 36870b2..d088050 100644 (file)
@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
 {
        unsigned int j;
 
-       state->lens[0] = 0;
-       state->lens[1] = 1;
-       state->lens[2] = 2;
-       state->lens[3] = 3;
+       /* initially all lanes are unused */
+       state->lens[0] = 0xFFFFFFFF00000000;
+       state->lens[1] = 0xFFFFFFFF00000001;
+       state->lens[2] = 0xFFFFFFFF00000002;
+       state->lens[3] = 0xFFFFFFFF00000003;
+
        state->unused_lanes = 0xFF03020100;
        for (j = 0; j < 4; j++)
                state->ldata[j].job_in_lane = NULL;
index 3f48f69..dce7092 100644 (file)
@@ -97,80 +97,69 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS  21*8
 
-       .macro ALLOC_PT_GPREGS_ON_STACK
-       addq    $-(15*8), %rsp
-       .endm
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+       /*
+        * Push registers and sanitize registers of values that a
+        * speculation attack might otherwise want to exploit. The
+        * lower registers are likely clobbered well before they
+        * could be put to use in a speculative execution gadget.
+        * Interleave XOR with PUSH for better uop scheduling:
+        */
+       pushq   %rdi            /* pt_regs->di */
+       pushq   %rsi            /* pt_regs->si */
+       pushq   \rdx            /* pt_regs->dx */
+       pushq   %rcx            /* pt_regs->cx */
+       pushq   \rax            /* pt_regs->ax */
+       pushq   %r8             /* pt_regs->r8 */
+       xorq    %r8, %r8        /* nospec   r8 */
+       pushq   %r9             /* pt_regs->r9 */
+       xorq    %r9, %r9        /* nospec   r9 */
+       pushq   %r10            /* pt_regs->r10 */
+       xorq    %r10, %r10      /* nospec   r10 */
+       pushq   %r11            /* pt_regs->r11 */
+       xorq    %r11, %r11      /* nospec   r11*/
+       pushq   %rbx            /* pt_regs->rbx */
+       xorl    %ebx, %ebx      /* nospec   rbx*/
+       pushq   %rbp            /* pt_regs->rbp */
+       xorl    %ebp, %ebp      /* nospec   rbp*/
+       pushq   %r12            /* pt_regs->r12 */
+       xorq    %r12, %r12      /* nospec   r12*/
+       pushq   %r13            /* pt_regs->r13 */
+       xorq    %r13, %r13      /* nospec   r13*/
+       pushq   %r14            /* pt_regs->r14 */
+       xorq    %r14, %r14      /* nospec   r14*/
+       pushq   %r15            /* pt_regs->r15 */
+       xorq    %r15, %r15      /* nospec   r15*/
+       UNWIND_HINT_REGS
+.endm
 
-       .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
-       .if \r11
-       movq %r11, 6*8+\offset(%rsp)
-       .endif
-       .if \r8910
-       movq %r10, 7*8+\offset(%rsp)
-       movq %r9,  8*8+\offset(%rsp)
-       movq %r8,  9*8+\offset(%rsp)
-       .endif
-       .if \rax
-       movq %rax, 10*8+\offset(%rsp)
-       .endif
-       .if \rcx
-       movq %rcx, 11*8+\offset(%rsp)
-       .endif
-       movq %rdx, 12*8+\offset(%rsp)
-       movq %rsi, 13*8+\offset(%rsp)
-       movq %rdi, 14*8+\offset(%rsp)
-       UNWIND_HINT_REGS offset=\offset extra=0
-       .endm
-       .macro SAVE_C_REGS offset=0
-       SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
-       SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_R891011
-       SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RCX_R891011
-       SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
-       SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
-       .endm
-
-       .macro SAVE_EXTRA_REGS offset=0
-       movq %r15, 0*8+\offset(%rsp)
-       movq %r14, 1*8+\offset(%rsp)
-       movq %r13, 2*8+\offset(%rsp)
-       movq %r12, 3*8+\offset(%rsp)
-       movq %rbp, 4*8+\offset(%rsp)
-       movq %rbx, 5*8+\offset(%rsp)
-       UNWIND_HINT_REGS offset=\offset
-       .endm
-
-       .macro POP_EXTRA_REGS
+.macro POP_REGS pop_rdi=1 skip_r11rcx=0
        popq %r15
        popq %r14
        popq %r13
        popq %r12
        popq %rbp
        popq %rbx
-       .endm
-
-       .macro POP_C_REGS
+       .if \skip_r11rcx
+       popq %rsi
+       .else
        popq %r11
+       .endif
        popq %r10
        popq %r9
        popq %r8
        popq %rax
+       .if \skip_r11rcx
+       popq %rsi
+       .else
        popq %rcx
+       .endif
        popq %rdx
        popq %rsi
+       .if \pop_rdi
        popq %rdi
-       .endm
-
-       .macro icebp
-       .byte 0xf1
-       .endm
+       .endif
+.endm
 
 /*
  * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
@@ -178,7 +167,7 @@ For 32-bit we have the following conventions - kernel is built with
  * is just setting the LSB, which makes it an invalid stack address and is also
  * a signal to the unwinder that it's a pt_regs pointer in disguise.
  *
- * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
  * the original rbp.
  */
 .macro ENCODE_FRAME_POINTER ptregs_offset=0
index 30c8c53..8971bd6 100644 (file)
@@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)
 
        swapgs
        /*
-        * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+        * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
         * is not required to switch CR3.
         */
        movq    %rsp, PER_CPU_VAR(rsp_scratch)
@@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)
        pushq   %rcx                            /* pt_regs->ip */
 GLOBAL(entry_SYSCALL_64_after_hwframe)
        pushq   %rax                            /* pt_regs->orig_ax */
-       pushq   %rdi                            /* pt_regs->di */
-       pushq   %rsi                            /* pt_regs->si */
-       pushq   %rdx                            /* pt_regs->dx */
-       pushq   %rcx                            /* pt_regs->cx */
-       pushq   $-ENOSYS                        /* pt_regs->ax */
-       pushq   %r8                             /* pt_regs->r8 */
-       pushq   %r9                             /* pt_regs->r9 */
-       pushq   %r10                            /* pt_regs->r10 */
-       pushq   %r11                            /* pt_regs->r11 */
-       pushq   %rbx                            /* pt_regs->rbx */
-       pushq   %rbp                            /* pt_regs->rbp */
-       pushq   %r12                            /* pt_regs->r12 */
-       pushq   %r13                            /* pt_regs->r13 */
-       pushq   %r14                            /* pt_regs->r14 */
-       pushq   %r15                            /* pt_regs->r15 */
-       UNWIND_HINT_REGS
+
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 
        TRACE_IRQS_OFF
 
@@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 syscall_return_via_sysret:
        /* rcx and r11 are already restored (see code above) */
        UNWIND_HINT_EMPTY
-       POP_EXTRA_REGS
-       popq    %rsi    /* skip r11 */
-       popq    %r10
-       popq    %r9
-       popq    %r8
-       popq    %rax
-       popq    %rsi    /* skip rcx */
-       popq    %rdx
-       popq    %rsi
+       POP_REGS pop_rdi=0 skip_r11rcx=1
 
        /*
         * Now all regs are restored except RSP and RDI.
@@ -559,9 +537,7 @@ END(irq_entries_start)
        call    switch_to_thread_stack
 1:
 
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
+       PUSH_AND_CLEAR_REGS
        ENCODE_FRAME_POINTER
 
        testb   $3, CS(%rsp)
@@ -622,15 +598,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
        ud2
 1:
 #endif
-       POP_EXTRA_REGS
-       popq    %r11
-       popq    %r10
-       popq    %r9
-       popq    %r8
-       popq    %rax
-       popq    %rcx
-       popq    %rdx
-       popq    %rsi
+       POP_REGS pop_rdi=0
 
        /*
         * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
@@ -688,8 +656,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
        ud2
 1:
 #endif
-       POP_EXTRA_REGS
-       POP_C_REGS
+       POP_REGS
        addq    $8, %rsp        /* skip regs->orig_ax */
        /*
         * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -908,7 +875,9 @@ ENTRY(\sym)
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
        .endif
 
-       ALLOC_PT_GPREGS_ON_STACK
+       /* Save all registers in pt_regs */
+       PUSH_AND_CLEAR_REGS
+       ENCODE_FRAME_POINTER
 
        .if \paranoid < 2
        testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
@@ -1121,9 +1090,7 @@ ENTRY(xen_failsafe_callback)
        addq    $0x30, %rsp
        UNWIND_HINT_IRET_REGS
        pushq   $-1 /* orig_ax = -1 => not a system call */
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
+       PUSH_AND_CLEAR_REGS
        ENCODE_FRAME_POINTER
        jmp     error_exit
 END(xen_failsafe_callback)
@@ -1163,16 +1130,13 @@ idtentry machine_check          do_mce                  has_error_code=0        paranoid=1
 #endif
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
        UNWIND_HINT_FUNC
        cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -1211,21 +1175,18 @@ ENTRY(paranoid_exit)
        jmp     .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
        TRACE_IRQS_IRETQ_DEBUG
+       RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
        jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
-       UNWIND_HINT_FUNC
+       UNWIND_HINT_REGS offset=8
        cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-       ENCODE_FRAME_POINTER 8
-       xorl    %ebx, %ebx
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
 
@@ -1406,22 +1367,7 @@ ENTRY(nmi)
        pushq   1*8(%rdx)       /* pt_regs->rip */
        UNWIND_HINT_IRET_REGS
        pushq   $-1             /* pt_regs->orig_ax */
-       pushq   %rdi            /* pt_regs->di */
-       pushq   %rsi            /* pt_regs->si */
-       pushq   (%rdx)          /* pt_regs->dx */
-       pushq   %rcx            /* pt_regs->cx */
-       pushq   %rax            /* pt_regs->ax */
-       pushq   %r8             /* pt_regs->r8 */
-       pushq   %r9             /* pt_regs->r9 */
-       pushq   %r10            /* pt_regs->r10 */
-       pushq   %r11            /* pt_regs->r11 */
-       pushq   %rbx            /* pt_regs->rbx */
-       pushq   %rbp            /* pt_regs->rbp */
-       pushq   %r12            /* pt_regs->r12 */
-       pushq   %r13            /* pt_regs->r13 */
-       pushq   %r14            /* pt_regs->r14 */
-       pushq   %r15            /* pt_regs->r15 */
-       UNWIND_HINT_REGS
+       PUSH_AND_CLEAR_REGS rdx=(%rdx)
        ENCODE_FRAME_POINTER
 
        /*
@@ -1631,7 +1577,8 @@ end_repeat_nmi:
         * frame to point back to repeat_nmi.
         */
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-       ALLOC_PT_GPREGS_ON_STACK
+       PUSH_AND_CLEAR_REGS
+       ENCODE_FRAME_POINTER
 
        /*
         * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
@@ -1655,8 +1602,7 @@ end_repeat_nmi:
 nmi_swapgs:
        SWAPGS_UNSAFE_STACK
 nmi_restore:
-       POP_EXTRA_REGS
-       POP_C_REGS
+       POP_REGS
 
        /*
         * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
index 98d5358..fd65e01 100644 (file)
@@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
+       xorq    %r15, %r15              /* nospec   r15 */
        cld
 
        /*
@@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
        pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
+       xorq    %r15, %r15              /* nospec   r15 */
 
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
@@ -338,15 +358,25 @@ ENTRY(entry_INT80_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   %r12                    /* pt_regs->r12 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   %r13                    /* pt_regs->r13 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   %r14                    /* pt_regs->r14 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   %r15                    /* pt_regs->r15 */
+       xorq    %r15, %r15              /* nospec   r15 */
        cld
 
        /*
index 731153a..56457cb 100644 (file)
@@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu)
                break;
 
        case INTEL_FAM6_SANDYBRIDGE_X:
-               switch (cpu_data(cpu).x86_mask) {
+               switch (cpu_data(cpu).x86_stepping) {
                case 6: rev = 0x618; break;
                case 7: rev = 0x70c; break;
                }
index ae64d0b..cf372b9 100644 (file)
@@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void)
         * on PMU interrupt
         */
        if (boot_cpu_data.x86_model == 28
-           && boot_cpu_data.x86_mask < 10) {
+           && boot_cpu_data.x86_stepping < 10) {
                pr_cont("LBR disabled due to erratum");
                return;
        }
index a5604c3..408879b 100644 (file)
@@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = {
 
 static __init void p6_pmu_rdpmc_quirk(void)
 {
-       if (boot_cpu_data.x86_mask < 9) {
+       if (boot_cpu_data.x86_stepping < 9) {
                /*
                 * PPro erratum 26; fixed in stepping 9 and above.
                 */
index 44f5d79..1188172 100644 (file)
@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
        if (boot_cpu_data.x86 == 0x0F &&
            boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
            boot_cpu_data.x86_model <= 0x05 &&
-           boot_cpu_data.x86_mask < 0x0A)
+           boot_cpu_data.x86_stepping < 0x0A)
                return 1;
        else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
                return 1;
index 30d4061..e1259f0 100644 (file)
@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 
        asm ("cmp %1,%2; sbb %0,%0;"
                        :"=r" (mask)
-                       :"r"(size),"r" (index)
+                       :"g"(size),"r" (index)
                        :"cc");
        return mask;
 }
index 34d99af..6804d66 100644 (file)
@@ -5,23 +5,20 @@
 #include <linux/stringify.h>
 
 /*
- * Since some emulators terminate on UD2, we cannot use it for WARN.
- * Since various instruction decoders disagree on the length of UD1,
- * we cannot use it either. So use UD0 for WARN.
+ * Despite that some emulators terminate on UD2, we use it for WARN().
  *
- * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
- *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
- *  with various things like the Intel SDM instruction encoding rules)
+ * Since various instruction decoders/specs disagree on the encoding of
+ * UD0/UD1.
  */
 
-#define ASM_UD0                ".byte 0x0f, 0xff"
+#define ASM_UD0                ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
 #define ASM_UD1                ".byte 0x0f, 0xb9" /* + ModRM */
 #define ASM_UD2                ".byte 0x0f, 0x0b"
 
 #define INSN_UD0       0xff0f
 #define INSN_UD2       0x0b0f
 
-#define LEN_UD0                2
+#define LEN_UD2                2
 
 #ifdef CONFIG_GENERIC_BUG
 
@@ -77,7 +74,11 @@ do {                                                         \
        unreachable();                                          \
 } while (0)
 
-#define __WARN_FLAGS(flags)    _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
+#define __WARN_FLAGS(flags)                                    \
+do {                                                           \
+       _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags));           \
+       annotate_reachable();                                   \
+} while (0)
 
 #include <asm-generic/bug.h>
 
index 70eddb3..736771c 100644 (file)
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-               asm_volatile_goto("1: jmp 6f\n"
-                        "2:\n"
-                        ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-                                "((5f-4f) - (2b-1b)),0x90\n"
-                        "3:\n"
-                        ".section .altinstructions,\"a\"\n"
-                        " .long 1b - .\n"              /* src offset */
-                        " .long 4f - .\n"              /* repl offset */
-                        " .word %P1\n"                 /* always replace */
-                        " .byte 3b - 1b\n"             /* src len */
-                        " .byte 5f - 4f\n"             /* repl len */
-                        " .byte 3b - 2b\n"             /* pad len */
-                        ".previous\n"
-                        ".section .altinstr_replacement,\"ax\"\n"
-                        "4: jmp %l[t_no]\n"
-                        "5:\n"
-                        ".previous\n"
-                        ".section .altinstructions,\"a\"\n"
-                        " .long 1b - .\n"              /* src offset */
-                        " .long 0\n"                   /* no replacement */
-                        " .word %P0\n"                 /* feature bit */
-                        " .byte 3b - 1b\n"             /* src len */
-                        " .byte 0\n"                   /* repl len */
-                        " .byte 0\n"                   /* pad len */
-                        ".previous\n"
-                        ".section .altinstr_aux,\"ax\"\n"
-                        "6:\n"
-                        " testb %[bitnum],%[cap_byte]\n"
-                        " jnz %l[t_yes]\n"
-                        " jmp %l[t_no]\n"
-                        ".previous\n"
-                        : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
-                            [bitnum] "i" (1 << (bit & 7)),
-                            [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
-                        : : t_yes, t_no);
-       t_yes:
-               return true;
-       t_no:
-               return false;
+       asm_volatile_goto("1: jmp 6f\n"
+                "2:\n"
+                ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+                        "((5f-4f) - (2b-1b)),0x90\n"
+                "3:\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 4f - .\n"              /* repl offset */
+                " .word %P[always]\n"          /* always replace */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 5f - 4f\n"             /* repl len */
+                " .byte 3b - 2b\n"             /* pad len */
+                ".previous\n"
+                ".section .altinstr_replacement,\"ax\"\n"
+                "4: jmp %l[t_no]\n"
+                "5:\n"
+                ".previous\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 0\n"                   /* no replacement */
+                " .word %P[feature]\n"         /* feature bit */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 0\n"                   /* repl len */
+                " .byte 0\n"                   /* pad len */
+                ".previous\n"
+                ".section .altinstr_aux,\"ax\"\n"
+                "6:\n"
+                " testb %[bitnum],%[cap_byte]\n"
+                " jnz %l[t_yes]\n"
+                " jmp %l[t_no]\n"
+                ".previous\n"
+                : : [feature]  "i" (bit),
+                    [always]   "i" (X86_FEATURE_ALWAYS),
+                    [bitnum]   "i" (1 << (bit & 7)),
+                    [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+                : : t_yes, t_no);
+t_yes:
+       return true;
+t_no:
+       return false;
 }
 
 #define static_cpu_has(bit)                                    \
index 4d57894..81a1be3 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
 
 #ifdef __ASSEMBLY__
 
@@ -164,11 +165,53 @@ static inline void vmexit_fill_RSB(void)
 
 static inline void indirect_branch_prediction_barrier(void)
 {
-       alternative_input("",
-                         "call __ibp_barrier",
-                         X86_FEATURE_USE_IBPB,
-                         ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
+       asm volatile(ALTERNATIVE("",
+                                "movl %[msr], %%ecx\n\t"
+                                "movl %[val], %%eax\n\t"
+                                "movl $0, %%edx\n\t"
+                                "wrmsr",
+                                X86_FEATURE_USE_IBPB)
+                    : : [msr] "i" (MSR_IA32_PRED_CMD),
+                        [val] "i" (PRED_CMD_IBPB)
+                    : "eax", "ecx", "edx", "memory");
 }
 
 #endif /* __ASSEMBLY__ */
+
+/*
+ * Below is used in the eBPF JIT compiler and emits the byte sequence
+ * for the following assembly:
+ *
+ * With retpolines configured:
+ *
+ *    callq do_rop
+ *  spec_trap:
+ *    pause
+ *    lfence
+ *    jmp spec_trap
+ *  do_rop:
+ *    mov %rax,(%rsp)
+ *    retq
+ *
+ * Without retpolines configured:
+ *
+ *    jmp *%rax
+ */
+#ifdef CONFIG_RETPOLINE
+# define RETPOLINE_RAX_BPF_JIT_SIZE    17
+# define RETPOLINE_RAX_BPF_JIT()                               \
+       EMIT1_off32(0xE8, 7);    /* callq do_rop */             \
+       /* spec_trap: */                                        \
+       EMIT2(0xF3, 0x90);       /* pause */                    \
+       EMIT3(0x0F, 0xAE, 0xE8); /* lfence */                   \
+       EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
+       /* do_rop: */                                           \
+       EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */    \
+       EMIT1(0xC3);             /* retq */
+#else
+# define RETPOLINE_RAX_BPF_JIT_SIZE    2
+# define RETPOLINE_RAX_BPF_JIT()                               \
+       EMIT2(0xFF, 0xE0);       /* jmp *%rax */
+#endif
+
 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
index 4baa6bc..d652a38 100644 (file)
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
index 892df37..554841f 100644 (file)
@@ -297,9 +297,9 @@ static inline void __flush_tlb_global(void)
 {
        PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
 }
-static inline void __flush_tlb_single(unsigned long addr)
+static inline void __flush_tlb_one_user(unsigned long addr)
 {
-       PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
+       PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
 }
 
 static inline void flush_tlb_others(const struct cpumask *cpumask,
index 6ec54d0..f624f1f 100644 (file)
@@ -217,7 +217,7 @@ struct pv_mmu_ops {
        /* TLB operations */
        void (*flush_tlb_user)(void);
        void (*flush_tlb_kernel)(void);
-       void (*flush_tlb_single)(unsigned long addr);
+       void (*flush_tlb_one_user)(unsigned long addr);
        void (*flush_tlb_others)(const struct cpumask *cpus,
                                 const struct flush_tlb_info *info);
 
index e67c062..e554667 100644 (file)
@@ -61,7 +61,7 @@ void paging_init(void);
 #define kpte_clear_flush(ptep, vaddr)          \
 do {                                           \
        pte_clear(&init_mm, (vaddr), (ptep));   \
-       __flush_tlb_one((vaddr));               \
+       __flush_tlb_one_kernel((vaddr));                \
 } while (0)
 
 #endif /* !__ASSEMBLY__ */
index 793bae7..1bd9ed8 100644 (file)
@@ -91,7 +91,7 @@ struct cpuinfo_x86 {
        __u8                    x86;            /* CPU family */
        __u8                    x86_vendor;     /* CPU vendor */
        __u8                    x86_model;
-       __u8                    x86_mask;
+       __u8                    x86_stepping;
 #ifdef CONFIG_X86_64
        /* Number of 4K pages in DTLB/ITLB combined(in pages): */
        int                     x86_tlbsize;
@@ -109,7 +109,7 @@ struct cpuinfo_x86 {
        char                    x86_vendor_id[16];
        char                    x86_model_id[64];
        /* in KB - valid for CPUS which support this call: */
-       int                     x86_cache_size;
+       unsigned int            x86_cache_size;
        int                     x86_cache_alignment;    /* In bytes */
        /* Cache QoS architectural values: */
        int                     x86_cache_max_rmid;     /* max index */
@@ -977,7 +977,4 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
-
-void __ibp_barrier(void);
-
 #endif /* _ASM_X86_PROCESSOR_H */
index 461f53d..a418976 100644 (file)
@@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
+void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
 void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
index 2b8f18c..84137c2 100644 (file)
@@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 #else
 #define __flush_tlb() __native_flush_tlb()
 #define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
 #endif
 
 static inline bool tlb_defer_switch_to_init_mm(void)
@@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void)
 /*
  * flush one page in the user mapping
  */
-static inline void __native_flush_tlb_single(unsigned long addr)
+static inline void __native_flush_tlb_one_user(unsigned long addr)
 {
        u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
 
@@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void)
 /*
  * flush one page in the kernel mapping
  */
-static inline void __flush_tlb_one(unsigned long addr)
+static inline void __flush_tlb_one_kernel(unsigned long addr)
 {
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-       __flush_tlb_single(addr);
+
+       /*
+        * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
+        * paravirt equivalent.  Even with PCID, this is sufficient: we only
+        * use PCID if we also use global PTEs for the kernel mapping, and
+        * INVLPG flushes global translations across all address spaces.
+        *
+        * If PTI is on, then the kernel is mapped with non-global PTEs, and
+        * __flush_tlb_one_user() will flush the given address for the current
+        * kernel address space and for its usermode counterpart, but it does
+        * not flush it for other address spaces.
+        */
+       __flush_tlb_one_user(addr);
 
        if (!static_cpu_has(X86_FEATURE_PTI))
                return;
 
        /*
-        * __flush_tlb_single() will have cleared the TLB entry for this ASID,
-        * but since kernel space is replicated across all, we must also
-        * invalidate all others.
+        * See above.  We need to propagate the flush to all other address
+        * spaces.  In principle, we only need to propagate it to kernelmode
+        * address spaces, but the extra bookkeeping we would need is not
+        * worth it.
         */
        invalidate_other_asid();
 }
index 6db28f1..c88e0b1 100644 (file)
@@ -235,7 +235,7 @@ int amd_cache_northbridges(void)
        if (boot_cpu_data.x86 == 0x10 &&
            boot_cpu_data.x86_model >= 0x8 &&
            (boot_cpu_data.x86_model > 0x9 ||
-            boot_cpu_data.x86_mask >= 0x1))
+            boot_cpu_data.x86_stepping >= 0x1))
                amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
 
        if (boot_cpu_data.x86 == 0x15)
index 25ddf02..b203af0 100644 (file)
@@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static u32 hsx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x3a; /* EP */
        case 0x04: return 0x0f; /* EX */
        }
@@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void)
 
 static u32 bdx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x00000011;
        case 0x03: return 0x0700000e;
        case 0x04: return 0x0f00000c;
@@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void)
 
 static u32 skx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x03: return 0x01000136;
        case 0x04: return 0x02000014;
        }
index 46b675a..f11910b 100644 (file)
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 
        uv_gre_table = gre;
        for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+               unsigned long size = ((unsigned long)(gre->limit - lgre)
+                                       << UV_GAM_RANGE_SHFT);
+               int order = 0;
+               char suffix[] = " KMGTPE";
+
+               while (size > 9999 && order < sizeof(suffix)) {
+                       size /= 1024;
+                       order++;
+               }
+
                if (!index) {
                        pr_info("UV: GAM Range Table...\n");
                        pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
                }
-               pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
+               pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
                        index++,
                        (unsigned long)lgre << UV_GAM_RANGE_SHFT,
                        (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
-                       ((unsigned long)(gre->limit - lgre)) >>
-                               (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+                       size, suffix[order],
                        gre->type, gre->nasid, gre->sockid, gre->pnode);
 
                lgre = gre->limit;
index fa1261e..f91ba53 100644 (file)
@@ -18,7 +18,7 @@ void foo(void)
        OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
        OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
        OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
-       OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+       OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
        OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
        OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
        OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
index 5bddbdc..f0e6456 100644 (file)
@@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                return;
        }
 
-       if (c->x86_model == 6 && c->x86_mask == 1) {
+       if (c->x86_model == 6 && c->x86_stepping == 1) {
                const int K6_BUG_LOOP = 1000000;
                int n;
                void (*f_vide)(void);
@@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 
        /* K6 with old style WHCR */
        if (c->x86_model < 8 ||
-          (c->x86_model == 8 && c->x86_mask < 8)) {
+          (c->x86_model == 8 && c->x86_stepping < 8)) {
                /* We can only write allocate on the low 508Mb */
                if (mbytes > 508)
                        mbytes = 508;
@@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                return;
        }
 
-       if ((c->x86_model == 8 && c->x86_mask > 7) ||
+       if ((c->x86_model == 8 && c->x86_stepping > 7) ||
             c->x86_model == 9 || c->x86_model == 13) {
                /* The more serious chips .. */
 
@@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
         * As per AMD technical note 27212 0.2
         */
-       if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+       if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
                rdmsr(MSR_K7_CLK_CTL, l, h);
                if ((l & 0xfff00000) != 0x20000000) {
                        pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
@@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * but they are not certified as MP capable.
         */
        /* Athlon 660/661 is valid. */
-       if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
-           (c->x86_mask == 1)))
+       if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
+           (c->x86_stepping == 1)))
                return;
 
        /* Duron 670 is valid */
-       if ((c->x86_model == 7) && (c->x86_mask == 0))
+       if ((c->x86_model == 7) && (c->x86_stepping == 0))
                return;
 
        /*
@@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
         * more.
         */
-       if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
-           ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+       if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
+           ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
             (c->x86_model > 7))
                if (cpu_has(c, X86_FEATURE_MP))
                        return;
@@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
        /*  Set MTRR capability flag if appropriate */
        if (c->x86 == 5)
                if (c->x86_model == 13 || c->x86_model == 9 ||
-                   (c->x86_model == 8 && c->x86_mask >= 8))
+                   (c->x86_model == 8 && c->x86_stepping >= 8))
                        set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
         * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
         * all up to and including B1.
         */
-       if (c->x86_model <= 1 && c->x86_mask <= 1)
+       if (c->x86_model <= 1 && c->x86_stepping <= 1)
                set_cpu_cap(c, X86_FEATURE_CPB);
 }
 
@@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
        /* AMD errata T13 (order #21922) */
        if ((c->x86 == 6)) {
                /* Duron Rev A0 */
-               if (c->x86_model == 3 && c->x86_mask == 0)
+               if (c->x86_model == 3 && c->x86_stepping == 0)
                        size = 64;
                /* Tbird rev A1/A2 */
                if (c->x86_model == 4 &&
-                       (c->x86_mask == 0 || c->x86_mask == 1))
+                       (c->x86_stepping == 0 || c->x86_stepping == 1))
                        size = 256;
        }
        return size;
@@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
        }
 
        /* OSVW unavailable or ID unknown, match family-model-stepping range */
-       ms = (cpu->x86_model << 4) | cpu->x86_mask;
+       ms = (cpu->x86_model << 4) | cpu->x86_stepping;
        while ((range = *erratum++))
                if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
                    (ms >= AMD_MODEL_RANGE_START(range)) &&
index 71949bf..d71c8b5 100644 (file)
@@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
        if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
                return SPECTRE_V2_CMD_NONE;
        else {
-               ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
-                                         sizeof(arg));
+               ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
                if (ret < 0)
                        return SPECTRE_V2_CMD_AUTO;
 
@@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
                }
 
                if (i >= ARRAY_SIZE(mitigation_options)) {
-                       pr_err("unknown option (%s). Switching to AUTO select\n",
-                              mitigation_options[i].option);
+                       pr_err("unknown option (%s). Switching to AUTO select\n", arg);
                        return SPECTRE_V2_CMD_AUTO;
                }
        }
@@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
             cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
             cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
            !IS_ENABLED(CONFIG_RETPOLINE)) {
-               pr_err("%s selected but not compiled in. Switching to AUTO select\n",
-                      mitigation_options[i].option);
+               pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
                return SPECTRE_V2_CMD_AUTO;
        }
 
@@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)
                        goto retpoline_auto;
                break;
        }
-       pr_err("kernel not compiled with retpoline; no mitigation available!");
+       pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
        return;
 
 retpoline_auto:
        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
        retpoline_amd:
                if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-                       pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+                       pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
                        goto retpoline_generic;
                }
                mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@@ -281,7 +278,7 @@ retpoline_auto:
        pr_info("%s\n", spectre_v2_strings[mode]);
 
        /*
-        * If neither SMEP or KPTI are available, there is a risk of
+        * If neither SMEP nor PTI are available, there is a risk of
         * hitting userspace addresses in the RSB after a context switch
         * from a shallow call stack to a deeper one. To prevent this fill
         * the entire RSB, even when using IBRS.
@@ -295,21 +292,20 @@ retpoline_auto:
        if ((!boot_cpu_has(X86_FEATURE_PTI) &&
             !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
                setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-               pr_info("Filling RSB on context switch\n");
+               pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
        }
 
        /* Initialize Indirect Branch Prediction Barrier if supported */
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
-               pr_info("Enabling Indirect Branch Prediction Barrier\n");
+               pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
        }
 }
 
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev,
-                         struct device_attribute *attr, char *buf)
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
                return sprintf(buf, "Not affected\n");
@@ -318,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev,
        return sprintf(buf, "Vulnerable\n");
 }
 
-ssize_t cpu_show_spectre_v1(struct device *dev,
-                           struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
                return sprintf(buf, "Not affected\n");
        return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
 
-ssize_t cpu_show_spectre_v2(struct device *dev,
-                           struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
                return sprintf(buf, "Not affected\n");
@@ -337,9 +331,3 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
                       spectre_v2_module_string());
 }
 #endif
-
-void __ibp_barrier(void)
-{
-       __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
-}
-EXPORT_SYMBOL_GPL(__ibp_barrier);
index c578cd2..e5ec0f1 100644 (file)
@@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
                        clear_cpu_cap(c, X86_FEATURE_TSC);
                        break;
                case 8:
-                       switch (c->x86_mask) {
+                       switch (c->x86_stepping) {
                        default:
                        name = "2";
                                break;
@@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
         *  - Note, it seems this may only be in engineering samples.
         */
        if ((c->x86 == 6) && (c->x86_model == 9) &&
-                               (c->x86_mask == 1) && (size == 65))
+                               (c->x86_stepping == 1) && (size == 65))
                size -= 1;
        return size;
 }
index d63f4b5..824aee0 100644 (file)
@@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
                cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                c->x86          = x86_family(tfms);
                c->x86_model    = x86_model(tfms);
-               c->x86_mask     = x86_stepping(tfms);
+               c->x86_stepping = x86_stepping(tfms);
 
                if (cap0 & (1<<19)) {
                        c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
        int i;
 
        c->loops_per_jiffy = loops_per_jiffy;
-       c->x86_cache_size = -1;
+       c->x86_cache_size = 0;
        c->x86_vendor = X86_VENDOR_UNKNOWN;
-       c->x86_model = c->x86_mask = 0; /* So far unknown... */
+       c->x86_model = c->x86_stepping = 0;     /* So far unknown... */
        c->x86_vendor_id[0] = '\0'; /* Unset */
        c->x86_model_id[0] = '\0';  /* Unset */
        c->x86_max_cores = 1;
@@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 
        pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
-       if (c->x86_mask || c->cpuid_level >= 0)
-               pr_cont(", stepping: 0x%x)\n", c->x86_mask);
+       if (c->x86_stepping || c->cpuid_level >= 0)
+               pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
        else
                pr_cont(")\n");
 }
index 6b4bb33..8949b7a 100644 (file)
@@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 
        /* common case step number/rev -- exceptions handled below */
        c->x86_model = (dir1 >> 4) + 1;
-       c->x86_mask = dir1 & 0xf;
+       c->x86_stepping = dir1 & 0xf;
 
        /* Now cook; the original recipe is by Channing Corn, from Cyrix.
         * We do the same thing for each generation: we work out
index 319bf98..d19e903 100644 (file)
@@ -116,14 +116,13 @@ struct sku_microcode {
        u32 microcode;
 };
 static const struct sku_microcode spectre_bad_microcodes[] = {
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0B,   0x84 },
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0A,   0x84 },
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x09,   0x84 },
-       { INTEL_FAM6_KABYLAKE_MOBILE,   0x0A,   0x84 },
-       { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x84 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0B,   0x80 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0A,   0x80 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x09,   0x80 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x0A,   0x80 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x80 },
        { INTEL_FAM6_SKYLAKE_X,         0x03,   0x0100013e },
        { INTEL_FAM6_SKYLAKE_X,         0x04,   0x0200003c },
-       { INTEL_FAM6_SKYLAKE_MOBILE,    0x03,   0xc2 },
        { INTEL_FAM6_SKYLAKE_DESKTOP,   0x03,   0xc2 },
        { INTEL_FAM6_BROADWELL_CORE,    0x04,   0x28 },
        { INTEL_FAM6_BROADWELL_GT3E,    0x01,   0x1b },
@@ -136,8 +135,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
        { INTEL_FAM6_HASWELL_X,         0x02,   0x3b },
        { INTEL_FAM6_HASWELL_X,         0x04,   0x10 },
        { INTEL_FAM6_IVYBRIDGE_X,       0x04,   0x42a },
-       /* Updated in the 20180108 release; blacklist until we know otherwise */
-       { INTEL_FAM6_ATOM_GEMINI_LAKE,  0x01,   0x22 },
        /* Observed in the wild */
        { INTEL_FAM6_SANDYBRIDGE_X,     0x06,   0x61b },
        { INTEL_FAM6_SANDYBRIDGE_X,     0x07,   0x712 },
@@ -149,7 +146,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 
        for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
                if (c->x86_model == spectre_bad_microcodes[i].model &&
-                   c->x86_mask == spectre_bad_microcodes[i].stepping)
+                   c->x86_stepping == spectre_bad_microcodes[i].stepping)
                        return (c->microcode <= spectre_bad_microcodes[i].microcode);
        }
        return false;
@@ -196,7 +193,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
         * need the microcode to have already been loaded... so if it is
         * not, recommend a BIOS update and disable large pages.
         */
-       if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+       if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
            c->microcode < 0x20e) {
                pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
                clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -212,7 +209,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 
        /* CPUID workaround for 0F33/0F34 CPU */
        if (c->x86 == 0xF && c->x86_model == 0x3
-           && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+           && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
                c->x86_phys_bits = 36;
 
        /*
@@ -310,7 +307,7 @@ int ppro_with_ram_bug(void)
        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
            boot_cpu_data.x86 == 6 &&
            boot_cpu_data.x86_model == 1 &&
-           boot_cpu_data.x86_mask < 8) {
+           boot_cpu_data.x86_stepping < 8) {
                pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
                return 1;
        }
@@ -327,7 +324,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
         * Mask B, Pentium, but not Pentium MMX
         */
        if (c->x86 == 5 &&
-           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
            c->x86_model <= 3) {
                /*
                 * Remember we have B step Pentia with bugs
@@ -370,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
         * model 3 mask 3
         */
-       if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+       if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
                clear_cpu_cap(c, X86_FEATURE_SEP);
 
        /*
@@ -388,7 +385,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * P4 Xeon erratum 037 workaround.
         * Hardware prefetcher may cause stale data to be loaded into the cache.
         */
-       if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+       if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
                if (msr_set_bit(MSR_IA32_MISC_ENABLE,
                                MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
                        pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@@ -403,7 +400,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * Specification Update").
         */
        if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
-           (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+           (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
                set_cpu_bug(c, X86_BUG_11AP);
 
 
@@ -650,7 +647,7 @@ static void init_intel(struct cpuinfo_x86 *c)
                case 6:
                        if (l2 == 128)
                                p = "Celeron (Mendocino)";
-                       else if (c->x86_mask == 0 || c->x86_mask == 5)
+                       else if (c->x86_stepping == 0 || c->x86_stepping == 5)
                                p = "Celeron-A";
                        break;
 
index 410629f..589b948 100644 (file)
@@ -819,7 +819,7 @@ static __init void rdt_quirks(void)
                        cache_alloc_hsw_probe();
                break;
        case INTEL_FAM6_SKYLAKE_X:
-               if (boot_cpu_data.x86_mask <= 4)
+               if (boot_cpu_data.x86_stepping <= 4)
                        set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
        }
 }
index aa0d5df..e956eb2 100644 (file)
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)        { }
 
 extern struct mca_config mca_cfg;
 
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ *    are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ *    recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
 #endif /* __X86_MCE_INTERNAL_H__ */
index 3a8e88a..8ff94d1 100644 (file)
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
                        m->cs, m->ip);
 
                if (m->cs == __KERNEL_CS)
-                       pr_cont("{%pS}", (void *)m->ip);
+                       pr_cont("{%pS}", (void *)(unsigned long)m->ip);
                pr_cont("\n");
        }
 
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 
        if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
                pfn = mce->addr >> PAGE_SHIFT;
-               memory_failure(pfn, 0);
+               if (!memory_failure(pfn, 0))
+                       mce_unmap_kpfn(pfn);
        }
 
        return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
        ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
        if (ret)
                pr_err("Memory error not recovered");
+       else
+               mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
        return ret;
 }
 
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
 {
        unsigned long decoy_addr;
 
@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
         * We would like to just call:
         *      set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
         * but doing that would radically increase the odds of a
-        * speculative access to the posion page because we'd have
+        * speculative access to the poison page because we'd have
         * the virtual address of the kernel 1:1 mapping sitting
         * around in registers.
         * Instead we get tricky.  We create a non-canonical address
@@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
 
        if (set_memory_np(decoy_addr, 1))
                pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
 }
 #endif
 
index f7c55b0..a15db2b 100644 (file)
@@ -921,7 +921,7 @@ static bool is_blacklisted(unsigned int cpu)
         */
        if (c->x86 == 6 &&
            c->x86_model == INTEL_FAM6_BROADWELL_X &&
-           c->x86_mask == 0x01 &&
+           c->x86_stepping == 0x01 &&
            llc_size_per_core > 2621440 &&
            c->microcode < 0x0b000021) {
                pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
@@ -944,7 +944,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
                return UCODE_NFOUND;
 
        sprintf(name, "intel-ucode/%02x-%02x-%02x",
-               c->x86, c->x86_model, c->x86_mask);
+               c->x86, c->x86_model, c->x86_stepping);
 
        if (request_firmware_direct(&firmware, name, device)) {
                pr_debug("data file %s load failed\n", name);
@@ -982,7 +982,7 @@ static struct microcode_ops microcode_intel_ops = {
 
 static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
 {
-       u64 llc_size = c->x86_cache_size * 1024;
+       u64 llc_size = c->x86_cache_size * 1024ULL;
 
        do_div(llc_size, c->x86_max_cores);
 
index fdc5521..e12ee86 100644 (file)
@@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
         */
        if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
            boot_cpu_data.x86_model == 1 &&
-           boot_cpu_data.x86_mask <= 7) {
+           boot_cpu_data.x86_stepping <= 7) {
                if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
                        pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
                        return -EINVAL;
index 40d5a8a..7468de4 100644 (file)
@@ -711,8 +711,8 @@ void __init mtrr_bp_init(void)
                        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
                            boot_cpu_data.x86 == 0xF &&
                            boot_cpu_data.x86_model == 0x3 &&
-                           (boot_cpu_data.x86_mask == 0x3 ||
-                            boot_cpu_data.x86_mask == 0x4))
+                           (boot_cpu_data.x86_stepping == 0x3 ||
+                            boot_cpu_data.x86_stepping == 0x4))
                                phys_addr = 36;
 
                        size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
index e7eceda..2c8522a 100644 (file)
@@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                   c->x86_model,
                   c->x86_model_id[0] ? c->x86_model_id : "unknown");
 
-       if (c->x86_mask || c->cpuid_level >= 0)
-               seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+       if (c->x86_stepping || c->cpuid_level >= 0)
+               seq_printf(m, "stepping\t: %d\n", c->x86_stepping);
        else
                seq_puts(m, "stepping\t: unknown\n");
        if (c->microcode)
@@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        }
 
        /* Cache size */
-       if (c->x86_cache_size >= 0)
-               seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+       if (c->x86_cache_size)
+               seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size);
 
        show_cpuinfo_core(m, c, cpu);
        show_cpuinfo_misc(m, c);
index c290209..b59e4fb 100644 (file)
@@ -37,7 +37,7 @@
 #define X86            new_cpu_data+CPUINFO_x86
 #define X86_VENDOR     new_cpu_data+CPUINFO_x86_vendor
 #define X86_MODEL      new_cpu_data+CPUINFO_x86_model
-#define X86_MASK       new_cpu_data+CPUINFO_x86_mask
+#define X86_STEPPING   new_cpu_data+CPUINFO_x86_stepping
 #define X86_HARD_MATH  new_cpu_data+CPUINFO_hard_math
 #define X86_CPUID      new_cpu_data+CPUINFO_cpuid_level
 #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
@@ -332,7 +332,7 @@ ENTRY(startup_32_smp)
        shrb $4,%al
        movb %al,X86_MODEL
        andb $0x0f,%cl          # mask mask revision
-       movb %cl,X86_MASK
+       movb %cl,X86_STEPPING
        movl %edx,X86_CAPABILITY
 
 .Lis486:
index 1f790cf..3b7427a 100644 (file)
@@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
                                goto overflow;
                        break;
                case R_X86_64_PC32:
+               case R_X86_64_PLT32:
                        value -= (u64)address;
                        *(u32 *)location = value;
                        break;
index da0c160..f58336a 100644 (file)
@@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
                                goto overflow;
                        break;
                case R_X86_64_PC32:
+               case R_X86_64_PLT32:
                        if (*(u32 *)loc != 0)
                                goto invalid_relocation;
                        val -= (u64)loc;
index 27d0a17..f1c5eb9 100644 (file)
@@ -410,7 +410,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
        processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
        processor.cpuflag = CPU_ENABLED;
        processor.cpufeature = (boot_cpu_data.x86 << 8) |
-           (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+           (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;
        processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
        processor.reserved[0] = 0;
        processor.reserved[1] = 0;
index 041096b..99dc79e 100644 (file)
@@ -200,9 +200,9 @@ static void native_flush_tlb_global(void)
        __native_flush_tlb_global();
 }
 
-static void native_flush_tlb_single(unsigned long addr)
+static void native_flush_tlb_one_user(unsigned long addr)
 {
-       __native_flush_tlb_single(addr);
+       __native_flush_tlb_one_user(addr);
 }
 
 struct static_key paravirt_steal_enabled;
@@ -401,7 +401,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 
        .flush_tlb_user = native_flush_tlb,
        .flush_tlb_kernel = native_flush_tlb_global,
-       .flush_tlb_single = native_flush_tlb_single,
+       .flush_tlb_one_user = native_flush_tlb_one_user,
        .flush_tlb_others = native_flush_tlb_others,
 
        .pgd_alloc = __paravirt_pgd_alloc,
index 6f27fac..9eee25d 100644 (file)
@@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void)
        cpu_set_state_online(me);
 }
 
-void __init native_smp_cpus_done(unsigned int max_cpus)
+void __init calculate_max_logical_packages(void)
 {
        int ncpus;
 
-       pr_debug("Boot done\n");
        /*
         * Today neither Intel nor AMD support heterogenous systems so
         * extrapolate the boot cpu's data to all packages.
@@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
        ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
        __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
        pr_info("Max logical packages: %u\n", __max_logical_packages);
+}
+
+void __init native_smp_cpus_done(unsigned int max_cpus)
+{
+       pr_debug("Boot done\n");
+
+       calculate_max_logical_packages();
 
        if (x86_has_numa_in_package)
                set_sched_topology(x86_numa_in_package_topology);
@@ -1430,7 +1436,6 @@ static void remove_siblinginfo(int cpu)
        cpumask_clear(cpu_llc_shared_mask(cpu));
        cpumask_clear(topology_sibling_cpumask(cpu));
        cpumask_clear(topology_core_cpumask(cpu));
-       c->phys_proc_id = 0;
        c->cpu_core_id = 0;
        cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
        recompute_smt_state();
index 446c9ef..3d9b230 100644 (file)
@@ -181,7 +181,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr)
                break;
 
        case BUG_TRAP_TYPE_WARN:
-               regs->ip += LEN_UD0;
+               regs->ip += LEN_UD2;
                return 1;
        }
 
index 8eca1d0..46ff304 100644 (file)
@@ -5080,7 +5080,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
 
 /* The caller should hold mmu-lock before calling this function. */
-static bool
+static __always_inline bool
 slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
                        slot_level_handler fn, int start_level, int end_level,
                        gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
@@ -5110,7 +5110,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
        return flush;
 }
 
-static bool
+static __always_inline bool
 slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
                  slot_level_handler fn, int start_level, int end_level,
                  bool lock_flush_tlb)
@@ -5121,7 +5121,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
                        lock_flush_tlb);
 }
 
-static bool
+static __always_inline bool
 slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
                      slot_level_handler fn, bool lock_flush_tlb)
 {
@@ -5129,7 +5129,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
                                 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
 }
 
-static bool
+static __always_inline bool
 slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
                        slot_level_handler fn, bool lock_flush_tlb)
 {
@@ -5137,7 +5137,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
                                 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
 }
 
-static bool
+static __always_inline bool
 slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
                 slot_level_handler fn, bool lock_flush_tlb)
 {
index f427723..3dec126 100644 (file)
@@ -10136,7 +10136,10 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
                        (unsigned long)(vmcs12->posted_intr_desc_addr &
                        (PAGE_SIZE - 1)));
        }
-       if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
+       if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
+               vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
+                             CPU_BASED_USE_MSR_BITMAPS);
+       else
                vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
                                CPU_BASED_USE_MSR_BITMAPS);
 }
@@ -10224,8 +10227,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
         *    updated to reflect this when L1 (or its L2s) actually write to
         *    the MSR.
         */
-       bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
-       bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+       bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+       bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
 
        /* Nothing to do if the MSR bitmap is not in use.  */
        if (!cpu_has_vmx_msr_bitmap() ||
index d6f848d..2dd1fe1 100644 (file)
@@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig)
 {
        unsigned int fam, model;
 
-        fam = x86_family(sig);
+       fam = x86_family(sig);
 
        model = (sig >> 4) & 0xf;
 
index 7b881d0..3cdf061 100644 (file)
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
 
 asm(
        ".type just_return_func, @function\n"
+       ".globl just_return_func\n"
        "just_return_func:\n"
        "       ret\n"
        ".size just_return_func, .-just_return_func\n"
index 1ab42c8..8b72923 100644 (file)
@@ -256,7 +256,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
         * It's enough to flush this one mapping.
         * (PGE mappings get flushed as well)
         */
-       __flush_tlb_one(vaddr);
+       __flush_tlb_one_kernel(vaddr);
 }
 
 void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
@@ -1193,8 +1193,8 @@ void __init mem_init(void)
        register_page_bootmem_info();
 
        /* Register memory areas for /proc/kcore */
-       kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
-                        PAGE_SIZE, KCORE_OTHER);
+       if (get_gate_vma(&init_mm))
+               kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
 
        mem_init_print_info(NULL);
 }
index c45b6ec..e2db83b 100644 (file)
@@ -820,5 +820,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
                set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
        else
                pte_clear(&init_mm, addr, pte);
-       __flush_tlb_one(addr);
+       __flush_tlb_one_kernel(addr);
 }
index 58477ec..7c86867 100644 (file)
@@ -168,7 +168,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
                return -1;
        }
 
-       __flush_tlb_one(f->addr);
+       __flush_tlb_one_kernel(f->addr);
        return 0;
 }
 
index c3c5274..9bb7f0a 100644 (file)
@@ -63,7 +63,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
         * It's enough to flush this one mapping.
         * (PGE mappings get flushed as well)
         */
-       __flush_tlb_one(vaddr);
+       __flush_tlb_one_kernel(vaddr);
 }
 
 unsigned long __FIXADDR_TOP = 0xfffff000;
index 8dcc060..7f1a513 100644 (file)
@@ -498,7 +498,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
         *    flush that changes context.tlb_gen from 2 to 3.  If they get
         *    processed on this CPU in reverse order, we'll see
         *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
-        *    If we were to use __flush_tlb_single() and set local_tlb_gen to
+        *    If we were to use __flush_tlb_one_user() and set local_tlb_gen to
         *    3, we'd be break the invariant: we'd update local_tlb_gen above
         *    1 without the full flush that's needed for tlb_gen 2.
         *
@@ -519,7 +519,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 
                addr = f->start;
                while (addr < f->end) {
-                       __flush_tlb_single(addr);
+                       __flush_tlb_one_user(addr);
                        addr += PAGE_SIZE;
                }
                if (local)
@@ -666,7 +666,7 @@ static void do_kernel_range_flush(void *info)
 
        /* flush range by one by one 'invlpg' */
        for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-               __flush_tlb_one(addr);
+               __flush_tlb_one_kernel(addr);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
index 4923d92..eb661ff 100644 (file)
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
-#include <asm/set_memory.h>
 #include <linux/bpf.h>
 
+#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
+
 /*
  * assembly code in arch/x86/net/bpf_jit.S
  */
@@ -60,7 +61,12 @@ static bool is_imm8(int value)
 
 static bool is_simm32(s64 value)
 {
-       return value == (s64) (s32) value;
+       return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+       return value == (u64)(u32)value;
 }
 
 /* mov dst, src */
@@ -97,16 +103,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JLE 0x7E
 #define X86_JG  0x7F
 
-static void bpf_flush_icache(void *start, void *end)
-{
-       mm_segment_t old_fs = get_fs();
-
-       set_fs(KERNEL_DS);
-       smp_wmb();
-       flush_icache_range((unsigned long)start, (unsigned long)end);
-       set_fs(old_fs);
-}
-
 #define CHOOSE_LOAD_FUNC(K, func) \
        ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
@@ -211,7 +207,7 @@ struct jit_context {
 /* emit x64 prologue code for BPF program and check it's size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
        u8 *prog = *pprog;
        int cnt = 0;
@@ -246,18 +242,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
        /* mov qword ptr [rbp+24],r15 */
        EMIT4(0x4C, 0x89, 0x7D, 24);
 
-       /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
-        * we need to reset the counter to 0. It's done in two instructions,
-        * resetting rax register to 0 (xor on eax gets 0 extended), and
-        * moving it to the counter location.
-        */
+       if (!ebpf_from_cbpf) {
+               /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+                * calls we need to reset the counter to 0. It's done in two
+                * instructions, resetting rax register to 0, and moving it
+                * to the counter location.
+                */
 
-       /* xor eax, eax */
-       EMIT2(0x31, 0xc0);
-       /* mov qword ptr [rbp+32], rax */
-       EMIT4(0x48, 0x89, 0x45, 32);
+               /* xor eax, eax */
+               EMIT2(0x31, 0xc0);
+               /* mov qword ptr [rbp+32], rax */
+               EMIT4(0x48, 0x89, 0x45, 32);
+
+               BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+       }
 
-       BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
        *pprog = prog;
 }
 
@@ -290,7 +289,7 @@ static void emit_bpf_tail_call(u8 **pprog)
        EMIT2(0x89, 0xD2);                        /* mov edx, edx */
        EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
              offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 43 /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
        EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
        label1 = cnt;
 
@@ -299,7 +298,7 @@ static void emit_bpf_tail_call(u8 **pprog)
         */
        EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
        EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 32
+#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
        EMIT2(X86_JA, OFFSET2);                   /* ja out */
        label2 = cnt;
        EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
@@ -313,7 +312,7 @@ static void emit_bpf_tail_call(u8 **pprog)
         *   goto out;
         */
        EMIT3(0x48, 0x85, 0xC0);                  /* test rax,rax */
-#define OFFSET3 10
+#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
        EMIT2(X86_JE, OFFSET3);                   /* je out */
        label3 = cnt;
 
@@ -326,7 +325,7 @@ static void emit_bpf_tail_call(u8 **pprog)
         * rdi == ctx (1st arg)
         * rax == prog->bpf_func + prologue_size
         */
-       EMIT2(0xFF, 0xE0);                        /* jmp rax */
+       RETPOLINE_RAX_BPF_JIT();
 
        /* out: */
        BUILD_BUG_ON(cnt - label1 != OFFSET1);
@@ -355,6 +354,86 @@ static void emit_load_skb_data_hlen(u8 **pprog)
        *pprog = prog;
 }
 
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+                          u32 dst_reg, const u32 imm32)
+{
+       u8 *prog = *pprog;
+       u8 b1, b2, b3;
+       int cnt = 0;
+
+       /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+        * (which zero-extends imm32) to save 2 bytes.
+        */
+       if (sign_propagate && (s32)imm32 < 0) {
+               /* 'mov %rax, imm32' sign extends imm32 */
+               b1 = add_1mod(0x48, dst_reg);
+               b2 = 0xC7;
+               b3 = 0xC0;
+               EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+               goto done;
+       }
+
+       /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+        * to save 3 bytes.
+        */
+       if (imm32 == 0) {
+               if (is_ereg(dst_reg))
+                       EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+               b2 = 0x31; /* xor */
+               b3 = 0xC0;
+               EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+               goto done;
+       }
+
+       /* mov %eax, imm32 */
+       if (is_ereg(dst_reg))
+               EMIT1(add_1mod(0x40, dst_reg));
+       EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+       *pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+                          const u32 imm32_hi, const u32 imm32_lo)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+               /* For emitting plain u32, where sign bit must not be
+                * propagated LLVM tends to load imm64 over mov32
+                * directly, so save couple of bytes by just doing
+                * 'mov %eax, imm32' instead.
+                */
+               emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+       } else {
+               /* movabsq %rax, imm64 */
+               EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+               EMIT(imm32_lo, 4);
+               EMIT(imm32_hi, 4);
+       }
+
+       *pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (is64) {
+               /* mov dst, src */
+               EMIT_mov(dst_reg, src_reg);
+       } else {
+               /* mov32 dst, src */
+               if (is_ereg(dst_reg) || is_ereg(src_reg))
+                       EMIT1(add_2mod(0x40, dst_reg, src_reg));
+               EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+       }
+
+       *pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                  int oldproglen, struct jit_context *ctx)
 {
@@ -368,7 +447,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
        int proglen = 0;
        u8 *prog = temp;
 
-       emit_prologue(&prog, bpf_prog->aux->stack_depth);
+       emit_prologue(&prog, bpf_prog->aux->stack_depth,
+                     bpf_prog_was_classic(bpf_prog));
 
        if (seen_ld_abs)
                emit_load_skb_data_hlen(&prog);
@@ -377,7 +457,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                const s32 imm32 = insn->imm;
                u32 dst_reg = insn->dst_reg;
                u32 src_reg = insn->src_reg;
-               u8 b1 = 0, b2 = 0, b3 = 0;
+               u8 b2 = 0, b3 = 0;
                s64 jmp_offset;
                u8 jmp_cond;
                bool reload_skb_data;
@@ -413,16 +493,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
                        break;
 
-                       /* mov dst, src */
                case BPF_ALU64 | BPF_MOV | BPF_X:
-                       EMIT_mov(dst_reg, src_reg);
-                       break;
-
-                       /* mov32 dst, src */
                case BPF_ALU | BPF_MOV | BPF_X:
-                       if (is_ereg(dst_reg) || is_ereg(src_reg))
-                               EMIT1(add_2mod(0x40, dst_reg, src_reg));
-                       EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+                       emit_mov_reg(&prog,
+                                    BPF_CLASS(insn->code) == BPF_ALU64,
+                                    dst_reg, src_reg);
                        break;
 
                        /* neg dst */
@@ -485,58 +560,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        break;
 
                case BPF_ALU64 | BPF_MOV | BPF_K:
-                       /* optimization: if imm32 is positive,
-                        * use 'mov eax, imm32' (which zero-extends imm32)
-                        * to save 2 bytes
-                        */
-                       if (imm32 < 0) {
-                               /* 'mov rax, imm32' sign extends imm32 */
-                               b1 = add_1mod(0x48, dst_reg);
-                               b2 = 0xC7;
-                               b3 = 0xC0;
-                               EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
-                               break;
-                       }
-
                case BPF_ALU | BPF_MOV | BPF_K:
-                       /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
-                        * to save 3 bytes.
-                        */
-                       if (imm32 == 0) {
-                               if (is_ereg(dst_reg))
-                                       EMIT1(add_2mod(0x40, dst_reg, dst_reg));
-                               b2 = 0x31; /* xor */
-                               b3 = 0xC0;
-                               EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
-                               break;
-                       }
-
-                       /* mov %eax, imm32 */
-                       if (is_ereg(dst_reg))
-                               EMIT1(add_1mod(0x40, dst_reg));
-                       EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+                       emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+                                      dst_reg, imm32);
                        break;
 
                case BPF_LD | BPF_IMM | BPF_DW:
-                       /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
-                        * to save 7 bytes.
-                        */
-                       if (insn[0].imm == 0 && insn[1].imm == 0) {
-                               b1 = add_2mod(0x48, dst_reg, dst_reg);
-                               b2 = 0x31; /* xor */
-                               b3 = 0xC0;
-                               EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
-                               insn++;
-                               i++;
-                               break;
-                       }
-
-                       /* movabsq %rax, imm64 */
-                       EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
-                       EMIT(insn[0].imm, 4);
-                       EMIT(insn[1].imm, 4);
-
+                       emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
                        insn++;
                        i++;
                        break;
@@ -593,36 +623,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                case BPF_ALU | BPF_MUL | BPF_X:
                case BPF_ALU64 | BPF_MUL | BPF_K:
                case BPF_ALU64 | BPF_MUL | BPF_X:
-                       EMIT1(0x50); /* push rax */
-                       EMIT1(0x52); /* push rdx */
+               {
+                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+                       if (dst_reg != BPF_REG_0)
+                               EMIT1(0x50); /* push rax */
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x52); /* push rdx */
 
                        /* mov r11, dst_reg */
                        EMIT_mov(AUX_REG, dst_reg);
 
                        if (BPF_SRC(insn->code) == BPF_X)
-                               /* mov rax, src_reg */
-                               EMIT_mov(BPF_REG_0, src_reg);
+                               emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
                        else
-                               /* mov rax, imm32 */
-                               EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+                               emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
 
-                       if (BPF_CLASS(insn->code) == BPF_ALU64)
+                       if (is64)
                                EMIT1(add_1mod(0x48, AUX_REG));
                        else if (is_ereg(AUX_REG))
                                EMIT1(add_1mod(0x40, AUX_REG));
                        /* mul(q) r11 */
                        EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 
-                       /* mov r11, rax */
-                       EMIT_mov(AUX_REG, BPF_REG_0);
-
-                       EMIT1(0x5A); /* pop rdx */
-                       EMIT1(0x58); /* pop rax */
-
-                       /* mov dst_reg, r11 */
-                       EMIT_mov(dst_reg, AUX_REG);
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x5A); /* pop rdx */
+                       if (dst_reg != BPF_REG_0) {
+                               /* mov dst_reg, rax */
+                               EMIT_mov(dst_reg, BPF_REG_0);
+                               EMIT1(0x58); /* pop rax */
+                       }
                        break;
-
+               }
                        /* shifts */
                case BPF_ALU | BPF_LSH | BPF_K:
                case BPF_ALU | BPF_RSH | BPF_K:
@@ -640,7 +672,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        case BPF_RSH: b3 = 0xE8; break;
                        case BPF_ARSH: b3 = 0xF8; break;
                        }
-                       EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+                       if (imm32 == 1)
+                               EMIT2(0xD1, add_1reg(b3, dst_reg));
+                       else
+                               EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
                        break;
 
                case BPF_ALU | BPF_LSH | BPF_X:
@@ -1220,7 +1256,6 @@ skip_init_addrs:
                bpf_jit_dump(prog->len, proglen, pass + 1, image);
 
        if (image) {
-               bpf_flush_icache(header, image + proglen);
                if (!prog->is_func || extra_pass) {
                        bpf_jit_binary_lock_ro(header);
                } else {
index c2e9285..db77e08 100644 (file)
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
                local_flush_tlb();
                stat->d_alltlb++;
        } else {
-               __flush_tlb_single(msg->address);
+               __flush_tlb_one_user(msg->address);
                stat->d_onetlb++;
        }
        stat->d_requestee++;
index 5d73c44..220e978 100644 (file)
@@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
                break;
 
        case R_X86_64_PC32:
+       case R_X86_64_PLT32:
                /*
                 * PC relative relocations don't need to be adjusted unless
                 * referencing a percpu symbol.
+                *
+                * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
                 */
                if (is_percpu_sym(sym, symname))
                        add_reloc(&relocs32neg, offset);
index d850762..aae88fe 100644 (file)
@@ -1300,12 +1300,12 @@ static void xen_flush_tlb(void)
        preempt_enable();
 }
 
-static void xen_flush_tlb_single(unsigned long addr)
+static void xen_flush_tlb_one_user(unsigned long addr)
 {
        struct mmuext_op *op;
        struct multicall_space mcs;
 
-       trace_xen_mmu_flush_tlb_single(addr);
+       trace_xen_mmu_flush_tlb_one_user(addr);
 
        preempt_disable();
 
@@ -2370,7 +2370,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 
        .flush_tlb_user = xen_flush_tlb,
        .flush_tlb_kernel = xen_flush_tlb,
-       .flush_tlb_single = xen_flush_tlb_single,
+       .flush_tlb_one_user = xen_flush_tlb_one_user,
        .flush_tlb_others = xen_flush_tlb_others,
 
        .pgd_alloc = xen_pgd_alloc,
index 77c959c..7a43b2a 100644 (file)
@@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
 
        if (xen_hvm_domain())
                native_smp_cpus_done(max_cpus);
+       else
+               calculate_max_logical_packages();
 
        if (xen_have_vcpu_info_placement)
                return;
index df93102..3574927 100644 (file)
@@ -3164,6 +3164,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
                cpu_relax();
        }
 
+       __set_current_state(TASK_RUNNING);
        return false;
 }
 
index 73fd990..753b703 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "blacklist.h"
 
-const char __initdata *const blacklist_hashes[] = {
+const char __initconst *const blacklist_hashes[] = {
        NULL
 };
index 1f4e25f..598906b 100644 (file)
@@ -106,6 +106,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
                pr_devel("sinfo %u: Direct signer is key %x\n",
                         sinfo->index, key_serial(key));
                x509 = NULL;
+               sig = sinfo->sig;
                goto matched;
        }
        if (PTR_ERR(key) != -ENOKEY)
index 39e6de0..97c77f6 100644 (file)
@@ -270,7 +270,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
                                sinfo->index);
                        return 0;
                }
-               ret = public_key_verify_signature(p->pub, p->sig);
+               ret = public_key_verify_signature(p->pub, x509->sig);
                if (ret < 0)
                        return ret;
                x509->signer = p;
@@ -366,8 +366,7 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7,
  *
  *  (*) -EBADMSG if some part of the message was invalid, or:
  *
- *  (*) 0 if no signature chains were found to be blacklisted or to contain
- *     unsupported crypto, or:
+ *  (*) 0 if a signature chain passed verification, or:
  *
  *  (*) -EKEYREJECTED if a blacklisted key was encountered, or:
  *
@@ -423,8 +422,11 @@ int pkcs7_verify(struct pkcs7_message *pkcs7,
 
        for (sinfo = pkcs7->signed_infos; sinfo; sinfo = sinfo->next) {
                ret = pkcs7_verify_one(pkcs7, sinfo);
-               if (sinfo->blacklisted && actual_ret == -ENOPKG)
-                       actual_ret = -EKEYREJECTED;
+               if (sinfo->blacklisted) {
+                       if (actual_ret == -ENOPKG)
+                               actual_ret = -EKEYREJECTED;
+                       continue;
+               }
                if (ret < 0) {
                        if (ret == -ENOPKG) {
                                sinfo->unsupported_crypto = true;
index de99658..e929fe1 100644 (file)
@@ -79,9 +79,11 @@ int public_key_verify_signature(const struct public_key *pkey,
 
        BUG_ON(!pkey);
        BUG_ON(!sig);
-       BUG_ON(!sig->digest);
        BUG_ON(!sig->s);
 
+       if (!sig->digest)
+               return -ENOPKG;
+
        alg_name = sig->pkey_algo;
        if (strcmp(sig->pkey_algo, "rsa") == 0) {
                /* The data wangled by the RSA algorithm is typically padded
index 86fb685..7c93c77 100644 (file)
@@ -67,8 +67,9 @@ __setup("ca_keys=", ca_keys_setup);
  *
  * Returns 0 if the new certificate was accepted, -ENOKEY if we couldn't find a
  * matching parent certificate in the trusted list, -EKEYREJECTED if the
- * signature check fails or the key is blacklisted and some other error if
- * there is a matching certificate but the signature check cannot be performed.
+ * signature check fails or the key is blacklisted, -ENOPKG if the signature
+ * uses unsupported crypto, or some other error if there is a matching
+ * certificate but the signature check cannot be performed.
  */
 int restrict_link_by_signature(struct key *dest_keyring,
                               const struct key_type *type,
@@ -88,6 +89,8 @@ int restrict_link_by_signature(struct key *dest_keyring,
                return -EOPNOTSUPP;
 
        sig = payload->data[asym_auth];
+       if (!sig)
+               return -ENOPKG;
        if (!sig->auth_ids[0] && !sig->auth_ids[1])
                return -ENOKEY;
 
@@ -139,6 +142,8 @@ static int key_or_keyring_common(struct key *dest_keyring,
                return -EOPNOTSUPP;
 
        sig = payload->data[asym_auth];
+       if (!sig)
+               return -ENOPKG;
        if (!sig->auth_ids[0] && !sig->auth_ids[1])
                return -ENOKEY;
 
@@ -222,9 +227,9 @@ static int key_or_keyring_common(struct key *dest_keyring,
  *
  * Returns 0 if the new certificate was accepted, -ENOKEY if we
  * couldn't find a matching parent certificate in the trusted list,
- * -EKEYREJECTED if the signature check fails, and some other error if
- * there is a matching certificate but the signature check cannot be
- * performed.
+ * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses
+ * unsupported crypto, or some other error if there is a matching certificate
+ * but the signature check cannot be performed.
  */
 int restrict_link_by_key_or_keyring(struct key *dest_keyring,
                                    const struct key_type *type,
@@ -249,9 +254,9 @@ int restrict_link_by_key_or_keyring(struct key *dest_keyring,
  *
  * Returns 0 if the new certificate was accepted, -ENOKEY if we
  * couldn't find a matching parent certificate in the trusted list,
- * -EKEYREJECTED if the signature check fails, and some other error if
- * there is a matching certificate but the signature check cannot be
- * performed.
+ * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses
+ * unsupported crypto, or some other error if there is a matching certificate
+ * but the signature check cannot be performed.
  */
 int restrict_link_by_key_or_keyring_chain(struct key *dest_keyring,
                                          const struct key_type *type,
index a965b9d..ded1487 100644 (file)
 #include <crypto/sha3.h>
 #include <asm/unaligned.h>
 
+/*
+ * On some 32-bit architectures (mn10300 and h8300), GCC ends up using
+ * over 1 KB of stack if we inline the round calculation into the loop
+ * in keccakf(). On the other hand, on 64-bit architectures with plenty
+ * of [64-bit wide] general purpose registers, not inlining it severely
+ * hurts performance. So let's use 64-bitness as a heuristic to decide
+ * whether to inline or not.
+ */
+#ifdef CONFIG_64BIT
+#define SHA3_INLINE    inline
+#else
+#define SHA3_INLINE    noinline
+#endif
+
 #define KECCAK_ROUNDS 24
 
 static const u64 keccakf_rndc[24] = {
@@ -35,111 +49,115 @@ static const u64 keccakf_rndc[24] = {
 
 /* update the state with given number of rounds */
 
-static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25])
+static SHA3_INLINE void keccakf_round(u64 st[25])
 {
        u64 t[5], tt, bc[5];
-       int round;
 
-       for (round = 0; round < KECCAK_ROUNDS; round++) {
+       /* Theta */
+       bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
+       bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
+       bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
+       bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
+       bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
+
+       t[0] = bc[4] ^ rol64(bc[1], 1);
+       t[1] = bc[0] ^ rol64(bc[2], 1);
+       t[2] = bc[1] ^ rol64(bc[3], 1);
+       t[3] = bc[2] ^ rol64(bc[4], 1);
+       t[4] = bc[3] ^ rol64(bc[0], 1);
+
+       st[0] ^= t[0];
+
+       /* Rho Pi */
+       tt = st[1];
+       st[ 1] = rol64(st[ 6] ^ t[1], 44);
+       st[ 6] = rol64(st[ 9] ^ t[4], 20);
+       st[ 9] = rol64(st[22] ^ t[2], 61);
+       st[22] = rol64(st[14] ^ t[4], 39);
+       st[14] = rol64(st[20] ^ t[0], 18);
+       st[20] = rol64(st[ 2] ^ t[2], 62);
+       st[ 2] = rol64(st[12] ^ t[2], 43);
+       st[12] = rol64(st[13] ^ t[3], 25);
+       st[13] = rol64(st[19] ^ t[4],  8);
+       st[19] = rol64(st[23] ^ t[3], 56);
+       st[23] = rol64(st[15] ^ t[0], 41);
+       st[15] = rol64(st[ 4] ^ t[4], 27);
+       st[ 4] = rol64(st[24] ^ t[4], 14);
+       st[24] = rol64(st[21] ^ t[1],  2);
+       st[21] = rol64(st[ 8] ^ t[3], 55);
+       st[ 8] = rol64(st[16] ^ t[1], 45);
+       st[16] = rol64(st[ 5] ^ t[0], 36);
+       st[ 5] = rol64(st[ 3] ^ t[3], 28);
+       st[ 3] = rol64(st[18] ^ t[3], 21);
+       st[18] = rol64(st[17] ^ t[2], 15);
+       st[17] = rol64(st[11] ^ t[1], 10);
+       st[11] = rol64(st[ 7] ^ t[2],  6);
+       st[ 7] = rol64(st[10] ^ t[0],  3);
+       st[10] = rol64(    tt ^ t[1],  1);
+
+       /* Chi */
+       bc[ 0] = ~st[ 1] & st[ 2];
+       bc[ 1] = ~st[ 2] & st[ 3];
+       bc[ 2] = ~st[ 3] & st[ 4];
+       bc[ 3] = ~st[ 4] & st[ 0];
+       bc[ 4] = ~st[ 0] & st[ 1];
+       st[ 0] ^= bc[ 0];
+       st[ 1] ^= bc[ 1];
+       st[ 2] ^= bc[ 2];
+       st[ 3] ^= bc[ 3];
+       st[ 4] ^= bc[ 4];
+
+       bc[ 0] = ~st[ 6] & st[ 7];
+       bc[ 1] = ~st[ 7] & st[ 8];
+       bc[ 2] = ~st[ 8] & st[ 9];
+       bc[ 3] = ~st[ 9] & st[ 5];
+       bc[ 4] = ~st[ 5] & st[ 6];
+       st[ 5] ^= bc[ 0];
+       st[ 6] ^= bc[ 1];
+       st[ 7] ^= bc[ 2];
+       st[ 8] ^= bc[ 3];
+       st[ 9] ^= bc[ 4];
+
+       bc[ 0] = ~st[11] & st[12];
+       bc[ 1] = ~st[12] & st[13];
+       bc[ 2] = ~st[13] & st[14];
+       bc[ 3] = ~st[14] & st[10];
+       bc[ 4] = ~st[10] & st[11];
+       st[10] ^= bc[ 0];
+       st[11] ^= bc[ 1];
+       st[12] ^= bc[ 2];
+       st[13] ^= bc[ 3];
+       st[14] ^= bc[ 4];
+
+       bc[ 0] = ~st[16] & st[17];
+       bc[ 1] = ~st[17] & st[18];
+       bc[ 2] = ~st[18] & st[19];
+       bc[ 3] = ~st[19] & st[15];
+       bc[ 4] = ~st[15] & st[16];
+       st[15] ^= bc[ 0];
+       st[16] ^= bc[ 1];
+       st[17] ^= bc[ 2];
+       st[18] ^= bc[ 3];
+       st[19] ^= bc[ 4];
+
+       bc[ 0] = ~st[21] & st[22];
+       bc[ 1] = ~st[22] & st[23];
+       bc[ 2] = ~st[23] & st[24];
+       bc[ 3] = ~st[24] & st[20];
+       bc[ 4] = ~st[20] & st[21];
+       st[20] ^= bc[ 0];
+       st[21] ^= bc[ 1];
+       st[22] ^= bc[ 2];
+       st[23] ^= bc[ 3];
+       st[24] ^= bc[ 4];
+}
 
-               /* Theta */
-               bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
-               bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
-               bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
-               bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
-               bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
-
-               t[0] = bc[4] ^ rol64(bc[1], 1);
-               t[1] = bc[0] ^ rol64(bc[2], 1);
-               t[2] = bc[1] ^ rol64(bc[3], 1);
-               t[3] = bc[2] ^ rol64(bc[4], 1);
-               t[4] = bc[3] ^ rol64(bc[0], 1);
-
-               st[0] ^= t[0];
-
-               /* Rho Pi */
-               tt = st[1];
-               st[ 1] = rol64(st[ 6] ^ t[1], 44);
-               st[ 6] = rol64(st[ 9] ^ t[4], 20);
-               st[ 9] = rol64(st[22] ^ t[2], 61);
-               st[22] = rol64(st[14] ^ t[4], 39);
-               st[14] = rol64(st[20] ^ t[0], 18);
-               st[20] = rol64(st[ 2] ^ t[2], 62);
-               st[ 2] = rol64(st[12] ^ t[2], 43);
-               st[12] = rol64(st[13] ^ t[3], 25);
-               st[13] = rol64(st[19] ^ t[4],  8);
-               st[19] = rol64(st[23] ^ t[3], 56);
-               st[23] = rol64(st[15] ^ t[0], 41);
-               st[15] = rol64(st[ 4] ^ t[4], 27);
-               st[ 4] = rol64(st[24] ^ t[4], 14);
-               st[24] = rol64(st[21] ^ t[1],  2);
-               st[21] = rol64(st[ 8] ^ t[3], 55);
-               st[ 8] = rol64(st[16] ^ t[1], 45);
-               st[16] = rol64(st[ 5] ^ t[0], 36);
-               st[ 5] = rol64(st[ 3] ^ t[3], 28);
-               st[ 3] = rol64(st[18] ^ t[3], 21);
-               st[18] = rol64(st[17] ^ t[2], 15);
-               st[17] = rol64(st[11] ^ t[1], 10);
-               st[11] = rol64(st[ 7] ^ t[2],  6);
-               st[ 7] = rol64(st[10] ^ t[0],  3);
-               st[10] = rol64(    tt ^ t[1],  1);
-
-               /* Chi */
-               bc[ 0] = ~st[ 1] & st[ 2];
-               bc[ 1] = ~st[ 2] & st[ 3];
-               bc[ 2] = ~st[ 3] & st[ 4];
-               bc[ 3] = ~st[ 4] & st[ 0];
-               bc[ 4] = ~st[ 0] & st[ 1];
-               st[ 0] ^= bc[ 0];
-               st[ 1] ^= bc[ 1];
-               st[ 2] ^= bc[ 2];
-               st[ 3] ^= bc[ 3];
-               st[ 4] ^= bc[ 4];
-
-               bc[ 0] = ~st[ 6] & st[ 7];
-               bc[ 1] = ~st[ 7] & st[ 8];
-               bc[ 2] = ~st[ 8] & st[ 9];
-               bc[ 3] = ~st[ 9] & st[ 5];
-               bc[ 4] = ~st[ 5] & st[ 6];
-               st[ 5] ^= bc[ 0];
-               st[ 6] ^= bc[ 1];
-               st[ 7] ^= bc[ 2];
-               st[ 8] ^= bc[ 3];
-               st[ 9] ^= bc[ 4];
-
-               bc[ 0] = ~st[11] & st[12];
-               bc[ 1] = ~st[12] & st[13];
-               bc[ 2] = ~st[13] & st[14];
-               bc[ 3] = ~st[14] & st[10];
-               bc[ 4] = ~st[10] & st[11];
-               st[10] ^= bc[ 0];
-               st[11] ^= bc[ 1];
-               st[12] ^= bc[ 2];
-               st[13] ^= bc[ 3];
-               st[14] ^= bc[ 4];
-
-               bc[ 0] = ~st[16] & st[17];
-               bc[ 1] = ~st[17] & st[18];
-               bc[ 2] = ~st[18] & st[19];
-               bc[ 3] = ~st[19] & st[15];
-               bc[ 4] = ~st[15] & st[16];
-               st[15] ^= bc[ 0];
-               st[16] ^= bc[ 1];
-               st[17] ^= bc[ 2];
-               st[18] ^= bc[ 3];
-               st[19] ^= bc[ 4];
-
-               bc[ 0] = ~st[21] & st[22];
-               bc[ 1] = ~st[22] & st[23];
-               bc[ 2] = ~st[23] & st[24];
-               bc[ 3] = ~st[24] & st[20];
-               bc[ 4] = ~st[20] & st[21];
-               st[20] ^= bc[ 0];
-               st[21] ^= bc[ 1];
-               st[22] ^= bc[ 2];
-               st[23] ^= bc[ 3];
-               st[24] ^= bc[ 4];
+static void __optimize("O3") keccakf(u64 st[25])
+{
+       int round;
 
+       for (round = 0; round < KECCAK_ROUNDS; round++) {
+               keccakf_round(st);
                /* Iota */
                st[0] ^= keccakf_rndc[round];
        }
index 676c978..0dad0bd 100644 (file)
@@ -660,13 +660,15 @@ struct acpi_device *acpi_companion_match(const struct device *dev)
  * acpi_of_match_device - Match device object using the "compatible" property.
  * @adev: ACPI device object to match.
  * @of_match_table: List of device IDs to match against.
+ * @of_id: OF ID if matched
  *
  * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of
  * identifiers and a _DSD object with the "compatible" property, use that
  * property to match against the given list of identifiers.
  */
 static bool acpi_of_match_device(struct acpi_device *adev,
-                                const struct of_device_id *of_match_table)
+                                const struct of_device_id *of_match_table,
+                                const struct of_device_id **of_id)
 {
        const union acpi_object *of_compatible, *obj;
        int i, nval;
@@ -690,8 +692,11 @@ static bool acpi_of_match_device(struct acpi_device *adev,
                const struct of_device_id *id;
 
                for (id = of_match_table; id->compatible[0]; id++)
-                       if (!strcasecmp(obj->string.pointer, id->compatible))
+                       if (!strcasecmp(obj->string.pointer, id->compatible)) {
+                               if (of_id)
+                                       *of_id = id;
                                return true;
+                       }
        }
 
        return false;
@@ -762,10 +767,11 @@ static bool __acpi_match_device_cls(const struct acpi_device_id *id,
        return true;
 }
 
-static const struct acpi_device_id *__acpi_match_device(
-       struct acpi_device *device,
-       const struct acpi_device_id *ids,
-       const struct of_device_id *of_ids)
+static bool __acpi_match_device(struct acpi_device *device,
+                               const struct acpi_device_id *acpi_ids,
+                               const struct of_device_id *of_ids,
+                               const struct acpi_device_id **acpi_id,
+                               const struct of_device_id **of_id)
 {
        const struct acpi_device_id *id;
        struct acpi_hardware_id *hwid;
@@ -775,30 +781,32 @@ static const struct acpi_device_id *__acpi_match_device(
         * driver for it.
         */
        if (!device || !device->status.present)
-               return NULL;
+               return false;
 
        list_for_each_entry(hwid, &device->pnp.ids, list) {
                /* First, check the ACPI/PNP IDs provided by the caller. */
-               for (id = ids; id->id[0] || id->cls; id++) {
-                       if (id->id[0] && !strcmp((char *) id->id, hwid->id))
-                               return id;
-                       else if (id->cls && __acpi_match_device_cls(id, hwid))
-                               return id;
+               if (acpi_ids) {
+                       for (id = acpi_ids; id->id[0] || id->cls; id++) {
+                               if (id->id[0] && !strcmp((char *)id->id, hwid->id))
+                                       goto out_acpi_match;
+                               if (id->cls && __acpi_match_device_cls(id, hwid))
+                                       goto out_acpi_match;
+                       }
                }
 
                /*
                 * Next, check ACPI_DT_NAMESPACE_HID and try to match the
                 * "compatible" property if found.
-                *
-                * The id returned by the below is not valid, but the only
-                * caller passing non-NULL of_ids here is only interested in
-                * whether or not the return value is NULL.
                 */
-               if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id)
-                   && acpi_of_match_device(device, of_ids))
-                       return id;
+               if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id))
+                       return acpi_of_match_device(device, of_ids, of_id);
        }
-       return NULL;
+       return false;
+
+out_acpi_match:
+       if (acpi_id)
+               *acpi_id = id;
+       return true;
 }
 
 /**
@@ -815,32 +823,29 @@ static const struct acpi_device_id *__acpi_match_device(
 const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
                                               const struct device *dev)
 {
-       return __acpi_match_device(acpi_companion_match(dev), ids, NULL);
+       const struct acpi_device_id *id = NULL;
+
+       __acpi_match_device(acpi_companion_match(dev), ids, NULL, &id, NULL);
+       return id;
 }
 EXPORT_SYMBOL_GPL(acpi_match_device);
 
-void *acpi_get_match_data(const struct device *dev)
+const void *acpi_device_get_match_data(const struct device *dev)
 {
        const struct acpi_device_id *match;
 
-       if (!dev->driver)
-               return NULL;
-
-       if (!dev->driver->acpi_match_table)
-               return NULL;
-
        match = acpi_match_device(dev->driver->acpi_match_table, dev);
        if (!match)
                return NULL;
 
-       return (void *)match->driver_data;
+       return (const void *)match->driver_data;
 }
-EXPORT_SYMBOL_GPL(acpi_get_match_data);
+EXPORT_SYMBOL_GPL(acpi_device_get_match_data);
 
 int acpi_match_device_ids(struct acpi_device *device,
                          const struct acpi_device_id *ids)
 {
-       return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT;
+       return __acpi_match_device(device, ids, NULL, NULL, NULL) ? 0 : -ENOENT;
 }
 EXPORT_SYMBOL(acpi_match_device_ids);
 
@@ -849,10 +854,12 @@ bool acpi_driver_match_device(struct device *dev,
 {
        if (!drv->acpi_match_table)
                return acpi_of_match_device(ACPI_COMPANION(dev),
-                                           drv->of_match_table);
+                                           drv->of_match_table,
+                                           NULL);
 
-       return !!__acpi_match_device(acpi_companion_match(dev),
-                                    drv->acpi_match_table, drv->of_match_table);
+       return __acpi_match_device(acpi_companion_match(dev),
+                                  drv->acpi_match_table, drv->of_match_table,
+                                  NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(acpi_driver_match_device);
 
index d9f38c6..30a5729 100644 (file)
@@ -1927,6 +1927,9 @@ static int acpi_ec_suspend_noirq(struct device *dev)
            ec->reference_count >= 1)
                acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_DISABLE);
 
+       if (acpi_sleep_no_ec_events())
+               acpi_ec_enter_noirq(ec);
+
        return 0;
 }
 
@@ -1934,6 +1937,9 @@ static int acpi_ec_resume_noirq(struct device *dev)
 {
        struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev));
 
+       if (acpi_sleep_no_ec_events())
+               acpi_ec_leave_noirq(ec);
+
        if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) &&
            ec->reference_count >= 1)
                acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_ENABLE);
index 466d150..5815356 100644 (file)
@@ -1271,11 +1271,11 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
        return 0;
 }
 
-static void *
+static const void *
 acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
                                  const struct device *dev)
 {
-       return acpi_get_match_data(dev);
+       return acpi_device_get_match_data(dev);
 }
 
 #define DECLARE_ACPI_FWNODE_OPS(ops) \
index 89e97d2..9d52743 100644 (file)
@@ -115,6 +115,7 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console)
                        table->serial_port.access_width))) {
                default:
                        pr_err("Unexpected SPCR Access Width.  Defaulting to byte size\n");
+                       /* fall through */
                case 8:
                        iotype = "mmio";
                        break;
index 15e3d3c..764b63a 100644 (file)
@@ -1991,8 +1991,14 @@ static void binder_send_failed_reply(struct binder_transaction *t,
                                        &target_thread->reply_error.work);
                                wake_up_interruptible(&target_thread->wait);
                        } else {
-                               WARN(1, "Unexpected reply error: %u\n",
-                                               target_thread->reply_error.cmd);
+                               /*
+                                * Cannot get here for normal operation, but
+                                * we can if multiple synchronous transactions
+                                * are sent without blocking for responses.
+                                * Just ignore the 2nd error in this case.
+                                */
+                               pr_warn("Unexpected reply error: %u\n",
+                                       target_thread->reply_error.cmd);
                        }
                        binder_inner_proc_unlock(target_thread->proc);
                        binder_thread_dec_tmpref(target_thread);
@@ -2193,7 +2199,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
        int debug_id = buffer->debug_id;
 
        binder_debug(BINDER_DEBUG_TRANSACTION,
-                    "%d buffer release %d, size %zd-%zd, failed at %p\n",
+                    "%d buffer release %d, size %zd-%zd, failed at %pK\n",
                     proc->pid, buffer->debug_id,
                     buffer->data_size, buffer->offsets_size, failed_at);
 
@@ -3705,7 +3711,7 @@ static int binder_thread_write(struct binder_proc *proc,
                                }
                        }
                        binder_debug(BINDER_DEBUG_DEAD_BINDER,
-                                    "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n",
+                                    "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n",
                                     proc->pid, thread->pid, (u64)cookie,
                                     death);
                        if (death == NULL) {
@@ -4376,6 +4382,15 @@ static int binder_thread_release(struct binder_proc *proc,
 
        binder_inner_proc_unlock(thread->proc);
 
+       /*
+        * This is needed to avoid races between wake_up_poll() above and
+        * and ep_remove_waitqueue() called for other reasons (eg the epoll file
+        * descriptor being closed); ep_remove_waitqueue() holds an RCU read
+        * lock, so we can be sure it's done after calling synchronize_rcu().
+        */
+       if (thread->looper & BINDER_LOOPER_STATE_POLL)
+               synchronize_rcu();
+
        if (send_reply)
                binder_send_failed_reply(send_reply, BR_DEAD_REPLY);
        binder_release_work(proc, &thread->todo);
@@ -4391,6 +4406,8 @@ static __poll_t binder_poll(struct file *filp,
        bool wait_for_proc_work;
 
        thread = binder_get_thread(proc);
+       if (!thread)
+               return POLLERR;
 
        binder_inner_proc_lock(thread->proc);
        thread->looper |= BINDER_LOOPER_STATE_POLL;
@@ -5034,7 +5051,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
        spin_lock(&t->lock);
        to_proc = t->to_proc;
        seq_printf(m,
-                  "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d",
+                  "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d",
                   prefix, t->debug_id, t,
                   t->from ? t->from->proc->pid : 0,
                   t->from ? t->from->pid : 0,
@@ -5058,7 +5075,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
        }
        if (buffer->target_node)
                seq_printf(m, " node %d", buffer->target_node->debug_id);
-       seq_printf(m, " size %zd:%zd data %p\n",
+       seq_printf(m, " size %zd:%zd data %pK\n",
                   buffer->data_size, buffer->offsets_size,
                   buffer->data);
 }
index 0277f36..6e73714 100644 (file)
@@ -3173,14 +3173,10 @@ static void init_sram(struct idt77252_dev *card)
                                    (u32) 0xffffffff);
        }
 
-       writel((SAR_FBQ0_LOW << 28) | 0x00000000 | 0x00000000 |
-              (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
-       writel((SAR_FBQ1_LOW << 28) | 0x00000000 | 0x00000000 |
-              (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
-       writel((SAR_FBQ2_LOW << 28) | 0x00000000 | 0x00000000 |
-              (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
-       writel((SAR_FBQ3_LOW << 28) | 0x00000000 | 0x00000000 |
-              (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
+       writel((SAR_FBQ0_LOW << 28) | (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
+       writel((SAR_FBQ1_LOW << 28) | (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
+       writel((SAR_FBQ2_LOW << 28) | (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
+       writel((SAR_FBQ3_LOW << 28) | (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
 
        /* Initialize rate table  */
        for (i = 0; i < 256; i++) {
index b2261f9..5847364 100644 (file)
@@ -310,6 +310,9 @@ static void __device_link_del(struct device_link *link)
        dev_info(link->consumer, "Dropping the link to %s\n",
                 dev_name(link->supplier));
 
+       if (link->flags & DL_FLAG_PM_RUNTIME)
+               pm_runtime_drop_link(link->consumer);
+
        list_del(&link->s_node);
        list_del(&link->c_node);
        device_link_free(link);
index a8ac86e..6637fc3 100644 (file)
@@ -321,7 +321,8 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq)
                return;
 
        if (device_may_wakeup(wirq->dev)) {
-               if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)
+               if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+                   !pm_runtime_status_suspended(wirq->dev))
                        enable_irq(wirq->irq);
 
                enable_irq_wake(wirq->irq);
@@ -343,7 +344,8 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
        if (device_may_wakeup(wirq->dev)) {
                disable_irq_wake(wirq->irq);
 
-               if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)
+               if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+                   !pm_runtime_status_suspended(wirq->dev))
                        disable_irq_nosync(wirq->irq);
        }
 }
index 3022362..8f205f6 100644 (file)
@@ -1410,9 +1410,8 @@ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 }
 EXPORT_SYMBOL(fwnode_graph_parse_endpoint);
 
-void *device_get_match_data(struct device *dev)
+const void *device_get_match_data(struct device *dev)
 {
-       return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data,
-                                 dev);
+       return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev);
 }
 EXPORT_SYMBOL_GPL(device_get_match_data);
index 204afe6..3d7a5c1 100644 (file)
@@ -203,6 +203,12 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
        { }     /* Terminating entry */
 };
 
+static inline void ath3k_log_failed_loading(int err, int len, int size)
+{
+       BT_ERR("Error in firmware loading err = %d, len = %d, size = %d",
+                       err, len, size);
+}
+
 #define USB_REQ_DFU_DNLOAD     1
 #define BULK_SIZE              4096
 #define FW_HDR_SIZE            20
@@ -227,15 +233,16 @@ static int ath3k_load_firmware(struct usb_device *udev,
                return -ENOMEM;
        }
 
-       memcpy(send_buf, firmware->data, 20);
+       memcpy(send_buf, firmware->data, FW_HDR_SIZE);
        err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR,
-                             0, 0, send_buf, 20, USB_CTRL_SET_TIMEOUT);
+                             0, 0, send_buf, FW_HDR_SIZE,
+                             USB_CTRL_SET_TIMEOUT);
        if (err < 0) {
                BT_ERR("Can't change to loading configuration err");
                goto error;
        }
-       sent += 20;
-       count -= 20;
+       sent += FW_HDR_SIZE;
+       count -= FW_HDR_SIZE;
 
        pipe = usb_sndbulkpipe(udev, 0x02);
 
@@ -250,8 +257,7 @@ static int ath3k_load_firmware(struct usb_device *udev,
                                        &len, 3000);
 
                if (err || (len != size)) {
-                       BT_ERR("Error in firmware loading err = %d,"
-                               "len = %d, size = %d", err, len, size);
+                       ath3k_log_failed_loading(err, len, size);
                        goto error;
                }
 
@@ -350,8 +356,7 @@ static int ath3k_load_fwfile(struct usb_device *udev,
                err = usb_bulk_msg(udev, pipe, send_buf, size,
                                        &len, 3000);
                if (err || (len != size)) {
-                       BT_ERR("Error in firmware loading err = %d,"
-                               "len = %d, size = %d", err, len, size);
+                       ath3k_log_failed_loading(err, len, size);
                        kfree(send_buf);
                        return err;
                }
@@ -398,7 +403,7 @@ static int ath3k_set_normal_mode(struct usb_device *udev)
 static int ath3k_load_patch(struct usb_device *udev)
 {
        unsigned char fw_state;
-       char filename[ATH3K_NAME_LEN] = {0};
+       char filename[ATH3K_NAME_LEN];
        const struct firmware *firmware;
        struct ath3k_version fw_version;
        __u32 pt_rom_version, pt_build_version;
@@ -451,7 +456,7 @@ static int ath3k_load_patch(struct usb_device *udev)
 static int ath3k_load_syscfg(struct usb_device *udev)
 {
        unsigned char fw_state;
-       char filename[ATH3K_NAME_LEN] = {0};
+       char filename[ATH3K_NAME_LEN];
        const struct firmware *firmware;
        struct ath3k_version fw_version;
        int clk_value, ret;
@@ -522,7 +527,6 @@ static int ath3k_probe(struct usb_interface *intf,
 
        /* load patch and sysconfig files for AR3012 */
        if (id->driver_info & BTUSB_ATH3012) {
-
                /* New firmware with patch and sysconfig files already loaded */
                if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001)
                        return -ENODEV;
@@ -565,7 +569,7 @@ static int ath3k_probe(struct usb_interface *intf,
 
 static void ath3k_disconnect(struct usb_interface *intf)
 {
-       BT_DBG("ath3k_disconnect intf %p", intf);
+       BT_DBG("%s intf %p", __func__, intf);
 }
 
 static struct usb_driver ath3k_driver = {
index b280d46..f6c694a 100644 (file)
@@ -183,7 +183,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode,
                return -EFAULT;
        }
 
-       skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_ATOMIC);
+       skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_KERNEL);
        if (!skb) {
                BT_ERR("No free skb");
                return -ENOMEM;
index 6e2ad74..437f080 100644 (file)
 #define RTL_ROM_LMP_8761A      0x8761
 #define RTL_ROM_LMP_8822B      0x8822
 
+#define IC_MATCH_FL_LMPSUBV    (1 << 0)
+#define IC_MATCH_FL_HCIREV     (1 << 1)
+#define IC_INFO(lmps, hcir) \
+       .match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV, \
+       .lmp_subver = (lmps), \
+       .hci_rev = (hcir)
+
+struct id_table {
+       __u16 match_flags;
+       __u16 lmp_subver;
+       __u16 hci_rev;
+       bool config_needed;
+       char *fw_name;
+       char *cfg_name;
+};
+
+static const struct id_table ic_id_table[] = {
+       /* 8723B */
+       { IC_INFO(RTL_ROM_LMP_8723B, 0xb),
+         .config_needed = false,
+         .fw_name  = "rtl_bt/rtl8723b_fw.bin",
+         .cfg_name = "rtl_bt/rtl8723b_config.bin" },
+
+       /* 8723D */
+       { IC_INFO(RTL_ROM_LMP_8723B, 0xd),
+         .config_needed = true,
+         .fw_name  = "rtl_bt/rtl8723d_fw.bin",
+         .cfg_name = "rtl_bt/rtl8723d_config.bin" },
+
+       /* 8821A */
+       { IC_INFO(RTL_ROM_LMP_8821A, 0xa),
+         .config_needed = false,
+         .fw_name  = "rtl_bt/rtl8821a_fw.bin",
+         .cfg_name = "rtl_bt/rtl8821a_config.bin" },
+
+       /* 8821C */
+       { IC_INFO(RTL_ROM_LMP_8821A, 0xc),
+         .config_needed = false,
+         .fw_name  = "rtl_bt/rtl8821c_fw.bin",
+         .cfg_name = "rtl_bt/rtl8821c_config.bin" },
+
+       /* 8761A */
+       { IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0,
+         .config_needed = false,
+         .fw_name  = "rtl_bt/rtl8761a_fw.bin",
+         .cfg_name = "rtl_bt/rtl8761a_config.bin" },
+
+       /* 8822B */
+       { IC_INFO(RTL_ROM_LMP_8822B, 0xb),
+         .config_needed = true,
+         .fw_name  = "rtl_bt/rtl8822b_fw.bin",
+         .cfg_name = "rtl_bt/rtl8822b_config.bin" },
+       };
+
 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 {
        struct rtl_rom_version_evt *rom_version;
@@ -64,9 +118,9 @@ static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
        return 0;
 }
 
-static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
-                                  const struct firmware *fw,
-                                  unsigned char **_buf)
+static int rtlbt_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
+                               const struct firmware *fw,
+                               unsigned char **_buf)
 {
        const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 };
        struct rtl_epatch_header *epatch_info;
@@ -88,6 +142,8 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
                { RTL_ROM_LMP_8821A, 2 },
                { RTL_ROM_LMP_8761A, 3 },
                { RTL_ROM_LMP_8822B, 8 },
+               { RTL_ROM_LMP_8723B, 9 },       /* 8723D */
+               { RTL_ROM_LMP_8821A, 10 },      /* 8821C */
        };
 
        ret = rtl_read_rom_version(hdev, &rom_version);
@@ -320,8 +376,8 @@ out:
        return ret;
 }
 
-static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
-                               const char *fw_name)
+static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 hci_rev,
+                               u16 lmp_subver)
 {
        unsigned char *fw_data = NULL;
        const struct firmware *fw;
@@ -330,39 +386,40 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
        u8 *cfg_buff = NULL;
        u8 *tbuff;
        char *cfg_name = NULL;
-       bool config_needed = false;
+       char *fw_name = NULL;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ic_id_table); i++) {
+               if ((ic_id_table[i].match_flags & IC_MATCH_FL_LMPSUBV) &&
+                   (ic_id_table[i].lmp_subver != lmp_subver))
+                       continue;
+               if ((ic_id_table[i].match_flags & IC_MATCH_FL_HCIREV) &&
+                   (ic_id_table[i].hci_rev != hci_rev))
+                       continue;
 
-       switch (lmp_subver) {
-       case RTL_ROM_LMP_8723B:
-               cfg_name = "rtl_bt/rtl8723b_config.bin";
-               break;
-       case RTL_ROM_LMP_8821A:
-               cfg_name = "rtl_bt/rtl8821a_config.bin";
-               break;
-       case RTL_ROM_LMP_8761A:
-               cfg_name = "rtl_bt/rtl8761a_config.bin";
-               break;
-       case RTL_ROM_LMP_8822B:
-               cfg_name = "rtl_bt/rtl8822b_config.bin";
-               config_needed = true;
-               break;
-       default:
-               BT_ERR("%s: rtl: no config according to lmp_subver %04x",
-                      hdev->name, lmp_subver);
                break;
        }
 
+       if (i >= ARRAY_SIZE(ic_id_table)) {
+               BT_ERR("%s: unknown IC info, lmp subver %04x, hci rev %04x",
+                      hdev->name, lmp_subver, hci_rev);
+               return -EINVAL;
+       }
+
+       cfg_name = ic_id_table[i].cfg_name;
+
        if (cfg_name) {
                cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff);
                if (cfg_sz < 0) {
                        cfg_sz = 0;
-                       if (config_needed)
+                       if (ic_id_table[i].config_needed)
                                BT_ERR("Necessary config file %s not found\n",
                                       cfg_name);
                }
        } else
                cfg_sz = 0;
 
+       fw_name = ic_id_table[i].fw_name;
        bt_dev_info(hdev, "rtl: loading %s", fw_name);
        ret = request_firmware(&fw, fw_name, &hdev->dev);
        if (ret < 0) {
@@ -370,7 +427,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
                goto err_req_fw;
        }
 
-       ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data);
+       ret = rtlbt_parse_firmware(hdev, lmp_subver, fw, &fw_data);
        if (ret < 0)
                goto out;
 
@@ -429,7 +486,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 {
        struct sk_buff *skb;
        struct hci_rp_read_local_version *resp;
-       u16 lmp_subver;
+       u16 hci_rev, lmp_subver;
 
        skb = btrtl_read_local_version(hdev);
        if (IS_ERR(skb))
@@ -441,6 +498,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
                    resp->hci_ver, resp->hci_rev,
                    resp->lmp_ver, resp->lmp_subver);
 
+       hci_rev = le16_to_cpu(resp->hci_rev);
        lmp_subver = le16_to_cpu(resp->lmp_subver);
        kfree_skb(skb);
 
@@ -455,17 +513,10 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
        case RTL_ROM_LMP_3499:
                return btrtl_setup_rtl8723a(hdev);
        case RTL_ROM_LMP_8723B:
-               return btrtl_setup_rtl8723b(hdev, lmp_subver,
-                                           "rtl_bt/rtl8723b_fw.bin");
        case RTL_ROM_LMP_8821A:
-               return btrtl_setup_rtl8723b(hdev, lmp_subver,
-                                           "rtl_bt/rtl8821a_fw.bin");
        case RTL_ROM_LMP_8761A:
-               return btrtl_setup_rtl8723b(hdev, lmp_subver,
-                                           "rtl_bt/rtl8761a_fw.bin");
        case RTL_ROM_LMP_8822B:
-               return btrtl_setup_rtl8723b(hdev, lmp_subver,
-                                           "rtl_bt/rtl8822b_fw.bin");
+               return btrtl_setup_rtl8723b(hdev, hci_rev, lmp_subver);
        default:
                bt_dev_info(hdev, "rtl: assuming no firmware upload needed");
                return 0;
index 2a55380..c8e9ae6 100644 (file)
@@ -339,6 +339,7 @@ static const struct usb_device_id blacklist_table[] = {
 
        /* Intel Bluetooth devices */
        { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW },
+       { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW },
        { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
        { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
        { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
@@ -373,6 +374,9 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
 
+       /* Additional Realtek 8822BE Bluetooth devices */
+       { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
+
        /* Silicon Wave based devices */
        { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
 
@@ -2057,6 +2061,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
        case 0x0c:      /* WsP */
        case 0x11:      /* JfP */
        case 0x12:      /* ThP */
+       case 0x13:      /* HrP */
+       case 0x14:      /* QnJ, IcP */
                break;
        default:
                BT_ERR("%s: Unsupported Intel hardware variant (%u)",
@@ -2149,6 +2155,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
                break;
        case 0x11:      /* JfP */
        case 0x12:      /* ThP */
+       case 0x13:      /* HrP */
+       case 0x14:      /* QnJ, IcP */
                snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
                         le16_to_cpu(ver.hw_variant),
                         le16_to_cpu(ver.hw_revision),
@@ -2180,6 +2188,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
                break;
        case 0x11:      /* JfP */
        case 0x12:      /* ThP */
+       case 0x13:      /* HrP */
+       case 0x14:      /* QnJ, IcP */
                snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
                         le16_to_cpu(ver.hw_variant),
                         le16_to_cpu(ver.hw_revision),
index 14ae7ee..d568fbd 100644 (file)
@@ -71,12 +71,12 @@ static int ath_wakeup_ar3k(struct tty_struct *tty)
        /* Clear RTS first */
        tty->driver->ops->tiocmget(tty);
        tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS);
-       mdelay(20);
+       msleep(20);
 
        /* Set RTS, wake up board */
        tty->driver->ops->tiocmget(tty);
        tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00);
-       mdelay(20);
+       msleep(20);
 
        status = tty->driver->ops->tiocmget(tty);
        return status;
index 1b4417a..2f30dca 100644 (file)
@@ -650,7 +650,7 @@ static int download_firmware(struct ll_device *lldev)
                        break;
                case ACTION_DELAY:      /* sleep */
                        bt_dev_info(lldev->hu.hdev, "sleep command in scr");
-                       mdelay(((struct bts_action_delay *)action_ptr)->msec);
+                       msleep(((struct bts_action_delay *)action_ptr)->msec);
                        break;
                }
                len -= (sizeof(struct bts_action) +
index d1f5bb5..6e9df55 100644 (file)
@@ -162,7 +162,7 @@ static int via_rng_init(struct hwrng *rng)
        /* Enable secondary noise source on CPUs where it is present. */
 
        /* Nehemiah stepping 8 and higher */
-       if ((c->x86_model == 9) && (c->x86_mask > 7))
+       if ((c->x86_model == 9) && (c->x86_stepping > 7))
                lo |= VIA_NOISESRC2;
 
        /* Esther */
index 3a2ca0f..d0c34df 100644 (file)
@@ -629,7 +629,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
        if (c->x86_vendor == X86_VENDOR_INTEL) {
                if ((c->x86 == 15) &&
                    (c->x86_model == 6) &&
-                   (c->x86_mask == 8)) {
+                   (c->x86_stepping == 8)) {
                        pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
                        return -ENODEV;
                    }
index 942632a..f730b65 100644 (file)
@@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
                break;
 
        case 7:
-               switch (c->x86_mask) {
+               switch (c->x86_stepping) {
                case 0:
                        longhaul_version = TYPE_LONGHAUL_V1;
                        cpu_model = CPU_SAMUEL2;
@@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
                        break;
                case 1 ... 15:
                        longhaul_version = TYPE_LONGHAUL_V2;
-                       if (c->x86_mask < 8) {
+                       if (c->x86_stepping < 8) {
                                cpu_model = CPU_SAMUEL2;
                                cpuname = "C3 'Samuel 2' [C5B]";
                        } else {
@@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
                numscales = 32;
                memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
                memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
-               switch (c->x86_mask) {
+               switch (c->x86_stepping) {
                case 0 ... 1:
                        cpu_model = CPU_NEHEMIAH;
                        cpuname = "C3 'Nehemiah A' [C5XLOE]";
index fd77812..a25741b 100644 (file)
@@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 #endif
 
        /* Errata workaround */
-       cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
+       cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping;
        switch (cpuid) {
        case 0x0f07:
        case 0x0f0a:
index 80ac313..302e9ce 100644 (file)
@@ -131,7 +131,7 @@ static int check_powernow(void)
                return 0;
        }
 
-       if ((c->x86_model == 6) && (c->x86_mask == 0)) {
+       if ((c->x86_model == 6) && (c->x86_stepping == 0)) {
                pr_info("K7 660[A0] core detected, enabling errata workarounds\n");
                have_a0 = 1;
        }
index 41bc539..4fa5adf 100644 (file)
@@ -37,7 +37,7 @@ struct cpu_id
 {
        __u8    x86;            /* CPU family */
        __u8    x86_model;      /* model */
-       __u8    x86_mask;       /* stepping */
+       __u8    x86_stepping;   /* stepping */
 };
 
 enum {
@@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
 {
        if ((c->x86 == x->x86) &&
            (c->x86_model == x->x86_model) &&
-           (c->x86_mask == x->x86_mask))
+           (c->x86_stepping == x->x86_stepping))
                return 1;
        return 0;
 }
index 8085ec9..e3a9962 100644 (file)
@@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void)
                ebx = cpuid_ebx(0x00000001);
                ebx &= 0x000000FF;
 
-               pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+               pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping);
 
-               switch (c->x86_mask) {
+               switch (c->x86_stepping) {
                case 4:
                        /*
                         * B-stepping [M-P4-M]
@@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void)
                                msr_lo, msr_hi);
                if ((msr_hi & (1<<18)) &&
                    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
-                       if (c->x86_mask == 0x01) {
+                       if (c->x86_stepping == 0x01) {
                                pr_debug("early PIII version\n");
                                return SPEEDSTEP_CPU_PIII_C_EARLY;
                        } else
index 75d280c..e843cf4 100644 (file)
@@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
                 * without any error (HW optimizations for later
                 * CAAM eras), then try again.
                 */
+               if (ret)
+                       break;
+
                rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
                if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
-                   !(rdsta_val & (1 << sh_idx)))
+                   !(rdsta_val & (1 << sh_idx))) {
                        ret = -EAGAIN;
-               if (ret)
                        break;
+               }
+
                dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx);
                /* Clear the contents before recreating the descriptor */
                memset(desc, 0x00, CAAM_CMD_SZ * 7);
index 4b6642a..1c6cbda 100644 (file)
@@ -512,7 +512,7 @@ static int __init padlock_init(void)
 
        printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
 
-       if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) {
+       if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) {
                ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
                cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
                printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
index 188f44b..5d64c08 100644 (file)
@@ -1922,15 +1922,21 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
        uint32_t aes_control;
        unsigned long flags;
        int err;
+       u8 *iv;
 
        aes_control = SSS_AES_KEY_CHANGE_MODE;
        if (mode & FLAGS_AES_DECRYPT)
                aes_control |= SSS_AES_MODE_DECRYPT;
 
-       if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC)
+       if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) {
                aes_control |= SSS_AES_CHAIN_MODE_CBC;
-       else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR)
+               iv = req->info;
+       } else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) {
                aes_control |= SSS_AES_CHAIN_MODE_CTR;
+               iv = req->info;
+       } else {
+               iv = NULL; /* AES_ECB */
+       }
 
        if (dev->ctx->keylen == AES_KEYSIZE_192)
                aes_control |= SSS_AES_KEY_SIZE_192;
@@ -1961,7 +1967,7 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
                goto outdata_error;
 
        SSS_AES_WRITE(dev, AES_CONTROL, aes_control);
-       s5p_set_aes(dev, dev->ctx->aes_key, req->info, dev->ctx->keylen);
+       s5p_set_aes(dev, dev->ctx->aes_key, iv, dev->ctx->keylen);
 
        s5p_set_dma_indata(dev,  dev->sg_src);
        s5p_set_dma_outdata(dev, dev->sg_dst);
index 0d01d16..63d6364 100644 (file)
@@ -28,7 +28,7 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
        algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
        ss = algt->ss;
 
-       spin_lock(&ss->slock);
+       spin_lock_bh(&ss->slock);
 
        writel(mode, ss->base + SS_CTL);
 
@@ -51,6 +51,6 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
        }
 
        writel(0, ss->base + SS_CTL);
-       spin_unlock(&ss->slock);
-       return dlen;
+       spin_unlock_bh(&ss->slock);
+       return 0;
 }
index 9c80e0c..6882fa2 100644 (file)
@@ -1138,6 +1138,10 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
        struct talitos_private *priv = dev_get_drvdata(dev);
        bool is_sec1 = has_ftr_sec1(priv);
 
+       if (!src) {
+               to_talitos_ptr(ptr, 0, 0, is_sec1);
+               return 1;
+       }
        if (sg_count == 1) {
                to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
                return sg_count;
index 8b16ec5..329cb96 100644 (file)
@@ -3147,7 +3147,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
        struct amd64_family_type *fam_type = NULL;
 
        pvt->ext_model  = boot_cpu_data.x86_model >> 4;
-       pvt->stepping   = boot_cpu_data.x86_mask;
+       pvt->stepping   = boot_cpu_data.x86_stepping;
        pvt->model      = boot_cpu_data.x86_model;
        pvt->fam        = boot_cpu_data.x86;
 
index 0a44d43..3ec4c71 100644 (file)
@@ -1,7 +1,6 @@
 /*
  * extcon-axp288.c - X-Power AXP288 PMIC extcon cable detection driver
  *
- * Copyright (C) 2016-2017 Hans de Goede <hdegoede@redhat.com>
  * Copyright (C) 2015 Intel Corporation
  * Author: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
  *
@@ -98,15 +97,13 @@ struct axp288_extcon_info {
        struct device *dev;
        struct regmap *regmap;
        struct regmap_irq_chip_data *regmap_irqc;
-       struct delayed_work det_work;
        int irq[EXTCON_IRQ_END];
        struct extcon_dev *edev;
        unsigned int previous_cable;
-       bool first_detect_done;
 };
 
 /* Power up/down reason string array */
-static char *axp288_pwr_up_down_info[] = {
+static const char * const axp288_pwr_up_down_info[] = {
        "Last wake caused by user pressing the power button",
        "Last wake caused by a charger insertion",
        "Last wake caused by a battery insertion",
@@ -124,7 +121,7 @@ static char *axp288_pwr_up_down_info[] = {
  */
 static void axp288_extcon_log_rsi(struct axp288_extcon_info *info)
 {
-       char **rsi;
+       const char * const *rsi;
        unsigned int val, i, clear_mask = 0;
        int ret;
 
@@ -140,25 +137,6 @@ static void axp288_extcon_log_rsi(struct axp288_extcon_info *info)
        regmap_write(info->regmap, AXP288_PS_BOOT_REASON_REG, clear_mask);
 }
 
-static void axp288_chrg_detect_complete(struct axp288_extcon_info *info)
-{
-       /*
-        * We depend on other drivers to do things like mux the data lines,
-        * enable/disable vbus based on the id-pin, etc. Sometimes the BIOS has
-        * not set these things up correctly resulting in the initial charger
-        * cable type detection giving a wrong result and we end up not charging
-        * or charging at only 0.5A.
-        *
-        * So we schedule a second cable type detection after 2 seconds to
-        * give the other drivers time to load and do their thing.
-        */
-       if (!info->first_detect_done) {
-               queue_delayed_work(system_wq, &info->det_work,
-                                  msecs_to_jiffies(2000));
-               info->first_detect_done = true;
-       }
-}
-
 static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info)
 {
        int ret, stat, cfg, pwr_stat;
@@ -223,8 +201,6 @@ no_vbus:
                info->previous_cable = cable;
        }
 
-       axp288_chrg_detect_complete(info);
-
        return 0;
 
 dev_det_ret:
@@ -246,11 +222,8 @@ static irqreturn_t axp288_extcon_isr(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static void axp288_extcon_det_work(struct work_struct *work)
+static void axp288_extcon_enable(struct axp288_extcon_info *info)
 {
-       struct axp288_extcon_info *info =
-               container_of(work, struct axp288_extcon_info, det_work.work);
-
        regmap_update_bits(info->regmap, AXP288_BC_GLOBAL_REG,
                                                BC_GLOBAL_RUN, 0);
        /* Enable the charger detection logic */
@@ -272,7 +245,6 @@ static int axp288_extcon_probe(struct platform_device *pdev)
        info->regmap = axp20x->regmap;
        info->regmap_irqc = axp20x->regmap_irqc;
        info->previous_cable = EXTCON_NONE;
-       INIT_DELAYED_WORK(&info->det_work, axp288_extcon_det_work);
 
        platform_set_drvdata(pdev, info);
 
@@ -318,7 +290,7 @@ static int axp288_extcon_probe(struct platform_device *pdev)
        }
 
        /* Start charger cable type detection */
-       queue_delayed_work(system_wq, &info->det_work, 0);
+       axp288_extcon_enable(info);
 
        return 0;
 }
index c8691b5..191e99f 100644 (file)
@@ -153,8 +153,9 @@ static int int3496_probe(struct platform_device *pdev)
                return ret;
        }
 
-       /* queue initial processing of id-pin */
+       /* process id-pin so that we start with the right status */
        queue_delayed_work(system_wq, &data->work, 0);
+       flush_delayed_work(&data->work);
 
        platform_set_drvdata(pdev, data);
 
index e2c3c5e..c53095b 100644 (file)
@@ -568,6 +568,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
        /* HG _PR3 doesn't seem to work on this A+A weston board */
        { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
        { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+       { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
        { 0, 0, 0, 0, 0 },
 };
 
index 8ca3783..74d2efa 100644 (file)
@@ -736,9 +736,11 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
        enum drm_connector_status ret = connector_status_disconnected;
        int r;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        if (encoder) {
                struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -757,8 +759,12 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
        /* check acpi lid status ??? */
 
        amdgpu_connector_update_scratch_regs(connector, ret);
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
+
        return ret;
 }
 
@@ -868,9 +874,11 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
        enum drm_connector_status ret = connector_status_disconnected;
        int r;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        encoder = amdgpu_connector_best_single_encoder(connector);
        if (!encoder)
@@ -924,8 +932,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
        amdgpu_connector_update_scratch_regs(connector, ret);
 
 out:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
 
        return ret;
 }
@@ -988,9 +998,11 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
        enum drm_connector_status ret = connector_status_disconnected;
        bool dret = false, broken_edid = false;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
                ret = connector->status;
@@ -1115,8 +1127,10 @@ out:
        amdgpu_connector_update_scratch_regs(connector, ret);
 
 exit:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
 
        return ret;
 }
@@ -1359,9 +1373,11 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
        struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
        int r;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
                ret = connector->status;
@@ -1429,8 +1445,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 
        amdgpu_connector_update_scratch_regs(connector, ret);
 out:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
 
        return ret;
 }
index cd23b1b..c91b9b0 100644 (file)
@@ -294,22 +294,7 @@ static void cirrus_crtc_prepare(struct drm_crtc *crtc)
 {
 }
 
-/*
- * This is called after a mode is programmed. It should reverse anything done
- * by the prepare function
- */
-static void cirrus_crtc_commit(struct drm_crtc *crtc)
-{
-}
-
-/*
- * The core can pass us a set of gamma values to program. We actually only
- * use this for 8-bit mode so can't perform smooth fades on deeper modes,
- * but it's a requirement that we provide the function
- */
-static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-                                u16 *blue, uint32_t size,
-                                struct drm_modeset_acquire_ctx *ctx)
+static void cirrus_crtc_load_lut(struct drm_crtc *crtc)
 {
        struct drm_device *dev = crtc->dev;
        struct cirrus_device *cdev = dev->dev_private;
@@ -317,7 +302,7 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
        int i;
 
        if (!crtc->enabled)
-               return 0;
+               return;
 
        r = crtc->gamma_store;
        g = r + crtc->gamma_size;
@@ -330,6 +315,27 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
                WREG8(PALETTE_DATA, *g++ >> 8);
                WREG8(PALETTE_DATA, *b++ >> 8);
        }
+}
+
+/*
+ * This is called after a mode is programmed. It should reverse anything done
+ * by the prepare function
+ */
+static void cirrus_crtc_commit(struct drm_crtc *crtc)
+{
+       cirrus_crtc_load_lut(crtc);
+}
+
+/*
+ * The core can pass us a set of gamma values to program. We actually only
+ * use this for 8-bit mode so can't perform smooth fades on deeper modes,
+ * but it's a requirement that we provide the function
+ */
+static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+                                u16 *blue, uint32_t size,
+                                struct drm_modeset_acquire_ctx *ctx)
+{
+       cirrus_crtc_load_lut(crtc);
 
        return 0;
 }
index ab40321..ae3cbfe 100644 (file)
@@ -1878,6 +1878,8 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
                new_crtc_state->event->base.completion = &commit->flip_done;
                new_crtc_state->event->base.completion_release = release_crtc_commit;
                drm_crtc_commit_get(commit);
+
+               commit->abort_completion = true;
        }
 
        for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
@@ -3421,8 +3423,21 @@ EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state);
 void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state)
 {
        if (state->commit) {
+               /*
+                * In the event that a non-blocking commit returns
+                * -ERESTARTSYS before the commit_tail work is queued, we will
+                * have an extra reference to the commit object. Release it, if
+                * the event has not been consumed by the worker.
+                *
+                * state->event may be freed, so we can't directly look at
+                * state->event->base.completion.
+                */
+               if (state->event && state->commit->abort_completion)
+                       drm_crtc_commit_put(state->commit);
+
                kfree(state->commit->event);
                state->commit->event = NULL;
+
                drm_crtc_commit_put(state->commit);
        }
 
index ddd5379..4f751a9 100644 (file)
@@ -113,6 +113,9 @@ static const struct edid_quirk {
        /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
        { "AEO", 0, EDID_QUIRK_FORCE_6BPC },
 
+       /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */
+       { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC },
+
        /* Belinea 10 15 55 */
        { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
        { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
@@ -162,6 +165,24 @@ static const struct edid_quirk {
 
        /* HTC Vive VR Headset */
        { "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP },
+
+       /* Oculus Rift DK1, DK2, and CV1 VR Headsets */
+       { "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP },
+       { "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP },
+       { "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP },
+
+       /* Windows Mixed Reality Headsets */
+       { "ACR", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+       { "HPN", 0x3515, EDID_QUIRK_NON_DESKTOP },
+       { "LEN", 0x0408, EDID_QUIRK_NON_DESKTOP },
+       { "LEN", 0xb800, EDID_QUIRK_NON_DESKTOP },
+       { "FUJ", 0x1970, EDID_QUIRK_NON_DESKTOP },
+       { "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+       { "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP },
+       { "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP },
+
+       /* Sony PlayStation VR Headset */
+       { "SNY", 0x0704, EDID_QUIRK_NON_DESKTOP },
 };
 
 /*
index 186c4e9..89eef1b 100644 (file)
@@ -836,9 +836,24 @@ struct drm_mm_node *drm_mm_scan_color_evict(struct drm_mm_scan *scan)
        if (!mm->color_adjust)
                return NULL;
 
-       hole = list_first_entry(&mm->hole_stack, typeof(*hole), hole_stack);
-       hole_start = __drm_mm_hole_node_start(hole);
-       hole_end = hole_start + hole->hole_size;
+       /*
+        * The hole found during scanning should ideally be the first element
+        * in the hole_stack list, but due to side-effects in the driver it
+        * may not be.
+        */
+       list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
+               hole_start = __drm_mm_hole_node_start(hole);
+               hole_end = hole_start + hole->hole_size;
+
+               if (hole_start <= scan->hit_start &&
+                   hole_end >= scan->hit_end)
+                       break;
+       }
+
+       /* We should only be called after we found the hole previously */
+       DRM_MM_BUG_ON(&hole->hole_stack == &mm->hole_stack);
+       if (unlikely(&hole->hole_stack == &mm->hole_stack))
+               return NULL;
 
        DRM_MM_BUG_ON(hole_start > scan->hit_start);
        DRM_MM_BUG_ON(hole_end < scan->hit_end);
index 555fbe5..00b8445 100644 (file)
@@ -653,6 +653,26 @@ out:
                schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
 }
 
+/**
+ * drm_kms_helper_is_poll_worker - is %current task an output poll worker?
+ *
+ * Determine if %current task is an output poll worker.  This can be used
+ * to select distinct code paths for output polling versus other contexts.
+ *
+ * One use case is to avoid a deadlock between the output poll worker and
+ * the autosuspend worker wherein the latter waits for polling to finish
+ * upon calling drm_kms_helper_poll_disable(), while the former waits for
+ * runtime suspend to finish upon calling pm_runtime_get_sync() in a
+ * connector ->detect hook.
+ */
+bool drm_kms_helper_is_poll_worker(void)
+{
+       struct work_struct *work = current_work();
+
+       return work && work->func == output_poll_execute;
+}
+EXPORT_SYMBOL(drm_kms_helper_is_poll_worker);
+
 /**
  * drm_kms_helper_poll_disable - disable output polling
  * @dev: drm_device
index 2b8bf2d..f68ef1b 100644 (file)
@@ -286,7 +286,6 @@ static int g2d_init_cmdlist(struct g2d_data *g2d)
 
        node = kcalloc(G2D_CMDLIST_NUM, sizeof(*node), GFP_KERNEL);
        if (!node) {
-               dev_err(dev, "failed to allocate memory\n");
                ret = -ENOMEM;
                goto err;
        }
@@ -926,7 +925,7 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
        struct drm_device *drm_dev = g2d->subdrv.drm_dev;
        struct g2d_runqueue_node *runqueue_node = g2d->runqueue_node;
        struct drm_exynos_pending_g2d_event *e;
-       struct timeval now;
+       struct timespec64 now;
 
        if (list_empty(&runqueue_node->event_list))
                return;
@@ -934,9 +933,9 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
        e = list_first_entry(&runqueue_node->event_list,
                             struct drm_exynos_pending_g2d_event, base.link);
 
-       do_gettimeofday(&now);
+       ktime_get_ts64(&now);
        e->event.tv_sec = now.tv_sec;
-       e->event.tv_usec = now.tv_usec;
+       e->event.tv_usec = now.tv_nsec / NSEC_PER_USEC;
        e->event.cmdlist_no = cmdlist_no;
 
        drm_send_event(drm_dev, &e->base);
@@ -1358,10 +1357,9 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data,
                return -EFAULT;
 
        runqueue_node = kmem_cache_alloc(g2d->runqueue_slab, GFP_KERNEL);
-       if (!runqueue_node) {
-               dev_err(dev, "failed to allocate memory\n");
+       if (!runqueue_node)
                return -ENOMEM;
-       }
+
        run_cmdlist = &runqueue_node->run_cmdlist;
        event_list = &runqueue_node->event_list;
        INIT_LIST_HEAD(run_cmdlist);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.h b/drivers/gpu/drm/exynos/exynos_drm_rotator.h
deleted file mode 100644 (file)
index 71a0b4c..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *     YoungJun Cho <yj44.cho@samsung.com>
- *     Eunchul Kim <chulspro.kim@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef        _EXYNOS_DRM_ROTATOR_H_
-#define        _EXYNOS_DRM_ROTATOR_H_
-
-/* TODO */
-
-#endif
index a4b75a4..abd84cb 100644 (file)
@@ -1068,10 +1068,13 @@ static void hdmi_audio_config(struct hdmi_context *hdata)
        /* Configuration I2S input ports. Configure I2S_PIN_SEL_0~4 */
        hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_0, HDMI_I2S_SEL_SCLK(5)
                        | HDMI_I2S_SEL_LRCK(6));
-       hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(1)
-                       | HDMI_I2S_SEL_SDATA2(4));
+
+       hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(3)
+                       | HDMI_I2S_SEL_SDATA0(4));
+
        hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_2, HDMI_I2S_SEL_SDATA3(1)
                        | HDMI_I2S_SEL_SDATA2(2));
+
        hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_3, HDMI_I2S_SEL_DSD(0));
 
        /* I2S_CON_1 & 2 */
index 3049613..d7cbe53 100644 (file)
 #define EXYNOS_CIIMGEFF_FIN_EMBOSSING          (4 << 26)
 #define EXYNOS_CIIMGEFF_FIN_SILHOUETTE         (5 << 26)
 #define EXYNOS_CIIMGEFF_FIN_MASK                       (7 << 26)
-#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK          ((0xff < 13) | (0xff < 0))
+#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK          ((0xff << 13) | (0xff << 0))
 
 /* Real input DMA size register */
 #define EXYNOS_CIREAL_ISIZE_AUTOLOAD_ENABLE    (1 << 31)
index 04be0f7..4420c20 100644 (file)
 
 /* I2S_PIN_SEL_1 */
 #define HDMI_I2S_SEL_SDATA1(x)         (((x) & 0x7) << 4)
-#define HDMI_I2S_SEL_SDATA2(x)         ((x) & 0x7)
+#define HDMI_I2S_SEL_SDATA0(x)         ((x) & 0x7)
 
 /* I2S_PIN_SEL_2 */
 #define HDMI_I2S_SEL_SDATA3(x)         (((x) & 0x7) << 4)
index 909499b..021f722 100644 (file)
@@ -733,6 +733,25 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
        return ret == 0 ? count : ret;
 }
 
+static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
+{
+       struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+       struct intel_gvt *gvt = vgpu->gvt;
+       int offset;
+
+       /* Only allow MMIO GGTT entry access */
+       if (index != PCI_BASE_ADDRESS_0)
+               return false;
+
+       offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
+               intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+
+       return (offset >= gvt->device_info.gtt_start_offset &&
+               offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
+                       true : false;
+}
+
 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
                        size_t count, loff_t *ppos)
 {
@@ -742,7 +761,21 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
        while (count) {
                size_t filled;
 
-               if (count >= 4 && !(*ppos % 4)) {
+               /* Only support GGTT entry 8 bytes read */
+               if (count >= 8 && !(*ppos % 8) &&
+                       gtt_entry(mdev, ppos)) {
+                       u64 val;
+
+                       ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+                                       ppos, false);
+                       if (ret <= 0)
+                               goto read_err;
+
+                       if (copy_to_user(buf, &val, sizeof(val)))
+                               goto read_err;
+
+                       filled = 8;
+               } else if (count >= 4 && !(*ppos % 4)) {
                        u32 val;
 
                        ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
@@ -802,7 +835,21 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev,
        while (count) {
                size_t filled;
 
-               if (count >= 4 && !(*ppos % 4)) {
+               /* Only support GGTT entry 8 bytes write */
+               if (count >= 8 && !(*ppos % 8) &&
+                       gtt_entry(mdev, ppos)) {
+                       u64 val;
+
+                       if (copy_from_user(&val, buf, sizeof(val)))
+                               goto write_err;
+
+                       ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+                                       ppos, true);
+                       if (ret <= 0)
+                               goto write_err;
+
+                       filled = 8;
+               } else if (count >= 4 && !(*ppos % 4)) {
                        u32 val;
 
                        if (copy_from_user(&val, buf, sizeof(val)))
index 73ad6e9..256f1bb 100644 (file)
@@ -118,6 +118,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
        {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
        {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
        {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+       {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
        {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
        {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
        {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
index 7a25115..736bd2b 100644 (file)
@@ -333,7 +333,7 @@ TRACE_EVENT(render_mmio,
        TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
                 unsigned int old_val, unsigned int new_val),
 
-       TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
+       TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
 
        TP_STRUCT__entry(
                __field(int, old_id)
index 173d009..2f5209d 100644 (file)
@@ -1433,19 +1433,7 @@ void i915_driver_unload(struct drm_device *dev)
 
        intel_modeset_cleanup(dev);
 
-       /*
-        * free the memory space allocated for the child device
-        * config parsed from VBT
-        */
-       if (dev_priv->vbt.child_dev && dev_priv->vbt.child_dev_num) {
-               kfree(dev_priv->vbt.child_dev);
-               dev_priv->vbt.child_dev = NULL;
-               dev_priv->vbt.child_dev_num = 0;
-       }
-       kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
-       dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
-       kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
-       dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
+       intel_bios_cleanup(dev_priv);
 
        vga_switcheroo_unregister_client(pdev);
        vga_client_register(pdev, NULL, NULL, NULL);
index a42deeb..d307429 100644 (file)
@@ -1349,6 +1349,7 @@ struct intel_vbt_data {
                u32 size;
                u8 *data;
                const u8 *sequence[MIPI_SEQ_MAX];
+               u8 *deassert_seq; /* Used by fixup_mipi_sequences() */
        } dsi;
 
        int crt_ddc_pin;
@@ -3657,6 +3658,7 @@ extern void intel_i2c_reset(struct drm_i915_private *dev_priv);
 
 /* intel_bios.c */
 void intel_bios_init(struct drm_i915_private *dev_priv);
+void intel_bios_cleanup(struct drm_i915_private *dev_priv);
 bool intel_bios_is_valid_vbt(const void *buf, size_t size);
 bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
 bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
index 648e753..0c963fc 100644 (file)
@@ -803,7 +803,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 
        case I915_CONTEXT_PARAM_PRIORITY:
                {
-                       int priority = args->value;
+                       s64 priority = args->value;
 
                        if (args->size)
                                ret = -EINVAL;
index 42ff06f..792facd 100644 (file)
@@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
 void
 i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv)
 {
-       strncpy(dev_priv->perf.oa.test_config.uuid,
+       strlcpy(dev_priv->perf.oa.test_config.uuid,
                "577e8e2c-3fa0-4875-8743-3538d585e3b0",
-               UUID_STRING_LEN);
+               sizeof(dev_priv->perf.oa.test_config.uuid));
        dev_priv->perf.oa.test_config.id = 1;
 
        dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
index ff0ac36..ba9140c 100644 (file)
@@ -96,9 +96,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
 void
 i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv)
 {
-       strncpy(dev_priv->perf.oa.test_config.uuid,
+       strlcpy(dev_priv->perf.oa.test_config.uuid,
                "db41edd4-d8e7-4730-ad11-b9a2d6833503",
-               UUID_STRING_LEN);
+               sizeof(dev_priv->perf.oa.test_config.uuid));
        dev_priv->perf.oa.test_config.id = 1;
 
        dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
index 55a8a1e..0e9b98c 100644 (file)
@@ -285,26 +285,41 @@ static u64 count_interrupts(struct drm_i915_private *i915)
        return sum;
 }
 
-static void i915_pmu_event_destroy(struct perf_event *event)
+static void engine_event_destroy(struct perf_event *event)
 {
-       WARN_ON(event->parent);
+       struct drm_i915_private *i915 =
+               container_of(event->pmu, typeof(*i915), pmu.base);
+       struct intel_engine_cs *engine;
+
+       engine = intel_engine_lookup_user(i915,
+                                         engine_event_class(event),
+                                         engine_event_instance(event));
+       if (WARN_ON_ONCE(!engine))
+               return;
+
+       if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
+           intel_engine_supports_stats(engine))
+               intel_disable_engine_stats(engine);
 }
 
-static int engine_event_init(struct perf_event *event)
+static void i915_pmu_event_destroy(struct perf_event *event)
 {
-       struct drm_i915_private *i915 =
-               container_of(event->pmu, typeof(*i915), pmu.base);
+       WARN_ON(event->parent);
 
-       if (!intel_engine_lookup_user(i915, engine_event_class(event),
-                                     engine_event_instance(event)))
-               return -ENODEV;
+       if (is_engine_event(event))
+               engine_event_destroy(event);
+}
 
-       switch (engine_event_sample(event)) {
+static int
+engine_event_status(struct intel_engine_cs *engine,
+                   enum drm_i915_pmu_engine_sample sample)
+{
+       switch (sample) {
        case I915_SAMPLE_BUSY:
        case I915_SAMPLE_WAIT:
                break;
        case I915_SAMPLE_SEMA:
-               if (INTEL_GEN(i915) < 6)
+               if (INTEL_GEN(engine->i915) < 6)
                        return -ENODEV;
                break;
        default:
@@ -314,6 +329,30 @@ static int engine_event_init(struct perf_event *event)
        return 0;
 }
 
+static int engine_event_init(struct perf_event *event)
+{
+       struct drm_i915_private *i915 =
+               container_of(event->pmu, typeof(*i915), pmu.base);
+       struct intel_engine_cs *engine;
+       u8 sample;
+       int ret;
+
+       engine = intel_engine_lookup_user(i915, engine_event_class(event),
+                                         engine_event_instance(event));
+       if (!engine)
+               return -ENODEV;
+
+       sample = engine_event_sample(event);
+       ret = engine_event_status(engine, sample);
+       if (ret)
+               return ret;
+
+       if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
+               ret = intel_enable_engine_stats(engine);
+
+       return ret;
+}
+
 static int i915_pmu_event_init(struct perf_event *event)
 {
        struct drm_i915_private *i915 =
@@ -370,7 +409,94 @@ static int i915_pmu_event_init(struct perf_event *event)
        return 0;
 }
 
-static u64 __i915_pmu_event_read(struct perf_event *event)
+static u64 __get_rc6(struct drm_i915_private *i915)
+{
+       u64 val;
+
+       val = intel_rc6_residency_ns(i915,
+                                    IS_VALLEYVIEW(i915) ?
+                                    VLV_GT_RENDER_RC6 :
+                                    GEN6_GT_GFX_RC6);
+
+       if (HAS_RC6p(i915))
+               val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+       if (HAS_RC6pp(i915))
+               val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+       return val;
+}
+
+static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+{
+#if IS_ENABLED(CONFIG_PM)
+       unsigned long flags;
+       u64 val;
+
+       if (intel_runtime_pm_get_if_in_use(i915)) {
+               val = __get_rc6(i915);
+               intel_runtime_pm_put(i915);
+
+               /*
+                * If we are coming back from being runtime suspended we must
+                * be careful not to report a larger value than returned
+                * previously.
+                */
+
+               if (!locked)
+                       spin_lock_irqsave(&i915->pmu.lock, flags);
+
+               if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+                       i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+                       i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+               } else {
+                       val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+               }
+
+               if (!locked)
+                       spin_unlock_irqrestore(&i915->pmu.lock, flags);
+       } else {
+               struct pci_dev *pdev = i915->drm.pdev;
+               struct device *kdev = &pdev->dev;
+               unsigned long flags2;
+
+               /*
+                * We are runtime suspended.
+                *
+                * Report the delta from when the device was suspended to now,
+                * on top of the last known real value, as the approximated RC6
+                * counter value.
+                */
+               if (!locked)
+                       spin_lock_irqsave(&i915->pmu.lock, flags);
+
+               spin_lock_irqsave(&kdev->power.lock, flags2);
+
+               if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
+                       i915->pmu.suspended_jiffies_last =
+                                               kdev->power.suspended_jiffies;
+
+               val = kdev->power.suspended_jiffies -
+                     i915->pmu.suspended_jiffies_last;
+               val += jiffies - kdev->power.accounting_timestamp;
+
+               spin_unlock_irqrestore(&kdev->power.lock, flags2);
+
+               val = jiffies_to_nsecs(val);
+               val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+               i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+               if (!locked)
+                       spin_unlock_irqrestore(&i915->pmu.lock, flags);
+       }
+
+       return val;
+#else
+       return __get_rc6(i915);
+#endif
+}
+
+static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
 {
        struct drm_i915_private *i915 =
                container_of(event->pmu, typeof(*i915), pmu.base);
@@ -387,7 +513,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
                if (WARN_ON_ONCE(!engine)) {
                        /* Do nothing */
                } else if (sample == I915_SAMPLE_BUSY &&
-                          engine->pmu.busy_stats) {
+                          intel_engine_supports_stats(engine)) {
                        val = ktime_to_ns(intel_engine_get_busy_time(engine));
                } else {
                        val = engine->pmu.sample[sample].cur;
@@ -408,18 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
                        val = count_interrupts(i915);
                        break;
                case I915_PMU_RC6_RESIDENCY:
-                       intel_runtime_pm_get(i915);
-                       val = intel_rc6_residency_ns(i915,
-                                                    IS_VALLEYVIEW(i915) ?
-                                                    VLV_GT_RENDER_RC6 :
-                                                    GEN6_GT_GFX_RC6);
-                       if (HAS_RC6p(i915))
-                               val += intel_rc6_residency_ns(i915,
-                                                             GEN6_GT_GFX_RC6p);
-                       if (HAS_RC6pp(i915))
-                               val += intel_rc6_residency_ns(i915,
-                                                             GEN6_GT_GFX_RC6pp);
-                       intel_runtime_pm_put(i915);
+                       val = get_rc6(i915, locked);
                        break;
                }
        }
@@ -434,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event)
 
 again:
        prev = local64_read(&hwc->prev_count);
-       new = __i915_pmu_event_read(event);
+       new = __i915_pmu_event_read(event, false);
 
        if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
                goto again;
@@ -442,12 +557,6 @@ again:
        local64_add(new - prev, &event->count);
 }
 
-static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
-{
-       return intel_engine_supports_stats(engine) &&
-              (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
-}
-
 static void i915_pmu_enable(struct perf_event *event)
 {
        struct drm_i915_private *i915 =
@@ -487,21 +596,7 @@ static void i915_pmu_enable(struct perf_event *event)
 
                GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
                GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-               if (engine->pmu.enable_count[sample]++ == 0) {
-                       /*
-                        * Enable engine busy stats tracking if needed or
-                        * alternatively cancel the scheduled disable.
-                        *
-                        * If the delayed disable was pending, cancel it and
-                        * in this case do not enable since it already is.
-                        */
-                       if (engine_needs_busy_stats(engine) &&
-                           !engine->pmu.busy_stats) {
-                               engine->pmu.busy_stats = true;
-                               if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-                                       intel_enable_engine_stats(engine);
-                       }
-               }
+               engine->pmu.enable_count[sample]++;
        }
 
        /*
@@ -509,19 +604,11 @@ static void i915_pmu_enable(struct perf_event *event)
         * for all listeners. Even when the event was already enabled and has
         * an existing non-zero value.
         */
-       local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
+       local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
 
        spin_unlock_irqrestore(&i915->pmu.lock, flags);
 }
 
-static void __disable_busy_stats(struct work_struct *work)
-{
-       struct intel_engine_cs *engine =
-              container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
-
-       intel_disable_engine_stats(engine);
-}
-
 static void i915_pmu_disable(struct perf_event *event)
 {
        struct drm_i915_private *i915 =
@@ -545,26 +632,8 @@ static void i915_pmu_disable(struct perf_event *event)
                 * Decrement the reference count and clear the enabled
                 * bitmask when the last listener on an event goes away.
                 */
-               if (--engine->pmu.enable_count[sample] == 0) {
+               if (--engine->pmu.enable_count[sample] == 0)
                        engine->pmu.enable &= ~BIT(sample);
-                       if (!engine_needs_busy_stats(engine) &&
-                           engine->pmu.busy_stats) {
-                               engine->pmu.busy_stats = false;
-                               /*
-                                * We request a delayed disable to handle the
-                                * rapid on/off cycles on events, which can
-                                * happen when tools like perf stat start, in a
-                                * nicer way.
-                                *
-                                * In addition, this also helps with busy stats
-                                * accuracy with background CPU offline/online
-                                * migration events.
-                                */
-                               queue_delayed_work(system_wq,
-                                                  &engine->pmu.disable_busy_stats,
-                                                  round_jiffies_up_relative(HZ));
-                       }
-               }
        }
 
        GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -797,8 +866,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
 
 void i915_pmu_register(struct drm_i915_private *i915)
 {
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
        int ret;
 
        if (INTEL_GEN(i915) <= 2) {
@@ -820,10 +887,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
        hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        i915->pmu.timer.function = i915_sample;
 
-       for_each_engine(engine, i915, id)
-               INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
-                                 __disable_busy_stats);
-
        ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
        if (ret)
                goto err;
@@ -843,9 +906,6 @@ err:
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
        if (!i915->pmu.base.event_init)
                return;
 
@@ -853,11 +913,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
        hrtimer_cancel(&i915->pmu.timer);
 
-       for_each_engine(engine, i915, id) {
-               GEM_BUG_ON(engine->pmu.busy_stats);
-               flush_delayed_work(&engine->pmu.disable_busy_stats);
-       }
-
        i915_pmu_unregister_cpuhp_state(i915);
 
        perf_pmu_unregister(&i915->pmu.base);
index 40c154d..bb62df1 100644 (file)
@@ -27,6 +27,8 @@
 enum {
        __I915_SAMPLE_FREQ_ACT = 0,
        __I915_SAMPLE_FREQ_REQ,
+       __I915_SAMPLE_RC6,
+       __I915_SAMPLE_RC6_ESTIMATED,
        __I915_NUM_PMU_SAMPLERS
 };
 
@@ -94,6 +96,10 @@ struct i915_pmu {
         * struct intel_engine_cs.
         */
        struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
+       /**
+        * @suspended_jiffies_last: Cached suspend time from PM core.
+        */
+       unsigned long suspended_jiffies_last;
 };
 
 #ifdef CONFIG_PERF_EVENTS
index f7f7717..b49a2df 100644 (file)
@@ -947,6 +947,86 @@ static int goto_next_sequence_v3(const u8 *data, int index, int total)
        return 0;
 }
 
+/*
+ * Get len of pre-fixed deassert fragment from a v1 init OTP sequence,
+ * skip all delay + gpio operands and stop at the first DSI packet op.
+ */
+static int get_init_otp_deassert_fragment_len(struct drm_i915_private *dev_priv)
+{
+       const u8 *data = dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+       int index, len;
+
+       if (WARN_ON(!data || dev_priv->vbt.dsi.seq_version != 1))
+               return 0;
+
+       /* index = 1 to skip sequence byte */
+       for (index = 1; data[index] != MIPI_SEQ_ELEM_END; index += len) {
+               switch (data[index]) {
+               case MIPI_SEQ_ELEM_SEND_PKT:
+                       return index == 1 ? 0 : index;
+               case MIPI_SEQ_ELEM_DELAY:
+                       len = 5; /* 1 byte for operand + uint32 */
+                       break;
+               case MIPI_SEQ_ELEM_GPIO:
+                       len = 3; /* 1 byte for op, 1 for gpio_nr, 1 for value */
+                       break;
+               default:
+                       return 0;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence.
+ * The deassert must be done before calling intel_dsi_device_ready, so for
+ * these devices we split the init OTP sequence into a deassert sequence and
+ * the actual init OTP part.
+ */
+static void fixup_mipi_sequences(struct drm_i915_private *dev_priv)
+{
+       u8 *init_otp;
+       int len;
+
+       /* Limit this to VLV for now. */
+       if (!IS_VALLEYVIEW(dev_priv))
+               return;
+
+       /* Limit this to v1 vid-mode sequences */
+       if (dev_priv->vbt.dsi.config->is_cmd_mode ||
+           dev_priv->vbt.dsi.seq_version != 1)
+               return;
+
+       /* Only do this if there are otp and assert seqs and no deassert seq */
+       if (!dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] ||
+           !dev_priv->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET] ||
+           dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET])
+               return;
+
+       /* The deassert-sequence ends at the first DSI packet */
+       len = get_init_otp_deassert_fragment_len(dev_priv);
+       if (!len)
+               return;
+
+       DRM_DEBUG_KMS("Using init OTP fragment to deassert reset\n");
+
+       /* Copy the fragment, update seq byte and terminate it */
+       init_otp = (u8 *)dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+       dev_priv->vbt.dsi.deassert_seq = kmemdup(init_otp, len + 1, GFP_KERNEL);
+       if (!dev_priv->vbt.dsi.deassert_seq)
+               return;
+       dev_priv->vbt.dsi.deassert_seq[0] = MIPI_SEQ_DEASSERT_RESET;
+       dev_priv->vbt.dsi.deassert_seq[len] = MIPI_SEQ_ELEM_END;
+       /* Use the copy for deassert */
+       dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET] =
+               dev_priv->vbt.dsi.deassert_seq;
+       /* Replace the last byte of the fragment with init OTP seq byte */
+       init_otp[len - 1] = MIPI_SEQ_INIT_OTP;
+       /* And make MIPI_MIPI_SEQ_INIT_OTP point to it */
+       dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
+}
+
 static void
 parse_mipi_sequence(struct drm_i915_private *dev_priv,
                    const struct bdb_header *bdb)
@@ -1016,6 +1096,8 @@ parse_mipi_sequence(struct drm_i915_private *dev_priv,
        dev_priv->vbt.dsi.size = seq_size;
        dev_priv->vbt.dsi.seq_version = sequence->version;
 
+       fixup_mipi_sequences(dev_priv);
+
        DRM_DEBUG_DRIVER("MIPI related VBT parsing complete\n");
        return;
 
@@ -1588,6 +1670,29 @@ out:
                pci_unmap_rom(pdev, bios);
 }
 
+/**
+ * intel_bios_cleanup - Free any resources allocated by intel_bios_init()
+ * @dev_priv: i915 device instance
+ */
+void intel_bios_cleanup(struct drm_i915_private *dev_priv)
+{
+       kfree(dev_priv->vbt.child_dev);
+       dev_priv->vbt.child_dev = NULL;
+       dev_priv->vbt.child_dev_num = 0;
+       kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
+       dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
+       kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
+       dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
+       kfree(dev_priv->vbt.dsi.data);
+       dev_priv->vbt.dsi.data = NULL;
+       kfree(dev_priv->vbt.dsi.pps);
+       dev_priv->vbt.dsi.pps = NULL;
+       kfree(dev_priv->vbt.dsi.config);
+       dev_priv->vbt.dsi.config = NULL;
+       kfree(dev_priv->vbt.dsi.deassert_seq);
+       dev_priv->vbt.dsi.deassert_seq = NULL;
+}
+
 /**
  * intel_bios_is_tv_present - is integrated TV present in VBT
  * @dev_priv:  i915 device instance
index bd40fea..f54ddda 100644 (file)
@@ -594,29 +594,16 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
        spin_unlock_irq(&b->rb_lock);
 }
 
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
-       return intel_wait_check_request(&request->signaling.wait, request);
-}
-
 static bool signal_complete(const struct drm_i915_gem_request *request)
 {
        if (!request)
                return false;
 
-       /* If another process served as the bottom-half it may have already
-        * signalled that this wait is already completed.
-        */
-       if (intel_wait_complete(&request->signaling.wait))
-               return signal_valid(request);
-
-       /* Carefully check if the request is complete, giving time for the
+       /*
+        * Carefully check if the request is complete, giving time for the
         * seqno to be visible or if the GPU hung.
         */
-       if (__i915_request_irq_complete(request))
-               return true;
-
-       return false;
+       return __i915_request_irq_complete(request);
 }
 
 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
@@ -659,9 +646,13 @@ static int intel_breadcrumbs_signaler(void *arg)
                        request = i915_gem_request_get_rcu(request);
                rcu_read_unlock();
                if (signal_complete(request)) {
-                       local_bh_disable();
-                       dma_fence_signal(&request->fence);
-                       local_bh_enable(); /* kick start the tasklets */
+                       if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+                                     &request->fence.flags)) {
+                               local_bh_disable();
+                               dma_fence_signal(&request->fence);
+                               GEM_BUG_ON(!i915_gem_request_completed(request));
+                               local_bh_enable(); /* kick start the tasklets */
+                       }
 
                        spin_lock_irq(&b->rb_lock);
 
index 5dc118f..1704c88 100644 (file)
@@ -1952,6 +1952,14 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
        if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9)
                min_cdclk = max(2 * 96000, min_cdclk);
 
+       /*
+        * On Valleyview some DSI panels lose (v|h)sync when the clock is lower
+        * than 320000KHz.
+        */
+       if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) &&
+           IS_VALLEYVIEW(dev_priv))
+               min_cdclk = max(320000, min_cdclk);
+
        if (min_cdclk > dev_priv->max_cdclk_freq) {
                DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n",
                              min_cdclk, dev_priv->max_cdclk_freq);
index d790bdc..fa960cf 100644 (file)
@@ -1458,7 +1458,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
        struct drm_i915_private *dev_priv = engine->i915;
        bool idle = true;
 
-       intel_runtime_pm_get(dev_priv);
+       /* If the whole device is asleep, the engine must be idle */
+       if (!intel_runtime_pm_get_if_in_use(dev_priv))
+               return true;
 
        /* First check that no commands are left in the ring */
        if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
@@ -1943,16 +1945,22 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
  */
 int intel_enable_engine_stats(struct intel_engine_cs *engine)
 {
+       struct intel_engine_execlists *execlists = &engine->execlists;
        unsigned long flags;
+       int err = 0;
 
        if (!intel_engine_supports_stats(engine))
                return -ENODEV;
 
+       tasklet_disable(&execlists->tasklet);
        spin_lock_irqsave(&engine->stats.lock, flags);
-       if (engine->stats.enabled == ~0)
-               goto busy;
+
+       if (unlikely(engine->stats.enabled == ~0)) {
+               err = -EBUSY;
+               goto unlock;
+       }
+
        if (engine->stats.enabled++ == 0) {
-               struct intel_engine_execlists *execlists = &engine->execlists;
                const struct execlist_port *port = execlists->port;
                unsigned int num_ports = execlists_num_ports(execlists);
 
@@ -1967,14 +1975,12 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
                if (engine->stats.active)
                        engine->stats.start = engine->stats.enabled_at;
        }
-       spin_unlock_irqrestore(&engine->stats.lock, flags);
-
-       return 0;
 
-busy:
+unlock:
        spin_unlock_irqrestore(&engine->stats.lock, flags);
+       tasklet_enable(&execlists->tasklet);
 
-       return -EBUSY;
+       return err;
 }
 
 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
index c5ff203..a0e7a6c 100644 (file)
@@ -366,20 +366,6 @@ struct intel_engine_cs {
                 */
 #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
                struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
-               /**
-                * @busy_stats: Has enablement of engine stats tracking been
-                *              requested.
-                */
-               bool busy_stats;
-               /**
-                * @disable_busy_stats: Work item for busy stats disabling.
-                *
-                * Same as with @enable_busy_stats action, with the difference
-                * that we delay it in case there are rapid enable-disable
-                * actions, which can happen during tool startup (like perf
-                * stat).
-                */
-               struct delayed_work disable_busy_stats;
        } pmu;
 
        /*
index 5155f01..0552020 100644 (file)
@@ -36,6 +36,7 @@
 #include "meson_venc.h"
 #include "meson_vpp.h"
 #include "meson_viu.h"
+#include "meson_canvas.h"
 #include "meson_registers.h"
 
 /* CRTC definition */
@@ -192,6 +193,11 @@ void meson_crtc_irq(struct meson_drm *priv)
                } else
                        meson_vpp_disable_interlace_vscaler_osd1(priv);
 
+               meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1,
+                          priv->viu.osd1_addr, priv->viu.osd1_stride,
+                          priv->viu.osd1_height, MESON_CANVAS_WRAP_NONE,
+                          MESON_CANVAS_BLKMODE_LINEAR);
+
                /* Enable OSD1 */
                writel_bits_relaxed(VPP_OSD1_POSTBLEND, VPP_OSD1_POSTBLEND,
                                    priv->io_base + _REG(VPP_MISC));
index 5e8b392..8450d6a 100644 (file)
@@ -43,6 +43,9 @@ struct meson_drm {
                bool osd1_commit;
                uint32_t osd1_ctrl_stat;
                uint32_t osd1_blk0_cfg[5];
+               uint32_t osd1_addr;
+               uint32_t osd1_stride;
+               uint32_t osd1_height;
        } viu;
 
        struct {
index d0a6ac8..27bd350 100644 (file)
@@ -164,10 +164,9 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
        /* Update Canvas with buffer address */
        gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-       meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1,
-                          gem->paddr, fb->pitches[0],
-                          fb->height, MESON_CANVAS_WRAP_NONE,
-                          MESON_CANVAS_BLKMODE_LINEAR);
+       priv->viu.osd1_addr = gem->paddr;
+       priv->viu.osd1_stride = fb->pitches[0];
+       priv->viu.osd1_height = fb->height;
 
        spin_unlock_irqrestore(&priv->drm->event_lock, flags);
 }
index 69d6e61..6ed9cb0 100644 (file)
@@ -570,9 +570,15 @@ nouveau_connector_detect(struct drm_connector *connector, bool force)
                nv_connector->edid = NULL;
        }
 
-       ret = pm_runtime_get_sync(connector->dev->dev);
-       if (ret < 0 && ret != -EACCES)
-               return conn_status;
+       /* Outputs are only polled while runtime active, so acquiring a
+        * runtime PM ref here is unnecessary (and would deadlock upon
+        * runtime suspend because it waits for polling to finish).
+        */
+       if (!drm_kms_helper_is_poll_worker()) {
+               ret = pm_runtime_get_sync(connector->dev->dev);
+               if (ret < 0 && ret != -EACCES)
+                       return conn_status;
+       }
 
        nv_encoder = nouveau_connector_ddc_detect(connector);
        if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) {
@@ -647,8 +653,10 @@ detect_analog:
 
  out:
 
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
 
        return conn_status;
 }
index bf62303..3695cde 100644 (file)
@@ -301,7 +301,7 @@ nvkm_therm_attr_set(struct nvkm_therm *therm,
 void
 nvkm_therm_clkgate_enable(struct nvkm_therm *therm)
 {
-       if (!therm->func->clkgate_enable || !therm->clkgating_enabled)
+       if (!therm || !therm->func->clkgate_enable || !therm->clkgating_enabled)
                return;
 
        nvkm_debug(&therm->subdev,
@@ -312,7 +312,7 @@ nvkm_therm_clkgate_enable(struct nvkm_therm *therm)
 void
 nvkm_therm_clkgate_fini(struct nvkm_therm *therm, bool suspend)
 {
-       if (!therm->func->clkgate_fini || !therm->clkgating_enabled)
+       if (!therm || !therm->func->clkgate_fini || !therm->clkgating_enabled)
                return;
 
        nvkm_debug(&therm->subdev,
@@ -395,7 +395,7 @@ void
 nvkm_therm_clkgate_init(struct nvkm_therm *therm,
                        const struct nvkm_therm_clkgate_pack *p)
 {
-       if (!therm->func->clkgate_init || !therm->clkgating_enabled)
+       if (!therm || !therm->func->clkgate_init || !therm->clkgating_enabled)
                return;
 
        therm->func->clkgate_init(therm, p);
index 5012f5e..2e2ca3c 100644 (file)
@@ -899,9 +899,11 @@ radeon_lvds_detect(struct drm_connector *connector, bool force)
        enum drm_connector_status ret = connector_status_disconnected;
        int r;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        if (encoder) {
                struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
@@ -924,8 +926,12 @@ radeon_lvds_detect(struct drm_connector *connector, bool force)
        /* check acpi lid status ??? */
 
        radeon_connector_update_scratch_regs(connector, ret);
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
+
        return ret;
 }
 
@@ -1039,9 +1045,11 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
        enum drm_connector_status ret = connector_status_disconnected;
        int r;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        encoder = radeon_best_single_encoder(connector);
        if (!encoder)
@@ -1108,8 +1116,10 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
        radeon_connector_update_scratch_regs(connector, ret);
 
 out:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
 
        return ret;
 }
@@ -1173,9 +1183,11 @@ radeon_tv_detect(struct drm_connector *connector, bool force)
        if (!radeon_connector->dac_load_detect)
                return ret;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        encoder = radeon_best_single_encoder(connector);
        if (!encoder)
@@ -1187,8 +1199,12 @@ radeon_tv_detect(struct drm_connector *connector, bool force)
        if (ret == connector_status_connected)
                ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, false);
        radeon_connector_update_scratch_regs(connector, ret);
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
+
        return ret;
 }
 
@@ -1251,9 +1267,11 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
        enum drm_connector_status ret = connector_status_disconnected;
        bool dret = false, broken_edid = false;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        if (radeon_connector->detected_hpd_without_ddc) {
                force = true;
@@ -1436,8 +1454,10 @@ out:
        }
 
 exit:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
 
        return ret;
 }
@@ -1688,9 +1708,11 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
        if (radeon_dig_connector->is_mst)
                return connector_status_disconnected;
 
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
 
        if (!force && radeon_check_hpd_status_unchanged(connector)) {
                ret = connector->status;
@@ -1777,8 +1799,10 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
        }
 
 out:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
 
        return ret;
 }
index 658fa2d..48685cd 100644 (file)
@@ -1089,7 +1089,7 @@ static void ipu_irq_handler(struct irq_desc *desc)
 {
        struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
        struct irq_chip *chip = irq_desc_get_chip(desc);
-       const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
+       static const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
 
        chained_irq_enter(chip, desc);
 
@@ -1102,7 +1102,7 @@ static void ipu_err_irq_handler(struct irq_desc *desc)
 {
        struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
        struct irq_chip *chip = irq_desc_get_chip(desc);
-       const int int_reg[] = { 4, 5, 8, 9};
+       static const int int_reg[] = { 4, 5, 8, 9};
 
        chained_irq_enter(chip, desc);
 
index bb9c087..9f2d9ec 100644 (file)
@@ -788,12 +788,14 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image)
        case V4L2_PIX_FMT_SGBRG8:
        case V4L2_PIX_FMT_SGRBG8:
        case V4L2_PIX_FMT_SRGGB8:
+       case V4L2_PIX_FMT_GREY:
                offset = image->rect.left + image->rect.top * pix->bytesperline;
                break;
        case V4L2_PIX_FMT_SBGGR16:
        case V4L2_PIX_FMT_SGBRG16:
        case V4L2_PIX_FMT_SGRBG16:
        case V4L2_PIX_FMT_SRGGB16:
+       case V4L2_PIX_FMT_Y16:
                offset = image->rect.left * 2 +
                         image->rect.top * pix->bytesperline;
                break;
index 24e12b8..caa05b0 100644 (file)
@@ -288,6 +288,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code)
        case MEDIA_BUS_FMT_SGBRG10_1X10:
        case MEDIA_BUS_FMT_SGRBG10_1X10:
        case MEDIA_BUS_FMT_SRGGB10_1X10:
+       case MEDIA_BUS_FMT_Y10_1X10:
                cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER;
                cfg->mipi_dt = MIPI_DT_RAW10;
                cfg->data_width = IPU_CSI_DATA_WIDTH_10;
@@ -296,6 +297,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code)
        case MEDIA_BUS_FMT_SGBRG12_1X12:
        case MEDIA_BUS_FMT_SGRBG12_1X12:
        case MEDIA_BUS_FMT_SRGGB12_1X12:
+       case MEDIA_BUS_FMT_Y12_1X12:
                cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER;
                cfg->mipi_dt = MIPI_DT_RAW12;
                cfg->data_width = IPU_CSI_DATA_WIDTH_12;
index f1cec3d..0f70e88 100644 (file)
@@ -129,11 +129,14 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index)
                if (pre_node == pre->dev->of_node) {
                        mutex_unlock(&ipu_pre_list_mutex);
                        device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE);
+                       of_node_put(pre_node);
                        return pre;
                }
        }
        mutex_unlock(&ipu_pre_list_mutex);
 
+       of_node_put(pre_node);
+
        return NULL;
 }
 
index 067365c..97b9950 100644 (file)
@@ -102,11 +102,14 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id)
                        mutex_unlock(&ipu_prg_list_mutex);
                        device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE);
                        prg->id = ipu_id;
+                       of_node_put(prg_node);
                        return prg;
                }
        }
        mutex_unlock(&ipu_prg_list_mutex);
 
+       of_node_put(prg_node);
+
        return NULL;
 }
 
index 43ddcdf..9454ac1 100644 (file)
 #define USB_DEVICE_ID_LD_MICROCASSYTIME                0x1033
 #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035
 #define USB_DEVICE_ID_LD_MICROCASSYPH          0x1038
+#define USB_DEVICE_ID_LD_POWERANALYSERCASSY    0x1040
+#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY      0x1042
+#define USB_DEVICE_ID_LD_MACHINETESTCASSY      0x1043
 #define USB_DEVICE_ID_LD_JWM           0x1080
 #define USB_DEVICE_ID_LD_DMMP          0x1081
 #define USB_DEVICE_ID_LD_UMIP          0x1090
index 5f6035a..e92b77f 100644 (file)
@@ -809,6 +809,9 @@ static const struct hid_device_id hid_ignore_list[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
index 4bdbf77..72c338e 100644 (file)
@@ -269,13 +269,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
        for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) {
                const struct tjmax_model *tm = &tjmax_model_table[i];
                if (c->x86_model == tm->model &&
-                   (tm->mask == ANY || c->x86_mask == tm->mask))
+                   (tm->mask == ANY || c->x86_stepping == tm->mask))
                        return tm->tjmax;
        }
 
        /* Early chips have no MSR for TjMax */
 
-       if (c->x86_model == 0xf && c->x86_mask < 4)
+       if (c->x86_model == 0xf && c->x86_stepping < 4)
                usemsr_ee = 0;
 
        if (c->x86_model > 0xe && usemsr_ee) {
@@ -426,7 +426,7 @@ static int chk_ucode_version(unsigned int cpu)
         * Readings might stop update when processor visited too deep sleep,
         * fixed for stepping D0 (6EC).
         */
-       if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) {
+       if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) {
                pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n");
                return -ENODEV;
        }
index ef91b8a..84e9128 100644 (file)
@@ -293,7 +293,7 @@ u8 vid_which_vrm(void)
        if (c->x86 < 6)         /* Any CPU with family lower than 6 */
                return 0;       /* doesn't have VID */
 
-       vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor);
+       vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor);
        if (vrm_ret == 134)
                vrm_ret = get_via_model_d_vrm();
        if (vrm_ret == 0)
index 06b4e1c..051a72e 100644 (file)
@@ -129,7 +129,10 @@ static ssize_t temp1_input_show(struct device *dev,
 
        data->read_tempreg(data->pdev, &regval);
        temp = (regval >> 21) * 125;
-       temp -= data->temp_offset;
+       if (temp > data->temp_offset)
+               temp -= data->temp_offset;
+       else
+               temp = 0;
 
        return sprintf(buf, "%u\n", temp);
 }
@@ -227,7 +230,7 @@ static bool has_erratum_319(struct pci_dev *pdev)
         * and AM3 formats, but that's the best we can do.
         */
        return boot_cpu_data.x86_model < 4 ||
-              (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2);
+              (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
 }
 
 static int k10temp_probe(struct pci_dev *pdev,
index 5a632bc..e59f911 100644 (file)
@@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev,
                return -ENOMEM;
 
        model = boot_cpu_data.x86_model;
-       stepping = boot_cpu_data.x86_mask;
+       stepping = boot_cpu_data.x86_stepping;
 
        /* feature available since SH-C0, exclude older revisions */
        if ((model == 4 && stepping == 0) ||
index a9805c7..e2954fb 100644 (file)
@@ -123,8 +123,10 @@ config I2C_I801
            Wildcat Point (PCH)
            Wildcat Point-LP (PCH)
            BayTrail (SOC)
+           Braswell (SOC)
            Sunrise Point-H (PCH)
            Sunrise Point-LP (PCH)
+           Kaby Lake-H (PCH)
            DNV (SOC)
            Broxton (SOC)
            Lewisburg (PCH)
index cd07a69..44deae7 100644 (file)
@@ -50,6 +50,9 @@
 #define BCM2835_I2C_S_CLKT     BIT(9)
 #define BCM2835_I2C_S_LEN      BIT(10) /* Fake bit for SW error reporting */
 
+#define BCM2835_I2C_FEDL_SHIFT 16
+#define BCM2835_I2C_REDL_SHIFT 0
+
 #define BCM2835_I2C_CDIV_MIN   0x0002
 #define BCM2835_I2C_CDIV_MAX   0xFFFE
 
@@ -81,7 +84,7 @@ static inline u32 bcm2835_i2c_readl(struct bcm2835_i2c_dev *i2c_dev, u32 reg)
 
 static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev)
 {
-       u32 divider;
+       u32 divider, redl, fedl;
 
        divider = DIV_ROUND_UP(clk_get_rate(i2c_dev->clk),
                               i2c_dev->bus_clk_rate);
@@ -100,6 +103,22 @@ static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev)
 
        bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DIV, divider);
 
+       /*
+        * Number of core clocks to wait after falling edge before
+        * outputting the next data bit.  Note that both FEDL and REDL
+        * can't be greater than CDIV/2.
+        */
+       fedl = max(divider / 16, 1u);
+
+       /*
+        * Number of core clocks to wait after rising edge before
+        * sampling the next incoming data bit.
+        */
+       redl = max(divider / 4, 1u);
+
+       bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DEL,
+                          (fedl << BCM2835_I2C_FEDL_SHIFT) |
+                          (redl << BCM2835_I2C_REDL_SHIFT));
        return 0;
 }
 
index ae69188..0573253 100644 (file)
@@ -209,7 +209,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
        i2c_dw_disable_int(dev);
 
        /* Enable the adapter */
-       __i2c_dw_enable(dev, true);
+       __i2c_dw_enable_and_wait(dev, true);
 
        /* Clear and enable interrupts */
        dw_readl(dev, DW_IC_CLR_INTR);
@@ -644,7 +644,7 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev)
        gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH);
        if (IS_ERR(gpio)) {
                r = PTR_ERR(gpio);
-               if (r == -ENOENT)
+               if (r == -ENOENT || r == -ENOSYS)
                        return 0;
                return r;
        }
index 8eac00e..692b341 100644 (file)
@@ -58,6 +58,7 @@
  * Wildcat Point (PCH)         0x8ca2  32      hard    yes     yes     yes
  * Wildcat Point-LP (PCH)      0x9ca2  32      hard    yes     yes     yes
  * BayTrail (SOC)              0x0f12  32      hard    yes     yes     yes
+ * Braswell (SOC)              0x2292  32      hard    yes     yes     yes
  * Sunrise Point-H (PCH)       0xa123  32      hard    yes     yes     yes
  * Sunrise Point-LP (PCH)      0x9d23  32      hard    yes     yes     yes
  * DNV (SOC)                   0x19df  32      hard    yes     yes     yes
index 2fd8b6d..87197ec 100644 (file)
@@ -341,7 +341,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, adap);
        init_completion(&siic->done);
 
-       /* Controller Initalisation */
+       /* Controller initialisation */
 
        writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL);
        while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET)
@@ -369,7 +369,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev)
         * but they start to affect the speed when clock is set to faster
         * frequencies.
         * Through the actual tests, use the different user_div value(which
-        * in the divider formular 'Fio / (Fi2c * user_div)') to adapt
+        * in the divider formula 'Fio / (Fi2c * user_div)') to adapt
         * the different ranges of i2c bus clock frequency, to make the SCL
         * more accurate.
         */
index 327a49b..9515ca1 100644 (file)
@@ -243,7 +243,7 @@ static int aspeed_adc_probe(struct platform_device *pdev)
                                         ASPEED_ADC_INIT_POLLING_TIME,
                                         ASPEED_ADC_INIT_TIMEOUT);
                if (ret)
-                       goto scaler_error;
+                       goto poll_timeout_error;
        }
 
        /* Start all channels in normal mode. */
@@ -274,9 +274,10 @@ iio_register_error:
        writel(ASPEED_OPERATION_MODE_POWER_DOWN,
                data->base + ASPEED_REG_ENGINE_CONTROL);
        clk_disable_unprepare(data->clk_scaler->clk);
-reset_error:
-       reset_control_assert(data->rst);
 clk_enable_error:
+poll_timeout_error:
+       reset_control_assert(data->rst);
+reset_error:
        clk_hw_unregister_divider(data->clk_scaler);
 scaler_error:
        clk_hw_unregister_divider(data->clk_prescaler);
index 7f5def4..9a2583c 100644 (file)
@@ -722,8 +722,6 @@ static int stm32h7_adc_enable(struct stm32_adc *adc)
        int ret;
        u32 val;
 
-       /* Clear ADRDY by writing one, then enable ADC */
-       stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY);
        stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN);
 
        /* Poll for ADRDY to be set (after adc startup time) */
@@ -731,8 +729,11 @@ static int stm32h7_adc_enable(struct stm32_adc *adc)
                                           val & STM32H7_ADRDY,
                                           100, STM32_ADC_TIMEOUT_US);
        if (ret) {
-               stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN);
+               stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADDIS);
                dev_err(&indio_dev->dev, "Failed to enable ADC\n");
+       } else {
+               /* Clear ADRDY by writing one */
+               stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY);
        }
 
        return ret;
index 0dd5a38..457372f 100644 (file)
@@ -46,6 +46,10 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
        if (adis->trig == NULL)
                return -ENOMEM;
 
+       adis->trig->dev.parent = &adis->spi->dev;
+       adis->trig->ops = &adis_trigger_ops;
+       iio_trigger_set_drvdata(adis->trig, adis);
+
        ret = request_irq(adis->spi->irq,
                          &iio_trigger_generic_data_rdy_poll,
                          IRQF_TRIGGER_RISING,
@@ -54,9 +58,6 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
        if (ret)
                goto error_free_trig;
 
-       adis->trig->dev.parent = &adis->spi->dev;
-       adis->trig->ops = &adis_trigger_ops;
-       iio_trigger_set_drvdata(adis->trig, adis);
        ret = iio_trigger_register(adis->trig);
 
        indio_dev->trig = iio_trigger_get(adis->trig);
index 79abf70..cd5bfe3 100644 (file)
@@ -175,7 +175,7 @@ __poll_t iio_buffer_poll(struct file *filp,
        struct iio_dev *indio_dev = filp->private_data;
        struct iio_buffer *rb = indio_dev->buffer;
 
-       if (!indio_dev->info)
+       if (!indio_dev->info || rb == NULL)
                return 0;
 
        poll_wait(filp, &rb->pollq, wait);
index fcb1c4b..f726f94 100644 (file)
@@ -68,6 +68,8 @@ config SX9500
 
 config SRF08
        tristate "Devantech SRF02/SRF08/SRF10 ultrasonic ranger sensor"
+       select IIO_BUFFER
+       select IIO_TRIGGERED_BUFFER
        depends on I2C
        help
          Say Y here to build a driver for Devantech SRF02/SRF08/SRF10
index e66963c..915bbd8 100644 (file)
@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
                           IPV6_ADDR_LINKLOCAL;
        struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
                                         &src_addr->sin6_addr, net_dev->ifindex,
-                                        strict);
+                                        NULL, strict);
        bool ret;
 
        if (!rt)
@@ -4549,6 +4549,7 @@ static struct pernet_operations cma_pernet_operations = {
        .exit = cma_exit_net,
        .id = &cma_pernet_id,
        .size = sizeof(struct cma_pernet),
+       .async = true,
 };
 
 static int __init cma_init(void)
index c4560d8..25bb178 100644 (file)
@@ -305,16 +305,21 @@ void nldev_exit(void);
 static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
                                          struct ib_pd *pd,
                                          struct ib_qp_init_attr *attr,
-                                         struct ib_udata *udata)
+                                         struct ib_udata *udata,
+                                         struct ib_uobject *uobj)
 {
        struct ib_qp *qp;
 
+       if (!dev->create_qp)
+               return ERR_PTR(-EOPNOTSUPP);
+
        qp = dev->create_qp(pd, attr, udata);
        if (IS_ERR(qp))
                return qp;
 
        qp->device = dev;
        qp->pd = pd;
+       qp->uobject = uobj;
        /*
         * We don't track XRC QPs for now, because they don't have PD
         * and more importantly they are created internaly by driver,
index 85b5ee4..d8eead5 100644 (file)
@@ -141,7 +141,12 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
         */
        uobj->context = context;
        uobj->type = type;
-       atomic_set(&uobj->usecnt, 0);
+       /*
+        * Allocated objects start out as write locked to deny any other
+        * syscalls from accessing them until they are committed. See
+        * rdma_alloc_commit_uobject
+        */
+       atomic_set(&uobj->usecnt, -1);
        kref_init(&uobj->ref);
 
        return uobj;
@@ -196,7 +201,15 @@ static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *t
                goto free;
        }
 
-       uverbs_uobject_get(uobj);
+       /*
+        * The idr_find is guaranteed to return a pointer to something that
+        * isn't freed yet, or NULL, as the free after idr_remove goes through
+        * kfree_rcu(). However the object may still have been released and
+        * kfree() could be called at any time.
+        */
+       if (!kref_get_unless_zero(&uobj->ref))
+               uobj = ERR_PTR(-ENOENT);
+
 free:
        rcu_read_unlock();
        return uobj;
@@ -399,13 +412,13 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
        return ret;
 }
 
-static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
 {
 #ifdef CONFIG_LOCKDEP
        if (exclusive)
-               WARN_ON(atomic_read(&uobj->usecnt) > 0);
+               WARN_ON(atomic_read(&uobj->usecnt) != -1);
        else
-               WARN_ON(atomic_read(&uobj->usecnt) == -1);
+               WARN_ON(atomic_read(&uobj->usecnt) <= 0);
 #endif
 }
 
@@ -444,7 +457,7 @@ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
                WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
                return 0;
        }
-       lockdep_check(uobj, true);
+       assert_uverbs_usecnt(uobj, true);
        ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
 
        up_read(&ucontext->cleanup_rwsem);
@@ -474,16 +487,17 @@ int rdma_explicit_destroy(struct ib_uobject *uobject)
                WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
                return 0;
        }
-       lockdep_check(uobject, true);
+       assert_uverbs_usecnt(uobject, true);
        ret = uobject->type->type_class->remove_commit(uobject,
                                                       RDMA_REMOVE_DESTROY);
        if (ret)
-               return ret;
+               goto out;
 
        uobject->type = &null_obj_type;
 
+out:
        up_read(&ucontext->cleanup_rwsem);
-       return 0;
+       return ret;
 }
 
 static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
@@ -527,6 +541,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
                return ret;
        }
 
+       /* matches atomic_set(-1) in alloc_uobj */
+       assert_uverbs_usecnt(uobj, true);
+       atomic_set(&uobj->usecnt, 0);
+
        uobj->type->type_class->alloc_commit(uobj);
        up_read(&uobj->context->cleanup_rwsem);
 
@@ -561,7 +579,7 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
 
 void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
 {
-       lockdep_check(uobj, exclusive);
+       assert_uverbs_usecnt(uobj, exclusive);
        uobj->type->type_class->lookup_put(uobj, exclusive);
        /*
         * In order to unlock an object, either decrease its usecnt for
index 857637b..3dbc4e4 100644 (file)
@@ -7,7 +7,6 @@
 #include <rdma/restrack.h>
 #include <linux/mutex.h>
 #include <linux/sched/task.h>
-#include <linux/uaccess.h>
 #include <linux/pid_namespace.h>
 
 void rdma_restrack_init(struct rdma_restrack_root *res)
@@ -63,7 +62,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 {
        enum rdma_restrack_type type = res->type;
        struct ib_device *dev;
-       struct ib_xrcd *xrcd;
        struct ib_pd *pd;
        struct ib_cq *cq;
        struct ib_qp *qp;
@@ -81,10 +79,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
                qp = container_of(res, struct ib_qp, res);
                dev = qp->device;
                break;
-       case RDMA_RESTRACK_XRCD:
-               xrcd = container_of(res, struct ib_xrcd, res);
-               dev = xrcd->device;
-               break;
        default:
                WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
                return NULL;
@@ -93,6 +87,21 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
        return dev;
 }
 
+static bool res_is_user(struct rdma_restrack_entry *res)
+{
+       switch (res->type) {
+       case RDMA_RESTRACK_PD:
+               return container_of(res, struct ib_pd, res)->uobject;
+       case RDMA_RESTRACK_CQ:
+               return container_of(res, struct ib_cq, res)->uobject;
+       case RDMA_RESTRACK_QP:
+               return container_of(res, struct ib_qp, res)->uobject;
+       default:
+               WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
+               return false;
+       }
+}
+
 void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
        struct ib_device *dev = res_to_dev(res);
@@ -100,7 +109,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
        if (!dev)
                return;
 
-       if (!uaccess_kernel()) {
+       if (res_is_user(res)) {
                get_task_struct(current);
                res->task = current;
                res->kern_name = NULL;
index 256934d..a148de3 100644 (file)
@@ -562,9 +562,10 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
        if (f.file)
                fdput(f);
 
+       mutex_unlock(&file->device->xrcd_tree_mutex);
+
        uobj_alloc_commit(&obj->uobject);
 
-       mutex_unlock(&file->device->xrcd_tree_mutex);
        return in_len;
 
 err_copy:
@@ -603,10 +604,8 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 
        uobj  = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
                               file->ucontext);
-       if (IS_ERR(uobj)) {
-               mutex_unlock(&file->device->xrcd_tree_mutex);
+       if (IS_ERR(uobj))
                return PTR_ERR(uobj);
-       }
 
        ret = uobj_remove_commit(uobj);
        return ret ?: in_len;
@@ -979,6 +978,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
        struct ib_uverbs_ex_create_cq_resp resp;
        struct ib_cq_init_attr attr = {};
 
+       if (!ib_dev->create_cq)
+               return ERR_PTR(-EOPNOTSUPP);
+
        if (cmd->comp_vector >= file->device->num_comp_vectors)
                return ERR_PTR(-EINVAL);
 
@@ -1030,14 +1032,14 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
        resp.response_length = offsetof(typeof(resp), response_length) +
                sizeof(resp.response_length);
 
+       cq->res.type = RDMA_RESTRACK_CQ;
+       rdma_restrack_add(&cq->res);
+
        ret = cb(file, obj, &resp, ucore, context);
        if (ret)
                goto err_cb;
 
        uobj_alloc_commit(&obj->uobject);
-       cq->res.type = RDMA_RESTRACK_CQ;
-       rdma_restrack_add(&cq->res);
-
        return obj;
 
 err_cb:
@@ -1518,7 +1520,8 @@ static int create_qp(struct ib_uverbs_file *file,
        if (cmd->qp_type == IB_QPT_XRC_TGT)
                qp = ib_create_qp(pd, &attr);
        else
-               qp = _ib_create_qp(device, pd, &attr, uhw);
+               qp = _ib_create_qp(device, pd, &attr, uhw,
+                                  &obj->uevent.uobject);
 
        if (IS_ERR(qp)) {
                ret = PTR_ERR(qp);
@@ -1550,8 +1553,10 @@ static int create_qp(struct ib_uverbs_file *file,
                        atomic_inc(&attr.srq->usecnt);
                if (ind_tbl)
                        atomic_inc(&ind_tbl->usecnt);
+       } else {
+               /* It is done in _ib_create_qp for other QP types */
+               qp->uobject = &obj->uevent.uobject;
        }
-       qp->uobject = &obj->uevent.uobject;
 
        obj->uevent.uobject.object = qp;
 
@@ -1971,8 +1976,15 @@ static int modify_qp(struct ib_uverbs_file *file,
                goto release_qp;
        }
 
+       if ((cmd->base.attr_mask & IB_QP_AV) &&
+           !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
+               ret = -EINVAL;
+               goto release_qp;
+       }
+
        if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
-           !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) {
+           (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
+           !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) {
                ret = -EINVAL;
                goto release_qp;
        }
@@ -2941,6 +2953,11 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
                wq_init_attr.create_flags = cmd.create_flags;
        obj->uevent.events_reported = 0;
        INIT_LIST_HEAD(&obj->uevent.event_list);
+
+       if (!pd->device->create_wq) {
+               err = -EOPNOTSUPP;
+               goto err_put_cq;
+       }
        wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
        if (IS_ERR(wq)) {
                err = PTR_ERR(wq);
@@ -3084,7 +3101,12 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
                wq_attr.flags = cmd.flags;
                wq_attr.flags_mask = cmd.flags_mask;
        }
+       if (!wq->device->modify_wq) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
        ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
+out:
        uobj_put_obj_read(wq);
        return ret;
 }
@@ -3181,6 +3203,11 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 
        init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
        init_attr.ind_tbl = wqs;
+
+       if (!ib_dev->create_rwq_ind_table) {
+               err = -EOPNOTSUPP;
+               goto err_uobj;
+       }
        rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
 
        if (IS_ERR(rwq_ind_tbl)) {
@@ -3770,6 +3797,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
        struct ib_device_attr attr = {0};
        int err;
 
+       if (!ib_dev->query_device)
+               return -EOPNOTSUPP;
+
        if (ucore->inlen < sizeof(cmd))
                return -EINVAL;
 
index d96dc1d..339b851 100644 (file)
@@ -59,6 +59,9 @@ static int uverbs_process_attr(struct ib_device *ibdev,
                        return 0;
        }
 
+       if (test_bit(attr_id, attr_bundle_h->valid_bitmap))
+               return -EINVAL;
+
        spec = &attr_spec_bucket->attrs[attr_id];
        e = &elements[attr_id];
        e->uattr = uattr_ptr;
index 062485f..62e1eb1 100644 (file)
@@ -114,6 +114,7 @@ static size_t get_elements_above_id(const void **iters,
        short min = SHRT_MAX;
        const void *elem;
        int i, j, last_stored = -1;
+       unsigned int equal_min = 0;
 
        for_each_element(elem, i, j, elements, num_elements, num_offset,
                         data_offset) {
@@ -136,6 +137,10 @@ static size_t get_elements_above_id(const void **iters,
                 */
                iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
                last_stored = i;
+               if (min == GET_ID(id))
+                       equal_min++;
+               else
+                       equal_min = 1;
                min = GET_ID(id);
        }
 
@@ -146,15 +151,10 @@ static size_t get_elements_above_id(const void **iters,
         * Therefore, we need to clean the beginning of the array to make sure
         * all ids of final elements are equal to min.
         */
-       for (i = num_iters - 1; i >= 0 &&
-            GET_ID(*(u16 *)(iters[i] + id_offset)) == min; i--)
-               ;
-
-       num_iters -= i + 1;
-       memmove(iters, iters + i + 1, sizeof(*iters) * num_iters);
+       memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min);
 
        *min_id = min;
-       return num_iters;
+       return equal_min;
 }
 
 #define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
@@ -322,7 +322,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
                hash = kzalloc(sizeof(*hash) +
                               ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
                                     sizeof(long)) +
-                              BITS_TO_LONGS(attr_max_bucket) * sizeof(long),
+                              BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long),
                               GFP_KERNEL);
                if (!hash) {
                        res = -ENOMEM;
@@ -509,7 +509,7 @@ static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_
                         * first handler which != NULL. This also defines the
                         * set of flags used for this handler.
                         */
-                       for (i = num_object_defs - 1;
+                       for (i = num_method_defs - 1;
                             i >= 0 && !method_defs[i]->handler; i--)
                                ;
                        hash->methods[min_id++] = method;
index 395a3b0..b1ca223 100644 (file)
@@ -650,12 +650,21 @@ static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
        return -1;
 }
 
+static bool verify_command_idx(u32 command, bool extended)
+{
+       if (extended)
+               return command < ARRAY_SIZE(uverbs_ex_cmd_table);
+
+       return command < ARRAY_SIZE(uverbs_cmd_table);
+}
+
 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                             size_t count, loff_t *pos)
 {
        struct ib_uverbs_file *file = filp->private_data;
        struct ib_device *ib_dev;
        struct ib_uverbs_cmd_hdr hdr;
+       bool extended_command;
        __u32 command;
        __u32 flags;
        int srcu_key;
@@ -688,6 +697,15 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        }
 
        command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+       flags = (hdr.command &
+                IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
+
+       extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED;
+       if (!verify_command_idx(command, extended_command)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        if (verify_command_mask(ib_dev, command)) {
                ret = -EOPNOTSUPP;
                goto out;
@@ -699,12 +717,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                goto out;
        }
 
-       flags = (hdr.command &
-                IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
        if (!flags) {
-               if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
-                   !uverbs_cmd_table[command]) {
+               if (!uverbs_cmd_table[command]) {
                        ret = -EINVAL;
                        goto out;
                }
@@ -725,8 +739,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                struct ib_udata uhw;
                size_t written_count = count;
 
-               if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
-                   !uverbs_ex_cmd_table[command]) {
+               if (!uverbs_ex_cmd_table[command]) {
                        ret = -ENOSYS;
                        goto out;
                }
@@ -942,6 +955,7 @@ static const struct file_operations uverbs_fops = {
        .llseek  = no_llseek,
 #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
        .unlocked_ioctl = ib_uverbs_ioctl,
+       .compat_ioctl = ib_uverbs_ioctl,
 #endif
 };
 
@@ -954,6 +968,7 @@ static const struct file_operations uverbs_mmap_fops = {
        .llseek  = no_llseek,
 #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
        .unlocked_ioctl = ib_uverbs_ioctl,
+       .compat_ioctl = ib_uverbs_ioctl,
 #endif
 };
 
index cab0ac3..df1360e 100644 (file)
@@ -234,15 +234,18 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
                uverbs_attr_get(ctx, UVERBS_UHW_OUT);
 
        if (!IS_ERR(uhw_in)) {
-               udata->inbuf = uhw_in->ptr_attr.ptr;
                udata->inlen = uhw_in->ptr_attr.len;
+               if (uverbs_attr_ptr_is_inline(uhw_in))
+                       udata->inbuf = &uhw_in->uattr->data;
+               else
+                       udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
        } else {
                udata->inbuf = NULL;
                udata->inlen = 0;
        }
 
        if (!IS_ERR(uhw_out)) {
-               udata->outbuf = uhw_out->ptr_attr.ptr;
+               udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
                udata->outlen = uhw_out->ptr_attr.len;
        } else {
                udata->outbuf = NULL;
@@ -323,7 +326,8 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev,
        cq->res.type = RDMA_RESTRACK_CQ;
        rdma_restrack_add(&cq->res);
 
-       ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
+       ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe,
+                            sizeof(cq->cqe));
        if (ret)
                goto err_cq;
 
@@ -375,7 +379,7 @@ static int uverbs_destroy_cq_handler(struct ib_device *ib_dev,
        resp.comp_events_reported  = obj->comp_events_reported;
        resp.async_events_reported = obj->async_events_reported;
 
-       return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp);
+       return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp));
 }
 
 static DECLARE_UVERBS_METHOD(
index 16ebc63..93025d2 100644 (file)
@@ -887,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
        if (qp_init_attr->cap.max_rdma_ctxs)
                rdma_rw_init_qp(device, qp_init_attr);
 
-       qp = _ib_create_qp(device, pd, qp_init_attr, NULL);
+       qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
        if (IS_ERR(qp))
                return qp;
 
@@ -898,7 +898,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
        }
 
        qp->real_qp    = qp;
-       qp->uobject    = NULL;
        qp->qp_type    = qp_init_attr->qp_type;
        qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
 
index ca32057..3eb7a83 100644 (file)
@@ -120,7 +120,6 @@ struct bnxt_re_dev {
 #define BNXT_RE_FLAG_HAVE_L2_REF               3
 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN           4
 #define BNXT_RE_FLAG_QOS_WORK_REG              5
-#define BNXT_RE_FLAG_TASK_IN_PROG              6
 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS          29
        struct net_device               *netdev;
        unsigned int                    version, major, minor;
@@ -158,6 +157,7 @@ struct bnxt_re_dev {
        atomic_t                        srq_count;
        atomic_t                        mr_count;
        atomic_t                        mw_count;
+       atomic_t                        sched_count;
        /* Max of 2 lossless traffic class supported per port */
        u16                             cosq[2];
 
index ae9e9ff..643174d 100644 (file)
@@ -174,10 +174,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
        ib_attr->max_pd = dev_attr->max_pd;
        ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
        ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
-       if (dev_attr->is_atomic) {
-               ib_attr->atomic_cap = IB_ATOMIC_HCA;
-               ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
-       }
+       ib_attr->atomic_cap = IB_ATOMIC_NONE;
+       ib_attr->masked_atomic_cap = IB_ATOMIC_NONE;
 
        ib_attr->max_ee_rd_atom = 0;
        ib_attr->max_res_rd_atom = 0;
@@ -787,20 +785,51 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
        return 0;
 }
 
+static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+       __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&qp->scq->cq_lock, flags);
+       if (qp->rcq != qp->scq)
+               spin_lock(&qp->rcq->cq_lock);
+       else
+               __acquire(&qp->rcq->cq_lock);
+
+       return flags;
+}
+
+static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+                              unsigned long flags)
+       __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
+{
+       if (qp->rcq != qp->scq)
+               spin_unlock(&qp->rcq->cq_lock);
+       else
+               __release(&qp->rcq->cq_lock);
+       spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
+}
+
 /* Queue Pairs */
 int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
 {
        struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
        struct bnxt_re_dev *rdev = qp->rdev;
        int rc;
+       unsigned int flags;
 
        bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
-       bnxt_qplib_del_flush_qp(&qp->qplib_qp);
        rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
        if (rc) {
                dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
                return rc;
        }
+
+       flags = bnxt_re_lock_cqs(qp);
+       bnxt_qplib_clean_qp(&qp->qplib_qp);
+       bnxt_re_unlock_cqs(qp, flags);
+       bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
+
        if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
                rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
                                           &rdev->sqp_ah->qplib_ah);
@@ -810,7 +839,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
                        return rc;
                }
 
-               bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+               bnxt_qplib_clean_qp(&qp->qplib_qp);
                rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
                                           &rdev->qp1_sqp->qplib_qp);
                if (rc) {
@@ -1069,6 +1098,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
                        goto fail;
                }
                qp->qplib_qp.scq = &cq->qplib_cq;
+               qp->scq = cq;
        }
 
        if (qp_init_attr->recv_cq) {
@@ -1080,6 +1110,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
                        goto fail;
                }
                qp->qplib_qp.rcq = &cq->qplib_cq;
+               qp->rcq = cq;
        }
 
        if (qp_init_attr->srq) {
@@ -1185,7 +1216,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
                rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
                if (rc) {
                        dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
-                       goto fail;
+                       goto free_umem;
                }
        }
 
@@ -1213,6 +1244,13 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
        return &qp->ib_qp;
 qp_destroy:
        bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+free_umem:
+       if (udata) {
+               if (qp->rumem)
+                       ib_umem_release(qp->rumem);
+               if (qp->sumem)
+                       ib_umem_release(qp->sumem);
+       }
 fail:
        kfree(qp);
        return ERR_PTR(rc);
@@ -1603,7 +1641,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                        dev_dbg(rdev_to_dev(rdev),
                                "Move QP = %p out of flush list\n",
                                qp);
-                       bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+                       bnxt_qplib_clean_qp(&qp->qplib_qp);
                }
        }
        if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
index 423ebe0..b88a48d 100644 (file)
@@ -89,6 +89,8 @@ struct bnxt_re_qp {
        /* QP1 */
        u32                     send_psn;
        struct ib_ud_header     qp1_hdr;
+       struct bnxt_re_cq       *scq;
+       struct bnxt_re_cq       *rcq;
 };
 
 struct bnxt_re_cq {
index 508d00a..33a4480 100644 (file)
@@ -656,7 +656,6 @@ static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
        mutex_unlock(&bnxt_re_dev_lock);
 
        synchronize_rcu();
-       flush_workqueue(bnxt_re_wq);
 
        ib_dealloc_device(&rdev->ibdev);
        /* rdev is gone */
@@ -1441,7 +1440,7 @@ static void bnxt_re_task(struct work_struct *work)
                break;
        }
        smp_mb__before_atomic();
-       clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+       atomic_dec(&rdev->sched_count);
        kfree(re_work);
 }
 
@@ -1503,7 +1502,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
                /* netdev notifier will call NETDEV_UNREGISTER again later since
                 * we are still holding the reference to the netdev
                 */
-               if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags))
+               if (atomic_read(&rdev->sched_count) > 0)
                        goto exit;
                bnxt_re_ib_unreg(rdev, false);
                bnxt_re_remove_one(rdev);
@@ -1523,7 +1522,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
                        re_work->vlan_dev = (real_dev == netdev ?
                                             NULL : netdev);
                        INIT_WORK(&re_work->work, bnxt_re_task);
-                       set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+                       atomic_inc(&rdev->sched_count);
                        queue_work(bnxt_re_wq, &re_work->work);
                }
        }
@@ -1578,6 +1577,11 @@ static void __exit bnxt_re_mod_exit(void)
        */
        list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
                dev_info(rdev_to_dev(rdev), "Unregistering Device");
+               /*
+                * Flush out any scheduled tasks before destroying the
+                * resources
+                */
+               flush_workqueue(bnxt_re_wq);
                bnxt_re_dev_stop(rdev);
                bnxt_re_ib_unreg(rdev, true);
                bnxt_re_remove_one(rdev);
index 1b0e946..3ea5b96 100644 (file)
@@ -173,7 +173,7 @@ static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
        }
 }
 
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
 {
        unsigned long flags;
 
@@ -1419,7 +1419,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_destroy_qp req;
        struct creq_destroy_qp_resp resp;
-       unsigned long flags;
        u16 cmd_flags = 0;
        int rc;
 
@@ -1437,19 +1436,12 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
                return rc;
        }
 
-       /* Must walk the associated CQs to nullified the QP ptr */
-       spin_lock_irqsave(&qp->scq->hwq.lock, flags);
-
-       __clean_cq(qp->scq, (u64)(unsigned long)qp);
-
-       if (qp->rcq && qp->rcq != qp->scq) {
-               spin_lock(&qp->rcq->hwq.lock);
-               __clean_cq(qp->rcq, (u64)(unsigned long)qp);
-               spin_unlock(&qp->rcq->hwq.lock);
-       }
-
-       spin_unlock_irqrestore(&qp->scq->hwq.lock, flags);
+       return 0;
+}
 
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+                           struct bnxt_qplib_qp *qp)
+{
        bnxt_qplib_free_qp_hdr_buf(res, qp);
        bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
        kfree(qp->sq.swq);
@@ -1462,7 +1454,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
        if (qp->orrq.max_elements)
                bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
 
-       return 0;
 }
 
 void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
index 211b27a..ca0a2ff 100644 (file)
@@ -478,6 +478,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp);
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+                           struct bnxt_qplib_qp *qp);
 void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
                                struct bnxt_qplib_sge *sge);
 void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
@@ -500,7 +503,6 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp);
 void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
                                 unsigned long *flags);
 void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
index c015c18..0305798 100644 (file)
@@ -52,18 +52,6 @@ const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
 
 /* Device */
 
-static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
-{
-       int rc;
-       u16 pcie_ctl2;
-
-       rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2,
-                                      &pcie_ctl2);
-       if (rc)
-               return false;
-       return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
-}
-
 static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
                                     char *fw_ver)
 {
@@ -165,7 +153,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
                attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
        }
 
-       attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
+       attr->is_atomic = 0;
 bail:
        bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
        return rc;
index bc62996..d42b922 100644 (file)
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)   += mlx5_ib.o
 
 mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
index 5b974fb..c4c7b82 100644 (file)
@@ -64,14 +64,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
        }
 }
 
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
-       return mlx5_buf_offset(&buf->buf, n * size);
-}
-
 static void *get_cqe(struct mlx5_ib_cq *cq, int n)
 {
-       return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+       return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
 }
 
 static u8 sw_ownership_bit(int n, int nent)
@@ -403,7 +398,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 
 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
 {
-       mlx5_buf_free(dev->mdev, &buf->buf);
+       mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
 }
 
 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -724,12 +719,25 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
        return ret;
 }
 
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
-                       int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+                            struct mlx5_ib_cq_buf *buf,
+                            int nent,
+                            int cqe_size)
 {
+       struct mlx5_frag_buf_ctrl *c = &buf->fbc;
+       struct mlx5_frag_buf *frag_buf = &c->frag_buf;
+       u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
        int err;
 
-       err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+       MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
+       MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
+
+       mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
+
+       err = mlx5_frag_buf_alloc_node(dev->mdev,
+                                      nent * cqe_size,
+                                      frag_buf,
+                                      dev->mdev->priv.numa_node);
        if (err)
                return err;
 
@@ -862,14 +870,15 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
        ib_umem_release(cq->buf.umem);
 }
 
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+                            struct mlx5_ib_cq_buf *buf)
 {
        int i;
        void *cqe;
        struct mlx5_cqe64 *cqe64;
 
        for (i = 0; i < buf->nent; i++) {
-               cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+               cqe = get_cqe(cq, i);
                cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
                cqe64->op_own = MLX5_CQE_INVALID << 4;
        }
@@ -891,14 +900,15 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
        cq->mcq.arm_db     = cq->db.db + 1;
        cq->mcq.cqe_sz = cqe_size;
 
-       err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+       err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
        if (err)
                goto err_db;
 
-       init_cq_buf(cq, &cq->buf);
+       init_cq_frag_buf(cq, &cq->buf);
 
        *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-                MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
+                MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+                cq->buf.fbc.frag_buf.npages;
        *cqb = kvzalloc(*inlen, GFP_KERNEL);
        if (!*cqb) {
                err = -ENOMEM;
@@ -906,11 +916,12 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
        }
 
        pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
-       mlx5_fill_page_array(&cq->buf.buf, pas);
+       mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
 
        cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
        MLX5_SET(cqc, cqc, log_page_size,
-                cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+                cq->buf.fbc.frag_buf.page_shift -
+                MLX5_ADAPTER_PAGE_SHIFT);
 
        *index = dev->mdev->priv.uar->index;
 
@@ -1207,11 +1218,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
        if (!cq->resize_buf)
                return -ENOMEM;
 
-       err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+       err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
        if (err)
                goto ex;
 
-       init_cq_buf(cq, cq->resize_buf);
+       init_cq_frag_buf(cq, cq->resize_buf);
 
        return 0;
 
@@ -1256,9 +1267,8 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
        }
 
        while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
-               dcqe = get_cqe_from_buf(cq->resize_buf,
-                                       (i + 1) & (cq->resize_buf->nent),
-                                       dsize);
+               dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+                                            (i + 1) & cq->resize_buf->nent);
                dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
                sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
                memcpy(dcqe, scqe, dsize);
@@ -1324,8 +1334,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
                cqe_size = 64;
                err = resize_kernel(dev, cq, entries, cqe_size);
                if (!err) {
-                       npas = cq->resize_buf->buf.npages;
-                       page_shift = cq->resize_buf->buf.page_shift;
+                       struct mlx5_frag_buf_ctrl *c;
+
+                       c = &cq->resize_buf->fbc;
+                       npas = c->frag_buf.npages;
+                       page_shift = c->frag_buf.page_shift;
                }
        }
 
@@ -1346,7 +1359,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
                mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
                                     pas, 0);
        else
-               mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+               mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
+                                         pas);
 
        MLX5_SET(modify_cq_in, in,
                 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644 (file)
index 0000000..61cc3d7
--- /dev/null
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+       STAGE_CREATE(MLX5_IB_STAGE_INIT,
+                    mlx5_ib_stage_init_init,
+                    mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_rep_flow_db_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+                    mlx5_ib_stage_caps_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_rep_non_default_cb,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+                    mlx5_ib_stage_rep_roce_init,
+                    mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+                    mlx5_ib_stage_dev_res_init,
+                    mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+                    mlx5_ib_stage_counters_init,
+                    mlx5_ib_stage_counters_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+                    mlx5_ib_stage_bfrag_init,
+                    mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+                    mlx5_ib_stage_ib_reg_init,
+                    mlx5_ib_stage_ib_reg_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+                    mlx5_ib_stage_umr_res_init,
+                    mlx5_ib_stage_umr_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+                    mlx5_ib_stage_class_attr_init,
+                    NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+       return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+       rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5_ib_dev *ibdev;
+
+       ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+       if (!ibdev)
+               return -ENOMEM;
+
+       ibdev->rep = rep;
+       ibdev->mdev = dev;
+       ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+                              MLX5_CAP_GEN(dev, num_vhca_ports));
+       if (!__mlx5_ib_add(ibdev, &rep_profile))
+               return -EINVAL;
+
+       rep->rep_if[REP_IB].priv = ibdev;
+
+       return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5_ib_dev *dev;
+
+       if (!rep->rep_if[REP_IB].priv)
+               return;
+
+       dev = mlx5_ib_rep_to_dev(rep);
+       __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+       rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+       return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+       int vport;
+
+       for (vport = 1; vport < total_vfs; vport++) {
+               struct mlx5_eswitch_rep_if rep_if = {};
+
+               rep_if.load = mlx5_ib_vport_rep_load;
+               rep_if.unload = mlx5_ib_vport_rep_unload;
+               rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+               mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+       }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+       int vport;
+
+       for (vport = 1; vport < total_vfs; vport++)
+               mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+       struct mlx5_eswitch_rep_if rep_if = {};
+
+       rep_if.load = mlx5_ib_nic_rep_load;
+       rep_if.unload = mlx5_ib_nic_rep_unload;
+       rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+       rep_if.priv = dev;
+
+       mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+       mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+
+       mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+       mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+       return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+       return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                             struct mlx5_ib_sq *sq)
+{
+       struct mlx5_flow_handle *flow_rule;
+       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+       if (!dev->rep)
+               return 0;
+
+       flow_rule =
+               mlx5_eswitch_add_send_to_vport_rule(esw,
+                                                   dev->rep->vport,
+                                                   sq->base.mqp.qpn);
+       if (IS_ERR(flow_rule))
+               return PTR_ERR(flow_rule);
+       sq->flow_rule = flow_rule;
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644 (file)
index 0000000..046fd94
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+                                          int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                             struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+       return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+                                          int vport_index)
+{
+       return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                                           struct mlx5_ib_sq *sq)
+{
+       return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+       return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
index 4236c80..ee55d7d 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 #include "cmd.h"
 
 #define DRIVER_NAME "mlx5_ib"
@@ -130,7 +131,7 @@ static int get_port_state(struct ib_device *ibdev,
        int ret;
 
        memset(&attr, 0, sizeof(attr));
-       ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+       ret = ibdev->query_port(ibdev, port_num, &attr);
        if (!ret)
                *state = attr.state;
        return ret;
@@ -154,10 +155,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
        case NETDEV_REGISTER:
        case NETDEV_UNREGISTER:
                write_lock(&roce->netdev_lock);
-
-               if (ndev->dev.parent == &mdev->pdev->dev)
-                       roce->netdev = (event == NETDEV_UNREGISTER) ?
+               if (ibdev->rep) {
+                       struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+                       struct net_device *rep_ndev;
+
+                       rep_ndev = mlx5_ib_get_rep_netdev(esw,
+                                                         ibdev->rep->vport);
+                       if (rep_ndev == ndev)
+                               roce->netdev = (event == NETDEV_UNREGISTER) ?
                                        NULL : ndev;
+               } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+                       roce->netdev = (event == NETDEV_UNREGISTER) ?
+                               NULL : ndev;
+               }
                write_unlock(&roce->netdev_lock);
                break;
 
@@ -1268,6 +1278,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
        return ret;
 }
 
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+                                 struct ib_port_attr *props)
+{
+       int ret;
+
+       /* Only link layer == ethernet is valid for representors */
+       ret = mlx5_query_port_roce(ibdev, port, props);
+       if (ret || !props)
+               return ret;
+
+       /* We don't support GIDS */
+       props->gid_tbl_len = 0;
+
+       return ret;
+}
+
 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
                             union ib_gid *gid)
 {
@@ -2631,7 +2657,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
                                                          ibflow);
        struct mlx5_ib_flow_handler *iter, *tmp;
 
-       mutex_lock(&dev->flow_db.lock);
+       mutex_lock(&dev->flow_db->lock);
 
        list_for_each_entry_safe(iter, tmp, &handler->list, list) {
                mlx5_del_flow_rules(iter->rule);
@@ -2642,7 +2668,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 
        mlx5_del_flow_rules(handler->rule);
        put_flow_table(dev, handler->prio, true);
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
 
        kfree(handler);
 
@@ -2691,7 +2717,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                             MLX5_FLOW_NAMESPACE_BYPASS);
                num_entries = MLX5_FS_MAX_ENTRIES;
                num_groups = MLX5_FS_MAX_TYPES;
-               prio = &dev->flow_db.prios[priority];
+               prio = &dev->flow_db->prios[priority];
        } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
                   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
                ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2699,7 +2725,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                build_leftovers_ft_param(&priority,
                                         &num_entries,
                                         &num_groups);
-               prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+               prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
        } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
                if (!MLX5_CAP_FLOWTABLE(dev->mdev,
                                        allow_sniffer_and_nic_rx_shared_tir))
@@ -2709,7 +2735,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                             MLX5_FLOW_NAMESPACE_SNIFFER_RX :
                                             MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 
-               prio = &dev->flow_db.sniffer[ft_type];
+               prio = &dev->flow_db->sniffer[ft_type];
                priority = 0;
                num_entries = 1;
                num_groups = 1;
@@ -2802,6 +2828,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
        if (!flow_is_multicast_only(flow_attr))
                set_underlay_qp(dev, spec, underlay_qpn);
 
+       if (dev->rep) {
+               void *misc;
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
+               MLX5_SET(fte_match_set_misc, misc, source_port,
+                        dev->rep->vport);
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       }
+
        spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
        if (is_drop) {
                flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
@@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
        if (!dst)
                return ERR_PTR(-ENOMEM);
 
-       mutex_lock(&dev->flow_db.lock);
+       mutex_lock(&dev->flow_db->lock);
 
        ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
        if (IS_ERR(ft_prio)) {
@@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                goto destroy_ft;
        }
 
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
 
        return &handler->ibflow;
@@ -3058,7 +3096,7 @@ destroy_ft:
        if (ft_prio_tx)
                put_flow_table(dev, ft_prio_tx, false);
 unlock:
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
        kfree(handler);
        return ERR_PTR(err);
@@ -3772,6 +3810,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
        return 0;
 }
 
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+                                  struct ib_port_immutable *immutable)
+{
+       struct ib_port_attr attr;
+       int err;
+
+       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+       err = ib_query_port(ibdev, port_num, &attr);
+       if (err)
+               return err;
+
+       immutable->pkey_tbl_len = attr.pkey_tbl_len;
+       immutable->gid_tbl_len = attr.gid_tbl_len;
+       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+       return 0;
+}
+
 static void get_dev_fw_str(struct ib_device *ibdev, char *str)
 {
        struct mlx5_ib_dev *dev =
@@ -3802,7 +3859,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
                goto err_destroy_vport_lag;
        }
 
-       dev->flow_db.lag_demux_ft = ft;
+       dev->flow_db->lag_demux_ft = ft;
        return 0;
 
 err_destroy_vport_lag:
@@ -3814,9 +3871,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
 
-       if (dev->flow_db.lag_demux_ft) {
-               mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
-               dev->flow_db.lag_demux_ft = NULL;
+       if (dev->flow_db->lag_demux_ft) {
+               mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+               dev->flow_db->lag_demux_ft = NULL;
 
                mlx5_cmd_destroy_vport_lag(mdev);
        }
@@ -3848,14 +3905,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
 {
        int err;
 
-       err = mlx5_add_netdev_notifier(dev, port_num);
-       if (err)
-               return err;
-
        if (MLX5_CAP_GEN(dev->mdev, roce)) {
                err = mlx5_nic_vport_enable_roce(dev->mdev);
                if (err)
-                       goto err_unregister_netdevice_notifier;
+                       return err;
        }
 
        err = mlx5_eth_lag_init(dev);
@@ -3868,8 +3921,6 @@ err_disable_roce:
        if (MLX5_CAP_GEN(dev->mdev, roce))
                mlx5_nic_vport_disable_roce(dev->mdev);
 
-err_unregister_netdevice_notifier:
-       mlx5_remove_netdev_notifier(dev, port_num);
        return err;
 }
 
@@ -4503,7 +4554,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
        mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
        mlx5_ib_cleanup_multiport_master(dev);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4512,7 +4563,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
        kfree(dev->port);
 }
 
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
        const char *name;
@@ -4564,7 +4615,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
                dev->mdev->priv.eq_table.num_comp_vectors;
        dev->ib_dev.dev.parent          = &mdev->pdev->dev;
 
-       mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
        INIT_LIST_HEAD(&dev->qp_list);
        spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4585,7 +4635,38 @@ err_free_port:
        return -ENOMEM;
 }
 
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+       dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+       if (!dev->flow_db)
+               return -ENOMEM;
+
+       mutex_init(&dev->flow_db->lock);
+
+       return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_ib_dev *nic_dev;
+
+       nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+       if (!nic_dev)
+               return -EINVAL;
+
+       dev->flow_db = nic_dev->flow_db;
+
+       return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+       kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
        int err;
@@ -4626,7 +4707,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
                (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
 
        dev->ib_dev.query_device        = mlx5_ib_query_device;
-       dev->ib_dev.query_port          = mlx5_ib_query_port;
        dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
        dev->ib_dev.query_gid           = mlx5_ib_query_gid;
        dev->ib_dev.add_gid             = mlx5_ib_add_gid;
@@ -4669,7 +4749,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
        dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
-       dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
        dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
        dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
        if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4720,6 +4799,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+       dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+       dev->ib_dev.query_port          = mlx5_ib_query_port;
+
+       return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+       dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable;
+       dev->ib_dev.query_port          = mlx5_ib_rep_query_port;
+
+       return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+                                         u8 port_num)
+{
+       int i;
+
+       for (i = 0; i < dev->num_ports; i++) {
+               dev->roce[i].dev = dev;
+               dev->roce[i].native_port_num = i + 1;
+               dev->roce[i].last_port_state = IB_PORT_DOWN;
+       }
+
+       dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
+       dev->ib_dev.create_wq    = mlx5_ib_create_wq;
+       dev->ib_dev.modify_wq    = mlx5_ib_modify_wq;
+       dev->ib_dev.destroy_wq   = mlx5_ib_destroy_wq;
+       dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+       dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+       dev->ib_dev.uverbs_ex_cmd_mask |=
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+       return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+       u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+       mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_core_dev *mdev = dev->mdev;
+       enum rdma_link_layer ll;
+       int port_type_cap;
+       int err = 0;
+       u8 port_num;
+
+       port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+       port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+       ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+       return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
 static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
@@ -4727,37 +4880,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
        int port_type_cap;
        u8 port_num;
        int err;
-       int i;
 
        port_num = mlx5_core_native_port_num(dev->mdev) - 1;
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
-               for (i = 0; i < dev->num_ports; i++) {
-                       dev->roce[i].dev = dev;
-                       dev->roce[i].native_port_num = i + 1;
-                       dev->roce[i].last_port_state = IB_PORT_DOWN;
-               }
+               err = mlx5_ib_stage_common_roce_init(dev, port_num);
+               if (err)
+                       return err;
 
-               dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
-               dev->ib_dev.create_wq    = mlx5_ib_create_wq;
-               dev->ib_dev.modify_wq    = mlx5_ib_modify_wq;
-               dev->ib_dev.destroy_wq   = mlx5_ib_destroy_wq;
-               dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
-               dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-               dev->ib_dev.uverbs_ex_cmd_mask |=
-                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
-                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
                err = mlx5_enable_eth(dev, port_num);
                if (err)
-                       return err;
+                       goto cleanup;
        }
 
        return 0;
+cleanup:
+       mlx5_ib_stage_common_roce_cleanup(dev);
+
+       return err;
 }
 
 static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4773,16 +4915,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
                mlx5_disable_eth(dev);
-               mlx5_remove_netdev_notifier(dev, port_num);
+               mlx5_ib_stage_common_roce_cleanup(dev);
        }
 }
 
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
 {
        return create_dev_resources(&dev->devr);
 }
 
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
 {
        destroy_dev_resources(&dev->devr);
 }
@@ -4794,7 +4936,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
        return mlx5_ib_odp_init_one(dev);
 }
 
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
                dev->ib_dev.get_hw_stats        = mlx5_ib_get_hw_stats;
@@ -4806,7 +4948,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
                mlx5_ib_dealloc_counters(dev);
@@ -4837,7 +4979,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
        mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 }
 
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 {
        int err;
 
@@ -4852,28 +4994,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
        return err;
 }
 
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
 {
        mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
        mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 }
 
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
        return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 {
        ib_unregister_device(&dev->ib_dev);
 }
 
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
 {
        return create_umr_res(dev);
 }
 
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
 {
        destroy_umrc_res(dev);
 }
@@ -4890,7 +5032,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
        cancel_delay_drop(dev);
 }
 
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 {
        int err;
        int i;
@@ -4905,9 +5047,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
-                            const struct mlx5_ib_profile *profile,
-                            int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_register_vport_reps(dev);
+
+       return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+                     const struct mlx5_ib_profile *profile,
+                     int stage)
 {
        /* Number of stages to cleanup */
        while (stage) {
@@ -4921,23 +5075,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
 
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
-                          const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+                   const struct mlx5_ib_profile *profile)
 {
-       struct mlx5_ib_dev *dev;
        int err;
        int i;
 
        printk_once(KERN_INFO "%s", mlx5_version);
 
-       dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
-       if (!dev)
-               return NULL;
-
-       dev->mdev = mdev;
-       dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
-                            MLX5_CAP_GEN(mdev, num_vhca_ports));
-
        for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
                if (profile->stage[i].init) {
                        err = profile->stage[i].init(dev);
@@ -4961,9 +5106,15 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_INIT,
                     mlx5_ib_stage_init_init,
                     mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_flow_db_init,
+                    mlx5_ib_stage_flow_db_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_CAPS,
                     mlx5_ib_stage_caps_init,
                     NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_non_default_cb,
+                    NULL),
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_roce_init,
                     mlx5_ib_stage_roce_cleanup),
@@ -4999,6 +5150,48 @@ static const struct mlx5_ib_profile pf_profile = {
                     NULL),
 };
 
+static const struct mlx5_ib_profile nic_rep_profile = {
+       STAGE_CREATE(MLX5_IB_STAGE_INIT,
+                    mlx5_ib_stage_init_init,
+                    mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_flow_db_init,
+                    mlx5_ib_stage_flow_db_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+                    mlx5_ib_stage_caps_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_rep_non_default_cb,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+                    mlx5_ib_stage_rep_roce_init,
+                    mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+                    mlx5_ib_stage_dev_res_init,
+                    mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+                    mlx5_ib_stage_counters_init,
+                    mlx5_ib_stage_counters_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_UAR,
+                    mlx5_ib_stage_uar_init,
+                    mlx5_ib_stage_uar_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+                    mlx5_ib_stage_bfrag_init,
+                    mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+                    mlx5_ib_stage_ib_reg_init,
+                    mlx5_ib_stage_ib_reg_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+                    mlx5_ib_stage_umr_res_init,
+                    mlx5_ib_stage_umr_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+                    mlx5_ib_stage_class_attr_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+                    mlx5_ib_stage_rep_reg_init,
+                    mlx5_ib_stage_rep_reg_cleanup),
+};
+
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 {
        struct mlx5_ib_multiport_info *mpi;
@@ -5044,8 +5237,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
        enum rdma_link_layer ll;
+       struct mlx5_ib_dev *dev;
        int port_type_cap;
 
+       printk_once(KERN_INFO "%s", mlx5_version);
+
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -5055,7 +5251,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                return mlx5_ib_add_slave_port(mdev, port_num);
        }
 
-       return __mlx5_ib_add(mdev, &pf_profile);
+       dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+       if (!dev)
+               return NULL;
+
+       dev->mdev = mdev;
+       dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+                            MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+       if (MLX5_VPORT_MANAGER(mdev) &&
+           mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+               dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+               return __mlx5_ib_add(dev, &nic_rep_profile);
+       }
+
+       return __mlx5_ib_add(dev, &pf_profile);
 }
 
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
index 1393851..e0bad28 100644 (file)
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
        struct mlx5_ib_wq       *sq;
        struct mlx5_ib_ubuffer  ubuffer;
        struct mlx5_db          *doorbell;
+       struct mlx5_flow_handle *flow_rule;
        u32                     tisn;
        u8                      state;
 };
@@ -371,7 +372,7 @@ struct mlx5_ib_qp {
                struct mlx5_ib_rss_qp rss_qp;
                struct mlx5_ib_dct dct;
        };
-       struct mlx5_buf         buf;
+       struct mlx5_frag_buf    buf;
 
        struct mlx5_db          db;
        struct mlx5_ib_wq       rq;
@@ -413,7 +414,7 @@ struct mlx5_ib_qp {
 };
 
 struct mlx5_ib_cq_buf {
-       struct mlx5_buf         buf;
+       struct mlx5_frag_buf_ctrl fbc;
        struct ib_umem          *umem;
        int                     cqe_size;
        int                     nent;
@@ -495,7 +496,7 @@ struct mlx5_ib_wc {
 struct mlx5_ib_srq {
        struct ib_srq           ibsrq;
        struct mlx5_core_srq    msrq;
-       struct mlx5_buf         buf;
+       struct mlx5_frag_buf    buf;
        struct mlx5_db          db;
        u64                    *wrid;
        /* protect SRQ hanlding
@@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
 
 enum mlx5_ib_stages {
        MLX5_IB_STAGE_INIT,
+       MLX5_IB_STAGE_FLOW_DB,
        MLX5_IB_STAGE_CAPS,
+       MLX5_IB_STAGE_NON_DEFAULT_CB,
        MLX5_IB_STAGE_ROCE,
        MLX5_IB_STAGE_DEVICE_RESOURCES,
        MLX5_IB_STAGE_ODP,
@@ -743,6 +746,7 @@ enum mlx5_ib_stages {
        MLX5_IB_STAGE_UMR_RESOURCES,
        MLX5_IB_STAGE_DELAY_DROP,
        MLX5_IB_STAGE_CLASS_ATTR,
+       MLX5_IB_STAGE_REP_REG,
        MLX5_IB_STAGE_MAX,
 };
 
@@ -797,7 +801,7 @@ struct mlx5_ib_dev {
        struct srcu_struct      mr_srcu;
        u32                     null_mkey;
 #endif
-       struct mlx5_ib_flow_db  flow_db;
+       struct mlx5_ib_flow_db  *flow_db;
        /* protect resources needed as part of reset flow */
        spinlock_t              reset_flow_resource_lock;
        struct list_head        qp_list;
@@ -807,6 +811,7 @@ struct mlx5_ib_dev {
        struct mlx5_sq_bfreg    fp_bfreg;
        struct mlx5_ib_delay_drop       delay_drop;
        const struct mlx5_ib_profile    *profile;
+       struct mlx5_eswitch_rep         *rep;
 
        /* protect the user_td */
        struct mutex            lb_mutex;
@@ -1049,6 +1054,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+                     const struct mlx5_ib_profile *profile,
+                     int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+                   const struct mlx5_ib_profile *profile);
+
 int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
                          u8 port, struct ifla_vf_info *info);
 int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
index 556e015..a5fad3e 100644 (file)
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 {
-       if (!mlx5_debugfs_root)
+       if (!mlx5_debugfs_root || dev->rep)
                return;
 
        debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
        struct mlx5_cache_ent *ent;
        int i;
 
-       if (!mlx5_debugfs_root)
+       if (!mlx5_debugfs_root || dev->rep)
                return 0;
 
        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                           MLX5_IB_UMR_OCTOWORD;
                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+                   !dev->rep &&
                    mlx5_core_is_pf(dev->mdev))
                        ent->limit = dev->mdev->profile->mr_cache[i].limit;
                else
index 39d24bf..5663530 100644 (file)
@@ -36,6 +36,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
        mlx5_core_destroy_tis(dev->mdev, sq->tisn);
 }
 
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                                      struct mlx5_ib_sq *sq)
+{
+       if (sq->flow_rule)
+               mlx5_del_flow_rules(sq->flow_rule);
+}
+
 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
                                   struct mlx5_ib_sq *sq, void *qpin,
                                   struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
        if (err)
                goto err_umem;
 
+       err = create_flow_rule_vport_sq(dev, sq);
+       if (err)
+               goto err_flow;
+
        return 0;
 
+err_flow:
+       mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
 err_umem:
        ib_umem_release(sq->ubuffer.umem);
        sq->ubuffer.umem = NULL;
@@ -1157,6 +1172,7 @@ err_umem:
 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
                                     struct mlx5_ib_sq *sq)
 {
+       destroy_flow_rule_vport_sq(dev, sq);
        mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
        ib_umem_release(sq->ubuffer.umem);
 }
@@ -1263,6 +1279,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
        if (tunnel_offload_en)
                MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
+       if (dev->rep)
+               MLX5_SET(tirc, tirc, self_lb_block,
+                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
        kvfree(in);
@@ -1554,6 +1574,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
+       if (dev->rep)
+               MLX5_SET(tirc, tirc, self_lb_block,
+                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
        if (err)
index de31838..67de943 100644 (file)
@@ -174,14 +174,13 @@ void usnic_transport_put_socket(struct socket *sock)
 int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
                                        uint32_t *addr, uint16_t *port)
 {
-       int len;
        int err;
        struct sockaddr_in sock_addr;
 
        err = sock->ops->getname(sock,
                                (struct sockaddr *)&sock_addr,
-                               &len, 0);
-       if (err)
+                               0);
+       if (err < 0)
                return err;
 
        if (sock_addr.sin_family != AF_INET)
index faa9478..f95b976 100644 (file)
@@ -114,6 +114,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
        union pvrdma_cmd_resp rsp;
        struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
        struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
+       struct pvrdma_create_cq_resp cq_resp = {0};
        struct pvrdma_create_cq ucmd;
 
        BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
@@ -197,6 +198,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 
        cq->ibcq.cqe = resp->cqe;
        cq->cq_handle = resp->cq_handle;
+       cq_resp.cqn = resp->cq_handle;
        spin_lock_irqsave(&dev->cq_tbl_lock, flags);
        dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
        spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
@@ -205,7 +207,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
                cq->uar = &(to_vucontext(context)->uar);
 
                /* Copy udata back. */
-               if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) {
+               if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
                        dev_warn(&dev->pdev->dev,
                                 "failed to copy back udata\n");
                        pvrdma_destroy_cq(&cq->ibcq);
index 5acebb1..af23596 100644 (file)
@@ -113,6 +113,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
        union pvrdma_cmd_resp rsp;
        struct pvrdma_cmd_create_srq *cmd = &req.create_srq;
        struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp;
+       struct pvrdma_create_srq_resp srq_resp = {0};
        struct pvrdma_create_srq ucmd;
        unsigned long flags;
        int ret;
@@ -204,12 +205,13 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
        }
 
        srq->srq_handle = resp->srqn;
+       srq_resp.srqn = resp->srqn;
        spin_lock_irqsave(&dev->srq_tbl_lock, flags);
        dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq;
        spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
 
        /* Copy udata back. */
-       if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) {
+       if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) {
                dev_warn(&dev->pdev->dev, "failed to copy back udata\n");
                pvrdma_destroy_srq(&srq->ibsrq);
                return ERR_PTR(-EINVAL);
index 16b9661..a51463c 100644 (file)
@@ -447,6 +447,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
        union pvrdma_cmd_resp rsp;
        struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
        struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
+       struct pvrdma_alloc_pd_resp pd_resp = {0};
        int ret;
        void *ptr;
 
@@ -475,9 +476,10 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
        pd->privileged = !context;
        pd->pd_handle = resp->pd_handle;
        pd->pdn = resp->pd_handle;
+       pd_resp.pdn = resp->pd_handle;
 
        if (context) {
-               if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+               if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) {
                        dev_warn(&dev->pdev->dev,
                                 "failed to copy back protection domain\n");
                        pvrdma_dealloc_pd(&pd->ibpd);
index 11f74cb..ea302b0 100644 (file)
@@ -281,8 +281,6 @@ void ipoib_delete_debug_files(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
-       WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
-       WARN_ONCE(!priv->path_dentry, "null path debug file\n");
        debugfs_remove(priv->mcg_dentry);
        debugfs_remove(priv->path_dentry);
        priv->mcg_dentry = priv->path_dentry = NULL;
index 55cfb98..faf734f 100644 (file)
@@ -339,9 +339,6 @@ int __init bcm7038_l1_of_init(struct device_node *dn,
                goto out_unmap;
        }
 
-       pr_info("registered BCM7038 L1 intc (mem: 0x%p, IRQs: %d)\n",
-               intc->cpus[0]->map_base, IRQS_PER_WORD * intc->n_words);
-
        return 0;
 
 out_unmap:
index 983640e..8968e5e 100644 (file)
@@ -318,9 +318,6 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
                }
        }
 
-       pr_info("registered %s intc (mem: 0x%p, parent IRQ(s): %d)\n",
-                       intc_name, data->map_base[0], data->num_parent_irqs);
-
        return 0;
 
 out_free_domain:
index 691d20e..0e65f60 100644 (file)
@@ -262,9 +262,6 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
                ct->chip.irq_set_wake = irq_gc_set_wake;
        }
 
-       pr_info("registered L2 intc (mem: 0x%p, parent irq: %d)\n",
-                       base, parent_irq);
-
        return 0;
 
 out_free_domain:
index 993a842..1ff38af 100644 (file)
@@ -94,7 +94,7 @@ static struct irq_chip gicv2m_msi_irq_chip = {
 
 static struct msi_domain_info gicv2m_msi_domain_info = {
        .flags  = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
-                  MSI_FLAG_PCI_MSIX),
+                  MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
        .chip   = &gicv2m_msi_irq_chip,
 };
 
@@ -155,18 +155,12 @@ static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
        return 0;
 }
 
-static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq)
+static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq,
+                              int nr_irqs)
 {
-       int pos;
-
-       pos = hwirq - v2m->spi_start;
-       if (pos < 0 || pos >= v2m->nr_spis) {
-               pr_err("Failed to teardown msi. Invalid hwirq %d\n", hwirq);
-               return;
-       }
-
        spin_lock(&v2m_lock);
-       __clear_bit(pos, v2m->bm);
+       bitmap_release_region(v2m->bm, hwirq - v2m->spi_start,
+                             get_count_order(nr_irqs));
        spin_unlock(&v2m_lock);
 }
 
@@ -174,13 +168,13 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
                                   unsigned int nr_irqs, void *args)
 {
        struct v2m_data *v2m = NULL, *tmp;
-       int hwirq, offset, err = 0;
+       int hwirq, offset, i, err = 0;
 
        spin_lock(&v2m_lock);
        list_for_each_entry(tmp, &v2m_nodes, entry) {
-               offset = find_first_zero_bit(tmp->bm, tmp->nr_spis);
-               if (offset < tmp->nr_spis) {
-                       __set_bit(offset, tmp->bm);
+               offset = bitmap_find_free_region(tmp->bm, tmp->nr_spis,
+                                                get_count_order(nr_irqs));
+               if (offset >= 0) {
                        v2m = tmp;
                        break;
                }
@@ -192,16 +186,21 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 
        hwirq = v2m->spi_start + offset;
 
-       err = gicv2m_irq_gic_domain_alloc(domain, virq, hwirq);
-       if (err) {
-               gicv2m_unalloc_msi(v2m, hwirq);
-               return err;
-       }
+       for (i = 0; i < nr_irqs; i++) {
+               err = gicv2m_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
+               if (err)
+                       goto fail;
 
-       irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-                                     &gicv2m_irq_chip, v2m);
+               irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+                                             &gicv2m_irq_chip, v2m);
+       }
 
        return 0;
+
+fail:
+       irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+       gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs));
+       return err;
 }
 
 static void gicv2m_irq_domain_free(struct irq_domain *domain,
@@ -210,8 +209,7 @@ static void gicv2m_irq_domain_free(struct irq_domain *domain,
        struct irq_data *d = irq_domain_get_irq_data(domain, virq);
        struct v2m_data *v2m = irq_data_get_irq_chip_data(d);
 
-       BUG_ON(nr_irqs != 1);
-       gicv2m_unalloc_msi(v2m, d->hwirq);
+       gicv2m_unalloc_msi(v2m, d->hwirq, nr_irqs);
        irq_domain_free_irqs_parent(domain, virq, nr_irqs);
 }
 
index 14a8c0a..25a98de 100644 (file)
@@ -132,6 +132,8 @@ static int __init its_pci_of_msi_init(void)
 
        for (np = of_find_matching_node(NULL, its_device_id); np;
             np = of_find_matching_node(np, its_device_id)) {
+               if (!of_device_is_available(np))
+                       continue;
                if (!of_property_read_bool(np, "msi-controller"))
                        continue;
 
index 833a90f..8881a05 100644 (file)
@@ -154,6 +154,8 @@ static void __init its_pmsi_of_init(void)
 
        for (np = of_find_matching_node(NULL, its_device_id); np;
             np = of_find_matching_node(np, its_device_id)) {
+               if (!of_device_is_available(np))
+                       continue;
                if (!of_property_read_bool(np, "msi-controller"))
                        continue;
 
index 06f025f..1d3056f 100644 (file)
@@ -3314,6 +3314,8 @@ static int __init its_of_probe(struct device_node *node)
 
        for (np = of_find_matching_node(node, its_device_id); np;
             np = of_find_matching_node(np, its_device_id)) {
+               if (!of_device_is_available(np))
+                       continue;
                if (!of_property_read_bool(np, "msi-controller")) {
                        pr_warn("%pOF: no msi-controller property, ITS ignored\n",
                                np);
index a57c0fb..d99cc07 100644 (file)
@@ -673,7 +673,7 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
               MPIDR_TO_SGI_RS(cluster_id)              |
               tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
 
-       pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
+       pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
        gic_write_sgi1r(val);
 }
 
@@ -688,7 +688,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
         * Ensure that stores to Normal memory are visible to the
         * other CPUs before issuing the IPI.
         */
-       smp_wmb();
+       wmb();
 
        for_each_cpu(cpu, mask) {
                u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
index ef92a4d..d32268c 100644 (file)
@@ -424,8 +424,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
        spin_lock_irqsave(&gic_lock, flags);
        write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
        write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
-       gic_clear_pcpu_masks(intr);
-       set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
        irq_data_update_effective_affinity(data, cpumask_of(cpu));
        spin_unlock_irqrestore(&gic_lock, flags);
 
index c5603d1..1f8f489 100644 (file)
@@ -560,7 +560,7 @@ done:
 
 static int
 data_sock_getname(struct socket *sock, struct sockaddr *addr,
-                 int *addr_len, int peer)
+                 int peer)
 {
        struct sockaddr_mISDN   *maddr = (struct sockaddr_mISDN *) addr;
        struct sock             *sk = sock->sk;
@@ -570,14 +570,13 @@ data_sock_getname(struct socket *sock, struct sockaddr *addr,
 
        lock_sock(sk);
 
-       *addr_len = sizeof(*maddr);
        maddr->family = AF_ISDN;
        maddr->dev = _pms(sk)->dev->id;
        maddr->channel = _pms(sk)->ch.nr;
        maddr->sapi = _pms(sk)->ch.addr & 0xff;
        maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
        release_sock(sk);
-       return 0;
+       return sizeof(*maddr);
 }
 
 static const struct proto_ops data_sock_ops = {
index 62f541f..0707482 100644 (file)
@@ -375,6 +375,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
        dev->ofdev.dev.of_node = np;
        dev->ofdev.archdata.dma_mask = 0xffffffffUL;
        dev->ofdev.dev.dma_mask = &dev->ofdev.archdata.dma_mask;
+       dev->ofdev.dev.coherent_dma_mask = dev->ofdev.archdata.dma_mask;
        dev->ofdev.dev.parent = parent;
        dev->ofdev.dev.bus = &macio_bus_type;
        dev->ofdev.dev.release = macio_release_dev;
index d6de00f..6813680 100644 (file)
@@ -903,7 +903,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
                        queue_io(md, bio);
                } else {
                        /* done with normal IO or empty flush */
-                       bio->bi_status = io_error;
+                       if (io_error)
+                               bio->bi_status = io_error;
                        bio_endio(bio);
                }
        }
index 8d12017..4470630 100644 (file)
@@ -2687,6 +2687,8 @@ mptctl_hp_targetinfo(unsigned long arg)
                                __FILE__, __LINE__, iocnum);
                return -ENODEV;
        }
+       if (karg.hdr.id >= MPT_MAX_FC_DEVICES)
+               return -EINVAL;
        dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n",
            ioc->name));
 
index 3e5eabd..772d029 100644 (file)
@@ -548,12 +548,6 @@ int mei_cldev_disable(struct mei_cl_device *cldev)
                goto out;
        }
 
-       if (bus->dev_state == MEI_DEV_POWER_DOWN) {
-               dev_dbg(bus->dev, "Device is powering down, don't bother with disconnection\n");
-               err = 0;
-               goto out;
-       }
-
        err = mei_cl_disconnect(cl);
        if (err < 0)
                dev_err(bus->dev, "Could not disconnect from the ME client\n");
index be64969..7e60c18 100644 (file)
@@ -945,6 +945,12 @@ int mei_cl_disconnect(struct mei_cl *cl)
                return 0;
        }
 
+       if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+               cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n");
+               mei_cl_set_disconnected(cl);
+               return 0;
+       }
+
        rets = pm_runtime_get(dev->dev);
        if (rets < 0 && rets != -EINPROGRESS) {
                pm_runtime_put_noidle(dev->dev);
index 0ccccba..e4b10b2 100644 (file)
 #define MEI_DEV_ID_KBP        0xA2BA  /* Kaby Point */
 #define MEI_DEV_ID_KBP_2      0xA2BB  /* Kaby Point 2 */
 
+#define MEI_DEV_ID_CNP_LP     0x9DE0  /* Cannon Point LP */
+#define MEI_DEV_ID_CNP_LP_4   0x9DE4  /* Cannon Point LP 4 (iTouch) */
+#define MEI_DEV_ID_CNP_H      0xA360  /* Cannon Point H */
+#define MEI_DEV_ID_CNP_H_4    0xA364  /* Cannon Point H 4 (iTouch) */
+
 /*
  * MEI HW Section
  */
index 4a0ccda..ea4e152 100644 (file)
@@ -98,6 +98,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
        {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)},
 
+       {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
+
        /* required last entry */
        {0, }
 };
index d9aa407..2dd2db9 100644 (file)
@@ -277,7 +277,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
        struct ocxl_context *ctx = file->private_data;
        struct ocxl_kernel_event_header header;
        ssize_t rc;
-       size_t used = 0;
+       ssize_t used = 0;
        DEFINE_WAIT(event_wait);
 
        memset(&header, 0, sizeof(header));
index 229dc18..768972a 100644 (file)
@@ -1265,7 +1265,8 @@ static int bcm2835_add_host(struct bcm2835_host *host)
        char pio_limit_string[20];
        int ret;
 
-       mmc->f_max = host->max_clk;
+       if (!mmc->f_max || mmc->f_max > host->max_clk)
+               mmc->f_max = host->max_clk;
        mmc->f_min = host->max_clk / SDCDIV_MAX_CDIV;
 
        mmc->max_busy_timeout = ~0 / (mmc->f_max / 1000);
index 22438eb..4f972b8 100644 (file)
@@ -717,22 +717,6 @@ static int meson_mmc_clk_phase_tuning(struct mmc_host *mmc, u32 opcode,
 static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
        struct meson_host *host = mmc_priv(mmc);
-       int ret;
-
-       /*
-        * If this is the initial tuning, try to get a sane Rx starting
-        * phase before doing the actual tuning.
-        */
-       if (!mmc->doing_retune) {
-               ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
-
-               if (ret)
-                       return ret;
-       }
-
-       ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->tx_clk);
-       if (ret)
-               return ret;
 
        return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
 }
@@ -763,9 +747,8 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                if (!IS_ERR(mmc->supply.vmmc))
                        mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 
-               /* Reset phases */
+               /* Reset rx phase */
                clk_set_phase(host->rx_clk, 0);
-               clk_set_phase(host->tx_clk, 270);
 
                break;
 
index e6b8c59..736ac88 100644 (file)
@@ -328,7 +328,7 @@ config MTD_NAND_MARVELL
        tristate "NAND controller support on Marvell boards"
        depends on PXA3xx || ARCH_MMP || PLAT_ORION || ARCH_MVEBU || \
                   COMPILE_TEST
-       depends on HAS_IOMEM
+       depends on HAS_IOMEM && HAS_DMA
        help
          This enables the NAND flash controller driver for Marvell boards,
          including:
index 80d31a5..f367144 100644 (file)
@@ -752,10 +752,8 @@ static int vf610_nfc_probe(struct platform_device *pdev)
                if (mtd->oobsize > 64)
                        mtd->oobsize = 64;
 
-               /*
-                * mtd->ecclayout is not specified here because we're using the
-                * default large page ECC layout defined in NAND core.
-                */
+               /* Use default large page ECC layout defined in NAND core */
+               mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
                if (chip->ecc.strength == 32) {
                        nfc->ecc_mode = ECC_60_BYTE;
                        chip->ecc.bytes = 60;
index 944ec3c..08b8521 100644 (file)
@@ -149,9 +149,9 @@ config MACVTAP
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
-    depends on IPV6
+    depends on IPV6 || !IPV6
     depends on NETFILTER
-    depends on NET_L3_MASTER_DEV
+    select NET_L3_MASTER_DEV
     ---help---
       This allows one to create virtual devices off of a main interface
       and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
index 11fe712..3afda65 100644 (file)
@@ -113,12 +113,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
 #endif
 #ifdef CONFIG_MVME147_NET      /* MVME147 internal Ethernet */
        {mvme147lance_probe, 0},
-#endif
-#ifdef CONFIG_MAC8390           /* NuBus NS8390-based cards */
-       {mac8390_probe, 0},
-#endif
-#ifdef CONFIG_MAC89x0
-       {mac89x0_probe, 0},
 #endif
        {NULL, 0},
 };
index c669554..4c19d23 100644 (file)
@@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = {
        .exit = bond_net_exit,
        .id   = &bond_net_id,
        .size = sizeof(struct bond_net),
+       .async = true,
 };
 
 static int __init bonding_init(void)
index db830a1..cd16067 100644 (file)
@@ -852,7 +852,7 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
 }
 EXPORT_SYMBOL(b53_get_ethtool_stats);
 
-int b53_get_sset_count(struct dsa_switch *ds)
+int b53_get_sset_count(struct dsa_switch *ds, int port)
 {
        struct b53_device *dev = ds->priv;
 
index d954cf3..1187ebd 100644 (file)
@@ -288,7 +288,7 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
 int b53_configure_vlan(struct dsa_switch *ds);
 void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
 void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds);
+int b53_get_sset_count(struct dsa_switch *ds, int port);
 int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
index 7aa84ee..f77be9f 100644 (file)
@@ -86,7 +86,7 @@ static int dsa_loop_setup(struct dsa_switch *ds)
        return 0;
 }
 
-static int dsa_loop_get_sset_count(struct dsa_switch *ds)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
 {
        return __DSA_LOOP_CNT_MAX;
 }
index 6171c08..fefa454 100644 (file)
@@ -1007,7 +1007,7 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
        }
 }
 
-static int lan9303_get_sset_count(struct dsa_switch *ds)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
 {
        return ARRAY_SIZE(lan9303_mib);
 }
index 663b0d5..bcb3e6c 100644 (file)
@@ -439,7 +439,7 @@ static void ksz_disable_port(struct dsa_switch *ds, int port,
        ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
 }
 
-static int ksz_sset_count(struct dsa_switch *ds)
+static int ksz_sset_count(struct dsa_switch *ds, int port)
 {
        return TOTAL_SWITCH_COUNTER_NUM;
 }
index 8a0bb00..511ca13 100644 (file)
@@ -604,7 +604,7 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_get_sset_count(struct dsa_switch *ds)
+mt7530_get_sset_count(struct dsa_switch *ds, int port)
 {
        return ARRAY_SIZE(mt7530_mib);
 }
index 1aaa7a9..ae9e7f7 100644 (file)
@@ -18,3 +18,13 @@ config NET_DSA_MV88E6XXX_GLOBAL2
 
          It is required on most chips. If the chip you compile the support for
          doesn't have such registers set, say N here. In doubt, say Y.
+
+config NET_DSA_MV88E6XXX_PTP
+       bool "PTP support for Marvell 88E6xxx"
+       default n
+       depends on NET_DSA_MV88E6XXX_GLOBAL2
+       imply NETWORK_PHY_TIMESTAMPING
+       imply PTP_1588_CLOCK
+       help
+         Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
+         chips that support it.
index 58a4a00..50de304 100644 (file)
@@ -5,6 +5,10 @@ mv88e6xxx-objs += global1.o
 mv88e6xxx-objs += global1_atu.o
 mv88e6xxx-objs += global1_vtu.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o
 mv88e6xxx-objs += phy.o
 mv88e6xxx-objs += port.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
 mv88e6xxx-objs += serdes.o
index eb328ba..cfd5363 100644 (file)
 #include "chip.h"
 #include "global1.h"
 #include "global2.h"
+#include "hwtstamp.h"
 #include "phy.h"
 #include "port.h"
+#include "ptp.h"
 #include "serdes.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
@@ -251,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
        chip->g1_irq.masked &= ~(1 << n);
 }
 
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
 {
-       struct mv88e6xxx_chip *chip = dev_id;
        unsigned int nhandled = 0;
        unsigned int sub_irq;
        unsigned int n;
@@ -278,6 +279,13 @@ out:
        return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
 
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+       struct mv88e6xxx_chip *chip = dev_id;
+
+       return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
 static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
 {
        struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -333,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
        .xlate  = irq_domain_xlate_twocell,
 };
 
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 {
        int irq, virq;
        u16 mask;
@@ -342,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
        mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
        mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
-       free_irq(chip->irq, chip);
-
        for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
                virq = irq_find_mapping(chip->g1_irq.domain, irq);
                irq_dispose_mapping(virq);
@@ -352,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
        irq_domain_remove(chip->g1_irq.domain);
 }
 
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+       mv88e6xxx_g1_irq_free(chip);
+
+       free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
        int err, irq, virq;
        u16 reg, mask;
@@ -385,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
        if (err)
                goto out_disable;
 
-       err = request_threaded_irq(chip->irq, NULL,
-                                  mv88e6xxx_g1_irq_thread_fn,
-                                  IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-                                  dev_name(chip->dev), chip);
-       if (err)
-               goto out_disable;
-
        return 0;
 
 out_disable:
@@ -409,6 +415,62 @@ out_mapping:
        return err;
 }
 
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6xxx_g1_irq_setup_common(chip);
+       if (err)
+               return err;
+
+       err = request_threaded_irq(chip->irq, NULL,
+                                  mv88e6xxx_g1_irq_thread_fn,
+                                  IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+                                  dev_name(chip->dev), chip);
+       if (err)
+               mv88e6xxx_g1_irq_free_common(chip);
+
+       return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+       struct mv88e6xxx_chip *chip = container_of(work,
+                                                  struct mv88e6xxx_chip,
+                                                  irq_poll_work.work);
+       mv88e6xxx_g1_irq_thread_work(chip);
+
+       kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+                                  msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6xxx_g1_irq_setup_common(chip);
+       if (err)
+               return err;
+
+       kthread_init_delayed_work(&chip->irq_poll_work,
+                                 mv88e6xxx_irq_poll);
+
+       chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+       if (IS_ERR(chip->kworker))
+               return PTR_ERR(chip->kworker);
+
+       kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+                                  msecs_to_jiffies(100));
+
+       return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+       kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+       kthread_destroy_worker(chip->kworker);
+}
+
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
        int i;
@@ -604,7 +666,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
                        return UINT64_MAX;
 
                low = reg;
-               if (s->sizeof_stat == 4) {
+               if (s->size == 4) {
                        err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
                        if (err)
                                return UINT64_MAX;
@@ -617,7 +679,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
        case STATS_TYPE_BANK0:
                reg |= s->reg | histogram;
                mv88e6xxx_g1_stats_read(chip, reg, &low);
-               if (s->sizeof_stat == 8)
+               if (s->size == 8)
                        mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
                break;
        default:
@@ -627,8 +689,8 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
        return value;
 }
 
-static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
-                                       uint8_t *data, int types)
+static int mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                      uint8_t *data, int types)
 {
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
@@ -641,29 +703,41 @@ static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
                        j++;
                }
        }
+
+       return j;
 }
 
-static void mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
-                                       uint8_t *data)
+static int mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                      uint8_t *data)
 {
-       mv88e6xxx_stats_get_strings(chip, data,
-                                   STATS_TYPE_BANK0 | STATS_TYPE_PORT);
+       return mv88e6xxx_stats_get_strings(chip, data,
+                                          STATS_TYPE_BANK0 | STATS_TYPE_PORT);
 }
 
-static void mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
-                                       uint8_t *data)
+static int mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                      uint8_t *data)
 {
-       mv88e6xxx_stats_get_strings(chip, data,
-                                   STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
+       return mv88e6xxx_stats_get_strings(chip, data,
+                                          STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
 }
 
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
                                  uint8_t *data)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
+       int count = 0;
+
+       mutex_lock(&chip->reg_lock);
 
        if (chip->info->ops->stats_get_strings)
-               chip->info->ops->stats_get_strings(chip, data);
+               count = chip->info->ops->stats_get_strings(chip, data);
+
+       if (chip->info->ops->serdes_get_strings) {
+               data += count * ETH_GSTRING_LEN;
+               chip->info->ops->serdes_get_strings(chip, port, data);
+       }
+
+       mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_get_sset_count(struct mv88e6xxx_chip *chip,
@@ -692,19 +766,34 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
                                              STATS_TYPE_BANK1);
 }
 
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
+       int serdes_count = 0;
+       int count = 0;
 
+       mutex_lock(&chip->reg_lock);
        if (chip->info->ops->stats_get_sset_count)
-               return chip->info->ops->stats_get_sset_count(chip);
+               count = chip->info->ops->stats_get_sset_count(chip);
+       if (count < 0)
+               goto out;
 
-       return 0;
+       if (chip->info->ops->serdes_get_sset_count)
+               serdes_count = chip->info->ops->serdes_get_sset_count(chip,
+                                                                     port);
+       if (serdes_count < 0)
+               count = serdes_count;
+       else
+               count += serdes_count;
+out:
+       mutex_unlock(&chip->reg_lock);
+
+       return count;
 }
 
-static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data, int types,
-                                     u16 bank1_select, u16 histogram)
+static int mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data, int types,
+                                    u16 bank1_select, u16 histogram)
 {
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
@@ -712,24 +801,28 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
        for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
                stat = &mv88e6xxx_hw_stats[i];
                if (stat->type & types) {
+                       mutex_lock(&chip->reg_lock);
                        data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
                                                              bank1_select,
                                                              histogram);
+                       mutex_unlock(&chip->reg_lock);
+
                        j++;
                }
        }
+       return j;
 }
 
-static void mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data)
+static int mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data)
 {
        return mv88e6xxx_stats_get_stats(chip, port, data,
                                         STATS_TYPE_BANK0 | STATS_TYPE_PORT,
                                         0, MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data)
+static int mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data)
 {
        return mv88e6xxx_stats_get_stats(chip, port, data,
                                         STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -737,8 +830,8 @@ static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
                                         MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data)
+static int mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data)
 {
        return mv88e6xxx_stats_get_stats(chip, port, data,
                                         STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -749,8 +842,15 @@ static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
                                uint64_t *data)
 {
+       int count = 0;
+
        if (chip->info->ops->stats_get_stats)
-               chip->info->ops->stats_get_stats(chip, port, data);
+               count = chip->info->ops->stats_get_stats(chip, port, data);
+
+       if (chip->info->ops->serdes_get_stats) {
+               data += count;
+               chip->info->ops->serdes_get_stats(chip, port, data);
+       }
 }
 
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
@@ -762,14 +862,13 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
        mutex_lock(&chip->reg_lock);
 
        ret = mv88e6xxx_stats_snapshot(chip, port);
-       if (ret < 0) {
-               mutex_unlock(&chip->reg_lock);
+       mutex_unlock(&chip->reg_lock);
+
+       if (ret < 0)
                return;
-       }
 
        mv88e6xxx_get_stats(chip, port, data);
 
-       mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)
@@ -1433,7 +1532,9 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
        eth_broadcast_addr(addr.mac);
 
        do {
+               mutex_lock(&chip->reg_lock);
                err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr);
+               mutex_unlock(&chip->reg_lock);
                if (err)
                        return err;
 
@@ -1466,7 +1567,10 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
        int err;
 
        /* Dump port's default Filtering Information Database (VLAN ID 0) */
+       mutex_lock(&chip->reg_lock);
        err = mv88e6xxx_port_get_fid(chip, port, &fid);
+       mutex_unlock(&chip->reg_lock);
+
        if (err)
                return err;
 
@@ -1476,7 +1580,9 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 
        /* Dump VLANs' Filtering Information Databases */
        do {
+               mutex_lock(&chip->reg_lock);
                err = mv88e6xxx_vtu_getnext(chip, &vlan);
+               mutex_unlock(&chip->reg_lock);
                if (err)
                        return err;
 
@@ -1496,13 +1602,8 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
                                   dsa_fdb_dump_cb_t *cb, void *data)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       int err;
 
-       mutex_lock(&chip->reg_lock);
-       err = mv88e6xxx_port_db_dump(chip, port, cb, data);
-       mutex_unlock(&chip->reg_lock);
-
-       return err;
+       return mv88e6xxx_port_db_dump(chip, port, cb, data);
 }
 
 static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,
@@ -2092,6 +2193,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
        if (err)
                goto unlock;
 
+       /* Setup PTP Hardware Clock and timestamping */
+       if (chip->info->ptp_support) {
+               err = mv88e6xxx_ptp_setup(chip);
+               if (err)
+                       goto unlock;
+
+               err = mv88e6xxx_hwtstamp_setup(chip);
+               if (err)
+                       goto unlock;
+       }
+
 unlock:
        mutex_unlock(&chip->reg_lock);
 
@@ -2148,6 +2260,15 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
        struct mii_bus *bus;
        int err;
 
+       if (external) {
+               mutex_lock(&chip->reg_lock);
+               err = mv88e6xxx_g2_scratch_gpio_set_smi(chip, true);
+               mutex_unlock(&chip->reg_lock);
+
+               if (err)
+                       return err;
+       }
+
        bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
        if (!bus)
                return -ENOMEM;
@@ -2472,6 +2593,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+       .gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -2602,6 +2724,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -2673,6 +2796,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -2736,6 +2860,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -2771,6 +2896,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -2843,6 +2969,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -2879,6 +3007,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -2913,6 +3043,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6185_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -2945,6 +3077,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6185_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -2981,6 +3115,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -3049,6 +3185,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+       .avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -3086,6 +3223,11 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6352_avb_ops,
+       .serdes_get_sset_count = mv88e6352_serdes_get_sset_count,
+       .serdes_get_strings = mv88e6352_serdes_get_strings,
+       .serdes_get_stats = mv88e6352_serdes_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -3124,6 +3266,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -3162,6 +3306,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
+       .gpio_ops = &mv88e6352_gpio_ops,
+       .avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -3267,6 +3413,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6341",
                .num_databases = 4096,
                .num_ports = 6,
+               .num_gpio = 11,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3346,6 +3493,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6172",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3386,6 +3534,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6176",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3424,6 +3573,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6190",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
@@ -3444,6 +3594,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6190X",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
@@ -3475,6 +3626,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
+               .ptp_support = true,
                .ops = &mv88e6191_ops,
        },
 
@@ -3484,6 +3636,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6240",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3495,6 +3648,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
+               .ptp_support = true,
                .ops = &mv88e6240_ops,
        },
 
@@ -3504,6 +3658,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6290",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
@@ -3515,6 +3670,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
+               .ptp_support = true,
                .ops = &mv88e6290_ops,
        },
 
@@ -3524,6 +3680,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6320",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3534,6 +3691,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
+               .ptp_support = true,
                .ops = &mv88e6320_ops,
        },
 
@@ -3543,6 +3701,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6321",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3552,6 +3711,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .atu_move_port_mask = 0xf,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
+               .ptp_support = true,
                .ops = &mv88e6321_ops,
        },
 
@@ -3561,6 +3721,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6341",
                .num_databases = 4096,
                .num_ports = 6,
+               .num_gpio = 11,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3571,6 +3732,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
+               .ptp_support = true,
                .ops = &mv88e6341_ops,
        },
 
@@ -3620,6 +3782,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6352",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3631,6 +3794,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
+               .ptp_support = true,
                .ops = &mv88e6352_ops,
        },
        [MV88E6390] = {
@@ -3639,6 +3803,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6390",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
@@ -3650,6 +3815,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
+               .ptp_support = true,
                .ops = &mv88e6390_ops,
        },
        [MV88E6390X] = {
@@ -3658,6 +3824,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6390X",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
@@ -3669,6 +3836,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
+               .ptp_support = true,
                .ops = &mv88e6390x_ops,
        },
 };
@@ -3880,6 +4048,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .port_mdb_del           = mv88e6xxx_port_mdb_del,
        .crosschip_bridge_join  = mv88e6xxx_crosschip_bridge_join,
        .crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave,
+       .port_hwtstamp_set      = mv88e6xxx_port_hwtstamp_set,
+       .port_hwtstamp_get      = mv88e6xxx_port_hwtstamp_get,
+       .port_txtstamp          = mv88e6xxx_port_txtstamp,
+       .port_rxtstamp          = mv88e6xxx_port_rxtstamp,
+       .get_ts_info            = mv88e6xxx_get_ts_info,
 };
 
 static struct dsa_switch_driver mv88e6xxx_switch_drv = {
@@ -3959,33 +4132,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
                goto out;
        }
 
-       if (chip->irq > 0) {
-               /* Has to be performed before the MDIO bus is created,
-                * because the PHYs will link there interrupts to these
-                * interrupt controllers
-                */
-               mutex_lock(&chip->reg_lock);
+       /* Has to be performed before the MDIO bus is created, because
+        * the PHYs will link there interrupts to these interrupt
+        * controllers
+        */
+       mutex_lock(&chip->reg_lock);
+       if (chip->irq > 0)
                err = mv88e6xxx_g1_irq_setup(chip);
-               mutex_unlock(&chip->reg_lock);
-
-               if (err)
-                       goto out;
-
-               if (chip->info->g2_irqs > 0) {
-                       err = mv88e6xxx_g2_irq_setup(chip);
-                       if (err)
-                               goto out_g1_irq;
-               }
+       else
+               err = mv88e6xxx_irq_poll_setup(chip);
+       mutex_unlock(&chip->reg_lock);
 
-               err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
-               if (err)
-                       goto out_g2_irq;
+       if (err)
+               goto out;
 
-               err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+       if (chip->info->g2_irqs > 0) {
+               err = mv88e6xxx_g2_irq_setup(chip);
                if (err)
-                       goto out_g1_atu_prob_irq;
+                       goto out_g1_irq;
        }
 
+       err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+       if (err)
+               goto out_g2_irq;
+
+       err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+       if (err)
+               goto out_g1_atu_prob_irq;
+
        err = mv88e6xxx_mdios_register(chip, np);
        if (err)
                goto out_g1_vtu_prob_irq;
@@ -3999,20 +4173,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
        mv88e6xxx_mdios_unregister(chip);
 out_g1_vtu_prob_irq:
-       if (chip->irq > 0)
-               mv88e6xxx_g1_vtu_prob_irq_free(chip);
+       mv88e6xxx_g1_vtu_prob_irq_free(chip);
 out_g1_atu_prob_irq:
-       if (chip->irq > 0)
-               mv88e6xxx_g1_atu_prob_irq_free(chip);
+       mv88e6xxx_g1_atu_prob_irq_free(chip);
 out_g2_irq:
-       if (chip->info->g2_irqs > 0 && chip->irq > 0)
+       if (chip->info->g2_irqs > 0)
                mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
-       if (chip->irq > 0) {
-               mutex_lock(&chip->reg_lock);
+       mutex_lock(&chip->reg_lock);
+       if (chip->irq > 0)
                mv88e6xxx_g1_irq_free(chip);
-               mutex_unlock(&chip->reg_lock);
-       }
+       else
+               mv88e6xxx_irq_poll_free(chip);
+       mutex_unlock(&chip->reg_lock);
 out:
        return err;
 }
@@ -4022,6 +4195,11 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
        struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
        struct mv88e6xxx_chip *chip = ds->priv;
 
+       if (chip->info->ptp_support) {
+               mv88e6xxx_hwtstamp_free(chip);
+               mv88e6xxx_ptp_free(chip);
+       }
+
        mv88e6xxx_phy_destroy(chip);
        mv88e6xxx_unregister_switch(chip);
        mv88e6xxx_mdios_unregister(chip);
index 3dba6e9..26b9a61 100644 (file)
 #include <linux/if_vlan.h>
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
 #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
 #include <net/dsa.h>
 
 #ifndef UINT64_MAX
@@ -39,6 +42,8 @@
 #define MV88E6XXX_MAX_PVT_SWITCHES     32
 #define MV88E6XXX_MAX_PVT_PORTS                16
 
+#define MV88E6XXX_MAX_GPIO     16
+
 enum mv88e6xxx_egress_mode {
        MV88E6XXX_EGRESS_MODE_UNMODIFIED,
        MV88E6XXX_EGRESS_MODE_UNTAGGED,
@@ -105,6 +110,7 @@ struct mv88e6xxx_info {
        const char *name;
        unsigned int num_databases;
        unsigned int num_ports;
+       unsigned int num_gpio;
        unsigned int max_vid;
        unsigned int port_base_addr;
        unsigned int global1_addr;
@@ -126,6 +132,9 @@ struct mv88e6xxx_info {
         */
        u8 atu_move_port_mask;
        const struct mv88e6xxx_ops *ops;
+
+       /* Supports PTP */
+       bool ptp_support;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -146,6 +155,8 @@ struct mv88e6xxx_vtu_entry {
 
 struct mv88e6xxx_bus_ops;
 struct mv88e6xxx_irq_ops;
+struct mv88e6xxx_gpio_ops;
+struct mv88e6xxx_avb_ops;
 
 struct mv88e6xxx_irq {
        u16 masked;
@@ -154,6 +165,36 @@ struct mv88e6xxx_irq {
        unsigned int nirqs;
 };
 
+/* state flags for mv88e6xxx_port_hwtstamp::state */
+enum {
+       MV88E6XXX_HWTSTAMP_ENABLED,
+       MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+};
+
+struct mv88e6xxx_port_hwtstamp {
+       /* Port index */
+       int port_id;
+
+       /* Timestamping state */
+       unsigned long state;
+
+       /* Resources for receive timestamping */
+       struct sk_buff_head rx_queue;
+       struct sk_buff_head rx_queue2;
+
+       /* Resources for transmit timestamping */
+       unsigned long tx_tstamp_start;
+       struct sk_buff *tx_skb;
+       u16 tx_seq_id;
+
+       /* Current timestamp configuration */
+       struct hwtstamp_config tstamp_config;
+};
+
+struct mv88e6xxx_port {
+       u64 serdes_stats[2];
+};
+
 struct mv88e6xxx_chip {
        const struct mv88e6xxx_info *info;
 
@@ -207,8 +248,34 @@ struct mv88e6xxx_chip {
        int irq;
        int device_irq;
        int watchdog_irq;
+
        int atu_prob_irq;
        int vtu_prob_irq;
+       struct kthread_worker *kworker;
+       struct kthread_delayed_work irq_poll_work;
+
+       /* GPIO resources */
+       u8 gpio_data[2];
+
+       /* This cyclecounter abstracts the switch PTP time.
+        * reg_lock must be held for any operation that read()s.
+        */
+       struct cyclecounter     tstamp_cc;
+       struct timecounter      tstamp_tc;
+       struct delayed_work     overflow_work;
+
+       struct ptp_clock        *ptp_clock;
+       struct ptp_clock_info   ptp_clock_info;
+       struct delayed_work     tai_event_work;
+       struct ptp_pin_desc     pin_config[MV88E6XXX_MAX_GPIO];
+       u16 trig_config;
+       u16 evcap_config;
+
+       /* Per-port timestamping resources. */
+       struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
+
+       /* Array of port structures. */
+       struct mv88e6xxx_port ports[DSA_MAX_PORTS];
 };
 
 struct mv88e6xxx_bus_ops {
@@ -327,9 +394,9 @@ struct mv88e6xxx_ops {
 
        /* Return the number of strings describing statistics */
        int (*stats_get_sset_count)(struct mv88e6xxx_chip *chip);
-       void (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
-       void (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
-                               uint64_t *data);
+       int (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
+       int (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+                              uint64_t *data);
        int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port);
        int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
        const struct mv88e6xxx_irq_ops *watchdog_ops;
@@ -339,11 +406,24 @@ struct mv88e6xxx_ops {
        /* Power on/off a SERDES interface */
        int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, bool on);
 
+       /* Statistics from the SERDES interface */
+       int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
+       void (*serdes_get_strings)(struct mv88e6xxx_chip *chip,  int port,
+                                  uint8_t *data);
+       void (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+                                uint64_t *data);
+
        /* VLAN Translation Unit operations */
        int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
                           struct mv88e6xxx_vtu_entry *entry);
        int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip,
                             struct mv88e6xxx_vtu_entry *entry);
+
+       /* GPIO operations */
+       const struct mv88e6xxx_gpio_ops *gpio_ops;
+
+       /* Interface to the AVB/PTP registers */
+       const struct mv88e6xxx_avb_ops *avb_ops;
 };
 
 struct mv88e6xxx_irq_ops {
@@ -355,13 +435,49 @@ struct mv88e6xxx_irq_ops {
        void (*irq_free)(struct mv88e6xxx_chip *chip);
 };
 
+struct mv88e6xxx_gpio_ops {
+       /* Get/set data on GPIO pin */
+       int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin);
+       int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin,
+                       int value);
+
+       /* get/set GPIO direction */
+       int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin);
+       int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin,
+                      bool input);
+
+       /* get/set GPIO pin control */
+       int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+                       int *func);
+       int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+                       int func);
+};
+
+struct mv88e6xxx_avb_ops {
+       /* Access port-scoped Precision Time Protocol registers */
+       int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr,
+                            u16 *data, int len);
+       int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr,
+                             u16 data);
+
+       /* Access global Precision Time Protocol registers */
+       int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+                       int len);
+       int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+
+       /* Access global Time Application Interface registers */
+       int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+                       int len);
+       int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+};
+
 #define STATS_TYPE_PORT                BIT(0)
 #define STATS_TYPE_BANK0       BIT(1)
 #define STATS_TYPE_BANK1       BIT(2)
 
 struct mv88e6xxx_hw_stat {
        char string[ETH_GSTRING_LEN];
-       int sizeof_stat;
+       size_t size;
        int reg;
        int type;
 };
@@ -386,6 +502,11 @@ static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip)
        return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
 }
 
+static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip)
+{
+       return chip->info->num_gpio;
+}
+
 int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
index af07278..5f370f1 100644 (file)
 #include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */
 #include "global2.h"
 
-static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
        return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
        return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
        return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
 }
 
-static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
        return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
 }
@@ -798,6 +798,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus,
                                                   val);
 }
 
+/* Offset 0x1B: Watchdog Control */
 static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
 {
        u16 reg;
index 669f590..aa3f0a7 100644 (file)
 #define MV88E6390_G2_EEPROM_ADDR_MASK  0xffff
 
 /* Offset 0x16: AVB Command Register */
-#define MV88E6352_G2_AVB_CMD           0x16
+#define MV88E6352_G2_AVB_CMD                   0x16
+#define MV88E6352_G2_AVB_CMD_BUSY              0x8000
+#define MV88E6352_G2_AVB_CMD_OP_READ           0x4000
+#define MV88E6352_G2_AVB_CMD_OP_READ_INCR      0x6000
+#define MV88E6352_G2_AVB_CMD_OP_WRITE          0x3000
+#define MV88E6390_G2_AVB_CMD_OP_READ           0x0000
+#define MV88E6390_G2_AVB_CMD_OP_READ_INCR      0x4000
+#define MV88E6390_G2_AVB_CMD_OP_WRITE          0x6000
+#define MV88E6352_G2_AVB_CMD_PORT_MASK         0x0f00
+#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL    0xe
+#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL    0xf
+#define MV88E6390_G2_AVB_CMD_PORT_MASK         0x1f00
+#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL    0x1e
+#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL    0x1f
+#define MV88E6352_G2_AVB_CMD_BLOCK_PTP         0
+#define MV88E6352_G2_AVB_CMD_BLOCK_AVB         1
+#define MV88E6352_G2_AVB_CMD_BLOCK_QAV         2
+#define MV88E6352_G2_AVB_CMD_BLOCK_QVB         3
+#define MV88E6352_G2_AVB_CMD_BLOCK_MASK                0x00e0
+#define MV88E6352_G2_AVB_CMD_ADDR_MASK         0x001f
 
 /* Offset 0x17: AVB Data Register */
 #define MV88E6352_G2_AVB_DATA          0x17
 #define MV88E6352_G2_NOEGR_POLICY      0x2000
 #define MV88E6390_G2_LAG_ID_4          0x2000
 
+/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */
+/* Offset 0x02: Misc Configuration */
+#define MV88E6352_G2_SCRATCH_MISC_CFG          0x02
+#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI        0x80
+/* Offset 0x60-0x61: GPIO Configuration */
+#define MV88E6352_G2_SCRATCH_GPIO_CFG0         0x60
+#define MV88E6352_G2_SCRATCH_GPIO_CFG1         0x61
+/* Offset 0x62-0x63: GPIO Direction */
+#define MV88E6352_G2_SCRATCH_GPIO_DIR0         0x62
+#define MV88E6352_G2_SCRATCH_GPIO_DIR1         0x63
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT      0
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN       1
+/* Offset 0x64-0x65: GPIO Data */
+#define MV88E6352_G2_SCRATCH_GPIO_DATA0                0x64
+#define MV88E6352_G2_SCRATCH_GPIO_DATA1                0x65
+/* Offset 0x68-0x6F: GPIO Pin Control */
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL0                0x68
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL1                0x69
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL2                0x6A
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL3                0x6B
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL4                0x6C
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL5                0x6D
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL6                0x6E
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL7                0x6F
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA0      0x70
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1      0x71
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU       BIT(2)
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2      0x72
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK 0x3
+
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO    0
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG    1
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ   2
+
 #ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -230,6 +283,11 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
        return 0;
 }
 
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+
 int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 
@@ -267,6 +325,14 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
+extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
+extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
+
+extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
+
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+                                     bool external);
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -279,6 +345,26 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
        return 0;
 }
 
+static inline int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip,
                                            int port)
 {
@@ -382,6 +468,17 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {};
 static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
 
+static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
+static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
+
+static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {};
+
+static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+                                                   bool external)
+{
+       return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c
new file mode 100644 (file)
index 0000000..2e398cc
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * Copyright (c) 2017 National Instruments
+ *     Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "global2.h"
+
+/* Offset 0x16: AVB Command Register
+ * Offset 0x17: AVB Data Register
+ *
+ * There are two different versions of this register interface:
+ *    "6352": 3-bit "op" field, 4-bit "port" field.
+ *    "6390": 2-bit "op" field, 5-bit "port" field.
+ *
+ * The "op" codes are different between the two, as well as the special
+ * port fields for global PTP and TAI configuration.
+ */
+
+/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words.
+ * The hardware supports snapshotting up to four contiguous registers.
+ */
+static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop,
+                                u16 *data, int len)
+{
+       int err;
+       int i;
+
+       /* Hardware can only snapshot four words. */
+       if (len > 4)
+               return -E2BIG;
+
+       err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop);
+       if (err)
+               return err;
+
+       for (i = 0; i < len; ++i) {
+               err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA,
+                                       &data[i]);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */
+static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop,
+                                 u16 data)
+{
+       int err;
+
+       err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop);
+}
+
+static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+                                         int port, int addr, u16 *data,
+                                         int len)
+{
+       u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ :
+                                MV88E6352_G2_AVB_CMD_OP_READ_INCR) |
+                    (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+                    addr;
+
+       return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+                                          int port, int addr, u16 data)
+{
+       u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) |
+                     (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+       return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+                                    u16 *data, int len)
+{
+       return mv88e6352_g2_avb_port_ptp_read(chip,
+                                       MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+                                       addr, data, len);
+}
+
+static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+                                     u16 data)
+{
+       return mv88e6352_g2_avb_port_ptp_write(chip,
+                                       MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+                                       addr, data);
+}
+
+static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+                                    u16 *data, int len)
+{
+       return mv88e6352_g2_avb_port_ptp_read(chip,
+                                       MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+                                       addr, data, len);
+}
+
+static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+                                     u16 data)
+{
+       return mv88e6352_g2_avb_port_ptp_write(chip,
+                                       MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+                                       addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {
+       .port_ptp_read          = mv88e6352_g2_avb_port_ptp_read,
+       .port_ptp_write         = mv88e6352_g2_avb_port_ptp_write,
+       .ptp_read               = mv88e6352_g2_avb_ptp_read,
+       .ptp_write              = mv88e6352_g2_avb_ptp_write,
+       .tai_read               = mv88e6352_g2_avb_tai_read,
+       .tai_write              = mv88e6352_g2_avb_tai_write,
+};
+
+static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+                                         int port, int addr, u16 *data,
+                                         int len)
+{
+       u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ :
+                                MV88E6390_G2_AVB_CMD_OP_READ_INCR) |
+                    (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+                    addr;
+
+       return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+                                          int port, int addr, u16 data)
+{
+       u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) |
+                     (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+       return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+                                    u16 *data, int len)
+{
+       return mv88e6390_g2_avb_port_ptp_read(chip,
+                                       MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+                                       addr, data, len);
+}
+
+static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+                                     u16 data)
+{
+       return mv88e6390_g2_avb_port_ptp_write(chip,
+                                       MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+                                       addr, data);
+}
+
+static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+                                    u16 *data, int len)
+{
+       return mv88e6390_g2_avb_port_ptp_read(chip,
+                                       MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+                                       addr, data, len);
+}
+
+static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+                                     u16 data)
+{
+       return mv88e6390_g2_avb_port_ptp_write(chip,
+                                       MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+                                       addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {
+       .port_ptp_read          = mv88e6390_g2_avb_port_ptp_read,
+       .port_ptp_write         = mv88e6390_g2_avb_port_ptp_write,
+       .ptp_read               = mv88e6390_g2_avb_ptp_read,
+       .ptp_write              = mv88e6390_g2_avb_ptp_write,
+       .tai_read               = mv88e6390_g2_avb_tai_read,
+       .tai_write              = mv88e6390_g2_avb_tai_write,
+};
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
new file mode 100644 (file)
index 0000000..3f92b88
--- /dev/null
@@ -0,0 +1,291 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+
+/* Offset 0x1A: Scratch and Misc. Register */
+static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg,
+                                    u8 *data)
+{
+       u16 value;
+       int err;
+
+       err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC,
+                                reg << 8);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value);
+       if (err)
+               return err;
+
+       *data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK);
+
+       return 0;
+}
+
+static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
+                                     u8 data)
+{
+       u16 value = (reg << 8) | data;
+
+       return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value);
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: is bit set?
+ */
+static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
+                                       int base_reg, unsigned int offset,
+                                       int *set)
+{
+       int reg = base_reg + (offset / 8);
+       u8 mask = (1 << (offset & 0x7));
+       u8 val;
+       int err;
+
+       err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+       if (err)
+               return err;
+
+       *set = !!(mask & val);
+
+       return 0;
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: set if true, clear if false
+ *
+ * Helper function for dealing with the direction and data registers.
+ */
+static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip,
+                                       int base_reg, unsigned int offset,
+                                       int set)
+{
+       int reg = base_reg + (offset / 8);
+       u8 mask = (1 << (offset & 0x7));
+       u8 val;
+       int err;
+
+       err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+       if (err)
+               return err;
+
+       if (set)
+               val |= mask;
+       else
+               val &= ~mask;
+
+       return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for low, 1 for high, negative error
+ */
+static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip,
+                                             unsigned int pin)
+{
+       int val = 0;
+       int err;
+
+       err = mv88e6xxx_g2_scratch_get_bit(chip,
+                                          MV88E6352_G2_SCRATCH_GPIO_DATA0,
+                                          pin, &val);
+       if (err)
+               return err;
+
+       return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ * @value: value to set
+ */
+static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip,
+                                             unsigned int pin, int value)
+{
+       u8 mask = (1 << (pin & 0x7));
+       int offset = (pin / 8);
+       int reg;
+
+       reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset;
+
+       if (value)
+               chip->gpio_data[offset] |= mask;
+       else
+               chip->gpio_data[offset] &= ~mask;
+
+       return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX).
+ */
+static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip,
+                                            unsigned int pin)
+{
+       int val = 0;
+       int err;
+
+       err = mv88e6xxx_g2_scratch_get_bit(chip,
+                                          MV88E6352_G2_SCRATCH_GPIO_DIR0,
+                                          pin, &val);
+       if (err)
+               return err;
+
+       return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ */
+static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip,
+                                            unsigned int pin, bool input)
+{
+       int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN :
+                            MV88E6352_G2_SCRATCH_GPIO_DIR_OUT);
+
+       return mv88e6xxx_g2_scratch_set_bit(chip,
+                                           MV88E6352_G2_SCRATCH_GPIO_DIR0,
+                                           pin, value);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ *
+ * Note that the function numbers themselves may vary by chipset.
+ */
+static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip,
+                                             unsigned int pin, int *func)
+{
+       int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+       int offset = (pin & 0x1) ? 4 : 0;
+       u8 mask = (0x7 << offset);
+       int err;
+       u8 val;
+
+       err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+       if (err)
+               return err;
+
+       *func = (val & mask) >> offset;
+
+       return 0;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ */
+static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip,
+                                             unsigned int pin, int func)
+{
+       int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+       int offset = (pin & 0x1) ? 4 : 0;
+       u8 mask = (0x7 << offset);
+       int err;
+       u8 val;
+
+       err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+       if (err)
+               return err;
+
+       val = (val & ~mask) | ((func & mask) << offset);
+
+       return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
+       .get_data = mv88e6352_g2_scratch_gpio_get_data,
+       .set_data = mv88e6352_g2_scratch_gpio_set_data,
+       .get_dir = mv88e6352_g2_scratch_gpio_get_dir,
+       .set_dir = mv88e6352_g2_scratch_gpio_set_dir,
+       .get_pctl = mv88e6352_g2_scratch_gpio_get_pctl,
+       .set_pctl = mv88e6352_g2_scratch_gpio_set_pctl,
+};
+
+/**
+ * mv88e6xxx_g2_gpio_set_smi - set gpio muxing for external smi
+ * @chip: chip private data
+ * @external: set mux for external smi, or free for gpio usage
+ *
+ * Some mv88e6xxx models have GPIO pins that may be configured as
+ * an external SMI interface, or they may be made free for other
+ * GPIO uses.
+ */
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+                                     bool external)
+{
+       int misc_cfg = MV88E6352_G2_SCRATCH_MISC_CFG;
+       int config_data1 = MV88E6352_G2_SCRATCH_CONFIG_DATA1;
+       int config_data2 = MV88E6352_G2_SCRATCH_CONFIG_DATA2;
+       bool no_cpu;
+       u8 p0_mode;
+       int err;
+       u8 val;
+
+       err = mv88e6xxx_g2_scratch_read(chip, config_data2, &val);
+       if (err)
+               return err;
+
+       p0_mode = val & MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK;
+
+       if (p0_mode == 0x01 || p0_mode == 0x02)
+               return -EBUSY;
+
+       err = mv88e6xxx_g2_scratch_read(chip, config_data1, &val);
+       if (err)
+               return err;
+
+       no_cpu = !!(val & MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU);
+
+       err = mv88e6xxx_g2_scratch_read(chip, misc_cfg, &val);
+       if (err)
+               return err;
+
+       /* NO_CPU being 0 inverts the meaning of the bit */
+       if (!no_cpu)
+               external = !external;
+
+       if (external)
+               val |= MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+       else
+               val &= ~MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+
+       return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
+}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
new file mode 100644 (file)
index 0000000..ac7694c
--- /dev/null
@@ -0,0 +1,576 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "hwtstamp.h"
+#include "ptp.h"
+#include <linux/ptp_classify.h>
+
+#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
+
+static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port,
+                                  int addr, u16 *data, int len)
+{
+       if (!chip->info->ops->avb_ops->port_ptp_read)
+               return -EOPNOTSUPP;
+
+       return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr,
+                                                      data, len);
+}
+
+static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port,
+                                   int addr, u16 data)
+{
+       if (!chip->info->ops->avb_ops->port_ptp_write)
+               return -EOPNOTSUPP;
+
+       return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr,
+                                                       data);
+}
+
+static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+                              u16 data)
+{
+       if (!chip->info->ops->avb_ops->ptp_write)
+               return -EOPNOTSUPP;
+
+       return chip->info->ops->avb_ops->ptp_write(chip, addr, data);
+}
+
+/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX
+ * timestamp. When working properly, hardware will produce a timestamp
+ * within 1ms. Software may enounter delays due to MDIO contention, so
+ * the timeout is set accordingly.
+ */
+#define TX_TSTAMP_TIMEOUT      msecs_to_jiffies(20)
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+                         struct ethtool_ts_info *info)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+
+       if (!chip->info->ptp_support)
+               return -EOPNOTSUPP;
+
+       info->so_timestamping =
+               SOF_TIMESTAMPING_TX_HARDWARE |
+               SOF_TIMESTAMPING_RX_HARDWARE |
+               SOF_TIMESTAMPING_RAW_HARDWARE;
+       info->phc_index = ptp_clock_index(chip->ptp_clock);
+       info->tx_types =
+               (1 << HWTSTAMP_TX_OFF) |
+               (1 << HWTSTAMP_TX_ON);
+       info->rx_filters =
+               (1 << HWTSTAMP_FILTER_NONE) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
+
+       return 0;
+}
+
+static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
+                                        struct hwtstamp_config *config)
+{
+       struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+       bool tstamp_enable = false;
+       u16 port_config0;
+       int err;
+
+       /* Prevent the TX/RX paths from trying to interact with the
+        * timestamp hardware while we reconfigure it.
+        */
+       clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+       /* reserved for future extensions */
+       if (config->flags)
+               return -EINVAL;
+
+       switch (config->tx_type) {
+       case HWTSTAMP_TX_OFF:
+               tstamp_enable = false;
+               break;
+       case HWTSTAMP_TX_ON:
+               tstamp_enable = true;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       /* The switch supports timestamping both L2 and L4; one cannot be
+        * disabled independently of the other.
+        */
+       switch (config->rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+               tstamp_enable = false;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+               config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+               break;
+       case HWTSTAMP_FILTER_ALL:
+       default:
+               config->rx_filter = HWTSTAMP_FILTER_NONE;
+               return -ERANGE;
+       }
+
+       if (tstamp_enable) {
+               /* Disable transportSpecific value matching, so that packets
+                * with either 1588 (0) and 802.1AS (1) will be timestamped.
+                */
+               port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH;
+       } else {
+               /* Disable PTP. This disables both RX and TX timestamping. */
+               port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP;
+       }
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+                                      port_config0);
+       mutex_unlock(&chip->reg_lock);
+
+       if (err < 0)
+               return err;
+
+       /* Once hardware has been configured, enable timestamp checks
+        * in the RX/TX paths.
+        */
+       if (tstamp_enable)
+               set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+       return 0;
+}
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+                               struct ifreq *ifr)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+       struct hwtstamp_config config;
+       int err;
+
+       if (!chip->info->ptp_support)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+               return -EFAULT;
+
+       err = mv88e6xxx_set_hwtstamp_config(chip, port, &config);
+       if (err)
+               return err;
+
+       /* Save the chosen configuration to be returned later. */
+       memcpy(&ps->tstamp_config, &config, sizeof(config));
+
+       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+               -EFAULT : 0;
+}
+
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+                               struct ifreq *ifr)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+       struct hwtstamp_config *config = &ps->tstamp_config;
+
+       if (!chip->info->ptp_support)
+               return -EOPNOTSUPP;
+
+       return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+               -EFAULT : 0;
+}
+
+/* Get the start of the PTP header in this skb */
+static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type)
+{
+       u8 *data = skb_mac_header(skb);
+       unsigned int offset = 0;
+
+       if (type & PTP_CLASS_VLAN)
+               offset += VLAN_HLEN;
+
+       switch (type & PTP_CLASS_PMASK) {
+       case PTP_CLASS_IPV4:
+               offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+               break;
+       case PTP_CLASS_IPV6:
+               offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+               break;
+       case PTP_CLASS_L2:
+               offset += ETH_HLEN;
+               break;
+       default:
+               return NULL;
+       }
+
+       /* Ensure that the entire header is present in this packet. */
+       if (skb->len + ETH_HLEN < offset + 34)
+               return NULL;
+
+       return data + offset;
+}
+
+/* Returns a pointer to the PTP header if the caller should time stamp,
+ * or NULL if the caller should not.
+ */
+static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
+                                  struct sk_buff *skb, unsigned int type)
+{
+       struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+       u8 *hdr;
+
+       if (!chip->info->ptp_support)
+               return NULL;
+
+       hdr = parse_ptp_header(skb, type);
+       if (!hdr)
+               return NULL;
+
+       if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state))
+               return NULL;
+
+       return hdr;
+}
+
+static int mv88e6xxx_ts_valid(u16 status)
+{
+       if (!(status & MV88E6XXX_PTP_TS_VALID))
+               return 0;
+       if (status & MV88E6XXX_PTP_TS_STATUS_MASK)
+               return 0;
+       return 1;
+}
+
+static int seq_match(struct sk_buff *skb, u16 ts_seqid)
+{
+       unsigned int type = SKB_PTP_TYPE(skb);
+       u8 *hdr = parse_ptp_header(skb, type);
+       __be16 *seqid;
+
+       seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+       return ts_seqid == ntohs(*seqid);
+}
+
+static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
+                              struct mv88e6xxx_port_hwtstamp *ps,
+                              struct sk_buff *skb, u16 reg,
+                              struct sk_buff_head *rxq)
+{
+       u16 buf[4] = { 0 }, status, seq_id;
+       u64 ns, timelo, timehi;
+       struct skb_shared_hwtstamps *shwt;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+                                     reg, buf, ARRAY_SIZE(buf));
+       mutex_unlock(&chip->reg_lock);
+       if (err)
+               pr_err("failed to get the receive time stamp\n");
+
+       status = buf[0];
+       timelo = buf[1];
+       timehi = buf[2];
+       seq_id = buf[3];
+
+       if (status & MV88E6XXX_PTP_TS_VALID) {
+               mutex_lock(&chip->reg_lock);
+               err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0);
+               mutex_unlock(&chip->reg_lock);
+               if (err)
+                       pr_err("failed to clear the receive status\n");
+       }
+       /* Since the device can only handle one time stamp at a time,
+        * we purge any extra frames from the queue.
+        */
+       for ( ; skb; skb = skb_dequeue(rxq)) {
+               if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) {
+                       ns = timehi << 16 | timelo;
+
+                       mutex_lock(&chip->reg_lock);
+                       ns = timecounter_cyc2time(&chip->tstamp_tc, ns);
+                       mutex_unlock(&chip->reg_lock);
+                       shwt = skb_hwtstamps(skb);
+                       memset(shwt, 0, sizeof(*shwt));
+                       shwt->hwtstamp = ns_to_ktime(ns);
+                       status &= ~MV88E6XXX_PTP_TS_VALID;
+               }
+               netif_rx_ni(skb);
+       }
+}
+
+static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
+                                   struct mv88e6xxx_port_hwtstamp *ps)
+{
+       struct sk_buff *skb;
+
+       skb = skb_dequeue(&ps->rx_queue);
+
+       if (skb)
+               mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS,
+                                  &ps->rx_queue);
+
+       skb = skb_dequeue(&ps->rx_queue2);
+       if (skb)
+               mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS,
+                                  &ps->rx_queue2);
+}
+
+static int is_pdelay_resp(u8 *msgtype)
+{
+       return (*msgtype & 0xf) == 3;
+}
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+                            struct sk_buff *skb, unsigned int type)
+{
+       struct mv88e6xxx_port_hwtstamp *ps;
+       struct mv88e6xxx_chip *chip;
+       u8 *hdr;
+
+       chip = ds->priv;
+       ps = &chip->port_hwtstamp[port];
+
+       if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT)
+               return false;
+
+       hdr = mv88e6xxx_should_tstamp(chip, port, skb, type);
+       if (!hdr)
+               return false;
+
+       SKB_PTP_TYPE(skb) = type;
+
+       if (is_pdelay_resp(hdr))
+               skb_queue_tail(&ps->rx_queue2, skb);
+       else
+               skb_queue_tail(&ps->rx_queue, skb);
+
+       ptp_schedule_worker(chip->ptp_clock, 0);
+
+       return true;
+}
+
+static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
+                                  struct mv88e6xxx_port_hwtstamp *ps)
+{
+       struct skb_shared_hwtstamps shhwtstamps;
+       u16 departure_block[4], status;
+       struct sk_buff *tmp_skb;
+       u32 time_raw;
+       int err;
+       u64 ns;
+
+       if (!ps->tx_skb)
+               return 0;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+                                     MV88E6XXX_PORT_PTP_DEP_STS,
+                                     departure_block,
+                                     ARRAY_SIZE(departure_block));
+       mutex_unlock(&chip->reg_lock);
+
+       if (err)
+               goto free_and_clear_skb;
+
+       if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) {
+               if (time_is_before_jiffies(ps->tx_tstamp_start +
+                                          TX_TSTAMP_TIMEOUT)) {
+                       dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n",
+                                ps->port_id);
+                       goto free_and_clear_skb;
+               }
+               /* The timestamp should be available quickly, while getting it
+                * is high priority and time bounded to only 10ms. A poll is
+                * warranted so restart the work.
+                */
+               return 1;
+       }
+
+       /* We have the timestamp; go ahead and clear valid now */
+       mutex_lock(&chip->reg_lock);
+       mv88e6xxx_port_ptp_write(chip, ps->port_id,
+                                MV88E6XXX_PORT_PTP_DEP_STS, 0);
+       mutex_unlock(&chip->reg_lock);
+
+       status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK;
+       if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) {
+               dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id);
+               goto free_and_clear_skb;
+       }
+
+       if (departure_block[3] != ps->tx_seq_id) {
+               dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id);
+               goto free_and_clear_skb;
+       }
+
+       memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+       time_raw = ((u32)departure_block[2] << 16) | departure_block[1];
+       mutex_lock(&chip->reg_lock);
+       ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw);
+       mutex_unlock(&chip->reg_lock);
+       shhwtstamps.hwtstamp = ns_to_ktime(ns);
+
+       dev_dbg(chip->dev,
+               "p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n",
+               ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp),
+               departure_block[0], ps->tx_seq_id, departure_block[3]);
+
+       /* skb_complete_tx_timestamp() will free up the client to make
+        * another timestamp-able transmit. We have to be ready for it
+        * -- by clearing the ps->tx_skb "flag" -- beforehand.
+        */
+
+       tmp_skb = ps->tx_skb;
+       ps->tx_skb = NULL;
+       clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+       skb_complete_tx_timestamp(tmp_skb, &shhwtstamps);
+
+       return 0;
+
+free_and_clear_skb:
+       dev_kfree_skb_any(ps->tx_skb);
+       ps->tx_skb = NULL;
+       clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+
+       return 0;
+}
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+       struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+       struct dsa_switch *ds = chip->ds;
+       struct mv88e6xxx_port_hwtstamp *ps;
+       int i, restart = 0;
+
+       for (i = 0; i < ds->num_ports; i++) {
+               if (!dsa_is_user_port(ds, i))
+                       continue;
+
+               ps = &chip->port_hwtstamp[i];
+               if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state))
+                       restart |= mv88e6xxx_txtstamp_work(chip, ps);
+
+               mv88e6xxx_rxtstamp_work(chip, ps);
+       }
+
+       return restart ? 1 : -1;
+}
+
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+                            struct sk_buff *clone, unsigned int type)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+       __be16 *seq_ptr;
+       u8 *hdr;
+
+       if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
+               return false;
+
+       hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
+       if (!hdr)
+               return false;
+
+       seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+       if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+                                 &ps->state))
+               return false;
+
+       ps->tx_skb = clone;
+       ps->tx_tstamp_start = jiffies;
+       ps->tx_seq_id = be16_to_cpup(seq_ptr);
+
+       ptp_schedule_worker(chip->ptp_clock, 0);
+       return true;
+}
+
+static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
+{
+       struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+
+       ps->port_id = port;
+
+       skb_queue_head_init(&ps->rx_queue);
+       skb_queue_head_init(&ps->rx_queue2);
+
+       return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+                                       MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
+}
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+       int i;
+
+       /* Disable timestamping on all ports. */
+       for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
+               err = mv88e6xxx_hwtstamp_port_setup(chip, i);
+               if (err)
+                       return err;
+       }
+
+       /* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to
+        * timestamp. This affects all ports that have timestamping enabled,
+        * but the timestamp config is per-port; thus we configure all events
+        * here and only support the HWTSTAMP_FILTER_*_EVENT filter types.
+        */
+       err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE,
+                                 MV88E6XXX_PTP_MSGTYPE_ALL_EVENT);
+       if (err)
+               return err;
+
+       /* Use ARRIVAL1 for peer delay response messages. */
+       err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR,
+                                 MV88E6XXX_PTP_MSGTYPE_PDLAY_RES);
+       if (err)
+               return err;
+
+       /* 88E6341 devices default to timestamping at the PHY, but this has
+        * a hardware issue that results in unreliable timestamps. Force
+        * these devices to timestamp at the MAC.
+        */
+       if (chip->info->family == MV88E6XXX_FAMILY_6341) {
+               u16 val = MV88E6341_PTP_CFG_UPDATE |
+                         MV88E6341_PTP_CFG_MODE_IDX |
+                         MV88E6341_PTP_CFG_MODE_TS_AT_MAC;
+               err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
new file mode 100644 (file)
index 0000000..bc71c92
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_HWTSTAMP_H
+#define _MV88E6XXX_HWTSTAMP_H
+
+#include "chip.h"
+
+/* Global PTP registers */
+/* Offset 0x00: PTP EtherType */
+#define MV88E6XXX_PTP_ETHERTYPE        0x00
+
+/* Offset 0x01: Message Type Timestamp Enables */
+#define MV88E6XXX_PTP_MSGTYPE                  0x01
+#define MV88E6XXX_PTP_MSGTYPE_SYNC             0x0001
+#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ                0x0002
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ                0x0004
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES                0x0008
+#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT                0x000f
+
+/* Offset 0x02: Timestamp Arrival Capture Pointers */
+#define MV88E6XXX_PTP_TS_ARRIVAL_PTR   0x02
+
+/* Offset 0x07: PTP Global Configuration */
+#define MV88E6341_PTP_CFG                      0x07
+#define MV88E6341_PTP_CFG_UPDATE               0x8000
+#define MV88E6341_PTP_CFG_IDX_MASK             0x7f00
+#define MV88E6341_PTP_CFG_DATA_MASK            0x00ff
+#define MV88E6341_PTP_CFG_MODE_IDX             0x0
+#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY       0x00
+#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC       0x80
+
+/* Offset 0x08: PTP Interrupt Status */
+#define MV88E6XXX_PTP_IRQ_STATUS       0x08
+
+/* Per-Port PTP Registers */
+/* Offset 0x00: PTP Configuration 0 */
+#define MV88E6XXX_PORT_PTP_CFG0                                0x00
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT            12
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK             0xf000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588             0x0000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS           0x1000
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH    0x0800
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE      0x0002
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP            0x0001
+
+/* Offset 0x01: PTP Configuration 1 */
+#define MV88E6XXX_PORT_PTP_CFG1        0x01
+
+/* Offset 0x02: PTP Configuration 2 */
+#define MV88E6XXX_PORT_PTP_CFG2                                0x02
+#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL          0x1000
+#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN             0x0002
+#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN             0x0001
+
+/* Offset 0x03: PTP LED Configuration */
+#define MV88E6XXX_PORT_PTP_LED_CFG     0x03
+
+/* Offset 0x08: PTP Arrival 0 Status */
+#define MV88E6XXX_PORT_PTP_ARR0_STS    0x08
+
+/* Offset 0x09/0x0A: PTP Arrival 0 Time */
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO        0x09
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI        0x0a
+
+/* Offset 0x0B: PTP Arrival 0 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR0_SEQID  0x0b
+
+/* Offset 0x0C: PTP Arrival 1 Status */
+#define MV88E6XXX_PORT_PTP_ARR1_STS    0x0c
+
+/* Offset 0x0D/0x0E: PTP Arrival 1 Time */
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO        0x0d
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI        0x0e
+
+/* Offset 0x0F: PTP Arrival 1 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR1_SEQID  0x0f
+
+/* Offset 0x10: PTP Departure Status */
+#define MV88E6XXX_PORT_PTP_DEP_STS     0x10
+
+/* Offset 0x11/0x12: PTP Deperture Time */
+#define MV88E6XXX_PORT_PTP_DEP_TIME_LO 0x11
+#define MV88E6XXX_PORT_PTP_DEP_TIME_HI 0x12
+
+/* Offset 0x13: PTP Departure Sequence ID */
+#define MV88E6XXX_PORT_PTP_DEP_SEQID   0x13
+
+/* Status fields for arrival and depature timestamp status registers */
+#define MV88E6XXX_PTP_TS_STATUS_MASK           0x0006
+#define MV88E6XXX_PTP_TS_STATUS_NORMAL         0x0000
+#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN     0x0002
+#define MV88E6XXX_PTP_TS_STATUS_DISCARDED      0x0004
+#define MV88E6XXX_PTP_TS_VALID                 0x0001
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+                               struct ifreq *ifr);
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+                               struct ifreq *ifr);
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+                            struct sk_buff *clone, unsigned int type);
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+                            struct sk_buff *clone, unsigned int type);
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+                         struct ethtool_ts_info *info);
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds,
+                                             int port, struct ifreq *ifr)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds,
+                                             int port, struct ifreq *ifr)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+                                          struct sk_buff *clone,
+                                          unsigned int type)
+{
+       return false;
+}
+
+static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+                                          struct sk_buff *clone,
+                                          unsigned int type)
+{
+       return false;
+}
+
+static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+                                       struct ethtool_ts_info *info)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+       return 0;
+}
+
+static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_HWTSTAMP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
new file mode 100644 (file)
index 0000000..bd85e2c
--- /dev/null
@@ -0,0 +1,381 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "ptp.h"
+
+/* Raw timestamps are in units of 8-ns clock periods. */
+#define CC_SHIFT       28
+#define CC_MULT                (8 << CC_SHIFT)
+#define CC_MULT_NUM    (1 << 9)
+#define CC_MULT_DEM    15625ULL
+
+#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
+
+#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc)
+#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+                                            overflow_work)
+#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+                                             tai_event_work)
+
+static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
+                             u16 *data, int len)
+{
+       if (!chip->info->ops->avb_ops->tai_read)
+               return -EOPNOTSUPP;
+
+       return chip->info->ops->avb_ops->tai_read(chip, addr, data, len);
+}
+
+static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data)
+{
+       if (!chip->info->ops->avb_ops->tai_write)
+               return -EOPNOTSUPP;
+
+       return chip->info->ops->avb_ops->tai_write(chip, addr, data);
+}
+
+/* TODO: places where this are called should be using pinctrl */
+static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
+                                  int func, int input)
+{
+       int err;
+
+       if (!chip->info->ops->gpio_ops)
+               return -EOPNOTSUPP;
+
+       err = chip->info->ops->gpio_ops->set_dir(chip, pin, input);
+       if (err)
+               return err;
+
+       return chip->info->ops->gpio_ops->set_pctl(chip, pin, func);
+}
+
+static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
+{
+       struct mv88e6xxx_chip *chip = cc_to_chip(cc);
+       u16 phc_time[2];
+       int err;
+
+       err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time,
+                                ARRAY_SIZE(phc_time));
+       if (err)
+               return 0;
+       else
+               return ((u32)phc_time[1] << 16) | phc_time[0];
+}
+
+/* mv88e6xxx_config_eventcap - configure TAI event capture
+ * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external)
+ * @rising: zero for falling-edge trigger, else rising-edge trigger
+ *
+ * This will also reset the capture sequence counter.
+ */
+static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
+                                    int rising)
+{
+       u16 global_config;
+       u16 cap_config;
+       int err;
+
+       chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE |
+                            MV88E6XXX_TAI_CFG_CAP_CTR_START;
+       if (!rising)
+               chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING;
+
+       global_config = (chip->evcap_config | chip->trig_config);
+       err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config);
+       if (err)
+               return err;
+
+       if (event == PTP_CLOCK_PPS) {
+               cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG;
+       } else if (event == PTP_CLOCK_EXTTS) {
+               /* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */
+               cap_config = 0;
+       } else {
+               return -EINVAL;
+       }
+
+       /* Write the capture config; this also clears the capture counter */
+       err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS,
+                                 cap_config);
+
+       return err;
+}
+
+static void mv88e6xxx_tai_event_work(struct work_struct *ugly)
+{
+       struct delayed_work *dw = to_delayed_work(ugly);
+       struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw);
+       struct ptp_clock_event ev;
+       u16 status[4];
+       u32 raw_ts;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS,
+                                status, ARRAY_SIZE(status));
+       mutex_unlock(&chip->reg_lock);
+
+       if (err) {
+               dev_err(chip->dev, "failed to read TAI status register\n");
+               return;
+       }
+       if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) {
+               dev_warn(chip->dev, "missed event capture\n");
+               return;
+       }
+       if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID))
+               goto out;
+
+       raw_ts = ((u32)status[2] << 16) | status[1];
+
+       /* Clear the valid bit so the next timestamp can come in */
+       status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID;
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]);
+       mutex_unlock(&chip->reg_lock);
+
+       /* This is an external timestamp */
+       ev.type = PTP_CLOCK_EXTTS;
+
+       /* We only have one timestamping channel. */
+       ev.index = 0;
+       mutex_lock(&chip->reg_lock);
+       ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts);
+       mutex_unlock(&chip->reg_lock);
+
+       ptp_clock_event(chip->ptp_clock, &ev);
+out:
+       schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL);
+}
+
+static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+       struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+       int neg_adj = 0;
+       u32 diff, mult;
+       u64 adj;
+
+       if (scaled_ppm < 0) {
+               neg_adj = 1;
+               scaled_ppm = -scaled_ppm;
+       }
+       mult = CC_MULT;
+       adj = CC_MULT_NUM;
+       adj *= scaled_ppm;
+       diff = div_u64(adj, CC_MULT_DEM);
+
+       mutex_lock(&chip->reg_lock);
+
+       timecounter_read(&chip->tstamp_tc);
+       chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff;
+
+       mutex_unlock(&chip->reg_lock);
+
+       return 0;
+}
+
+static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+       struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+       mutex_lock(&chip->reg_lock);
+       timecounter_adjtime(&chip->tstamp_tc, delta);
+       mutex_unlock(&chip->reg_lock);
+
+       return 0;
+}
+
+static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
+                                struct timespec64 *ts)
+{
+       struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+       u64 ns;
+
+       mutex_lock(&chip->reg_lock);
+       ns = timecounter_read(&chip->tstamp_tc);
+       mutex_unlock(&chip->reg_lock);
+
+       *ts = ns_to_timespec64(ns);
+
+       return 0;
+}
+
+static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
+                                const struct timespec64 *ts)
+{
+       struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+       u64 ns;
+
+       ns = timespec64_to_ns(ts);
+
+       mutex_lock(&chip->reg_lock);
+       timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns);
+       mutex_unlock(&chip->reg_lock);
+
+       return 0;
+}
+
+static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
+                                     struct ptp_clock_request *rq, int on)
+{
+       int rising = (rq->extts.flags & PTP_RISING_EDGE);
+       int func;
+       int pin;
+       int err;
+
+       pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
+
+       if (pin < 0)
+               return -EBUSY;
+
+       mutex_lock(&chip->reg_lock);
+
+       if (on) {
+               func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ;
+
+               err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+               if (err)
+                       goto out;
+
+               schedule_delayed_work(&chip->tai_event_work,
+                                     TAI_EVENT_WORK_INTERVAL);
+
+               err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
+       } else {
+               func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO;
+
+               err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+
+               cancel_delayed_work_sync(&chip->tai_event_work);
+       }
+
+out:
+       mutex_unlock(&chip->reg_lock);
+
+       return err;
+}
+
+static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
+                               struct ptp_clock_request *rq, int on)
+{
+       struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+       switch (rq->type) {
+       case PTP_CLK_REQ_EXTTS:
+               return mv88e6xxx_ptp_enable_extts(chip, rq, on);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+                               enum ptp_pin_function func, unsigned int chan)
+{
+       switch (func) {
+       case PTP_PF_NONE:
+       case PTP_PF_EXTTS:
+               break;
+       case PTP_PF_PEROUT:
+       case PTP_PF_PHYSYNC:
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
+ * seconds; this task forces periodic reads so that we don't miss any.
+ */
+#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16)
+static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
+{
+       struct delayed_work *dw = to_delayed_work(work);
+       struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
+       struct timespec64 ts;
+
+       mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
+
+       schedule_delayed_work(&chip->overflow_work,
+                             MV88E6XXX_TAI_OVERFLOW_PERIOD);
+}
+
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+       int i;
+
+       /* Set up the cycle counter */
+       memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
+       chip->tstamp_cc.read    = mv88e6xxx_ptp_clock_read;
+       chip->tstamp_cc.mask    = CYCLECOUNTER_MASK(32);
+       chip->tstamp_cc.mult    = CC_MULT;
+       chip->tstamp_cc.shift   = CC_SHIFT;
+
+       timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc,
+                        ktime_to_ns(ktime_get_real()));
+
+       INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
+       INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work);
+
+       chip->ptp_clock_info.owner = THIS_MODULE;
+       snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
+                dev_name(chip->dev));
+       chip->ptp_clock_info.max_adj    = 1000000;
+
+       chip->ptp_clock_info.n_ext_ts   = 1;
+       chip->ptp_clock_info.n_per_out  = 0;
+       chip->ptp_clock_info.n_pins     = mv88e6xxx_num_gpio(chip);
+       chip->ptp_clock_info.pps        = 0;
+
+       for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) {
+               struct ptp_pin_desc *ppd = &chip->pin_config[i];
+
+               snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i);
+               ppd->index = i;
+               ppd->func = PTP_PF_NONE;
+       }
+       chip->ptp_clock_info.pin_config = chip->pin_config;
+
+       chip->ptp_clock_info.adjfine    = mv88e6xxx_ptp_adjfine;
+       chip->ptp_clock_info.adjtime    = mv88e6xxx_ptp_adjtime;
+       chip->ptp_clock_info.gettime64  = mv88e6xxx_ptp_gettime;
+       chip->ptp_clock_info.settime64  = mv88e6xxx_ptp_settime;
+       chip->ptp_clock_info.enable     = mv88e6xxx_ptp_enable;
+       chip->ptp_clock_info.verify     = mv88e6xxx_ptp_verify;
+       chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
+
+       chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
+       if (IS_ERR(chip->ptp_clock))
+               return PTR_ERR(chip->ptp_clock);
+
+       schedule_delayed_work(&chip->overflow_work,
+                             MV88E6XXX_TAI_OVERFLOW_PERIOD);
+
+       return 0;
+}
+
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+       if (chip->ptp_clock) {
+               cancel_delayed_work_sync(&chip->overflow_work);
+               cancel_delayed_work_sync(&chip->tai_event_work);
+
+               ptp_clock_unregister(chip->ptp_clock);
+               chip->ptp_clock = NULL;
+       }
+}
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
new file mode 100644 (file)
index 0000000..10f271a
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_PTP_H
+#define _MV88E6XXX_PTP_H
+
+#include "chip.h"
+
+/* Offset 0x00: TAI Global Config */
+#define MV88E6XXX_TAI_CFG                      0x00
+#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE                0x8000
+#define MV88E6XXX_TAI_CFG_CAP_CTR_START                0x4000
+#define MV88E6XXX_TAI_CFG_EVREQ_FALLING                0x2000
+#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO       0x1000
+#define MV88E6XXX_TAI_CFG_IRL_ENABLE           0x0400
+#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN          0x0200
+#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN         0x0100
+#define MV88E6XXX_TAI_CFG_TRIG_LOCK            0x0080
+#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE         0x0008
+#define MV88E6XXX_TAI_CFG_MULTI_PTP            0x0004
+#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT    0x0002
+#define MV88E6XXX_TAI_CFG_TRIG_ENABLE          0x0001
+
+/* Offset 0x01: Timestamp Clock Period (ps) */
+#define MV88E6XXX_TAI_CLOCK_PERIOD             0x01
+
+/* Offset 0x02/0x03: Trigger Generation Amount */
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO       0x02
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI       0x03
+
+/* Offset 0x04: Clock Compensation */
+#define MV88E6XXX_TAI_TRIG_CLOCK_COMP          0x04
+
+/* Offset 0x05: Trigger Configuration */
+#define MV88E6XXX_TAI_TRIG_CFG                 0x05
+
+/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */
+#define MV88E6XXX_TAI_IRL_AMOUNT               0x06
+
+/* Offset 0x07: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP                 0x07
+
+/* Offset 0x08: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP_PS              0x08
+
+/* Offset 0x09: Event Status */
+#define MV88E6XXX_TAI_EVENT_STATUS             0x09
+#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG    0x4000
+#define MV88E6XXX_TAI_EVENT_STATUS_ERROR       0x0200
+#define MV88E6XXX_TAI_EVENT_STATUS_VALID       0x0100
+#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK    0x00ff
+
+/* Offset 0x0A/0x0B: Event Time */
+#define MV88E6XXX_TAI_EVENT_TIME_LO            0x0a
+#define MV88E6XXX_TAI_EVENT_TYPE_HI            0x0b
+
+/* Offset 0x0E/0x0F: PTP Global Time */
+#define MV88E6XXX_TAI_TIME_LO                  0x0e
+#define MV88E6XXX_TAI_TIME_HI                  0x0f
+
+/* Offset 0x10/0x11: Trig Generation Time */
+#define MV88E6XXX_TAI_TRIG_TIME_LO             0x10
+#define MV88E6XXX_TAI_TRIG_TIME_HI             0x11
+
+/* Offset 0x12: Lock Status */
+#define MV88E6XXX_TAI_LOCK_STATUS              0x12
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp);
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
+
+#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip,      \
+                                     ptp_clock_info)
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+       return -1;
+}
+
+static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+       return 0;
+}
+
+static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_PTP_H */
index f3c0111..4756969 100644 (file)
@@ -55,18 +55,30 @@ static int mv88e6352_serdes_power_set(struct mv88e6xxx_chip *chip, bool on)
        return err;
 }
 
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
-       int err;
        u8 cmode;
+       int err;
 
        err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
-       if (err)
-               return err;
+       if (err) {
+               dev_err(chip->dev, "failed to read cmode\n");
+               return 0;
+       }
 
        if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
            (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
-           (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII)) {
+           (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
+               return 1;
+
+       return 0;
+}
+
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+{
+       int err;
+
+       if (mv88e6352_port_has_serdes(chip, port)) {
                err = mv88e6352_serdes_power_set(chip, on);
                if (err < 0)
                        return err;
@@ -75,6 +87,90 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
        return 0;
 }
 
+struct mv88e6352_serdes_hw_stat {
+       char string[ETH_GSTRING_LEN];
+       int sizeof_stat;
+       int reg;
+};
+
+static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
+       { "serdes_fibre_rx_error", 16, 21 },
+       { "serdes_PRBS_error", 32, 24 },
+};
+
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
+{
+       if (mv88e6352_port_has_serdes(chip, port))
+               return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+
+       return 0;
+}
+
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+                                 int port, uint8_t *data)
+{
+       struct mv88e6352_serdes_hw_stat *stat;
+       int i;
+
+       if (!mv88e6352_port_has_serdes(chip, port))
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+               stat = &mv88e6352_serdes_hw_stats[i];
+               memcpy(data + i * ETH_GSTRING_LEN, stat->string,
+                      ETH_GSTRING_LEN);
+       }
+}
+
+static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
+                                         struct mv88e6352_serdes_hw_stat *stat)
+{
+       u64 val = 0;
+       u16 reg;
+       int err;
+
+       err = mv88e6352_serdes_read(chip, stat->reg, &reg);
+       if (err) {
+               dev_err(chip->dev, "failed to read statistic\n");
+               return 0;
+       }
+
+       val = reg;
+
+       if (stat->sizeof_stat == 32) {
+               err = mv88e6352_serdes_read(chip, stat->reg + 1, &reg);
+               if (err) {
+                       dev_err(chip->dev, "failed to read statistic\n");
+                       return 0;
+               }
+               val = val << 16 | reg;
+       }
+
+       return val;
+}
+
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+                               uint64_t *data)
+{
+       struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
+       struct mv88e6352_serdes_hw_stat *stat;
+       u64 value;
+       int i;
+
+       if (!mv88e6352_port_has_serdes(chip, port))
+               return;
+
+       BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
+                    ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
+
+       for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+               stat = &mv88e6352_serdes_hw_stats[i];
+               value = mv88e6352_serdes_get_stat(chip, stat);
+               mv88e6xxx_port->serdes_stats[i] += value;
+               data[i] = mv88e6xxx_port->serdes_stats[i];
+       }
+}
+
 /* Set the power on/off for 10GBASE-R and 10GBASE-X4/X2 */
 static int mv88e6390_serdes_10g(struct mv88e6xxx_chip *chip, int addr, bool on)
 {
index 5c1cd6d..641baa7 100644 (file)
@@ -44,5 +44,9 @@
 
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+                                 int port, uint8_t *data);
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+                               uint64_t *data);
 #endif
index 9df22eb..600d5ad 100644 (file)
@@ -631,7 +631,7 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-qca8k_get_sset_count(struct dsa_switch *ds)
+qca8k_get_sset_count(struct dsa_switch *ds, int port)
 {
        return ARRAY_SIZE(ar8327_mib);
 }
index f975c2f..1d650e6 100644 (file)
@@ -7,8 +7,8 @@ obj-$(CONFIG_MAC8390) += mac8390.o
 obj-$(CONFIG_APNE) += apne.o 8390.o
 obj-$(CONFIG_ARM_ETHERH) += etherh.o
 obj-$(CONFIG_AX88796) += ax88796.o
-obj-$(CONFIG_HYDRA) += hydra.o 8390.o
-obj-$(CONFIG_MCF8390) += mcf8390.o 8390.o
+obj-$(CONFIG_HYDRA) += hydra.o
+obj-$(CONFIG_MCF8390) += mcf8390.o
 obj-$(CONFIG_NE2000) += ne.o 8390p.o
 obj-$(CONFIG_NE2K_PCI) += ne2k-pci.o 8390.o
 obj-$(CONFIG_PCMCIA_AXNET) += axnet_cs.o 8390.o
@@ -16,4 +16,4 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o
 obj-$(CONFIG_STNIC) += stnic.o 8390.o
 obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
 obj-$(CONFIG_WD80x3) += wd.o 8390.o
-obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o
+obj-$(CONFIG_ZORRO8390) += zorro8390.o
index 2455547..da61cf3 100644 (file)
@@ -77,8 +77,6 @@ static unsigned char version[] = "ax88796.c: Copyright 2005,2007 Simtec Electron
 
 #define AX_GPOC_PPDSET BIT(6)
 
-static u32 ax_msg_enable;
-
 /* device private data */
 
 struct ax_device {
@@ -747,7 +745,6 @@ static int ax_init_dev(struct net_device *dev)
        ei_local->block_output = &ax_block_output;
        ei_local->get_8390_hdr = &ax_get_8390_hdr;
        ei_local->priv = 0;
-       ei_local->msg_enable = ax_msg_enable;
 
        dev->netdev_ops = &ax_netdev_ops;
        dev->ethtool_ops = &ax_ethtool_ops;
index 7bddb8e..d422a12 100644 (file)
@@ -104,7 +104,6 @@ static void AX88190_init(struct net_device *dev, int startp);
 static int ax_open(struct net_device *dev);
 static int ax_close(struct net_device *dev);
 static irqreturn_t ax_interrupt(int irq, void *dev_id);
-static u32 axnet_msg_enable;
 
 /*====================================================================*/
 
@@ -151,7 +150,6 @@ static int axnet_probe(struct pcmcia_device *link)
        return -ENOMEM;
 
     ei_local = netdev_priv(dev);
-    ei_local->msg_enable = axnet_msg_enable;
     spin_lock_init(&ei_local->page_lock);
 
     info = PRIV(dev);
index 11cbf22..32e9627 100644 (file)
@@ -64,8 +64,6 @@ static char version[] =
 
 #include "lib8390.c"
 
-static u32 etherh_msg_enable;
-
 struct etherh_priv {
        void __iomem    *ioc_fast;
        void __iomem    *memc;
@@ -501,18 +499,6 @@ etherh_close(struct net_device *dev)
        return 0;
 }
 
-/*
- * Initialisation
- */
-
-static void __init etherh_banner(void)
-{
-       static int version_printed;
-
-       if ((etherh_msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0))
-               pr_info("%s", version);
-}
-
 /*
  * Read the ethernet address string from the on board rom.
  * This is an ascii string...
@@ -671,8 +657,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
        struct etherh_priv *eh;
        int ret;
 
-       etherh_banner();
-
        ret = ecard_request_resources(ec);
        if (ret)
                goto out;
@@ -757,7 +741,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
        ei_local->block_output  = etherh_block_output;
        ei_local->get_8390_hdr  = etherh_get_header;
        ei_local->interface_num = 0;
-       ei_local->msg_enable = etherh_msg_enable;
 
        etherh_reset(dev);
        __NS8390_init(dev, 0);
index 8ae2491..941754e 100644 (file)
@@ -66,7 +66,6 @@ static void hydra_block_input(struct net_device *dev, int count,
 static void hydra_block_output(struct net_device *dev, int count,
                               const unsigned char *buf, int start_page);
 static void hydra_remove_one(struct zorro_dev *z);
-static u32 hydra_msg_enable;
 
 static struct zorro_device_id hydra_zorro_tbl[] = {
     { ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET },
@@ -119,7 +118,6 @@ static int hydra_init(struct zorro_dev *z)
     int start_page, stop_page;
     int j;
     int err;
-    struct ei_device *ei_local;
 
     static u32 hydra_offsets[16] = {
        0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
@@ -138,8 +136,6 @@ static int hydra_init(struct zorro_dev *z)
     start_page = NESM_START_PG;
     stop_page = NESM_STOP_PG;
 
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = hydra_msg_enable;
     dev->base_addr = ioaddr;
     dev->irq = IRQ_AMIGA_PORTS;
 
index 60f8e2c..5d9bbde 100644 (file)
@@ -975,6 +975,8 @@ static void ethdev_setup(struct net_device *dev)
        ether_setup(dev);
 
        spin_lock_init(&ei_local->page_lock);
+
+       ei_local->msg_enable = msg_enable;
 }
 
 /**
index 2f91ce8..b6d735b 100644 (file)
@@ -123,8 +123,7 @@ enum mac8390_access {
 };
 
 extern int mac8390_memtest(struct net_device *dev);
-static int mac8390_initdev(struct net_device *dev,
-                          struct nubus_rsrc *ndev,
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
                           enum mac8390_type type);
 
 static int mac8390_open(struct net_device *dev);
@@ -168,9 +167,8 @@ static void slow_sane_block_output(struct net_device *dev, int count,
                                   const unsigned char *buf, int start_page);
 static void word_memcpy_tocard(unsigned long tp, const void *fp, int count);
 static void word_memcpy_fromcard(void *tp, unsigned long fp, int count);
-static u32 mac8390_msg_enable;
 
-static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
+static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres)
 {
        switch (fres->dr_sw) {
        case NUBUS_DRSW_3COM:
@@ -236,7 +234,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
        return MAC8390_NONE;
 }
 
-static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
+static enum mac8390_access mac8390_testio(unsigned long membase)
 {
        unsigned long outdata = 0xA5A0B5B0;
        unsigned long indata =  0x00000000;
@@ -254,7 +252,7 @@ static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
        return ACCESS_UNKNOWN;
 }
 
-static int __init mac8390_memsize(unsigned long membase)
+static int mac8390_memsize(unsigned long membase)
 {
        unsigned long flags;
        int i, j;
@@ -290,36 +288,34 @@ static int __init mac8390_memsize(unsigned long membase)
        return i * 0x1000;
 }
 
-static bool __init mac8390_init(struct net_device *dev,
-                               struct nubus_rsrc *ndev,
-                               enum mac8390_type cardtype)
+static bool mac8390_rsrc_init(struct net_device *dev,
+                             struct nubus_rsrc *fres,
+                             enum mac8390_type cardtype)
 {
+       struct nubus_board *board = fres->board;
        struct nubus_dir dir;
        struct nubus_dirent ent;
        int offset;
        volatile unsigned short *i;
 
-       printk_once(KERN_INFO pr_fmt("%s"), version);
-
-       dev->irq = SLOT2IRQ(ndev->board->slot);
+       dev->irq = SLOT2IRQ(board->slot);
        /* This is getting to be a habit */
-       dev->base_addr = (ndev->board->slot_addr |
-                         ((ndev->board->slot & 0xf) << 20));
+       dev->base_addr = board->slot_addr | ((board->slot & 0xf) << 20);
 
        /*
         * Get some Nubus info - we will trust the card's idea
         * of where its memory and registers are.
         */
 
-       if (nubus_get_func_dir(ndev, &dir) == -1) {
-               pr_err("%s: Unable to get Nubus functional directory for slot %X!\n",
-                      dev->name, ndev->board->slot);
+       if (nubus_get_func_dir(fres, &dir) == -1) {
+               dev_err(&board->dev,
+                       "Unable to get Nubus functional directory\n");
                return false;
        }
 
        /* Get the MAC address */
        if (nubus_find_rsrc(&dir, NUBUS_RESID_MAC_ADDRESS, &ent) == -1) {
-               pr_info("%s: Couldn't get MAC address!\n", dev->name);
+               dev_info(&board->dev, "MAC address resource not found\n");
                return false;
        }
 
@@ -329,8 +325,8 @@ static bool __init mac8390_init(struct net_device *dev,
                nubus_rewinddir(&dir);
                if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS,
                                    &ent) == -1) {
-                       pr_err("%s: Memory offset resource for slot %X not found!\n",
-                              dev->name, ndev->board->slot);
+                       dev_err(&board->dev,
+                               "Memory offset resource not found\n");
                        return false;
                }
                nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -340,8 +336,8 @@ static bool __init mac8390_init(struct net_device *dev,
                nubus_rewinddir(&dir);
                if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH,
                                    &ent) == -1) {
-                       pr_info("%s: Memory length resource for slot %X not found, probing\n",
-                               dev->name, ndev->board->slot);
+                       dev_info(&board->dev,
+                                "Memory length resource not found, probing\n");
                        offset = mac8390_memsize(dev->mem_start);
                } else {
                        nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -351,25 +347,25 @@ static bool __init mac8390_init(struct net_device *dev,
                switch (cardtype) {
                case MAC8390_KINETICS:
                case MAC8390_DAYNA: /* it's the same */
-                       dev->base_addr = (int)(ndev->board->slot_addr +
+                       dev->base_addr = (int)(board->slot_addr +
                                               DAYNA_8390_BASE);
-                       dev->mem_start = (int)(ndev->board->slot_addr +
+                       dev->mem_start = (int)(board->slot_addr +
                                               DAYNA_8390_MEM);
                        dev->mem_end = dev->mem_start +
                                       mac8390_memsize(dev->mem_start);
                        break;
                case MAC8390_INTERLAN:
-                       dev->base_addr = (int)(ndev->board->slot_addr +
+                       dev->base_addr = (int)(board->slot_addr +
                                               INTERLAN_8390_BASE);
-                       dev->mem_start = (int)(ndev->board->slot_addr +
+                       dev->mem_start = (int)(board->slot_addr +
                                               INTERLAN_8390_MEM);
                        dev->mem_end = dev->mem_start +
                                       mac8390_memsize(dev->mem_start);
                        break;
                case MAC8390_CABLETRON:
-                       dev->base_addr = (int)(ndev->board->slot_addr +
+                       dev->base_addr = (int)(board->slot_addr +
                                               CABLETRON_8390_BASE);
-                       dev->mem_start = (int)(ndev->board->slot_addr +
+                       dev->mem_start = (int)(board->slot_addr +
                                               CABLETRON_8390_MEM);
                        /* The base address is unreadable if 0x00
                         * has been written to the command register
@@ -384,8 +380,8 @@ static bool __init mac8390_init(struct net_device *dev,
                        break;
 
                default:
-                       pr_err("Card type %s is unsupported, sorry\n",
-                              ndev->board->name);
+                       dev_err(&board->dev,
+                               "No known base address for card type\n");
                        return false;
                }
        }
@@ -393,91 +389,83 @@ static bool __init mac8390_init(struct net_device *dev,
        return true;
 }
 
-struct net_device * __init mac8390_probe(int unit)
+static int mac8390_device_probe(struct nubus_board *board)
 {
        struct net_device *dev;
-       struct nubus_rsrc *ndev = NULL;
        int err = -ENODEV;
-       struct ei_device *ei_local;
-
-       static unsigned int slots;
-
-       enum mac8390_type cardtype;
-
-       /* probably should check for Nubus instead */
-
-       if (!MACH_IS_MAC)
-               return ERR_PTR(-ENODEV);
+       struct nubus_rsrc *fres;
+       enum mac8390_type cardtype = MAC8390_NONE;
 
        dev = ____alloc_ei_netdev(0);
        if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0)
-               sprintf(dev->name, "eth%d", unit);
+               return -ENOMEM;
 
-       for_each_func_rsrc(ndev) {
-               if (ndev->category != NUBUS_CAT_NETWORK ||
-                   ndev->type != NUBUS_TYPE_ETHERNET)
-                       continue;
+       SET_NETDEV_DEV(dev, &board->dev);
 
-               /* Have we seen it already? */
-               if (slots & (1 << ndev->board->slot))
+       for_each_board_func_rsrc(board, fres) {
+               if (fres->category != NUBUS_CAT_NETWORK ||
+                   fres->type != NUBUS_TYPE_ETHERNET)
                        continue;
-               slots |= 1 << ndev->board->slot;
 
-               cardtype = mac8390_ident(ndev);
+               cardtype = mac8390_ident(fres);
                if (cardtype == MAC8390_NONE)
                        continue;
 
-               if (!mac8390_init(dev, ndev, cardtype))
-                       continue;
-
-               /* Do the nasty 8390 stuff */
-               if (!mac8390_initdev(dev, ndev, cardtype))
+               if (mac8390_rsrc_init(dev, fres, cardtype))
                        break;
        }
-
-       if (!ndev)
+       if (!fres)
                goto out;
 
-        ei_local = netdev_priv(dev);
-        ei_local->msg_enable = mac8390_msg_enable;
+       err = mac8390_initdev(dev, board, cardtype);
+       if (err)
+               goto out;
 
        err = register_netdev(dev);
        if (err)
                goto out;
-       return dev;
+
+       nubus_set_drvdata(board, dev);
+       return 0;
 
 out:
        free_netdev(dev);
-       return ERR_PTR(err);
+       return err;
+}
+
+static int mac8390_device_remove(struct nubus_board *board)
+{
+       struct net_device *dev = nubus_get_drvdata(board);
+
+       unregister_netdev(dev);
+       free_netdev(dev);
+       return 0;
 }
 
-#ifdef MODULE
+static struct nubus_driver mac8390_driver = {
+       .probe = mac8390_device_probe,
+       .remove = mac8390_device_remove,
+       .driver = {
+               .name = KBUILD_MODNAME,
+               .owner = THIS_MODULE,
+       }
+};
+
 MODULE_AUTHOR("David Huggins-Daines <dhd@debian.org> and others");
 MODULE_DESCRIPTION("Macintosh NS8390-based Nubus Ethernet driver");
 MODULE_LICENSE("GPL");
 
-static struct net_device *dev_mac8390;
-
-int __init init_module(void)
+static int __init mac8390_init(void)
 {
-       dev_mac8390 = mac8390_probe(-1);
-       if (IS_ERR(dev_mac8390)) {
-               pr_warn("mac8390: No card found\n");
-               return PTR_ERR(dev_mac8390);
-       }
-       return 0;
+       return nubus_driver_register(&mac8390_driver);
 }
+module_init(mac8390_init);
 
-void __exit cleanup_module(void)
+static void __exit mac8390_exit(void)
 {
-       unregister_netdev(dev_mac8390);
-       free_netdev(dev_mac8390);
+       nubus_driver_unregister(&mac8390_driver);
 }
-
-#endif /* MODULE */
+module_exit(mac8390_exit);
 
 static const struct net_device_ops mac8390_netdev_ops = {
        .ndo_open               = mac8390_open,
@@ -493,9 +481,8 @@ static const struct net_device_ops mac8390_netdev_ops = {
 #endif
 };
 
-static int __init mac8390_initdev(struct net_device *dev,
-                                 struct nubus_rsrc *ndev,
-                                 enum mac8390_type type)
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
+                          enum mac8390_type type)
 {
        static u32 fwrd4_offsets[16] = {
                0,      4,      8,      12,
@@ -546,7 +533,8 @@ static int __init mac8390_initdev(struct net_device *dev,
        case MAC8390_APPLE:
                switch (mac8390_testio(dev->mem_start)) {
                case ACCESS_UNKNOWN:
-                       pr_err("Don't know how to access card memory!\n");
+                       dev_err(&board->dev,
+                               "Don't know how to access card memory\n");
                        return -ENODEV;
 
                case ACCESS_16:
@@ -612,21 +600,18 @@ static int __init mac8390_initdev(struct net_device *dev,
                break;
 
        default:
-               pr_err("Card type %s is unsupported, sorry\n",
-                      ndev->board->name);
+               dev_err(&board->dev, "Unsupported card type\n");
                return -ENODEV;
        }
 
        __NS8390_init(dev, 0);
 
        /* Good, done, now spit out some messages */
-       pr_info("%s: %s in slot %X (type %s)\n",
-               dev->name, ndev->board->name, ndev->board->slot,
-               cardname[type]);
-       pr_info("MAC %pM IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
-               dev->dev_addr, dev->irq,
-               (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
-               dev->mem_start, access_bitmode ? 32 : 16);
+       dev_info(&board->dev, "%s (type %s)\n", board->name, cardname[type]);
+       dev_info(&board->dev, "MAC %pM, IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
+                dev->dev_addr, dev->irq,
+                (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
+                dev->mem_start, access_bitmode ? 32 : 16);
        return 0;
 }
 
index 4bb967b..4ad8031 100644 (file)
@@ -38,7 +38,6 @@ static const char version[] =
 
 #define NESM_START_PG  0x40    /* First page of TX buffer */
 #define NESM_STOP_PG   0x80    /* Last page +1 of RX ring */
-static u32 mcf8390_msg_enable;
 
 #ifdef NE2000_ODDOFFSET
 /*
@@ -407,7 +406,6 @@ static int mcf8390_init(struct net_device *dev)
 static int mcf8390_probe(struct platform_device *pdev)
 {
        struct net_device *dev;
-       struct ei_device *ei_local;
        struct resource *mem, *irq;
        resource_size_t msize;
        int ret;
@@ -435,8 +433,6 @@ static int mcf8390_probe(struct platform_device *pdev)
 
        SET_NETDEV_DEV(dev, &pdev->dev);
        platform_set_drvdata(pdev, dev);
-       ei_local = netdev_priv(dev);
-       ei_local->msg_enable = mcf8390_msg_enable;
 
        dev->irq = irq->start;
        dev->base_addr = mem->start;
index 66f4798..4cdff6e 100644 (file)
@@ -485,7 +485,7 @@ static int __init ne_probe1(struct net_device *dev, unsigned long ioaddr)
                mdelay(10);             /* wait 10ms for interrupt to propagate */
                outb_p(0x00, ioaddr + EN0_IMR);                 /* Mask it again. */
                dev->irq = probe_irq_off(cookie);
-               if (netif_msg_probe(ei_local))
+               if (ne_msg_enable & NETIF_MSG_PROBE)
                        pr_cont(" autoirq is %d", dev->irq);
        } else if (dev->irq == 2)
                /* Fixup for users that don't know that IRQ 2 is really IRQ 9,
index bcad4a7..61e4380 100644 (file)
@@ -66,7 +66,6 @@
 #define PCNET_RDC_TIMEOUT (2*HZ/100)   /* Max wait in jiffies for Tx RDC */
 
 static const char *if_names[] = { "auto", "10baseT", "10base2"};
-static u32 pcnet_msg_enable;
 
 /*====================================================================*/
 
@@ -556,7 +555,6 @@ static int pcnet_config(struct pcmcia_device *link)
     int start_pg, stop_pg, cm_offset;
     int has_shmem = 0;
     struct hw_info *local_hw_info;
-    struct ei_device *ei_local;
 
     dev_dbg(&link->dev, "pcnet_config\n");
 
@@ -606,8 +604,6 @@ static int pcnet_config(struct pcmcia_device *link)
        mii_phy_probe(dev);
 
     SET_NETDEV_DEV(dev, &link->dev);
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = pcnet_msg_enable;
 
     if (register_netdev(dev) != 0) {
        pr_notice("register_netdev() failed\n");
index 6efa272..fb17c2c 100644 (file)
@@ -299,7 +299,7 @@ static int __init wd_probe1(struct net_device *dev, int ioaddr)
 
                        outb_p(0x00, nic_addr+EN0_IMR); /* Mask all intrs. again. */
 
-                       if (netif_msg_drv(ei_local))
+                       if (wd_msg_enable & NETIF_MSG_PROBE)
                                pr_cont(" autoirq is %d", dev->irq);
                        if (dev->irq < 2)
                                dev->irq = word16 ? 10 : 5;
index 6d93956..35a500a 100644 (file)
@@ -44,8 +44,6 @@
 static const char version[] =
        "8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
 
-static u32 zorro8390_msg_enable;
-
 #include "lib8390.c"
 
 #define DRV_NAME       "zorro8390"
@@ -296,7 +294,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
        int err;
        unsigned char SA_prom[32];
        int start_page, stop_page;
-       struct ei_device *ei_local = netdev_priv(dev);
        static u32 zorro8390_offsets[16] = {
                0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
                0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
@@ -388,8 +385,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
        dev->netdev_ops = &zorro8390_netdev_ops;
        __NS8390_init(dev, 0);
 
-       ei_local->msg_enable = zorro8390_msg_enable;
-
        err = register_netdev(dev);
        if (err) {
                free_irq(IRQ_AMIGA_PORTS, dev);
index 358f7ab..c99e3e8 100644 (file)
@@ -649,7 +649,7 @@ static void amd8111e_free_ring(struct amd8111e_priv *lp)
 static int amd8111e_tx(struct net_device *dev)
 {
        struct amd8111e_priv *lp = netdev_priv(dev);
-       int tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK;
+       int tx_index;
        int status;
        /* Complete all the transmit packet */
        while (lp->tx_complete_idx != lp->tx_idx){
index 3e5833c..eb23f9b 100644 (file)
@@ -426,6 +426,8 @@ static int xgbe_pci_resume(struct pci_dev *pdev)
        struct net_device *netdev = pdata->netdev;
        int ret = 0;
 
+       XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff);
+
        pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
        XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
 
index f17a160..137cbb4 100644 (file)
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
        dev->netdev_ops         = &mace_netdev_ops;
        dev->watchdog_timeo     = TX_TIMEOUT;
 
-       printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
-              dev->name, dev->dev_addr);
+       pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+               dev->dev_addr, mp->chipid);
 
        err = register_netdev(dev);
        if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
                        else if (fs & (UFLO|LCOL|RTRY)) {
                                ++dev->stats.tx_aborted_errors;
                                if (mb->xmtfs & UFLO) {
-                                       printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
                                        dev->stats.tx_fifo_errors++;
                                        mace_txdma_reset(dev);
                                }
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 
        if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
                dev->stats.rx_errors++;
-               if (frame_status & RS_OFLO) {
-                       printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+               if (frame_status & RS_OFLO)
                        dev->stats.rx_fifo_errors++;
-               }
                if (frame_status & RS_CLSN)
                        dev->stats.collisions++;
                if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
        },
 };
 
-static int __init mac_mace_init_module(void)
-{
-       if (!MACH_IS_MAC)
-               return -ENODEV;
-
-       return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
-       platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
index 22889fc..87c4308 100644 (file)
@@ -226,6 +226,10 @@ static int aq_pci_probe(struct pci_dev *pdev,
                goto err_ioremap;
 
        self->aq_hw = kzalloc(sizeof(*self->aq_hw), GFP_KERNEL);
+       if (!self->aq_hw) {
+               err = -ENOMEM;
+               goto err_ioremap;
+       }
        self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self);
 
        for (bar = 0; bar < 4; ++bar) {
@@ -235,19 +239,19 @@ static int aq_pci_probe(struct pci_dev *pdev,
                        mmio_pa = pci_resource_start(pdev, bar);
                        if (mmio_pa == 0U) {
                                err = -EIO;
-                               goto err_ioremap;
+                               goto err_free_aq_hw;
                        }
 
                        reg_sz = pci_resource_len(pdev, bar);
                        if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) {
                                err = -EIO;
-                               goto err_ioremap;
+                               goto err_free_aq_hw;
                        }
 
                        self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz);
                        if (!self->aq_hw->mmio) {
                                err = -EIO;
-                               goto err_ioremap;
+                               goto err_free_aq_hw;
                        }
                        break;
                }
@@ -255,7 +259,7 @@ static int aq_pci_probe(struct pci_dev *pdev,
 
        if (bar == 4) {
                err = -EIO;
-               goto err_ioremap;
+               goto err_free_aq_hw;
        }
 
        numvecs = min((u8)AQ_CFG_VECS_DEF,
@@ -290,6 +294,8 @@ err_register:
        aq_pci_free_irq_vectors(self);
 err_hwinit:
        iounmap(self->aq_hw->mmio);
+err_free_aq_hw:
+       kfree(self->aq_hw);
 err_ioremap:
        free_netdev(ndev);
 err_pci_func:
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
new file mode 100644 (file)
index 0000000..805fa28
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
index a77ee2f..c1841db 100644 (file)
@@ -820,7 +820,7 @@ static int tg3_ape_event_lock(struct tg3 *tp, u32 timeout_us)
 
                tg3_ape_unlock(tp, TG3_APE_LOCK_MEM);
 
-               udelay(10);
+               usleep_range(10, 20);
                timeout_us -= (timeout_us > 10) ? 10 : timeout_us;
        }
 
@@ -922,8 +922,8 @@ static int tg3_ape_send_event(struct tg3 *tp, u32 event)
        if (!(apedata & APE_FW_STATUS_READY))
                return -EAGAIN;
 
-       /* Wait for up to 1 millisecond for APE to service previous event. */
-       err = tg3_ape_event_lock(tp, 1000);
+       /* Wait for up to 20 millisecond for APE to service previous event. */
+       err = tg3_ape_event_lock(tp, 20000);
        if (err)
                return err;
 
@@ -946,6 +946,7 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
 
        switch (kind) {
        case RESET_KIND_INIT:
+               tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
                tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG,
                                APE_HOST_SEG_SIG_MAGIC);
                tg3_ape_write32(tp, TG3_APE_HOST_SEG_LEN,
@@ -962,13 +963,6 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
                event = APE_EVENT_STATUS_STATE_START;
                break;
        case RESET_KIND_SHUTDOWN:
-               /* With the interface we are currently using,
-                * APE does not track driver state.  Wiping
-                * out the HOST SEGMENT SIGNATURE forces
-                * the APE to assume OS absent status.
-                */
-               tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG, 0x0);
-
                if (device_may_wakeup(&tp->pdev->dev) &&
                    tg3_flag(tp, WOL_ENABLE)) {
                        tg3_ape_write32(tp, TG3_APE_HOST_WOL_SPEED,
@@ -990,6 +984,18 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
        tg3_ape_send_event(tp, event);
 }
 
+static void tg3_send_ape_heartbeat(struct tg3 *tp,
+                                  unsigned long interval)
+{
+       /* Check if hb interval has exceeded */
+       if (!tg3_flag(tp, ENABLE_APE) ||
+           time_before(jiffies, tp->ape_hb_jiffies + interval))
+               return;
+
+       tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
+       tp->ape_hb_jiffies = jiffies;
+}
+
 static void tg3_disable_ints(struct tg3 *tp)
 {
        int i;
@@ -7262,6 +7268,7 @@ static int tg3_poll_msix(struct napi_struct *napi, int budget)
                }
        }
 
+       tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
        return work_done;
 
 tx_recovery:
@@ -7344,6 +7351,7 @@ static int tg3_poll(struct napi_struct *napi, int budget)
                }
        }
 
+       tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
        return work_done;
 
 tx_recovery:
@@ -10732,7 +10740,7 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
        if (tg3_flag(tp, ENABLE_APE))
                /* Write our heartbeat update interval to APE. */
                tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_INT_MS,
-                               APE_HOST_HEARTBEAT_INT_DISABLE);
+                               APE_HOST_HEARTBEAT_INT_5SEC);
 
        tg3_write_sig_post_reset(tp, RESET_KIND_INIT);
 
@@ -11077,6 +11085,9 @@ static void tg3_timer(struct timer_list *t)
                tp->asf_counter = tp->asf_multiplier;
        }
 
+       /* Update the APE heartbeat every 5 seconds.*/
+       tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL);
+
        spin_unlock(&tp->lock);
 
 restart_timer:
@@ -16653,6 +16664,8 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent)
                                       pci_state_reg);
 
                tg3_ape_lock_init(tp);
+               tp->ape_hb_interval =
+                       msecs_to_jiffies(APE_HOST_HEARTBEAT_INT_5SEC);
        }
 
        /* Set up tp->grc_local_ctrl before calling
index 47f51cc..1d61aa3 100644 (file)
 #define TG3_APE_LOCK_PHY3              5
 #define TG3_APE_LOCK_GPIO              7
 
+#define TG3_APE_HB_INTERVAL             (tp->ape_hb_interval)
 #define TG3_EEPROM_SB_F1R2_MBA_OFF     0x10
 
 
@@ -3423,6 +3424,10 @@ struct tg3 {
        struct device                   *hwmon_dev;
        bool                            link_up;
        bool                            pcierr_recovery;
+
+       u32                             ape_hb;
+       unsigned long                   ape_hb_interval;
+       unsigned long                   ape_hb_jiffies;
 };
 
 /* Accessor macros for chip and asic attributes
index c87c9c6..d59497a 100644 (file)
@@ -75,6 +75,8 @@ EXPORT_SYMBOL(cavium_ptp_get);
 
 void cavium_ptp_put(struct cavium_ptp *ptp)
 {
+       if (!ptp)
+               return;
        pci_dev_put(ptp->pdev);
 }
 EXPORT_SYMBOL(cavium_ptp_put);
index b68cde9..7d9c5ff 100644 (file)
@@ -67,11 +67,6 @@ module_param(cpi_alg, int, S_IRUGO);
 MODULE_PARM_DESC(cpi_alg,
                 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
 
-struct nicvf_xdp_tx {
-       u64 dma_addr;
-       u8  qidx;
-};
-
 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
 {
        if (nic->sqs_mode)
@@ -507,29 +502,14 @@ static int nicvf_init_resources(struct nicvf *nic)
        return 0;
 }
 
-static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
-{
-       /* Check if it's a recycled page, if not unmap the DMA mapping.
-        * Recycled page holds an extra reference.
-        */
-       if (page_ref_count(page) == 1) {
-               dma_addr &= PAGE_MASK;
-               dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
-                                    RCV_FRAG_LEN + XDP_HEADROOM,
-                                    DMA_FROM_DEVICE,
-                                    DMA_ATTR_SKIP_CPU_SYNC);
-       }
-}
-
 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
                                struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
                                struct rcv_queue *rq, struct sk_buff **skb)
 {
        struct xdp_buff xdp;
        struct page *page;
-       struct nicvf_xdp_tx *xdp_tx = NULL;
        u32 action;
-       u16 len, err, offset = 0;
+       u16 len, offset = 0;
        u64 dma_addr, cpu_addr;
        void *orig_data;
 
@@ -543,7 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
        cpu_addr = (u64)phys_to_virt(cpu_addr);
        page = virt_to_page((void *)cpu_addr);
 
-       xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM;
+       xdp.data_hard_start = page_address(page);
        xdp.data = (void *)cpu_addr;
        xdp_set_data_meta_invalid(&xdp);
        xdp.data_end = xdp.data + len;
@@ -563,7 +543,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 
        switch (action) {
        case XDP_PASS:
-               nicvf_unmap_page(nic, page, dma_addr);
+               /* Check if it's a recycled page, if not
+                * unmap the DMA mapping.
+                *
+                * Recycled page holds an extra reference.
+                */
+               if (page_ref_count(page) == 1) {
+                       dma_addr &= PAGE_MASK;
+                       dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+                                            RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+                                            DMA_FROM_DEVICE,
+                                            DMA_ATTR_SKIP_CPU_SYNC);
+               }
 
                /* Build SKB and pass on packet to network stack */
                *skb = build_skb(xdp.data,
@@ -576,20 +567,6 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
        case XDP_TX:
                nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
                return true;
-       case XDP_REDIRECT:
-               /* Save DMA address for use while transmitting */
-               xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
-               xdp_tx->dma_addr = dma_addr;
-               xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
-
-               err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog);
-               if (!err)
-                       return true;
-
-               /* Free the page on error */
-               nicvf_unmap_page(nic, page, dma_addr);
-               put_page(page);
-               break;
        default:
                bpf_warn_invalid_xdp_action(action);
                /* fall through */
@@ -597,7 +574,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
                trace_xdp_exception(nic->netdev, prog, action);
                /* fall through */
        case XDP_DROP:
-               nicvf_unmap_page(nic, page, dma_addr);
+               /* Check if it's a recycled page, if not
+                * unmap the DMA mapping.
+                *
+                * Recycled page holds an extra reference.
+                */
+               if (page_ref_count(page) == 1) {
+                       dma_addr &= PAGE_MASK;
+                       dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+                                            RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+                                            DMA_FROM_DEVICE,
+                                            DMA_ATTR_SKIP_CPU_SYNC);
+               }
                put_page(page);
                return true;
        }
@@ -1864,50 +1852,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
        }
 }
 
-static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp)
-{
-       struct nicvf *nic = netdev_priv(netdev);
-       struct nicvf *snic = nic;
-       struct nicvf_xdp_tx *xdp_tx;
-       struct snd_queue *sq;
-       struct page *page;
-       int err, qidx;
-
-       if (!netif_running(netdev) || !nic->xdp_prog)
-               return -EINVAL;
-
-       page = virt_to_page(xdp->data);
-       xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
-       qidx = xdp_tx->qidx;
-
-       if (xdp_tx->qidx >= nic->xdp_tx_queues)
-               return -EINVAL;
-
-       /* Get secondary Qset's info */
-       if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) {
-               qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS;
-               snic = (struct nicvf *)nic->snicvf[qidx - 1];
-               if (!snic)
-                       return -EINVAL;
-               qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS;
-       }
-
-       sq = &snic->qs->sq[qidx];
-       err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data,
-                                     xdp_tx->dma_addr,
-                                     xdp->data_end - xdp->data);
-       if (err)
-               return -ENOMEM;
-
-       nicvf_xdp_sq_doorbell(snic, sq, qidx);
-       return 0;
-}
-
-static void nicvf_xdp_flush(struct net_device *dev)
-{
-       return;
-}
-
 static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
 {
        struct hwtstamp_config config;
@@ -1986,8 +1930,6 @@ static const struct net_device_ops nicvf_netdev_ops = {
        .ndo_fix_features       = nicvf_fix_features,
        .ndo_set_features       = nicvf_set_features,
        .ndo_bpf                = nicvf_xdp,
-       .ndo_xdp_xmit           = nicvf_xdp_xmit,
-       .ndo_xdp_flush          = nicvf_xdp_flush,
        .ndo_do_ioctl           = nicvf_ioctl,
 };
 
index 3eae9ff..d42704d 100644 (file)
@@ -204,7 +204,7 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
 
        /* Reserve space for header modifications by BPF program */
        if (rbdr->is_xdp)
-               buf_len += XDP_HEADROOM;
+               buf_len += XDP_PACKET_HEADROOM;
 
        /* Check if it's recycled */
        if (pgcache)
@@ -224,9 +224,8 @@ ret:
                        nic->rb_page = NULL;
                        return -ENOMEM;
                }
-
                if (pgcache)
-                       pgcache->dma_addr = *rbuf + XDP_HEADROOM;
+                       pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
                nic->rb_page_offset += buf_len;
        }
 
@@ -1244,7 +1243,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
        int qentry;
 
        if (subdesc_cnt > sq->xdp_free_cnt)
-               return -1;
+               return 0;
 
        qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
 
@@ -1255,7 +1254,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
 
        sq->xdp_desc_cnt += subdesc_cnt;
 
-       return 0;
+       return 1;
 }
 
 /* Calculate no of SQ subdescriptors needed to transmit all
@@ -1656,7 +1655,7 @@ static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
                if (page_ref_count(page) != 1)
                        return;
 
-               len += XDP_HEADROOM;
+               len += XDP_PACKET_HEADROOM;
                /* Receive buffers in XDP mode are mapped from page start */
                dma_addr &= PAGE_MASK;
        }
index ce1eed7..5e9a03c 100644 (file)
@@ -11,7 +11,6 @@
 
 #include <linux/netdevice.h>
 #include <linux/iommu.h>
-#include <linux/bpf.h>
 #include <net/xdp.h>
 #include "q_struct.h"
 
@@ -94,9 +93,6 @@
 #define RCV_FRAG_LEN    (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
-#define RCV_BUF_HEADROOM       128 /* To store dma address for XDP redirect */
-#define XDP_HEADROOM           (XDP_PACKET_HEADROOM + RCV_BUF_HEADROOM)
-
 #define MAX_CQES_FOR_TX                ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
                                 MAX_CQE_PER_PKT_XMIT)
 
index 557fd8b..9da6f57 100644 (file)
@@ -472,7 +472,7 @@ int cudbg_collect_cim_la(struct cudbg_init *pdbg_init,
 
        if (is_t6(padap->params.chip)) {
                size = padap->params.cim_la_size / 10 + 1;
-               size *= 11 * sizeof(u32);
+               size *= 10 * sizeof(u32);
        } else {
                size = padap->params.cim_la_size / 8;
                size *= 8 * sizeof(u32);
@@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
                                      &payload->start, &payload->end);
 }
 
+static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
+                            int mtype, u32 addr, u32 len, void *hbuf)
+{
+       u32 win_pf, memoffset, mem_aperture, mem_base;
+       struct adapter *adap = pdbg_init->adap;
+       u32 pos, offset, resid;
+       u32 *res_buf;
+       u64 *buf;
+       int ret;
+
+       /* Argument sanity checks ...
+        */
+       if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
+               return -EINVAL;
+
+       buf = (u64 *)hbuf;
+
+       /* Try to do 64-bit reads.  Residual will be handled later. */
+       resid = len & 0x7;
+       len -= resid;
+
+       ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+                               &mem_aperture);
+       if (ret)
+               return ret;
+
+       addr = addr + memoffset;
+       win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
+
+       pos = addr & ~(mem_aperture - 1);
+       offset = addr - pos;
+
+       /* Set up initial PCI-E Memory Window to cover the start of our
+        * transfer.
+        */
+       t4_memory_update_win(adap, win, pos | win_pf);
+
+       /* Transfer data from the adapter */
+       while (len > 0) {
+               *buf++ = le64_to_cpu((__force __le64)
+                                    t4_read_reg64(adap, mem_base + offset));
+               offset += sizeof(u64);
+               len -= sizeof(u64);
+
+               /* If we've reached the end of our current window aperture,
+                * move the PCI-E Memory Window on to the next.
+                */
+               if (offset == mem_aperture) {
+                       pos += mem_aperture;
+                       offset = 0;
+                       t4_memory_update_win(adap, win, pos | win_pf);
+               }
+       }
+
+       res_buf = (u32 *)buf;
+       /* Read residual in 32-bit multiples */
+       while (resid > sizeof(u32)) {
+               *res_buf++ = le32_to_cpu((__force __le32)
+                                        t4_read_reg(adap, mem_base + offset));
+               offset += sizeof(u32);
+               resid -= sizeof(u32);
+
+               /* If we've reached the end of our current window aperture,
+                * move the PCI-E Memory Window on to the next.
+                */
+               if (offset == mem_aperture) {
+                       pos += mem_aperture;
+                       offset = 0;
+                       t4_memory_update_win(adap, win, pos | win_pf);
+               }
+       }
+
+       /* Transfer residual < 32-bits */
+       if (resid)
+               t4_memory_rw_residual(adap, resid, mem_base + offset,
+                                     (u8 *)res_buf, T4_MEMORY_READ);
+
+       return 0;
+}
+
 #define CUDBG_YIELD_ITERATION 256
 
 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
                                goto skip_read;
 
                spin_lock(&padap->win0_lock);
-               rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
-                                 bytes_read, bytes,
-                                 (__be32 *)temp_buff.data,
-                                 1);
+               rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
+                                      bytes_read, bytes, temp_buff.data);
                spin_unlock(&padap->win0_lock);
                if (rc) {
                        cudbg_err->sys_err = rc;
index 9040e13..d3fa53d 100644 (file)
@@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
 u32 t4_get_util_window(struct adapter *adap);
 void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
 
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+                     u32 *mem_base, u32 *mem_aperture);
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+                          int dir);
 #define T4_MEMORY_WRITE        0
 #define T4_MEMORY_READ 1
 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
index 30485f9..143686c 100644 (file)
@@ -102,7 +102,7 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
        case CUDBG_CIM_LA:
                if (is_t6(adap->params.chip)) {
                        len = adap->params.cim_la_size / 10 + 1;
-                       len *= 11 * sizeof(u32);
+                       len *= 10 * sizeof(u32);
                } else {
                        len = adap->params.cim_la_size / 8;
                        len *= 8 * sizeof(u32);
index 3177b0c..db92f18 100644 (file)
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
                return ret;
        }
 
-       /* Clear out any old resources being used by the filter before
-        * we start constructing the new filter.
-        */
-       if (f->valid)
-               clear_filter(adapter, f);
-
        if (is_t6(adapter->params.chip) && fs->type &&
            ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
            IPV6_ADDR_ANY) {
index 56bc626..7b452e8 100644 (file)
@@ -4982,9 +4982,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 
        pcie_fw = readl(adap->regs + PCIE_FW_A);
        /* Check if cxgb4 is the MASTER and fw is initialized */
-       if (!(pcie_fw & PCIE_FW_INIT_F) ||
+       if (num_vfs &&
+           (!(pcie_fw & PCIE_FW_INIT_F) ||
            !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
-           PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF) {
+           PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF)) {
                dev_warn(&pdev->dev,
                         "cxgb4 driver needs to be MASTER to support SRIOV\n");
                return -EOPNOTSUPP;
@@ -5599,24 +5600,24 @@ static void remove_one(struct pci_dev *pdev)
 #if IS_ENABLED(CONFIG_IPV6)
                t4_cleanup_clip_tbl(adapter);
 #endif
-               iounmap(adapter->regs);
                if (!is_t4(adapter->params.chip))
                        iounmap(adapter->bar2);
-               pci_disable_pcie_error_reporting(pdev);
-               if ((adapter->flags & DEV_ENABLED)) {
-                       pci_disable_device(pdev);
-                       adapter->flags &= ~DEV_ENABLED;
-               }
-               pci_release_regions(pdev);
-               kfree(adapter->mbox_log);
-               synchronize_rcu();
-               kfree(adapter);
        }
 #ifdef CONFIG_PCI_IOV
        else {
                cxgb4_iov_configure(adapter->pdev, 0);
        }
 #endif
+       iounmap(adapter->regs);
+       pci_disable_pcie_error_reporting(pdev);
+       if ((adapter->flags & DEV_ENABLED)) {
+               pci_disable_device(pdev);
+               adapter->flags &= ~DEV_ENABLED;
+       }
+       pci_release_regions(pdev);
+       kfree(adapter->mbox_log);
+       synchronize_rcu();
+       kfree(adapter);
 }
 
 /* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
index 047609e..2c889ef 100644 (file)
@@ -483,6 +483,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
        return 0;
 }
 
+/**
+ * t4_memory_rw_init - Get memory window relative offset, base, and size.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ * @mem_off: memory relative offset with respect to @mtype.
+ * @mem_base: configured memory base address.
+ * @mem_aperture: configured memory window aperture.
+ *
+ * Get the configured memory window's relative offset, base, and size.
+ */
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+                     u32 *mem_base, u32 *mem_aperture)
+{
+       u32 edc_size, mc_size, mem_reg;
+
+       /* Offset into the region of memory which is being accessed
+        * MEM_EDC0 = 0
+        * MEM_EDC1 = 1
+        * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
+        * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
+        * MEM_HMA  = 4
+        */
+       edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
+       if (mtype == MEM_HMA) {
+               *mem_off = 2 * (edc_size * 1024 * 1024);
+       } else if (mtype != MEM_MC1) {
+               *mem_off = (mtype * (edc_size * 1024 * 1024));
+       } else {
+               mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
+                                                     MA_EXT_MEMORY0_BAR_A));
+               *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
+       }
+
+       /* Each PCI-E Memory Window is programmed with a window size -- or
+        * "aperture" -- which controls the granularity of its mapping onto
+        * adapter memory.  We need to grab that aperture in order to know
+        * how to use the specified window.  The window is also programmed
+        * with the base address of the Memory Window in BAR0's address
+        * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
+        * the address is relative to BAR0.
+        */
+       mem_reg = t4_read_reg(adap,
+                             PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
+                                                 win));
+       /* a dead adapter will return 0xffffffff for PIO reads */
+       if (mem_reg == 0xffffffff)
+               return -ENXIO;
+
+       *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
+       *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
+       if (is_t4(adap->params.chip))
+               *mem_base -= adap->t4_bar0;
+
+       return 0;
+}
+
+/**
+ * t4_memory_update_win - Move memory window to specified address.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @addr: location to move.
+ *
+ * Move memory window to specified address.
+ */
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
+{
+       t4_write_reg(adap,
+                    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
+                    addr);
+       /* Read it back to ensure that changes propagate before we
+        * attempt to use the new value.
+        */
+       t4_read_reg(adap,
+                   PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+}
+
+/**
+ * t4_memory_rw_residual - Read/Write residual data.
+ * @adap: the adapter
+ * @off: relative offset within residual to start read/write.
+ * @addr: address within indicated memory type.
+ * @buf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Read/Write residual data less than 32-bits.
+ */
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+                          int dir)
+{
+       union {
+               u32 word;
+               char byte[4];
+       } last;
+       unsigned char *bp;
+       int i;
+
+       if (dir == T4_MEMORY_READ) {
+               last.word = le32_to_cpu((__force __le32)
+                                       t4_read_reg(adap, addr));
+               for (bp = (unsigned char *)buf, i = off; i < 4; i++)
+                       bp[i] = last.byte[i];
+       } else {
+               last.word = *buf;
+               for (i = off; i < 4; i++)
+                       last.byte[i] = 0;
+               t4_write_reg(adap, addr,
+                            (__force u32)cpu_to_le32(last.word));
+       }
+}
+
 /**
  *     t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
  *     @adap: the adapter
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
                 u32 len, void *hbuf, int dir)
 {
        u32 pos, offset, resid, memoffset;
-       u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
+       u32 win_pf, mem_aperture, mem_base;
        u32 *buf;
+       int ret;
 
        /* Argument sanity checks ...
         */
@@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
        resid = len & 0x3;
        len -= resid;
 
-       /* Offset into the region of memory which is being accessed
-        * MEM_EDC0 = 0
-        * MEM_EDC1 = 1
-        * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
-        * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
-        * MEM_HMA  = 4
-        */
-       edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
-       if (mtype == MEM_HMA) {
-               memoffset = 2 * (edc_size * 1024 * 1024);
-       } else if (mtype != MEM_MC1) {
-               memoffset = (mtype * (edc_size * 1024 * 1024));
-       } else {
-               mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
-                                                     MA_EXT_MEMORY0_BAR_A));
-               memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
-       }
+       ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+                               &mem_aperture);
+       if (ret)
+               return ret;
 
        /* Determine the PCIE_MEM_ACCESS_OFFSET */
        addr = addr + memoffset;
 
-       /* Each PCI-E Memory Window is programmed with a window size -- or
-        * "aperture" -- which controls the granularity of its mapping onto
-        * adapter memory.  We need to grab that aperture in order to know
-        * how to use the specified window.  The window is also programmed
-        * with the base address of the Memory Window in BAR0's address
-        * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
-        * the address is relative to BAR0.
-        */
-       mem_reg = t4_read_reg(adap,
-                             PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
-                                                 win));
-       mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
-       mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
-       if (is_t4(adap->params.chip))
-               mem_base -= adap->t4_bar0;
        win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
 
        /* Calculate our initial PCI-E Memory Window Position and Offset into
         * that Window.
         */
-       pos = addr & ~(mem_aperture-1);
+       pos = addr & ~(mem_aperture - 1);
        offset = addr - pos;
 
        /* Set up initial PCI-E Memory Window to cover the start of our
-        * transfer.  (Read it back to ensure that changes propagate before we
-        * attempt to use the new value.)
+        * transfer.
         */
-       t4_write_reg(adap,
-                    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
-                    pos | win_pf);
-       t4_read_reg(adap,
-                   PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+       t4_memory_update_win(adap, win, pos | win_pf);
 
        /* Transfer data to/from the adapter as long as there's an integral
         * number of 32-bit transfers to complete.
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
                if (offset == mem_aperture) {
                        pos += mem_aperture;
                        offset = 0;
-                       t4_write_reg(adap,
-                               PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-                                                   win), pos | win_pf);
-                       t4_read_reg(adap,
-                               PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-                                                   win));
+                       t4_memory_update_win(adap, win, pos | win_pf);
                }
        }
 
@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
         * residual amount.  The PCI-E Memory Window has already been moved
         * above (if necessary) to cover this final transfer.
         */
-       if (resid) {
-               union {
-                       u32 word;
-                       char byte[4];
-               } last;
-               unsigned char *bp;
-               int i;
-
-               if (dir == T4_MEMORY_READ) {
-                       last.word = le32_to_cpu(
-                                       (__force __le32)t4_read_reg(adap,
-                                               mem_base + offset));
-                       for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
-                               bp[i] = last.byte[i];
-               } else {
-                       last.word = *buf;
-                       for (i = resid; i < 4; i++)
-                               last.byte[i] = 0;
-                       t4_write_reg(adap, mem_base + offset,
-                                    (__force u32)cpu_to_le32(last.word));
-               }
-       }
+       if (resid)
+               t4_memory_rw_residual(adap, resid, mem_base + offset,
+                                     (u8 *)buf, dir);
 
        return 0;
 }
@@ -2637,7 +2692,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 }
 
 #define EEPROM_STAT_ADDR   0x7bfc
-#define VPD_SIZE           0x800
 #define VPD_BASE           0x400
 #define VPD_BASE_OLD       0
 #define VPD_LEN            1024
@@ -2704,15 +2758,6 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
        if (!vpd)
                return -ENOMEM;
 
-       /* We have two VPD data structures stored in the adapter VPD area.
-        * By default, Linux calculates the size of the VPD area by traversing
-        * the first VPD area at offset 0x0, so we need to tell the OS what
-        * our real VPD size is.
-        */
-       ret = pci_set_vpd_size(adapter->pdev, VPD_SIZE);
-       if (ret < 0)
-               goto out;
-
        /* Card information normally starts at VPD_BASE but early cards had
         * it at 0.
         */
@@ -6046,6 +6091,7 @@ unsigned int t4_get_tp_ch_map(struct adapter *adap, int pidx)
 
        case CHELSIO_T6:
                switch (nports) {
+               case 1:
                case 2: return 1 << pidx;
                }
                break;
index b7e79e6..361de86 100644 (file)
@@ -155,8 +155,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
                const char *fc;
                const struct port_info *pi = netdev_priv(dev);
 
-               netif_carrier_on(dev);
-
                switch (pi->link_cfg.speed) {
                case 100:
                        s = "100Mbps";
@@ -202,7 +200,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 
                netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
        } else {
-               netif_carrier_off(dev);
                netdev_info(dev, "link down\n");
        }
 }
@@ -278,6 +275,17 @@ static int link_start(struct net_device *dev)
         */
        if (ret == 0)
                ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
+
+       /* The Virtual Interfaces are connected to an internal switch on the
+        * chip which allows VIs attached to the same port to talk to each
+        * other even when the port link is down.  As a result, we generally
+        * want to always report a VI's link as being "up", provided there are
+        * no errors in enabling vi.
+        */
+
+       if (ret == 0)
+               netif_carrier_on(dev);
+
        return ret;
 }
 
index 977d4c2..3f8fe8f 100644 (file)
   local_irq_{dis,en}able()
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 static const char version[] =
 "cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
 
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
-   or override something. */
 #include <linux/module.h>
 
 /*
@@ -93,6 +83,7 @@ static const char version[] =
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
@@ -105,24 +96,22 @@ static const char version[] =
 
 #include "cs89x0.h"
 
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
 
 /* Information that need to be kept for each board. */
 struct net_local {
+       int msg_enable;
        int chip_type;          /* one of: CS8900, CS8920, CS8920M */
        char chip_revision;     /* revision letter of the chip ('A'...) */
        int send_cmd;           /* the propercommand used to send a packet. */
        int rx_mode;
        int curr_rx_cfg;
         int send_underrun;      /* keep track of how many underruns in a row we get */
-       struct sk_buff *skb;
 };
 
 /* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
 static int net_open(struct net_device *dev);
 static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +121,6 @@ static int net_close(struct net_device *dev);
 static struct net_device_stats *net_get_stats(struct net_device *dev);
 static int set_mac_address(struct net_device *dev, void *addr);
 
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
 /* For reading/writing registers ISA-style */
 static inline int
 readreg_io(struct net_device *dev, int portno)
@@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = {
 
 /* Probe for the CS8900 card in slot E.  We won't bother looking
    anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
 {
        struct net_device *dev;
-       static int once_is_enough;
        struct net_local *lp;
-       static unsigned version_printed;
        int i, slot;
        unsigned rev_type = 0;
        unsigned long ioaddr;
@@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit)
        int err = -ENODEV;
        struct nubus_rsrc *fres;
 
-       if (!MACH_IS_MAC)
-               return ERR_PTR(-ENODEV);
-
        dev = alloc_etherdev(sizeof(struct net_local));
        if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
-       if (once_is_enough)
-               goto out;
-       once_is_enough = 1;
+               return -ENOMEM;
 
        /* We might have to parameterize this later */
        slot = 0xE;
@@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit)
        if (sig != swab16(CHIP_EISA_ID_SIG))
                goto out;
 
+       SET_NETDEV_DEV(dev, &pdev->dev);
+
        /* Initialize the net_device structure. */
        lp = netdev_priv(dev);
 
+       lp->msg_enable = netif_msg_init(debug, 0);
+
        /* Fill in the 'dev' fields. */
        dev->base_addr = ioaddr;
        dev->mem_start = (unsigned long)
@@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit)
        if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
                lp->send_cmd = TX_NOW;
 
-       if (net_debug && version_printed++ == 0)
-               printk(version);
+       netif_dbg(lp, drv, dev, "%s", version);
 
-       printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
-              dev->name,
-              lp->chip_type==CS8900?'0':'2',
-              lp->chip_type==CS8920M?"M":"",
-              lp->chip_revision,
-              dev->base_addr);
+       pr_info("cs89%c0%s rev %c found at %#8lx\n",
+               lp->chip_type == CS8900 ? '0' : '2',
+               lp->chip_type == CS8920M ? "M" : "",
+               lp->chip_revision, dev->base_addr);
 
        /* Try to read the MAC address */
        if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
-               printk("\nmac89x0: No EEPROM, giving up now.\n");
+               pr_info("No EEPROM, giving up now.\n");
                goto out1;
         } else {
                 for (i = 0; i < ETH_ALEN; i += 2) {
@@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit)
 
        /* print the IRQ and ethernet address. */
 
-       printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+       pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
 
        dev->netdev_ops         = &mac89x0_netdev_ops;
 
        err = register_netdev(dev);
        if (err)
                goto out1;
-       return NULL;
+
+       platform_set_drvdata(pdev, dev);
+       return 0;
 out1:
        nubus_writew(0, dev->base_addr + ADD_PORT);
 out:
        free_netdev(dev);
-       return ERR_PTR(err);
+       return err;
 }
 
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
-       int reset_start_time;
-
-       writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
-       /* wait 30 ms */
-       msleep_interruptible(30);
-
-       /* Wait until the chip is reset */
-       reset_start_time = jiffies;
-       while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
-               ;
-}
-#endif
-
 /* Open/initialize the board.  This is called (in the current kernel)
    sometime after booting when the 'ifconfig' program is run.
 
@@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
        struct net_local *lp = netdev_priv(dev);
        unsigned long flags;
 
-       if (net_debug > 3)
-               printk("%s: sent %d byte packet of type %x\n",
-                      dev->name, skb->len,
-                      (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                      | skb->data[ETH_ALEN+ETH_ALEN+1]);
+       netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+                 skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+                 skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
        /* keep the upload from being interrupted, since we
           ask the chip to start transmitting before the
@@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
        struct net_local *lp;
        int ioaddr, status;
 
-       if (dev == NULL) {
-               printk ("net_interrupt(): irq %d for unknown device.\n", irq);
-               return IRQ_NONE;
-       }
-
        ioaddr = dev->base_addr;
        lp = netdev_priv(dev);
 
@@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
            faster than you can read them off, you're screwed.  Hasta la
            vista, baby!  */
        while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
-               if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+               netif_dbg(lp, intr, dev, "status=%04x\n", status);
                switch(status & ISQ_EVENT_MASK) {
                case ISQ_RECEIVER_EVENT:
                        /* Got a packet(s). */
@@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
                                netif_wake_queue(dev);
                        }
                        if (status & TX_UNDERRUN) {
-                               if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+                               netif_dbg(lp, tx_err, dev, "transmit underrun\n");
                                 lp->send_underrun++;
                                 if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
                                 else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 static void
 net_rx(struct net_device *dev)
 {
+       struct net_local *lp = netdev_priv(dev);
        struct sk_buff *skb;
        int status, length;
 
@@ -506,7 +456,6 @@ net_rx(struct net_device *dev)
        /* Malloc up new buffer. */
        skb = alloc_skb(length, GFP_ATOMIC);
        if (skb == NULL) {
-               printk("%s: Memory squeeze, dropping packet.\n", dev->name);
                dev->stats.rx_dropped++;
                return;
        }
@@ -515,10 +464,9 @@ net_rx(struct net_device *dev)
        skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
                                length);
 
-       if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
-                                 dev->name, length,
-                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                                | skb->data[ETH_ALEN+ETH_ALEN+1]);
+       netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+                 length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+                 skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
         skb->protocol=eth_type_trans(skb,dev);
        netif_rx(skb);
@@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
                return -EADDRNOTAVAIL;
 
        memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-       printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+       netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
 
        /* set the Ethernet address */
        for (i=0; i < ETH_ALEN/2; i++)
@@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
        return 0;
 }
 
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
 MODULE_LICENSE("GPL");
 
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
 {
-       net_debug = debug;
-        dev_cs89x0 = mac89x0_probe(-1);
-       if (IS_ERR(dev_cs89x0)) {
-                printk(KERN_WARNING "mac89x0.c: No card found\n");
-               return PTR_ERR(dev_cs89x0);
-       }
+       struct net_device *dev = platform_get_drvdata(pdev);
+
+       unregister_netdev(dev);
+       nubus_writew(0, dev->base_addr + ADD_PORT);
+       free_netdev(dev);
        return 0;
 }
 
-void
-cleanup_module(void)
-{
-       unregister_netdev(dev_cs89x0);
-       nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
-       free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+       .probe = mac89x0_device_probe,
+       .remove = mac89x0_device_remove,
+       .driver = {
+               .name = "mac89x0",
+       },
+};
+
+module_platform_driver(mac89x0_platform_driver);
index 9b218f0..0dd64ac 100644 (file)
@@ -33,7 +33,7 @@
 
 #define DRV_NAME               "enic"
 #define DRV_DESCRIPTION                "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION            "2.3.0.45"
+#define DRV_VERSION            "2.3.0.53"
 #define DRV_COPYRIGHT          "Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX          6
@@ -140,6 +140,7 @@ struct enic_rfs_flw_tbl {
 struct vxlan_offload {
        u16 vxlan_udp_port_number;
        u8 patch_level;
+       u8 flags;
 };
 
 /* Per-instance private data structure */
index efb9333..869006c 100644 (file)
@@ -474,6 +474,39 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
        return 0;
 }
 
+static int enic_get_rx_flow_hash(struct enic *enic, struct ethtool_rxnfc *cmd)
+{
+       cmd->data = 0;
+
+       switch (cmd->flow_type) {
+       case TCP_V6_FLOW:
+       case TCP_V4_FLOW:
+               cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               /* Fall through */
+       case UDP_V6_FLOW:
+       case UDP_V4_FLOW:
+               if (vnic_dev_capable_udp_rss(enic->vdev))
+                       cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               /* Fall through */
+       case SCTP_V4_FLOW:
+       case AH_ESP_V4_FLOW:
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case SCTP_V6_FLOW:
+       case AH_ESP_V6_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case IPV4_FLOW:
+       case IPV6_FLOW:
+               cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
                          u32 *rule_locs)
 {
@@ -500,6 +533,9 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
                ret = enic_grxclsrule(enic, cmd);
                spin_unlock_bh(&enic->rfs_h.lock);
                break;
+       case ETHTOOL_GRXFH:
+               ret = enic_get_rx_flow_hash(enic, cmd);
+               break;
        default:
                ret = -EOPNOTSUPP;
                break;
index f202ba7..a25fb95 100644 (file)
@@ -191,8 +191,16 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
                goto error;
        }
 
-       if (ti->sa_family != AF_INET) {
-               netdev_info(netdev, "vxlan: only IPv4 offload supported");
+       switch (ti->sa_family) {
+       case AF_INET6:
+               if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) {
+                       netdev_info(netdev, "vxlan: only IPv4 offload supported");
+                       goto error;
+               }
+               /* Fall through */
+       case AF_INET:
+               break;
+       default:
                goto error;
        }
 
@@ -204,6 +212,11 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 
                goto error;
        }
+       if ((vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) != 1) &&
+           !(enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ)) {
+               netdev_info(netdev, "vxlan: vxlan offload with multi wq not supported on this adapter");
+               goto error;
+       }
 
        err = vnic_dev_overlay_offload_cfg(enic->vdev,
                                           OVERLAY_CFG_VXLAN_PORT_UPDATE,
@@ -271,22 +284,37 @@ static netdev_features_t enic_features_check(struct sk_buff *skb,
        struct enic *enic = netdev_priv(dev);
        struct udphdr *udph;
        u16 port = 0;
-       u16 proto;
+       u8 proto;
 
        if (!skb->encapsulation)
                return features;
 
        features = vxlan_features_check(skb, features);
 
-       /* hardware only supports IPv4 vxlan tunnel */
-       if (vlan_get_protocol(skb) != htons(ETH_P_IP))
+       switch (vlan_get_protocol(skb)) {
+       case htons(ETH_P_IPV6):
+               if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6))
+                       goto out;
+               proto = ipv6_hdr(skb)->nexthdr;
+               break;
+       case htons(ETH_P_IP):
+               proto = ip_hdr(skb)->protocol;
+               break;
+       default:
                goto out;
+       }
 
-       /* hardware does not support offload of ipv6 inner pkt */
-       if (eth->h_proto != ntohs(ETH_P_IP))
+       switch (eth->h_proto) {
+       case ntohs(ETH_P_IPV6):
+               if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6))
+                       goto out;
+               /* Fall through */
+       case ntohs(ETH_P_IP):
+               break;
+       default:
                goto out;
+       }
 
-       proto = ip_hdr(skb)->protocol;
 
        if (proto == IPPROTO_UDP) {
                udph = udp_hdr(skb);
@@ -635,12 +663,25 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 
 static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
 {
-       if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+       const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+       switch (eth->h_proto) {
+       case ntohs(ETH_P_IP):
                inner_ip_hdr(skb)->check = 0;
                inner_tcp_hdr(skb)->check =
                        ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
                                           inner_ip_hdr(skb)->daddr, 0,
                                           IPPROTO_TCP, 0);
+               break;
+       case ntohs(ETH_P_IPV6):
+               inner_tcp_hdr(skb)->check =
+                       ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+                                        &inner_ipv6_hdr(skb)->daddr, 0,
+                                        IPPROTO_TCP, 0);
+               break;
+       default:
+               WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload");
+               break;
        }
 }
 
@@ -1898,6 +1939,8 @@ static int enic_open(struct net_device *netdev)
        }
 
        for (i = 0; i < enic->rq_count; i++) {
+               /* enable rq before updating rq desc */
+               vnic_rq_enable(&enic->rq[i]);
                vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
                /* Need at least one buffer on ring to get going */
                if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
@@ -1909,8 +1952,6 @@ static int enic_open(struct net_device *netdev)
 
        for (i = 0; i < enic->wq_count; i++)
                vnic_wq_enable(&enic->wq[i]);
-       for (i = 0; i < enic->rq_count; i++)
-               vnic_rq_enable(&enic->rq[i]);
 
        if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
                enic_dev_add_station_addr(enic);
@@ -1936,8 +1977,12 @@ static int enic_open(struct net_device *netdev)
        return 0;
 
 err_out_free_rq:
-       for (i = 0; i < enic->rq_count; i++)
+       for (i = 0; i < enic->rq_count; i++) {
+               err = vnic_rq_disable(&enic->rq[i]);
+               if (err)
+                       return err;
                vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+       }
        enic_dev_notify_unset(enic);
 err_out_free_intr:
        enic_unset_affinity_hint(enic);
@@ -2151,9 +2196,10 @@ static int enic_dev_wait(struct vnic_dev *vdev,
 static int enic_dev_open(struct enic *enic)
 {
        int err;
+       u32 flags = CMD_OPENF_IG_DESCCACHE;
 
        err = enic_dev_wait(enic->vdev, vnic_dev_open,
-               vnic_dev_open_done, 0);
+               vnic_dev_open_done, flags);
        if (err)
                dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n",
                        err);
@@ -2275,7 +2321,7 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 {
        struct device *dev = enic_get_dev(enic);
        const u8 rss_default_cpu = 0;
-       const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
+       u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
                NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
                NIC_CFG_RSS_HASH_TYPE_IPV6 |
                NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
@@ -2283,6 +2329,8 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
        const u8 rss_base_cpu = 0;
        u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
 
+       if (vnic_dev_capable_udp_rss(enic->vdev))
+               rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP;
        if (rss_enable) {
                if (!enic_set_rsskey(enic)) {
                        if (enic_set_rsscpu(enic, rss_hash_bits)) {
@@ -2901,9 +2949,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                netdev->hw_features |= NETIF_F_RXCSUM;
        if (ENIC_SETTING(enic, VXLAN)) {
                u64 patch_level;
+               u64 a1 = 0;
 
                netdev->hw_enc_features |= NETIF_F_RXCSUM               |
                                           NETIF_F_TSO                  |
+                                          NETIF_F_TSO6                 |
                                           NETIF_F_TSO_ECN              |
                                           NETIF_F_GSO_UDP_TUNNEL       |
                                           NETIF_F_HW_CSUM              |
@@ -2922,9 +2972,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 */
                err = vnic_dev_get_supported_feature_ver(enic->vdev,
                                                         VIC_FEATURE_VXLAN,
-                                                        &patch_level);
+                                                        &patch_level, &a1);
                if (err)
                        patch_level = 0;
+               enic->vxlan.flags = (u8)a1;
                /* mask bits that are supported by driver
                 */
                patch_level &= BIT_ULL(0) | BIT_ULL(2);
index 39bad67..a2b3760 100644 (file)
@@ -1269,16 +1269,32 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 }
 
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-                                      u64 *supported_versions)
+                                      u64 *supported_versions, u64 *a1)
 {
        u64 a0 = feature;
        int wait = 1000;
-       u64 a1 = 0;
        int ret;
 
-       ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
+       ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, a1, wait);
        if (!ret)
                *supported_versions = a0;
 
        return ret;
 }
+
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev)
+{
+       u64 a0 = CMD_NIC_CFG, a1 = 0;
+       u64 rss_hash_type;
+       int wait = 1000;
+       int err;
+
+       err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
+       if (err || !a0)
+               return 0;
+
+       rss_hash_type = (a1 >> NIC_CFG_RSS_HASH_TYPE_SHIFT) &
+                       NIC_CFG_RSS_HASH_TYPE_MASK_FIELD;
+
+       return (rss_hash_type & NIC_CFG_RSS_HASH_TYPE_UDP);
+}
index 9d43d6b..59d4cc8 100644 (file)
@@ -183,6 +183,7 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config);
 int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
                                 u16 vxlan_udp_port_number);
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-                                      u64 *supported_versions);
+                                      u64 *supported_versions, u64 *a1);
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev);
 
 #endif /* _VNIC_DEV_H_ */
index d83880b..41de4ba 100644 (file)
@@ -439,6 +439,7 @@ enum vnic_devcmd_cmd {
 
 /* flags for CMD_OPEN */
 #define CMD_OPENF_OPROM                0x1     /* open coming from option rom */
+#define CMD_OPENF_IG_DESCCACHE 0x2     /* Do not flush IG DESC cache */
 
 /* flags for CMD_INIT */
 #define CMD_INITF_DEFAULT_MAC  0x1     /* init with default mac addr */
@@ -697,6 +698,10 @@ enum overlay_ofld_cmd {
 
 #define OVERLAY_CFG_VXLAN_PORT_UPDATE  0
 
+#define ENIC_VXLAN_INNER_IPV6          BIT(0)
+#define ENIC_VXLAN_OUTER_IPV6          BIT(1)
+#define ENIC_VXLAN_MULTI_WQ            BIT(2)
+
 /* Use this enum to get the supported versions for each of these features
  * If you need to use the devcmd_get_supported_feature_version(), add
  * the new feature into this enum and install function handler in devcmd.c
index 995a50d..5a93db0 100644 (file)
@@ -47,6 +47,7 @@
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6         (1 << 4)
 #define NIC_CFG_RSS_HASH_TYPE_IPV6_EX          (1 << 5)
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX      (1 << 6)
+#define NIC_CFG_RSS_HASH_TYPE_UDP              (1 << 7)
 
 static inline void vnic_set_nic_cfg(u32 *nic_cfg,
        u8 rss_default_cpu, u8 rss_hash_type,
index 1a49297..ff92ab1 100644 (file)
@@ -19,7 +19,7 @@
 #include "be.h"
 #include "be_cmds.h"
 
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
        "Physical Link is functional",
        "Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
        "Optics of two types installed â€“ Remove one optic or install matching pair of optics.",
index 09da2d8..e8b43cf 100644 (file)
@@ -201,7 +201,7 @@ enum {
                         phy_state == BE_PHY_UNQUALIFIED ||     \
                         phy_state == BE_PHY_UNCERTIFIED)
 
-extern  char *be_misconfig_evt_port_state[];
+extern const  char * const be_misconfig_evt_port_state[];
 
 /* async event indicating misconfigured port */
 struct be_async_event_misconfig_port {
index 7caa8da..159dc2d 100644 (file)
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
                                  err);
        }
 
+       if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+               priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+               err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+                                                 priv->mac_dev->allmulti);
+               if (err < 0)
+                       netif_err(priv, drv, net_dev,
+                                 "mac_dev->set_allmulti() = %d\n",
+                                 err);
+       }
+
        err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
        if (err < 0)
                netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
@@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
                goto csum_failed;
        }
 
+       /* SGT[0] is used by the linear part */
        sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
-       qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+       frag_len = skb_headlen(skb);
+       qm_sg_entry_set_len(&sgt[0], frag_len);
        sgt[0].bpid = FSL_DPAA_BPID_INV;
        sgt[0].offset = 0;
        addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
        qm_sg_entry_set64(&sgt[0], addr);
 
        /* populate the rest of SGT entries */
-       frag = &skb_shinfo(skb)->frags[0];
-       frag_len = frag->size;
-       for (i = 1; i <= nr_frags; i++, frag++) {
+       for (i = 0; i < nr_frags; i++) {
+               frag = &skb_shinfo(skb)->frags[i];
+               frag_len = frag->size;
                WARN_ON(!skb_frag_page(frag));
                addr = skb_frag_dma_map(dev, frag, 0,
                                        frag_len, dma_dir);
@@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
                        goto sg_map_failed;
                }
 
-               qm_sg_entry_set_len(&sgt[i], frag_len);
-               sgt[i].bpid = FSL_DPAA_BPID_INV;
-               sgt[i].offset = 0;
+               qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+               sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+               sgt[i + 1].offset = 0;
 
                /* keep the offset in the address */
-               qm_sg_entry_set64(&sgt[i], addr);
-               frag_len = frag->size;
+               qm_sg_entry_set64(&sgt[i + 1], addr);
        }
-       qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+       /* Set the final bit in the last used entry of the SGT */
+       qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
        qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 
@@ -2052,19 +2065,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
        /* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
         * make sure we don't feed FMan with more fragments than it supports.
         */
-       if (nonlinear &&
-           likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
-               /* Just create a S/G fd based on the skb */
-               err = skb_to_sg_fd(priv, skb, &fd);
-               percpu_priv->tx_frag_skbuffs++;
-       } else {
+       if (unlikely(nonlinear &&
+                    (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
                /* If the egress skb contains more fragments than we support
                 * we have no choice but to linearize it ourselves.
                 */
-               if (unlikely(nonlinear) && __skb_linearize(skb))
+               if (__skb_linearize(skb))
                        goto enomem;
 
-               /* Finally, create a contig FD from this skb */
+               nonlinear = skb_is_nonlinear(skb);
+       }
+
+       if (nonlinear) {
+               /* Just create a S/G fd based on the skb */
+               err = skb_to_sg_fd(priv, skb, &fd);
+               percpu_priv->tx_frag_skbuffs++;
+       } else {
+               /* Create a contig FD from this skb */
                err = skb_to_contig_fd(priv, skb, &fd, &offset);
        }
        if (unlikely(err < 0))
@@ -2201,14 +2218,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
        if (dpaa_eth_napi_schedule(percpu_priv, portal))
                return qman_cb_dqrr_stop;
 
-       if (dpaa_eth_refill_bpools(priv))
-               /* Unable to refill the buffer pool due to insufficient
-                * system memory. Just release the frame back into the pool,
-                * otherwise we'll soon end up with an empty buffer pool.
-                */
-               dpaa_fd_release(net_dev, &dq->fd);
-       else
-               dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+       dpaa_eth_refill_bpools(priv);
+       dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
 
        return qman_cb_dqrr_consume;
 }
@@ -2766,7 +2777,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 
        priv->channel = (u16)channel;
 
-       /* Start a thread that will walk the CPUs with affine portals
+       /* Walk the CPUs with affine portals
         * and add this pool channel to each's dequeue mask.
         */
        dpaa_eth_add_channel(priv->channel);
index faea674..85306d1 100644 (file)
@@ -211,7 +211,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev,
        if (epause->rx_pause)
                newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
        if (epause->tx_pause)
-               newadv |= ADVERTISED_Asym_Pause;
+               newadv ^= ADVERTISED_Asym_Pause;
 
        oldadv = phydev->advertising &
                        (ADVERTISED_Pause | ADVERTISED_Asym_Pause);
index ea43b49..9a581fa 100644 (file)
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
        return 0;
 }
 
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+       u32 tmp;
+       struct dtsec_regs __iomem *regs = dtsec->regs;
+
+       if (!is_init_done(dtsec->dtsec_drv_param))
+               return -EINVAL;
+
+       tmp = ioread32be(&regs->rctrl);
+       if (enable)
+               tmp |= RCTRL_MPROM;
+       else
+               tmp &= ~RCTRL_MPROM;
+
+       iowrite32be(tmp, &regs->rctrl);
+
+       return 0;
+}
+
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 {
        struct dtsec_regs __iomem *regs = dtsec->regs;
index c4467c0..1a689ad 100644 (file)
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
 int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
 
 #endif /* __DTSEC_H */
index c029688..446a97b 100644 (file)
@@ -350,6 +350,7 @@ struct fman_mac {
        struct fman_rev_info fm_rev_info;
        bool basex_if;
        struct phy_device *pcsphy;
+       bool allmulti_enabled;
 };
 
 static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
        return 0;
 }
 
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+       u32 entry;
+       struct memac_regs __iomem *regs = memac->regs;
+
+       if (!is_init_done(memac->memac_drv_param))
+               return -EINVAL;
+
+       if (enable) {
+               for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry | HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       } else {
+               for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
+
+       memac->allmulti_enabled = enable;
+
+       return 0;
+}
+
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 {
        struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
                        break;
                }
        }
-       if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
-               iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+       if (!memac->allmulti_enabled) {
+               if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+                       iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
 
        return 0;
 }
index c4a6646..b5a5033 100644 (file)
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
                        enum fman_mac_exceptions exception, bool enable);
 int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
 
 #endif /* __MEMAC_H */
index 4b0f3a5..284735d 100644 (file)
@@ -217,6 +217,7 @@ struct fman_mac {
        struct tgec_cfg *cfg;
        void *fm;
        struct fman_rev_info fm_rev_info;
+       bool allmulti_enabled;
 };
 
 static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
        return 0;
 }
 
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+       u32 entry;
+       struct tgec_regs __iomem *regs = tgec->regs;
+
+       if (!is_init_done(tgec->cfg))
+               return -EINVAL;
+
+       if (enable) {
+               for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry | TGEC_HASH_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       } else {
+               for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
+
+       tgec->allmulti_enabled = enable;
+
+       return 0;
+}
+
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 {
        struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
                        break;
                }
        }
-       if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
-               iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
-                           &regs->hashtable_ctrl);
+
+       if (!tgec->allmulti_enabled) {
+               if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+                       iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+                                   &regs->hashtable_ctrl);
+       }
 
        return 0;
 }
index 514bba9..cbbd3b4 100644 (file)
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
 int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
 
 #endif /* __TGEC_H */
index 88c0a06..4829dcd 100644 (file)
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = dtsec_set_tx_pause_frames;
        mac_dev->set_rx_pause           = dtsec_accept_rx_pause_frames;
        mac_dev->set_exception          = dtsec_set_exception;
+       mac_dev->set_allmulti           = dtsec_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = tgec_set_tx_pause_frames;
        mac_dev->set_rx_pause           = tgec_accept_rx_pause_frames;
        mac_dev->set_exception          = tgec_set_exception;
+       mac_dev->set_allmulti           = tgec_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = memac_set_tx_pause_frames;
        mac_dev->set_rx_pause           = memac_accept_rx_pause_frames;
        mac_dev->set_exception          = memac_set_exception;
+       mac_dev->set_allmulti           = memac_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
index eefb335..b520cec 100644 (file)
@@ -59,6 +59,7 @@ struct mac_device {
        bool rx_pause_active;
        bool tx_pause_active;
        bool promisc;
+       bool allmulti;
 
        int (*init)(struct mac_device *mac_dev);
        int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
        void (*adjust_link)(struct mac_device *mac_dev);
        int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
        int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+       int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
        int (*set_multi)(struct net_device *net_dev,
                         struct mac_device *mac_dev);
        int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
index 3bdeb29..f5c87bd 100644 (file)
@@ -2934,29 +2934,17 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 {
        int size = lstatus & BD_LENGTH_MASK;
        struct page *page = rxb->page;
-       bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
-
-       /* Remove the FCS from the packet length */
-       if (last)
-               size -= ETH_FCS_LEN;
 
        if (likely(first)) {
                skb_put(skb, size);
        } else {
                /* the last fragments' length contains the full frame length */
-               if (last)
+               if (lstatus & BD_LFLAG(RXBD_LAST))
                        size -= skb->len;
 
-               /* Add the last fragment if it contains something other than
-                * the FCS, otherwise drop it and trim off any part of the FCS
-                * that was already received.
-                */
-               if (size > 0)
-                       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                                       rxb->page_offset + RXBUF_ALIGNMENT,
-                                       size, GFAR_RXB_TRUESIZE);
-               else if (size < 0)
-                       pskb_trim(skb, skb->len + size);
+               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+                               rxb->page_offset + RXBUF_ALIGNMENT,
+                               size, GFAR_RXB_TRUESIZE);
        }
 
        /* try reuse page */
@@ -3069,6 +3057,9 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
        if (priv->padding)
                skb_pull(skb, priv->padding);
 
+       /* Trim off the FCS */
+       pskb_trim(skb, skb->len - ETH_FCS_LEN);
+
        if (ndev->features & NETIF_F_RXCSUM)
                gfar_rx_checksum(skb, fcb);
 
index 2744726..7654071 100644 (file)
@@ -90,7 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
-static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs(struct ibmvnic_adapter *, bool);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
 static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -361,14 +361,14 @@ static void release_stats_buffers(struct ibmvnic_adapter *adapter)
 static int init_stats_buffers(struct ibmvnic_adapter *adapter)
 {
        adapter->tx_stats_buffers =
-                               kcalloc(adapter->req_tx_queues,
+                               kcalloc(IBMVNIC_MAX_QUEUES,
                                        sizeof(struct ibmvnic_tx_queue_stats),
                                        GFP_KERNEL);
        if (!adapter->tx_stats_buffers)
                return -ENOMEM;
 
        adapter->rx_stats_buffers =
-                               kcalloc(adapter->req_rx_queues,
+                               kcalloc(IBMVNIC_MAX_QUEUES,
                                        sizeof(struct ibmvnic_rx_queue_stats),
                                        GFP_KERNEL);
        if (!adapter->rx_stats_buffers)
@@ -509,7 +509,7 @@ static int init_rx_pools(struct net_device *netdev)
                return -1;
        }
 
-       adapter->num_active_rx_pools = 0;
+       adapter->num_active_rx_pools = rxadd_subcrqs;
 
        for (i = 0; i < rxadd_subcrqs; i++) {
                rx_pool = &adapter->rx_pool[i];
@@ -554,8 +554,6 @@ static int init_rx_pools(struct net_device *netdev)
                rx_pool->next_free = 0;
        }
 
-       adapter->num_active_rx_pools = rxadd_subcrqs;
-
        return 0;
 }
 
@@ -641,7 +639,7 @@ static int init_tx_pools(struct net_device *netdev)
        if (!adapter->tx_pool)
                return -1;
 
-       adapter->num_active_tx_pools = 0;
+       adapter->num_active_tx_pools = tx_subcrqs;
 
        for (i = 0; i < tx_subcrqs; i++) {
                tx_pool = &adapter->tx_pool[i];
@@ -690,8 +688,6 @@ static int init_tx_pools(struct net_device *netdev)
                tx_pool->producer_index = 0;
        }
 
-       adapter->num_active_tx_pools = tx_subcrqs;
-
        return 0;
 }
 
@@ -740,6 +736,45 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
        adapter->napi_enabled = false;
 }
 
+static int init_napi(struct ibmvnic_adapter *adapter)
+{
+       int i;
+
+       adapter->napi = kcalloc(adapter->req_rx_queues,
+                               sizeof(struct napi_struct), GFP_KERNEL);
+       if (!adapter->napi)
+               return -ENOMEM;
+
+       for (i = 0; i < adapter->req_rx_queues; i++) {
+               netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
+               netif_napi_add(adapter->netdev, &adapter->napi[i],
+                              ibmvnic_poll, NAPI_POLL_WEIGHT);
+       }
+
+       adapter->num_active_rx_napi = adapter->req_rx_queues;
+       return 0;
+}
+
+static void release_napi(struct ibmvnic_adapter *adapter)
+{
+       int i;
+
+       if (!adapter->napi)
+               return;
+
+       for (i = 0; i < adapter->num_active_rx_napi; i++) {
+               if (&adapter->napi[i]) {
+                       netdev_dbg(adapter->netdev,
+                                  "Releasing napi[%d]\n", i);
+                       netif_napi_del(&adapter->napi[i]);
+               }
+       }
+
+       kfree(adapter->napi);
+       adapter->napi = NULL;
+       adapter->num_active_rx_napi = 0;
+}
+
 static int ibmvnic_login(struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -750,7 +785,7 @@ static int ibmvnic_login(struct net_device *netdev)
        do {
                if (adapter->renegotiate) {
                        adapter->renegotiate = false;
-                       release_sub_crqs(adapter);
+                       release_sub_crqs(adapter, 1);
 
                        reinit_completion(&adapter->init_done);
                        send_cap_queries(adapter);
@@ -774,8 +809,11 @@ static int ibmvnic_login(struct net_device *netdev)
                }
 
                reinit_completion(&adapter->init_done);
-               send_login(adapter);
-               if (!wait_for_completion_timeout(&adapter->init_done,
+               rc = send_login(adapter);
+               if (rc) {
+                       dev_err(dev, "Unable to attempt device login\n");
+                       return rc;
+               } else if (!wait_for_completion_timeout(&adapter->init_done,
                                                 timeout)) {
                        dev_err(dev, "Login timeout\n");
                        return -1;
@@ -791,28 +829,28 @@ static int ibmvnic_login(struct net_device *netdev)
        return 0;
 }
 
-static void release_resources(struct ibmvnic_adapter *adapter)
+static void release_login_buffer(struct ibmvnic_adapter *adapter)
 {
-       int i;
+       kfree(adapter->login_buf);
+       adapter->login_buf = NULL;
+}
 
+static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
+{
+       kfree(adapter->login_rsp_buf);
+       adapter->login_rsp_buf = NULL;
+}
+
+static void release_resources(struct ibmvnic_adapter *adapter)
+{
        release_vpd_data(adapter);
 
        release_tx_pools(adapter);
        release_rx_pools(adapter);
 
-       release_stats_token(adapter);
-       release_stats_buffers(adapter);
        release_error_buffers(adapter);
-
-       if (adapter->napi) {
-               for (i = 0; i < adapter->req_rx_queues; i++) {
-                       if (&adapter->napi[i]) {
-                               netdev_dbg(adapter->netdev,
-                                          "Releasing napi[%d]\n", i);
-                               netif_napi_del(&adapter->napi[i]);
-                       }
-               }
-       }
+       release_napi(adapter);
+       release_login_rsp_buffer(adapter);
 }
 
 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
@@ -931,20 +969,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
 static int init_resources(struct ibmvnic_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
-       int i, rc;
+       int rc;
 
        rc = set_real_num_queues(netdev);
        if (rc)
                return rc;
 
-       rc = init_stats_buffers(adapter);
-       if (rc)
-               return rc;
-
-       rc = init_stats_token(adapter);
-       if (rc)
-               return rc;
-
        adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
        if (!adapter->vpd)
                return -ENOMEM;
@@ -957,16 +987,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
        }
 
        adapter->map_id = 1;
-       adapter->napi = kcalloc(adapter->req_rx_queues,
-                               sizeof(struct napi_struct), GFP_KERNEL);
-       if (!adapter->napi)
-               return -ENOMEM;
 
-       for (i = 0; i < adapter->req_rx_queues; i++) {
-               netdev_dbg(netdev, "Adding napi[%d]\n", i);
-               netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
-                              NAPI_POLL_WEIGHT);
-       }
+       rc = init_napi(adapter);
+       if (rc)
+               return rc;
 
        send_map_query(adapter);
 
@@ -1057,9 +1081,41 @@ static int ibmvnic_open(struct net_device *netdev)
        return rc;
 }
 
+static void clean_rx_pools(struct ibmvnic_adapter *adapter)
+{
+       struct ibmvnic_rx_pool *rx_pool;
+       struct ibmvnic_rx_buff *rx_buff;
+       u64 rx_entries;
+       int rx_scrqs;
+       int i, j;
+
+       if (!adapter->rx_pool)
+               return;
+
+       rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+       rx_entries = adapter->req_rx_add_entries_per_subcrq;
+
+       /* Free any remaining skbs in the rx buffer pools */
+       for (i = 0; i < rx_scrqs; i++) {
+               rx_pool = &adapter->rx_pool[i];
+               if (!rx_pool || !rx_pool->rx_buff)
+                       continue;
+
+               netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
+               for (j = 0; j < rx_entries; j++) {
+                       rx_buff = &rx_pool->rx_buff[j];
+                       if (rx_buff && rx_buff->skb) {
+                               dev_kfree_skb_any(rx_buff->skb);
+                               rx_buff->skb = NULL;
+                       }
+               }
+       }
+}
+
 static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_tx_pool *tx_pool;
+       struct ibmvnic_tx_buff *tx_buff;
        u64 tx_entries;
        int tx_scrqs;
        int i, j;
@@ -1073,14 +1129,15 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
        /* Free any remaining skbs in the tx buffer pools */
        for (i = 0; i < tx_scrqs; i++) {
                tx_pool = &adapter->tx_pool[i];
-               if (!tx_pool)
+               if (!tx_pool && !tx_pool->tx_buff)
                        continue;
 
                netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
                for (j = 0; j < tx_entries; j++) {
-                       if (tx_pool->tx_buff[j].skb) {
-                               dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
-                               tx_pool->tx_buff[j].skb = NULL;
+                       tx_buff = &tx_pool->tx_buff[j];
+                       if (tx_buff && tx_buff->skb) {
+                               dev_kfree_skb_any(tx_buff->skb);
+                               tx_buff->skb = NULL;
                        }
                }
        }
@@ -1134,7 +1191,7 @@ static int __ibmvnic_close(struct net_device *netdev)
                        }
                }
        }
-
+       clean_rx_pools(adapter);
        clean_tx_pools(adapter);
        adapter->state = VNIC_CLOSED;
        return rc;
@@ -1422,6 +1479,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        if ((*hdrs >> 7) & 1) {
                build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
                tx_crq.v1.n_crq_elem = num_entries;
+               tx_buff->num_entries = num_entries;
                tx_buff->indir_arr[0] = tx_crq;
                tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
                                                    sizeof(tx_buff->indir_arr),
@@ -1440,6 +1498,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                                               (u64)tx_buff->indir_dma,
                                               (u64)num_entries);
        } else {
+               tx_buff->num_entries = num_entries;
                lpar_rc = send_subcrq(adapter, handle_array[queue_num],
                                      &tx_crq);
        }
@@ -1470,9 +1529,9 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                goto out;
        }
 
-       if (atomic_inc_return(&tx_scrq->used)
+       if (atomic_add_return(num_entries, &tx_scrq->used)
                                        >= adapter->req_tx_entries_per_subcrq) {
-               netdev_info(netdev, "Stopping queue %d\n", queue_num);
+               netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
                netif_stop_subqueue(netdev, queue_num);
        }
 
@@ -1608,7 +1667,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
            adapter->wait_for_reset) {
                release_resources(adapter);
-               release_sub_crqs(adapter);
+               release_sub_crqs(adapter, 1);
                release_crq_queue(adapter);
        }
 
@@ -1646,6 +1705,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                        release_tx_pools(adapter);
                        init_rx_pools(netdev);
                        init_tx_pools(netdev);
+
+                       release_napi(adapter);
+                       init_napi(adapter);
                } else {
                        rc = reset_tx_pools(adapter);
                        if (rc)
@@ -1670,8 +1732,6 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                return 0;
        }
 
-       netif_carrier_on(netdev);
-
        /* kick napi */
        for (i = 0; i < adapter->req_rx_queues; i++)
                napi_schedule(&adapter->napi[i]);
@@ -1679,6 +1739,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        if (adapter->reset_reason != VNIC_RESET_FAILOVER)
                netdev_notify_peers(netdev);
 
+       netif_carrier_on(netdev);
+
        return 0;
 }
 
@@ -1853,6 +1915,12 @@ restart_poll:
                                   be16_to_cpu(next->rx_comp.rc));
                        /* free the entry */
                        next->rx_comp.first = 0;
+                       dev_kfree_skb_any(rx_buff->skb);
+                       remove_buff_from_pool(adapter, rx_buff);
+                       continue;
+               } else if (!rx_buff->skb) {
+                       /* free the entry */
+                       next->rx_comp.first = 0;
                        remove_buff_from_pool(adapter, rx_buff);
                        continue;
                }
@@ -2244,24 +2312,27 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 }
 
 static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
-                                 struct ibmvnic_sub_crq_queue *scrq)
+                                 struct ibmvnic_sub_crq_queue *scrq,
+                                 bool do_h_free)
 {
        struct device *dev = &adapter->vdev->dev;
        long rc;
 
        netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
 
-       /* Close the sub-crqs */
-       do {
-               rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
-                                       adapter->vdev->unit_address,
-                                       scrq->crq_num);
-       } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+       if (do_h_free) {
+               /* Close the sub-crqs */
+               do {
+                       rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
+                                               adapter->vdev->unit_address,
+                                               scrq->crq_num);
+               } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
-       if (rc) {
-               netdev_err(adapter->netdev,
-                          "Failed to release sub-CRQ %16lx, rc = %ld\n",
-                          scrq->crq_num, rc);
+               if (rc) {
+                       netdev_err(adapter->netdev,
+                                  "Failed to release sub-CRQ %16lx, rc = %ld\n",
+                                  scrq->crq_num, rc);
+               }
        }
 
        dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
@@ -2329,12 +2400,12 @@ zero_page_failed:
        return NULL;
 }
 
-static void release_sub_crqs(struct ibmvnic_adapter *adapter)
+static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 {
        int i;
 
        if (adapter->tx_scrq) {
-               for (i = 0; i < adapter->req_tx_queues; i++) {
+               for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
                        if (!adapter->tx_scrq[i])
                                continue;
 
@@ -2347,15 +2418,17 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
                                adapter->tx_scrq[i]->irq = 0;
                        }
 
-                       release_sub_crq_queue(adapter, adapter->tx_scrq[i]);
+                       release_sub_crq_queue(adapter, adapter->tx_scrq[i],
+                                             do_h_free);
                }
 
                kfree(adapter->tx_scrq);
                adapter->tx_scrq = NULL;
+               adapter->num_active_tx_scrqs = 0;
        }
 
        if (adapter->rx_scrq) {
-               for (i = 0; i < adapter->req_rx_queues; i++) {
+               for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
                        if (!adapter->rx_scrq[i])
                                continue;
 
@@ -2368,11 +2441,13 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
                                adapter->rx_scrq[i]->irq = 0;
                        }
 
-                       release_sub_crq_queue(adapter, adapter->rx_scrq[i]);
+                       release_sub_crq_queue(adapter, adapter->rx_scrq[i],
+                                             do_h_free);
                }
 
                kfree(adapter->rx_scrq);
                adapter->rx_scrq = NULL;
+               adapter->num_active_rx_scrqs = 0;
        }
 }
 
@@ -2422,6 +2497,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 restart_loop:
        while (pending_scrq(adapter, scrq)) {
                unsigned int pool = scrq->pool_index;
+               int num_entries = 0;
 
                next = ibmvnic_next_scrq(adapter, scrq);
                for (i = 0; i < next->tx_comp.num_comps; i++) {
@@ -2452,6 +2528,8 @@ restart_loop:
                                txbuff->skb = NULL;
                        }
 
+                       num_entries += txbuff->num_entries;
+
                        adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
                                                     producer_index] = index;
                        adapter->tx_pool[pool].producer_index =
@@ -2461,13 +2539,13 @@ restart_loop:
                /* remove tx_comp scrq*/
                next->tx_comp.first = 0;
 
-               if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <=
+               if (atomic_sub_return(num_entries, &scrq->used) <=
                    (adapter->req_tx_entries_per_subcrq / 2) &&
                    __netif_subqueue_stopped(adapter->netdev,
                                             scrq->pool_index)) {
                        netif_wake_subqueue(adapter->netdev, scrq->pool_index);
-                       netdev_info(adapter->netdev, "Started queue %d\n",
-                                   scrq->pool_index);
+                       netdev_dbg(adapter->netdev, "Started queue %d\n",
+                                  scrq->pool_index);
                }
        }
 
@@ -2539,7 +2617,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
                        dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
                                scrq->irq, rc);
                        irq_dispose_mapping(scrq->irq);
-                       goto req_rx_irq_failed;
+                       goto req_tx_irq_failed;
                }
        }
 
@@ -2575,7 +2653,7 @@ req_tx_irq_failed:
                free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
                irq_dispose_mapping(adapter->rx_scrq[j]->irq);
        }
-       release_sub_crqs(adapter);
+       release_sub_crqs(adapter, 1);
        return rc;
 }
 
@@ -2637,6 +2715,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
        for (i = 0; i < adapter->req_tx_queues; i++) {
                adapter->tx_scrq[i] = allqueues[i];
                adapter->tx_scrq[i]->pool_index = i;
+               adapter->num_active_tx_scrqs++;
        }
 
        adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
@@ -2647,6 +2726,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
        for (i = 0; i < adapter->req_rx_queues; i++) {
                adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
                adapter->rx_scrq[i]->scrq_num = i;
+               adapter->num_active_rx_scrqs++;
        }
 
        kfree(allqueues);
@@ -2657,7 +2737,7 @@ rx_failed:
        adapter->tx_scrq = NULL;
 tx_failed:
        for (i = 0; i < registered_queues; i++)
-               release_sub_crq_queue(adapter, allqueues[i]);
+               release_sub_crq_queue(adapter, allqueues[i], 1);
        kfree(allqueues);
        return -1;
 }
@@ -2997,7 +3077,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
        strncpy(&vlcd->name, adapter->netdev->name, len);
 }
 
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
        struct ibmvnic_login_buffer *login_buffer;
@@ -3013,6 +3093,13 @@ static void send_login(struct ibmvnic_adapter *adapter)
        struct vnic_login_client_data *vlcd;
        int i;
 
+       if (!adapter->tx_scrq || !adapter->rx_scrq) {
+               netdev_err(adapter->netdev,
+                          "RX or TX queues are not allocated, device login failed\n");
+               return -1;
+       }
+
+       release_login_rsp_buffer(adapter);
        client_data_len = vnic_client_data_len(adapter);
 
        buffer_size =
@@ -3109,7 +3196,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
        crq.login.len = cpu_to_be32(buffer_size);
        ibmvnic_send_crq(adapter, &crq);
 
-       return;
+       return 0;
 
 buf_rsp_map_failed:
        kfree(login_rsp_buffer);
@@ -3118,7 +3205,7 @@ buf_rsp_alloc_failed:
 buf_map_failed:
        kfree(login_buffer);
 buf_alloc_failed:
-       return;
+       return -1;
 }
 
 static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
@@ -3738,6 +3825,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
                ibmvnic_remove(adapter->vdev);
                return -EIO;
        }
+       release_login_buffer(adapter);
        complete(&adapter->init_done);
 
        return 0;
@@ -4282,6 +4370,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 {
        struct device *dev = &adapter->vdev->dev;
        unsigned long timeout = msecs_to_jiffies(30000);
+       u64 old_num_rx_queues, old_num_tx_queues;
        int rc;
 
        if (adapter->resetting && !adapter->wait_for_reset) {
@@ -4299,6 +4388,9 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 
        adapter->from_passive_init = false;
 
+       old_num_rx_queues = adapter->req_rx_queues;
+       old_num_tx_queues = adapter->req_tx_queues;
+
        init_completion(&adapter->init_done);
        adapter->init_done_rc = 0;
        ibmvnic_send_crq_init(adapter);
@@ -4318,10 +4410,18 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
                return -1;
        }
 
-       if (adapter->resetting && !adapter->wait_for_reset)
-               rc = reset_sub_crq_queues(adapter);
-       else
+       if (adapter->resetting && !adapter->wait_for_reset) {
+               if (adapter->req_rx_queues != old_num_rx_queues ||
+                   adapter->req_tx_queues != old_num_tx_queues) {
+                       release_sub_crqs(adapter, 0);
+                       rc = init_sub_crqs(adapter);
+               } else {
+                       rc = reset_sub_crq_queues(adapter);
+               }
+       } else {
                rc = init_sub_crqs(adapter);
+       }
+
        if (rc) {
                dev_err(dev, "Initialization of sub crqs failed\n");
                release_crq_queue(adapter);
@@ -4334,6 +4434,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
                release_crq_queue(adapter);
        }
 
+       rc = init_stats_buffers(adapter);
+       if (rc)
+               return rc;
+
+       rc = init_stats_token(adapter);
+       if (rc)
+               return rc;
+
        return rc;
 }
 
@@ -4421,7 +4529,7 @@ ibmvnic_register_fail:
        device_remove_file(&dev->dev, &dev_attr_failover);
 
 ibmvnic_init_fail:
-       release_sub_crqs(adapter);
+       release_sub_crqs(adapter, 1);
        release_crq_queue(adapter);
        free_netdev(netdev);
 
@@ -4438,9 +4546,12 @@ static int ibmvnic_remove(struct vio_dev *dev)
        mutex_lock(&adapter->reset_lock);
 
        release_resources(adapter);
-       release_sub_crqs(adapter);
+       release_sub_crqs(adapter, 1);
        release_crq_queue(adapter);
 
+       release_stats_token(adapter);
+       release_stats_buffers(adapter);
+
        adapter->state = VNIC_REMOVED;
 
        mutex_unlock(&adapter->reset_lock);
index fe21a6e..099c89d 100644 (file)
@@ -909,6 +909,7 @@ struct ibmvnic_tx_buff {
        union sub_crq indir_arr[6];
        u8 hdr_data[140];
        dma_addr_t indir_dma;
+       int num_entries;
 };
 
 struct ibmvnic_tx_pool {
@@ -1091,8 +1092,11 @@ struct ibmvnic_adapter {
        u64 opt_rxba_entries_per_subcrq;
        __be64 tx_rx_desc_req;
        u8 map_id;
-       u64 num_active_rx_pools;
-       u64 num_active_tx_pools;
+       u32 num_active_rx_scrqs;
+       u32 num_active_rx_pools;
+       u32 num_active_rx_napi;
+       u32 num_active_tx_scrqs;
+       u32 num_active_tx_pools;
 
        struct tasklet_struct tasklet;
        enum vnic_state state;
index 736a9f0..c58a537 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
  *  fm10k_read_hw_stats_32b - Reads value of 32-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing a 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of the register and returns the delta
  *  between the base and the current value.
@@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
  *  fm10k_read_hw_stats_48b - Reads value of 48-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing the lower 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of 2 registers, combined to represent a 48-bit
  *  statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
 
 /**
  *  fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- *  @hw: pointer to the hardware structure
  *  @q: pointer to the ring of hardware statistics queue
  *  @idx: index pointing to the start of the ring iteration
  *  @count: number of queues to iterate over
index 8e12aae..2c93d71 100644 (file)
 
 #include "fm10k.h"
 
-#define DRV_VERSION    "0.22.1-k"
+#define DRV_VERSION    "0.23.4-k"
 #define DRV_SUMMARY    "Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] = DRV_SUMMARY;
 static const char fm10k_copyright[] =
-       "Copyright(c) 2013 - 2017 Intel Corporation.";
+       "Copyright(c) 2013 - 2018 Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
index a38ae5c..75c99ae 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
 
 /**
  * fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ti: Tunnel endpoint information
  *
  * This function is called when a new UDP tunnel port has been added.
@@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
 
 /**
  * fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
  *
  * This function is called when a new UDP tunnel port is deleted. The freed
  * port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
  * @glort: the target glort for this update
  * @addr: the address to update
  * @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
  *
  * This function queues up a MAC request for sending to the switch manager.
  * A separate thread monitors the queue and sends updates to the switch
index a434fec..50f53e4 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
        [fm10k_device_vf] = &fm10k_vf_info,
 };
 
-/**
+/*
  * fm10k_pci_tbl - PCI Device ID Table
  *
  * Wildcard entries (PCI_ANY_ID) should come last
@@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
 
 /**
  * fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
  **/
 static void fm10k_service_timer(struct timer_list *t)
 {
@@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 {
@@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 {
@@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 
 /**
  * fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  *
  * This function serves two purposes.  First it strobes the interrupt lines
  * in order to make certain interrupts are occurring.  Secondly it sets the
@@ -1995,6 +1995,7 @@ skip_tx_dma_drain:
 /**
  * fm10k_sw_init - Initialize general software structures
  * @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
  *
  * fm10k_sw_init initializes the interface private data structure.
  * Fields are initialized based on PCI device information and
index d6406fc..bee192f 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
 
 /**
  * fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
  * @vid: VLAN ID to correct
  *
  * Will report an error if the VLAN ID is out of range. For VID = 0, it will
index f8e87bf..9d0d31d 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
  *  @msg: Pointer to message block
  *  @attr_id: Attribute ID
  *  @mac_addr: MAC address to be stored
+ *  @vlan: VLAN to be stored
  *
  *  This function will reorder a MAC address to be CPU endian and store it
  *  in the attribute buffer.  It will return success if provided with a
@@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
 /**
  *  fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
  *  @attr: Pointer to attribute
- *  @attr_id: Attribute ID
  *  @mac_addr: location of buffer to store MAC address
+ *  @vlan: location of buffer to store VLAN
  *
  *  This function pulls the MAC address back out of the attribute and will
  *  place it in the array pointed by by mac_addr.  It will return success
@@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
  *  @hw: Pointer to hardware structure
  *  @msg: Pointer to message
  *  @mbx: Pointer to mailbox information structure
- *  @func: Function array containing list of message handling functions
+ *  @data: Pointer to message handler data structure
  *
  *  This function should be the first function called upon receiving a
  *  message.  The handler will identify the message type and call the correct
index 46e9f4e..36d9401 100644 (file)
@@ -507,6 +507,7 @@ struct i40e_pf {
 #define I40E_HW_STOP_FW_LLDP                   BIT(16)
 #define I40E_HW_PORT_ID_VALID                  BIT(17)
 #define I40E_HW_RESTART_AUTONEG                        BIT(18)
+#define I40E_HW_STOPPABLE_FW_LLDP              BIT(19)
 
        u64 flags;
 #define I40E_FLAG_RX_CSUM_ENABLED              BIT_ULL(0)
@@ -824,6 +825,7 @@ struct i40e_q_vector {
        struct i40e_ring_container rx;
        struct i40e_ring_container tx;
 
+       u8 itr_countdown;       /* when 0 should adjust adaptive ITR */
        u8 num_ringpairs;       /* total number of ring pairs in vector */
 
        cpumask_t affinity_mask;
@@ -832,8 +834,6 @@ struct i40e_q_vector {
        struct rcu_head rcu;    /* to avoid race with update stats on free */
        char name[I40E_INT_NAME_STR_LEN];
        bool arm_wb_state;
-#define ITR_COUNTDOWN_START 100
-       u8 itr_countdown;       /* when 0 should adjust ITR */
 } ____cacheline_internodealigned_in_smp;
 
 /* lan device */
@@ -1109,4 +1109,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 
 int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
 int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+                             struct i40e_cloud_filter *filter,
+                             bool add);
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+                                     struct i40e_cloud_filter *filter,
+                                     bool add);
 #endif /* _I40E_H_ */
index a852775..0dfc527 100644 (file)
@@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type {
        I40E_PHY_TYPE_DEFAULT                   = 0xFF,
 };
 
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_SFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+                               BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+                               BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
 #define I40E_LINK_SPEED_100MB_SHIFT    0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT   0x2
 #define I40E_LINK_SPEED_10GB_SHIFT     0x3
index 4c3b424..b829fd3 100644 (file)
@@ -155,8 +155,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                dev_info(&pf->pdev->dev, "        vlan_features = 0x%08lx\n",
                         (unsigned long int)nd->vlan_features);
        }
-       dev_info(&pf->pdev->dev,
-                "    vlgrp: & = %p\n", vsi->active_vlans);
+       dev_info(&pf->pdev->dev, "    active_vlans is %s\n",
+                vsi->active_vlans ? "<valid>" : "<null>");
        dev_info(&pf->pdev->dev,
                 "    flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
                 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
@@ -269,14 +269,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                if (!rx_ring)
                        continue;
 
-               dev_info(&pf->pdev->dev,
-                        "    rx_rings[%i]: desc = %p\n",
-                        i, rx_ring->desc);
-               dev_info(&pf->pdev->dev,
-                        "    rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
-                        i, rx_ring->dev,
-                        rx_ring->netdev,
-                        rx_ring->rx_bi);
                dev_info(&pf->pdev->dev,
                         "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
                         i, *rx_ring->state,
@@ -307,17 +299,12 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                         rx_ring->rx_stats.realloc_count,
                         rx_ring->rx_stats.page_reuse_count);
                dev_info(&pf->pdev->dev,
-                        "    rx_rings[%i]: size = %i, dma = 0x%08lx\n",
-                        i, rx_ring->size,
-                        (unsigned long int)rx_ring->dma);
-               dev_info(&pf->pdev->dev,
-                        "    rx_rings[%i]: vsi = %p, q_vector = %p\n",
-                        i, rx_ring->vsi,
-                        rx_ring->q_vector);
+                        "    rx_rings[%i]: size = %i\n",
+                        i, rx_ring->size);
                dev_info(&pf->pdev->dev,
-                        "    rx_rings[%i]: rx_itr_setting = %d (%s)\n",
-                        i, rx_ring->rx_itr_setting,
-                        ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
+                        "    rx_rings[%i]: itr_setting = %d (%s)\n",
+                        i, rx_ring->itr_setting,
+                        ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
        }
        for (i = 0; i < vsi->num_queue_pairs; i++) {
                struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
@@ -325,14 +312,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                if (!tx_ring)
                        continue;
 
-               dev_info(&pf->pdev->dev,
-                        "    tx_rings[%i]: desc = %p\n",
-                        i, tx_ring->desc);
-               dev_info(&pf->pdev->dev,
-                        "    tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
-                        i, tx_ring->dev,
-                        tx_ring->netdev,
-                        tx_ring->tx_bi);
                dev_info(&pf->pdev->dev,
                         "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
                         i, *tx_ring->state,
@@ -355,20 +334,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                         tx_ring->tx_stats.tx_busy,
                         tx_ring->tx_stats.tx_done_old);
                dev_info(&pf->pdev->dev,
-                        "    tx_rings[%i]: size = %i, dma = 0x%08lx\n",
-                        i, tx_ring->size,
-                        (unsigned long int)tx_ring->dma);
-               dev_info(&pf->pdev->dev,
-                        "    tx_rings[%i]: vsi = %p, q_vector = %p\n",
-                        i, tx_ring->vsi,
-                        tx_ring->q_vector);
+                        "    tx_rings[%i]: size = %i\n",
+                        i, tx_ring->size);
                dev_info(&pf->pdev->dev,
                         "    tx_rings[%i]: DCB tc = %d\n",
                         i, tx_ring->dcb_tc);
                dev_info(&pf->pdev->dev,
-                        "    tx_rings[%i]: tx_itr_setting = %d (%s)\n",
-                        i, tx_ring->tx_itr_setting,
-                        ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed");
+                        "    tx_rings[%i]: itr_setting = %d (%s)\n",
+                        i, tx_ring->itr_setting,
+                        ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
        }
        rcu_read_unlock();
        dev_info(&pf->pdev->dev,
@@ -466,8 +440,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                 vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
                 vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
                 vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
-       if (vsi->back)
-               dev_info(&pf->pdev->dev, "    PF = %p\n", vsi->back);
        dev_info(&pf->pdev->dev, "    idx = %d\n", vsi->idx);
        dev_info(&pf->pdev->dev,
                 "    tc_config: numtc = %d, enabled_tc = 0x%x\n",
index 2f5bee7..89807e3 100644 (file)
@@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
        I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
        I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
        I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+       I40E_PRIV_FLAG("link-down-on-close",
+                      I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
        I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
        I40E_PRIV_FLAG("disable-source-pruning",
                       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
@@ -2244,14 +2246,14 @@ static int __i40e_get_coalesce(struct net_device *netdev,
        rx_ring = vsi->rx_rings[queue];
        tx_ring = vsi->tx_rings[queue];
 
-       if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+       if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
                ec->use_adaptive_rx_coalesce = 1;
 
-       if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+       if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
                ec->use_adaptive_tx_coalesce = 1;
 
-       ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-       ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
 
        /* we use the _usecs_high to store/set the interrupt rate limit
         * that the hardware supports, that almost but not quite
@@ -2311,34 +2313,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
        struct i40e_q_vector *q_vector;
-       u16 vector, intrl;
+       u16 intrl;
 
        intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
 
-       rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
-       tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+       rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+       tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
 
        if (ec->use_adaptive_rx_coalesce)
-               rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+               rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
        else
-               rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+               rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
 
        if (ec->use_adaptive_tx_coalesce)
-               tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+               tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
        else
-               tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+               tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
 
        q_vector = rx_ring->q_vector;
-       q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
-       vector = vsi->base_vector + q_vector->v_idx;
-       wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+       q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 
        q_vector = tx_ring->q_vector;
-       q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
-       vector = vsi->base_vector + q_vector->v_idx;
-       wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+       q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
 
-       wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
+       /* The interrupt handler itself will take care of programming
+        * the Tx and Rx ITR values based on the values we have entered
+        * into the q_vector, no need to write the values now.
+        */
+
+       wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
        i40e_flush(hw);
 }
 
@@ -2364,11 +2367,11 @@ static int __i40e_set_coalesce(struct net_device *netdev,
                vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
        if (queue < 0) {
-               cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting;
-               cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting;
+               cur_rx_itr = vsi->rx_rings[0]->itr_setting;
+               cur_tx_itr = vsi->tx_rings[0]->itr_setting;
        } else if (queue < vsi->num_queue_pairs) {
-               cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting;
-               cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting;
+               cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
+               cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
        } else {
                netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
                           vsi->num_queue_pairs - 1);
@@ -2396,7 +2399,7 @@ static int __i40e_set_coalesce(struct net_device *netdev,
                return -EINVAL;
        }
 
-       if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+       if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
                netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
                return -EINVAL;
        }
@@ -2407,16 +2410,16 @@ static int __i40e_set_coalesce(struct net_device *netdev,
                return -EINVAL;
        }
 
-       if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+       if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
                netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
                return -EINVAL;
        }
 
        if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
-               ec->rx_coalesce_usecs = I40E_MIN_ITR << 1;
+               ec->rx_coalesce_usecs = I40E_MIN_ITR;
 
        if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
-               ec->tx_coalesce_usecs = I40E_MIN_ITR << 1;
+               ec->tx_coalesce_usecs = I40E_MIN_ITR;
 
        intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
        vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
@@ -4406,6 +4409,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
        }
 
 flags_complete:
+       changed_flags = orig_flags ^ new_flags;
+
        /* Before we finalize any flag changes, we need to perform some
         * checks to ensure that the changes are supported and safe.
         */
@@ -4415,21 +4420,17 @@ flags_complete:
            !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
                return -EOPNOTSUPP;
 
-       /* Disable FW LLDP not supported if NPAR active or if FW
-        * API version < 1.7
+       /* If the driver detected FW LLDP was disabled on init, this flag could
+        * be set, however we do not support _changing_ the flag if NPAR is
+        * enabled or FW API version < 1.7.  There are situations where older
+        * FW versions/NPAR enabled PFs could disable LLDP, however we _must_
+        * not allow the user to enable/disable LLDP with this flag on
+        * unsupported FW versions.
         */
-       if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-               if (pf->hw.func_caps.npar_enable) {
-                       dev_warn(&pf->pdev->dev,
-                                "Unable to stop FW LLDP if NPAR active\n");
-                       return -EOPNOTSUPP;
-               }
-
-               if (pf->hw.aq.api_maj_ver < 1 ||
-                   (pf->hw.aq.api_maj_ver == 1 &&
-                    pf->hw.aq.api_min_ver < 7)) {
+       if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+               if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) {
                        dev_warn(&pf->pdev->dev,
-                                "FW ver does not support stopping FW LLDP\n");
+                                "Device does not support changing FW LLDP\n");
                        return -EOPNOTSUPP;
                }
        }
@@ -4439,6 +4440,10 @@ flags_complete:
         * something else has modified the flags variable since we copied it
         * originally. We'll just punt with an error and log something in the
         * message buffer.
+        *
+        * This is the point of no return for this function.  We need to have
+        * checked any discrepancies or misconfigurations and returned
+        * EOPNOTSUPP before updating pf->flags here.
         */
        if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
                dev_warn(&pf->pdev->dev,
@@ -4446,8 +4451,6 @@ flags_complete:
                return -EAGAIN;
        }
 
-       changed_flags = orig_flags ^ new_flags;
-
        /* Process any additional changes needed as a result of flag changes.
         * The changed_flags value reflects the list of bits that were
         * changed in the code above.
@@ -4479,6 +4482,12 @@ flags_complete:
                }
        }
 
+       if ((changed_flags & pf->flags &
+            I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
+           (pf->flags & I40E_FLAG_MFP_ENABLED))
+               dev_warn(&pf->pdev->dev,
+                        "Turning on link-down-on-close flag may affect other partitions\n");
+
        if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
                if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
                        struct i40e_dcbx_config *dcbcfg;
index e31adbc..be9a146 100644 (file)
@@ -69,12 +69,6 @@ static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-                                    struct i40e_cloud_filter *filter,
-                                    bool add);
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-                                            struct i40e_cloud_filter *filter,
-                                            bool add);
 static int i40e_get_capabilities(struct i40e_pf *pf,
                                 enum i40e_admin_queue_opc list_type);
 
@@ -215,8 +209,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
 
        if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
                dev_info(&pf->pdev->dev,
-                        "param err: pile=%p needed=%d id=0x%04x\n",
-                        pile, needed, id);
+                        "param err: pile=%s needed=%d id=0x%04x\n",
+                        pile ? "<valid>" : "<null>", needed, id);
                return -EINVAL;
        }
 
@@ -1380,14 +1374,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 
                ether_addr_copy(f->macaddr, macaddr);
                f->vlan = vlan;
-               /* If we're in overflow promisc mode, set the state directly
-                * to failed, so we don't bother to try sending the filter
-                * to the hardware.
-                */
-               if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))
-                       f->state = I40E_FILTER_FAILED;
-               else
-                       f->state = I40E_FILTER_NEW;
+               f->state = I40E_FILTER_NEW;
                INIT_HLIST_NODE(&f->hlist);
 
                key = i40e_addr_to_hkey(macaddr);
@@ -2116,17 +2103,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
  * @list: the list of filters to send to firmware
  * @add_head: Position in the add hlist
  * @num_add: the number of filters to add
- * @promisc_change: set to true on exit if promiscuous mode was forced on
  *
  * Send a request to firmware via AdminQ to add a chunk of filters. Will set
- * promisc_changed to true if the firmware has run out of space for more
- * filters.
+ * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
+ * space for more filters.
  */
 static
 void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
                          struct i40e_aqc_add_macvlan_element_data *list,
                          struct i40e_new_mac_filter *add_head,
-                         int num_add, bool *promisc_changed)
+                         int num_add)
 {
        struct i40e_hw *hw = &vsi->back->hw;
        int aq_err, fcnt;
@@ -2136,7 +2122,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
        fcnt = i40e_update_filter_state(num_add, list, add_head);
 
        if (fcnt != num_add) {
-               *promisc_changed = true;
                set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
                dev_warn(&vsi->back->pdev->dev,
                         "Error %s adding RX filters on %s, promiscuous mode forced on\n",
@@ -2177,11 +2162,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
                                                            NULL);
        }
 
-       if (aq_ret)
+       if (aq_ret) {
+               set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
                dev_warn(&vsi->back->pdev->dev,
-                        "Error %s setting broadcast promiscuous mode on %s\n",
+                        "Error %s, forcing overflow promiscuous on %s\n",
                         i40e_aq_str(hw, hw->aq.asq_last_status),
                         vsi_name);
+       }
 
        return aq_ret;
 }
@@ -2267,9 +2254,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
        struct i40e_mac_filter *f;
        struct i40e_new_mac_filter *new, *add_head = NULL;
        struct i40e_hw *hw = &vsi->back->hw;
+       bool old_overflow, new_overflow;
        unsigned int failed_filters = 0;
        unsigned int vlan_filters = 0;
-       bool promisc_changed = false;
        char vsi_name[16] = "PF";
        int filter_list_len = 0;
        i40e_status aq_ret = 0;
@@ -2291,6 +2278,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                usleep_range(1000, 2000);
        pf = vsi->back;
 
+       old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
        if (vsi->netdev) {
                changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
                vsi->current_netdev_flags = vsi->netdev->flags;
@@ -2423,12 +2412,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
                num_add = 0;
                hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
-                       if (test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-                                    vsi->state)) {
-                               new->state = I40E_FILTER_FAILED;
-                               continue;
-                       }
-
                        /* handle broadcast filters by updating the broadcast
                         * promiscuous flag instead of adding a MAC filter.
                         */
@@ -2464,15 +2447,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                        /* flush a full buffer */
                        if (num_add == filter_list_len) {
                                i40e_aqc_add_filters(vsi, vsi_name, add_list,
-                                                    add_head, num_add,
-                                                    &promisc_changed);
+                                                    add_head, num_add);
                                memset(add_list, 0, list_size);
                                num_add = 0;
                        }
                }
                if (num_add) {
                        i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
-                                            num_add, &promisc_changed);
+                                            num_add);
                }
                /* Now move all of the filters from the temp add list back to
                 * the VSI's list.
@@ -2501,24 +2483,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
        }
        spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
-       /* If promiscuous mode has changed, we need to calculate a new
-        * threshold for when we are safe to exit
-        */
-       if (promisc_changed)
-               vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
-
        /* Check if we are able to exit overflow promiscuous mode. We can
         * safely exit if we didn't just enter, we no longer have any failed
         * filters, and we have reduced filters below the threshold value.
         */
-       if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) &&
-           !promisc_changed && !failed_filters &&
-           (vsi->active_filters < vsi->promisc_threshold)) {
+       if (old_overflow && !failed_filters &&
+           vsi->active_filters < vsi->promisc_threshold) {
                dev_info(&pf->pdev->dev,
                         "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
                         vsi_name);
                clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
-               promisc_changed = true;
                vsi->promisc_threshold = 0;
        }
 
@@ -2528,6 +2502,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                goto out;
        }
 
+       new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+       /* If we are entering overflow promiscuous, we need to calculate a new
+        * threshold for when we are safe to exit
+        */
+       if (!old_overflow && new_overflow)
+               vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+
        /* check for changes in promiscuous modes */
        if (changed_flags & IFF_ALLMULTI) {
                bool cur_multipromisc;
@@ -2548,12 +2530,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                }
        }
 
-       if ((changed_flags & IFF_PROMISC) || promisc_changed) {
+       if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
                bool cur_promisc;
 
                cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
-                              test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-                                       vsi->state));
+                              new_overflow);
                aq_ret = i40e_set_promiscuous(pf, cur_promisc);
                if (aq_ret) {
                        retval = i40e_aq_rc_to_posix(aq_ret,
@@ -3449,15 +3430,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
        for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
                struct i40e_q_vector *q_vector = vsi->q_vectors[i];
 
-               q_vector->itr_countdown = ITR_COUNTDOWN_START;
-               q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting);
-               q_vector->rx.latency_range = I40E_LOW_LATENCY;
+               q_vector->rx.next_update = jiffies + 1;
+               q_vector->rx.target_itr =
+                       ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
                wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
-                    q_vector->rx.itr);
-               q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting);
-               q_vector->tx.latency_range = I40E_LOW_LATENCY;
+                    q_vector->rx.target_itr);
+               q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+               q_vector->tx.next_update = jiffies + 1;
+               q_vector->tx.target_itr =
+                       ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
                wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
-                    q_vector->tx.itr);
+                    q_vector->tx.target_itr);
+               q_vector->tx.current_itr = q_vector->tx.target_itr;
+
                wr32(hw, I40E_PFINT_RATEN(vector - 1),
                     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
 
@@ -3558,13 +3544,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
        u32 val;
 
        /* set the ITR configuration */
-       q_vector->itr_countdown = ITR_COUNTDOWN_START;
-       q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting);
-       q_vector->rx.latency_range = I40E_LOW_LATENCY;
-       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
-       q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting);
-       q_vector->tx.latency_range = I40E_LOW_LATENCY;
-       wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
+       q_vector->rx.next_update = jiffies + 1;
+       q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
+       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
+       q_vector->rx.current_itr = q_vector->rx.target_itr;
+       q_vector->tx.next_update = jiffies + 1;
+       q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
+       wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
+       q_vector->tx.current_itr = q_vector->tx.target_itr;
 
        i40e_enable_misc_int_causes(pf);
 
@@ -5375,7 +5362,7 @@ out:
  * @vsi: VSI to be configured
  *
  **/
-int i40e_get_link_speed(struct i40e_vsi *vsi)
+static int i40e_get_link_speed(struct i40e_vsi *vsi)
 {
        struct i40e_pf *pf = vsi->back;
 
@@ -6559,6 +6546,75 @@ int i40e_up(struct i40e_vsi *vsi)
        return err;
 }
 
+/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+       struct i40e_aq_get_phy_abilities_resp abilities;
+       struct i40e_aq_set_phy_config config = {0};
+       struct i40e_hw *hw = &pf->hw;
+       i40e_status err;
+       u64 mask;
+
+       /* Get the current phy config */
+       err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+                                          NULL);
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "failed to get phy cap., ret =  %s last_status =  %s\n",
+                       i40e_stat_str(hw, err),
+                       i40e_aq_str(hw, hw->aq.asq_last_status));
+               return err;
+       }
+
+       /* If link needs to go up, but was not forced to go down,
+        * no need for a flap
+        */
+       if (is_up && abilities.phy_type != 0)
+               return I40E_SUCCESS;
+
+       /* To force link we need to set bits for all supported PHY types,
+        * but there are now more than 32, so we need to split the bitmap
+        * across two fields.
+        */
+       mask = I40E_PHY_TYPES_BITMASK;
+       config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+       config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+       /* Copy the old settings, except of phy_type */
+       config.abilities = abilities.abilities;
+       config.link_speed = abilities.link_speed;
+       config.eee_capability = abilities.eee_capability;
+       config.eeer = abilities.eeer_val;
+       config.low_power_ctrl = abilities.d3_lpan;
+       err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "set phy config ret =  %s last_status =  %s\n",
+                       i40e_stat_str(&pf->hw, err),
+                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               return err;
+       }
+
+       /* Update the link info */
+       err = i40e_update_link_info(hw);
+       if (err) {
+               /* Wait a little bit (on 40G cards it sometimes takes a really
+                * long time for link to come back from the atomic reset)
+                * and try once more
+                */
+               msleep(1000);
+               i40e_update_link_info(hw);
+       }
+
+       i40e_aq_set_link_restart_an(hw, true, NULL);
+
+       return I40E_SUCCESS;
+}
+
 /**
  * i40e_down - Shutdown the connection processing
  * @vsi: the VSI being stopped
@@ -6576,6 +6632,9 @@ void i40e_down(struct i40e_vsi *vsi)
        }
        i40e_vsi_disable_irq(vsi);
        i40e_vsi_stop_rings(vsi);
+       if (vsi->type == I40E_VSI_MAIN &&
+           vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+               i40e_force_link_state(vsi->back, false);
        i40e_napi_disable_all(vsi);
 
        for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -6848,8 +6907,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter,
  * Add or delete a cloud filter for a specific flow spec.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-                                    struct i40e_cloud_filter *filter, bool add)
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+                             struct i40e_cloud_filter *filter, bool add)
 {
        struct i40e_aqc_cloud_filters_element_data cld_filter;
        struct i40e_pf *pf = vsi->back;
@@ -6915,9 +6974,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
  * Add or delete a cloud filter for a specific flow spec using big buffer.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-                                            struct i40e_cloud_filter *filter,
-                                            bool add)
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+                                     struct i40e_cloud_filter *filter,
+                                     bool add)
 {
        struct i40e_aqc_cloud_filters_element_bb cld_filter;
        struct i40e_pf *pf = vsi->back;
@@ -7537,6 +7596,9 @@ int i40e_open(struct net_device *netdev)
 
        netif_carrier_off(netdev);
 
+       if (i40e_force_link_state(pf, true))
+               return -EAGAIN;
+
        err = i40e_vsi_open(vsi);
        if (err)
                return err;
@@ -9215,6 +9277,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
        }
        i40e_get_oem_version(&pf->hw);
 
+       if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+           ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
+            hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
+               /* The following delay is necessary for 4.33 firmware and older
+                * to recover after EMP reset. 200 ms should suffice but we
+                * put here 300 ms to be sure that FW is ready to operate
+                * after reset.
+                */
+               mdelay(300);
+       }
+
        /* re-verify the eeprom if we just had an EMP reset */
        if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
                i40e_verify_eeprom(pf);
@@ -9937,18 +10010,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 
        mutex_lock(&pf->switch_mutex);
        if (!pf->vsi[vsi->idx]) {
-               dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n",
-                       vsi->idx, vsi->idx, vsi, vsi->type);
+               dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
+                       vsi->idx, vsi->idx, vsi->type);
                goto unlock_vsi;
        }
 
        if (pf->vsi[vsi->idx] != vsi) {
                dev_err(&pf->pdev->dev,
-                       "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n",
+                       "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
                        pf->vsi[vsi->idx]->idx,
-                       pf->vsi[vsi->idx],
                        pf->vsi[vsi->idx]->type,
-                       vsi->idx, vsi, vsi->type);
+                       vsi->idx, vsi->type);
                goto unlock_vsi;
        }
 
@@ -10018,7 +10090,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
                ring->dcb_tc = 0;
                if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
                        ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
-               ring->tx_itr_setting = pf->tx_itr_default;
+               ring->itr_setting = pf->tx_itr_default;
                vsi->tx_rings[i] = ring++;
 
                if (!i40e_enabled_xdp_vsi(vsi))
@@ -10036,7 +10108,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
                if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
                        ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
                set_ring_xdp(ring);
-               ring->tx_itr_setting = pf->tx_itr_default;
+               ring->itr_setting = pf->tx_itr_default;
                vsi->xdp_rings[i] = ring++;
 
 setup_rx:
@@ -10049,7 +10121,7 @@ setup_rx:
                ring->count = vsi->num_desc;
                ring->size = 0;
                ring->dcb_tc = 0;
-               ring->rx_itr_setting = pf->rx_itr_default;
+               ring->itr_setting = pf->rx_itr_default;
                vsi->rx_rings[i] = ring;
        }
 
@@ -10328,9 +10400,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
                netif_napi_add(vsi->netdev, &q_vector->napi,
                               i40e_napi_poll, NAPI_POLL_WEIGHT);
 
-       q_vector->rx.latency_range = I40E_LOW_LATENCY;
-       q_vector->tx.latency_range = I40E_LOW_LATENCY;
-
        /* tie q_vector and vsi together */
        vsi->q_vectors[v_idx] = q_vector;
 
@@ -11089,6 +11158,16 @@ static int i40e_sw_init(struct i40e_pf *pf)
                /* IWARP needs one extra vector for CQP just like MISC.*/
                pf->num_iwarp_msix = (int)num_online_cpus() + 1;
        }
+       /* Stopping the FW LLDP engine is only supported on the
+        * XL710 with a FW ver >= 1.7.  Also, stopping FW LLDP
+        * engine is not supported if NPAR is functioning on this
+        * part
+        */
+       if (pf->hw.mac.type == I40E_MAC_XL710 &&
+           !pf->hw.func_caps.npar_enable &&
+           (pf->hw.aq.api_maj_ver > 1 ||
+            (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6)))
+               pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP;
 
 #ifdef CONFIG_PCI_IOV
        if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
index e554aa6..97cfe94 100644 (file)
@@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 /**
  * i40e_get_tx_pending - how many tx descriptors not processed
  * @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
  *
  * Since there is no access to the ring head register
  * in XL710, we need to use our local copies
  **/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 {
        u32 head, tail;
 
-       head = i40e_get_head(ring);
-       tail = readl(ring->tail);
+       if (!in_sw) {
+               head = i40e_get_head(ring);
+               tail = readl(ring->tail);
+       } else {
+               head = ring->next_to_clean;
+               tail = ring->next_to_use;
+       }
 
        if (head != tail)
                return (head < tail) ?
@@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
                         */
                        smp_rmb();
                        tx_ring->tx_stats.prev_pkt_ctr =
-                           i40e_get_tx_pending(tx_ring) ? packets : -1;
+                           i40e_get_tx_pending(tx_ring, true) ? packets : -1;
                }
        }
 }
@@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                 * them to be written back in case we stay in NAPI.
                 * In this mode on X722 we do not enable Interrupt.
                 */
-               unsigned int j = i40e_get_tx_pending(tx_ring);
+               unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
                if (budget &&
                    ((j / WB_STRIDE) == 0) && (j > 0) &&
@@ -995,99 +1001,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
        }
 }
 
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+                                       struct i40e_ring_container *rc)
+{
+       return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+       unsigned int divisor;
+
+       switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+               break;
+       case I40E_LINK_SPEED_25GB:
+       case I40E_LINK_SPEED_20GB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+               break;
+       default:
+       case I40E_LINK_SPEED_10GB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+               break;
+       case I40E_LINK_SPEED_1GB:
+       case I40E_LINK_SPEED_100MB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+               break;
+       }
+
+       return divisor;
+}
+
 /**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
  * @rc: structure containing ring performance data
  *
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt.  The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern.  Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
  * while increasing bulk throughput.
  **/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+                           struct i40e_ring_container *rc)
 {
-       enum i40e_latency_range new_latency_range = rc->latency_range;
-       u32 new_itr = rc->itr;
-       int bytes_per_usec;
-       unsigned int usecs, estimated_usecs;
+       unsigned int avg_wire_size, packets, bytes, itr;
+       unsigned long next_update = jiffies;
 
-       if (rc->total_packets == 0 || !rc->itr)
-               return false;
+       /* If we don't have any rings just leave ourselves set for maximum
+        * possible latency so we take ourselves out of the equation.
+        */
+       if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+               return;
+
+       /* For Rx we want to push the delay up and default to low latency.
+        * for Tx we want to pull the delay down and default to high latency.
+        */
+       itr = i40e_container_is_rx(q_vector, rc) ?
+             I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+             I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+       /* If we didn't update within up to 1 - 2 jiffies we can assume
+        * that either packets are coming in so slow there hasn't been
+        * any work, or that there is so much work that NAPI is dealing
+        * with interrupt moderation and we don't need to do anything.
+        */
+       if (time_after(next_update, rc->next_update))
+               goto clear_counts;
+
+       /* If itr_countdown is set it means we programmed an ITR within
+        * the last 4 interrupt cycles. This has a side effect of us
+        * potentially firing an early interrupt. In order to work around
+        * this we need to throw out any data received for a few
+        * interrupts following the update.
+        */
+       if (q_vector->itr_countdown) {
+               itr = rc->target_itr;
+               goto clear_counts;
+       }
+
+       packets = rc->total_packets;
+       bytes = rc->total_bytes;
 
-       usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-       bytes_per_usec = rc->total_bytes / usecs;
+       if (i40e_container_is_rx(q_vector, rc)) {
+               /* If Rx there are 1 to 4 packets and bytes are less than
+                * 9000 assume insufficient data to use bulk rate limiting
+                * approach unless Tx is already in bulk rate limiting. We
+                * are likely latency driven.
+                */
+               if (packets && packets < 4 && bytes < 9000 &&
+                   (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+                       itr = I40E_ITR_ADAPTIVE_LATENCY;
+                       goto adjust_by_size;
+               }
+       } else if (packets < 4) {
+               /* If we have Tx and Rx ITR maxed and Tx ITR is running in
+                * bulk mode and we are receiving 4 or fewer packets just
+                * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+                * that the Rx can relax.
+                */
+               if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+                   (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+                    I40E_ITR_ADAPTIVE_MAX_USECS)
+                       goto clear_counts;
+       } else if (packets > 32) {
+               /* If we have processed over 32 packets in a single interrupt
+                * for Tx assume we need to switch over to "bulk" mode.
+                */
+               rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+       }
 
-       /* The calculations in this algorithm depend on interrupts actually
-        * firing at the ITR rate. This may not happen if the packet rate is
-        * really low, or if we've been napi polling. Check to make sure
-        * that's not the case before we continue.
+       /* We have no packets to actually measure against. This means
+        * either one of the other queues on this vector is active or
+        * we are a Tx queue doing TSO with too high of an interrupt rate.
+        *
+        * Between 4 and 56 we can assume that our current interrupt delay
+        * is only slightly too low. As such we should increase it by a small
+        * fixed amount.
         */
-       estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
-       if (estimated_usecs > usecs) {
-               new_latency_range = I40E_LOW_LATENCY;
-               goto reset_latency;
+       if (packets < 56) {
+               itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+               if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+                       itr &= I40E_ITR_ADAPTIVE_LATENCY;
+                       itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+               }
+               goto clear_counts;
+       }
+
+       if (packets <= 256) {
+               itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+               itr &= I40E_ITR_MASK;
+
+               /* Between 56 and 112 is our "goldilocks" zone where we are
+                * working out "just right". Just report that our current
+                * ITR is good for us.
+                */
+               if (packets <= 112)
+                       goto clear_counts;
+
+               /* If packet count is 128 or greater we are likely looking
+                * at a slight overrun of the delay we want. Try halving
+                * our delay to see if that will cut the number of packets
+                * in half per interrupt.
+                */
+               itr /= 2;
+               itr &= I40E_ITR_MASK;
+               if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+                       itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+               goto clear_counts;
        }
 
-       /* simple throttlerate management
-        *   0-10MB/s   lowest (50000 ints/s)
-        *  10-20MB/s   low    (20000 ints/s)
-        *  20-1249MB/s bulk   (18000 ints/s)
+       /* The paths below assume we are dealing with a bulk ITR since
+        * number of packets is greater than 256. We are just going to have
+        * to compute a value and try to bring the count under control,
+        * though for smaller packet sizes there isn't much we can do as
+        * NAPI polling will likely be kicking in sooner rather than later.
+        */
+       itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+       /* If packet counts are 256 or greater we can assume we have a gross
+        * overestimation of what the rate should be. Instead of trying to fine
+        * tune it just use the formula below to try and dial in an exact value
+        * give the current packet size of the frame.
+        */
+       avg_wire_size = bytes / packets;
+
+       /* The following is a crude approximation of:
+        *  wmem_default / (size + overhead) = desired_pkts_per_int
+        *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+        *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+        *
+        * Assuming wmem_default is 212992 and overhead is 640 bytes per
+        * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+        * formula down to
         *
-        * The math works out because the divisor is in 10^(-6) which
-        * turns the bytes/us input value into MB/s values, but
-        * make sure to use usecs, as the register values written
-        * are in 2 usec increments in the ITR registers, and make sure
-        * to use the smoothed values that the countdown timer gives us.
+        *  (170 * (size + 24)) / (size + 640) = ITR
+        *
+        * We first do some math on the packet size and then finally bitshift
+        * by 8 after rounding up. We also have to account for PCIe link speed
+        * difference as ITR scales based on this.
         */
-       switch (new_latency_range) {
-       case I40E_LOWEST_LATENCY:
-               if (bytes_per_usec > 10)
-                       new_latency_range = I40E_LOW_LATENCY;
-               break;
-       case I40E_LOW_LATENCY:
-               if (bytes_per_usec > 20)
-                       new_latency_range = I40E_BULK_LATENCY;
-               else if (bytes_per_usec <= 10)
-                       new_latency_range = I40E_LOWEST_LATENCY;
-               break;
-       case I40E_BULK_LATENCY:
-       default:
-               if (bytes_per_usec <= 20)
-                       new_latency_range = I40E_LOW_LATENCY;
-               break;
+       if (avg_wire_size <= 60) {
+               /* Start at 250k ints/sec */
+               avg_wire_size = 4096;
+       } else if (avg_wire_size <= 380) {
+               /* 250K ints/sec to 60K ints/sec */
+               avg_wire_size *= 40;
+               avg_wire_size += 1696;
+       } else if (avg_wire_size <= 1084) {
+               /* 60K ints/sec to 36K ints/sec */
+               avg_wire_size *= 15;
+               avg_wire_size += 11452;
+       } else if (avg_wire_size <= 1980) {
+               /* 36K ints/sec to 30K ints/sec */
+               avg_wire_size *= 5;
+               avg_wire_size += 22420;
+       } else {
+               /* plateau at a limit of 30K ints/sec */
+               avg_wire_size = 32256;
        }
 
-reset_latency:
-       rc->latency_range = new_latency_range;
+       /* If we are in low latency mode halve our delay which doubles the
+        * rate to somewhere between 100K to 16K ints/sec
+        */
+       if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+               avg_wire_size /= 2;
 
-       switch (new_latency_range) {
-       case I40E_LOWEST_LATENCY:
-               new_itr = I40E_ITR_50K;
-               break;
-       case I40E_LOW_LATENCY:
-               new_itr = I40E_ITR_20K;
-               break;
-       case I40E_BULK_LATENCY:
-               new_itr = I40E_ITR_18K;
-               break;
-       default:
-               break;
+       /* Resultant value is 256 times larger than it needs to be. This
+        * gives us room to adjust the value as needed to either increase
+        * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+        *
+        * Use addition as we have already recorded the new latency flag
+        * for the ITR value.
+        */
+       itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+              I40E_ITR_ADAPTIVE_MIN_INC;
+
+       if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+               itr &= I40E_ITR_ADAPTIVE_LATENCY;
+               itr += I40E_ITR_ADAPTIVE_MAX_USECS;
        }
 
+clear_counts:
+       /* write back value */
+       rc->target_itr = itr;
+
+       /* next update should occur within next jiffy */
+       rc->next_update = next_update + 1;
+
        rc->total_bytes = 0;
        rc->total_packets = 0;
-       rc->last_itr_update = jiffies;
-
-       if (new_itr != rc->itr) {
-               rc->itr = new_itr;
-               return true;
-       }
-       return false;
 }
 
 /**
@@ -1991,7 +2139,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
  * @rx_buffer: rx buffer to pull data from
  *
  * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
  */
 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
                               struct i40e_rx_buffer *rx_buffer)
@@ -2274,29 +2422,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        return failure ? budget : (int)total_rx_packets;
 }
 
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 {
        u32 val;
 
+       /* We don't bother with setting the CLEARPBA bit as the data sheet
+        * points out doing so is "meaningless since it was already
+        * auto-cleared". The auto-clearing happens when the interrupt is
+        * asserted.
+        *
+        * Hardware errata 28 for also indicates that writing to a
+        * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+        * an event in the PBA anyway so we need to rely on the automask
+        * to hold pending events for us until the interrupt is re-enabled
+        *
+        * The itr value is reported in microseconds, and the register
+        * value is recorded in 2 microsecond units. For this reason we
+        * only need to shift by the interval shift - 1 instead of the
+        * full value.
+        */
+       itr &= I40E_ITR_MASK;
+
        val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
-             I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
              (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
-             (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
+             (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
 
        return val;
 }
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_PFINT_DYN_CTLN
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
-       return vsi->rx_rings[idx]->rx_itr_setting;
-}
 
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
-       return vsi->tx_rings[idx]->tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -2308,10 +2472,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
                                          struct i40e_q_vector *q_vector)
 {
        struct i40e_hw *hw = &vsi->back->hw;
-       bool rx = false, tx = false;
-       u32 rxval, txval;
-       int idx = q_vector->v_idx;
-       int rx_itr_setting, tx_itr_setting;
+       u32 intval;
 
        /* If we don't have MSIX, then we only need to re-enable icr0 */
        if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
@@ -2319,65 +2480,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
                return;
        }
 
-       /* avoid dynamic calculation if in countdown mode OR if
-        * all dynamic is disabled
-        */
-       rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
-       rx_itr_setting = get_rx_itr(vsi, idx);
-       tx_itr_setting = get_tx_itr(vsi, idx);
-
-       if (q_vector->itr_countdown > 0 ||
-           (!ITR_IS_DYNAMIC(rx_itr_setting) &&
-            !ITR_IS_DYNAMIC(tx_itr_setting))) {
-               goto enable_int;
-       }
-
-       if (ITR_IS_DYNAMIC(rx_itr_setting)) {
-               rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-               rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
-       }
-
-       if (ITR_IS_DYNAMIC(tx_itr_setting)) {
-               tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-               txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
-       }
+       /* These will do nothing if dynamic updates are not enabled */
+       i40e_update_itr(q_vector, &q_vector->tx);
+       i40e_update_itr(q_vector, &q_vector->rx);
 
-       if (rx || tx) {
-               /* get the higher of the two ITR adjustments and
-                * use the same value for both ITR registers
-                * when in adaptive mode (Rx and/or Tx)
-                */
-               u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
-               q_vector->tx.itr = q_vector->rx.itr = itr;
-               txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
-               tx = true;
-               rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
-               rx = true;
-       }
-
-       /* only need to enable the interrupt once, but need
-        * to possibly update both ITR values
+       /* This block of logic allows us to get away with only updating
+        * one ITR value with each interrupt. The idea is to perform a
+        * pseudo-lazy update with the following criteria.
+        *
+        * 1. Rx is given higher priority than Tx if both are in same state
+        * 2. If we must reduce an ITR that is given highest priority.
+        * 3. We then give priority to increasing ITR based on amount.
         */
-       if (rx) {
-               /* set the INTENA_MSK_MASK so that this first write
-                * won't actually enable the interrupt, instead just
-                * updating the ITR (it's bit 31 PF and VF)
+       if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+               /* Rx ITR needs to be reduced, this is highest priority */
+               intval = i40e_buildreg_itr(I40E_RX_ITR,
+                                          q_vector->rx.target_itr);
+               q_vector->rx.current_itr = q_vector->rx.target_itr;
+               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+                  ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+                   (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+               /* Tx ITR needs to be reduced, this is second priority
+                * Tx ITR needs to be increased more than Rx, fourth priority
                 */
-               rxval |= BIT(31);
-               /* don't check _DOWN because interrupt isn't being enabled */
-               wr32(hw, INTREG(q_vector->reg_idx), rxval);
+               intval = i40e_buildreg_itr(I40E_TX_ITR,
+                                          q_vector->tx.target_itr);
+               q_vector->tx.current_itr = q_vector->tx.target_itr;
+               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+               /* Rx ITR needs to be increased, third priority */
+               intval = i40e_buildreg_itr(I40E_RX_ITR,
+                                          q_vector->rx.target_itr);
+               q_vector->rx.current_itr = q_vector->rx.target_itr;
+               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       } else {
+               /* No ITR update, lowest priority */
+               intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+               if (q_vector->itr_countdown)
+                       q_vector->itr_countdown--;
        }
 
-enable_int:
        if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-               wr32(hw, INTREG(q_vector->reg_idx), txval);
-
-       if (q_vector->itr_countdown)
-               q_vector->itr_countdown--;
-       else
-               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+               wr32(hw, INTREG(q_vector->reg_idx), intval);
 }
 
 /**
index 701b708..3c80ea7 100644 (file)
 #include <net/xdp.h>
 
 /* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR               0x0001  /* reg uses 2 usec resolution */
-#define I40E_ITR_100K              0x0005
-#define I40E_ITR_50K               0x000A
-#define I40E_ITR_20K               0x0019
-#define I40E_ITR_18K               0x001B
-#define I40E_ITR_8K                0x003E
-#define I40E_ITR_4K                0x007A
-#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-                                   I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-                                   I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
-#define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
 #define I40E_DEFAULT_IRQ_WORK      256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC       0x8000  /* use top bit as a flag */
+#define I40E_ITR_MASK          0x1FFE  /* mask for ITR register value */
+#define I40E_MIN_ITR                2  /* reg uses 2 usec resolution */
+#define I40E_ITR_100K              10  /* all values below must be even */
+#define I40E_ITR_50K               20
+#define I40E_ITR_20K               50
+#define I40E_ITR_18K               60
+#define I40E_ITR_8K               122
+#define I40E_MAX_ITR             8160  /* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF                (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF                (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
 /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
  * the value of the rate limit is non-zero
  */
 #define INTRL_ENA                  BIT(6)
+#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
 #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+
 /**
  * i40e_intrl_usec_to_reg - convert interrupt rate limit to register
  * @intrl: interrupt rate limit to convert
@@ -382,8 +387,7 @@ struct i40e_ring {
         * these values always store the USER setting, and must be converted
         * before programming to a register.
         */
-       u16 rx_itr_setting;
-       u16 tx_itr_setting;
+       u16 itr_setting;
 
        u16 count;                      /* Number of descriptors */
        u16 reg_idx;                    /* HW register index of the ring */
@@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring)
        ring->flags |= I40E_TXR_FLAGS_XDP;
 }
 
-enum i40e_latency_range {
-       I40E_LOWEST_LATENCY = 0,
-       I40E_LOW_LATENCY = 1,
-       I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC      0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS    0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS    0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY      0x8000
+#define I40E_ITR_ADAPTIVE_BULK         0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
 
 struct i40e_ring_container {
-       /* array of pointers to rings */
-       struct i40e_ring *ring;
+       struct i40e_ring *ring;         /* pointer to linked list of ring(s) */
+       unsigned long next_update;      /* jiffies value of next update */
        unsigned int total_bytes;       /* total bytes processed this int */
        unsigned int total_packets;     /* total packets processed this int */
-       unsigned long last_itr_update;  /* jiffies of last ITR update */
        u16 count;
-       enum i40e_latency_range latency_range;
-       u16 itr;
+       u16 target_itr;                 /* target ITR setting for ring(s) */
+       u16 current_itr;                /* current ITR setting for ring(s) */
 };
 
 /* iterator for handling rings in ring container */
@@ -501,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
 void i40e_free_rx_resources(struct i40e_ring *rx_ring);
 int i40e_napi_poll(struct napi_struct *napi, int budget);
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
index cd294e6..b0eed8c 100644 (file)
@@ -39,7 +39,7 @@
 #define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
 
 #define I40E_MAX_VSI_QP                        16
-#define I40E_MAX_VF_VSI                        3
+#define I40E_MAX_VF_VSI                        4
 #define I40E_MAX_CHAINED_RX_BUFFERS    5
 #define I40E_MAX_PF_UDP_OFFLOAD_PORTS  16
 
index e9309fb..e23975c 100644 (file)
@@ -257,6 +257,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id,
        return pf_queue_id;
 }
 
+/**
+ * i40e_get_real_pf_qid
+ * @vf: pointer to the VF info
+ * @vsi_id: vsi id
+ * @queue_id: queue number
+ *
+ * wrapper function to get pf_queue_id handling ADq code as well
+ **/
+static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id)
+{
+       int i;
+
+       if (vf->adq_enabled) {
+               /* Although VF considers all the queues(can be 1 to 16) as its
+                * own but they may actually belong to different VSIs(up to 4).
+                * We need to find which queues belongs to which VSI.
+                */
+               for (i = 0; i < vf->num_tc; i++) {
+                       if (queue_id < vf->ch[i].num_qps) {
+                               vsi_id = vf->ch[i].vsi_id;
+                               break;
+                       }
+                       /* find right queue id which is relative to a
+                        * given VSI.
+                        */
+                       queue_id -= vf->ch[i].num_qps;
+                       }
+               }
+
+       return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id);
+}
+
 /**
  * i40e_config_irq_link_list
  * @vf: pointer to the VF info
@@ -310,7 +342,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 
        vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
        qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+       pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id);
        reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
 
        wr32(hw, reg_idx, reg);
@@ -333,8 +365,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
                if (next_q < size) {
                        vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
                        qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-                       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id,
-                                                             vsi_queue_id);
+                       pf_queue_id = i40e_get_real_pf_qid(vf,
+                                                          vsi_id,
+                                                          vsi_queue_id);
                } else {
                        pf_queue_id = I40E_QUEUE_END_OF_LIST;
                        qtype = 0;
@@ -669,18 +702,20 @@ error_param:
 /**
  * i40e_alloc_vsi_res
  * @vf: pointer to the VF info
- * @type: type of VSI to allocate
+ * @idx: VSI index, applies only for ADq mode, zero otherwise
  *
  * alloc VF vsi context & resources
  **/
-static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
+static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
 {
        struct i40e_mac_filter *f = NULL;
        struct i40e_pf *pf = vf->pf;
        struct i40e_vsi *vsi;
+       u64 max_tx_rate = 0;
        int ret = 0;
 
-       vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id);
+       vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
+                            vf->vf_id);
 
        if (!vsi) {
                dev_err(&pf->pdev->dev,
@@ -689,7 +724,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
                ret = -ENOENT;
                goto error_alloc_vsi_res;
        }
-       if (type == I40E_VSI_SRIOV) {
+
+       if (!idx) {
                u64 hena = i40e_pf_get_default_rss_hena(pf);
                u8 broadcast[ETH_ALEN];
 
@@ -721,17 +757,29 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
                spin_unlock_bh(&vsi->mac_filter_hash_lock);
                wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
                wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
+               /* program mac filter only for VF VSI */
+               ret = i40e_sync_vsi_filters(vsi);
+               if (ret)
+                       dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
        }
 
-       /* program mac filter */
-       ret = i40e_sync_vsi_filters(vsi);
-       if (ret)
-               dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+       /* storing VSI index and id for ADq and don't apply the mac filter */
+       if (vf->adq_enabled) {
+               vf->ch[idx].vsi_idx = vsi->idx;
+               vf->ch[idx].vsi_id = vsi->id;
+       }
 
        /* Set VF bandwidth if specified */
        if (vf->tx_rate) {
+               max_tx_rate = vf->tx_rate;
+       } else if (vf->ch[idx].max_tx_rate) {
+               max_tx_rate = vf->ch[idx].max_tx_rate;
+       }
+
+       if (max_tx_rate) {
+               max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR);
                ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
-                                                 vf->tx_rate / 50, 0, NULL);
+                                                 max_tx_rate, 0, NULL);
                if (ret)
                        dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
                                vf->vf_id, ret);
@@ -741,6 +789,92 @@ error_alloc_vsi_res:
        return ret;
 }
 
+/**
+ * i40e_map_pf_queues_to_vsi
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI.
+ **/
+static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       u32 reg, num_tc = 1; /* VF has at least one traffic class */
+       u16 vsi_id, qps;
+       int i, j;
+
+       if (vf->adq_enabled)
+               num_tc = vf->num_tc;
+
+       for (i = 0; i < num_tc; i++) {
+               if (vf->adq_enabled) {
+                       qps = vf->ch[i].num_qps;
+                       vsi_id =  vf->ch[i].vsi_id;
+               } else {
+                       qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+                       vsi_id = vf->lan_vsi_id;
+               }
+
+               for (j = 0; j < 7; j++) {
+                       if (j * 2 >= qps) {
+                               /* end of list */
+                               reg = 0x07FF07FF;
+                       } else {
+                               u16 qid = i40e_vc_get_pf_queue_id(vf,
+                                                                 vsi_id,
+                                                                 j * 2);
+                               reg = qid;
+                               qid = i40e_vc_get_pf_queue_id(vf, vsi_id,
+                                                             (j * 2) + 1);
+                               reg |= qid << 16;
+                       }
+                       i40e_write_rx_ctl(hw,
+                                         I40E_VSILAN_QTABLE(j, vsi_id),
+                                         reg);
+               }
+       }
+}
+
+/**
+ * i40e_map_pf_to_vf_queues
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of the second part VPLAN_QTABLE & completes VF mappings.
+ **/
+static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       u32 reg, total_qps = 0;
+       u32 qps, num_tc = 1; /* VF has at least one traffic class */
+       u16 vsi_id, qid;
+       int i, j;
+
+       if (vf->adq_enabled)
+               num_tc = vf->num_tc;
+
+       for (i = 0; i < num_tc; i++) {
+               if (vf->adq_enabled) {
+                       qps = vf->ch[i].num_qps;
+                       vsi_id =  vf->ch[i].vsi_id;
+               } else {
+                       qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+                       vsi_id = vf->lan_vsi_id;
+               }
+
+               for (j = 0; j < qps; j++) {
+                       qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j);
+
+                       reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
+                       wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id),
+                            reg);
+                       total_qps++;
+               }
+       }
+}
+
 /**
  * i40e_enable_vf_mappings
  * @vf: pointer to the VF info
@@ -751,8 +885,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
 {
        struct i40e_pf *pf = vf->pf;
        struct i40e_hw *hw = &pf->hw;
-       u32 reg, total_queue_pairs = 0;
-       int j;
+       u32 reg;
 
        /* Tell the hardware we're using noncontiguous mapping. HW requires
         * that VF queues be mapped using this method, even when they are
@@ -765,30 +898,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
        reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
        wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
 
-       /* map PF queues to VF queues */
-       for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) {
-               u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j);
-
-               reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
-               wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg);
-               total_queue_pairs++;
-       }
-
-       /* map PF queues to VSI */
-       for (j = 0; j < 7; j++) {
-               if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) {
-                       reg = 0x07FF07FF;       /* unused */
-               } else {
-                       u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-                                                         j * 2);
-                       reg = qid;
-                       qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-                                                     (j * 2) + 1);
-                       reg |= qid << 16;
-               }
-               i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id),
-                                 reg);
-       }
+       i40e_map_pf_to_vf_queues(vf);
+       i40e_map_pf_queues_to_vsi(vf);
 
        i40e_flush(hw);
 }
@@ -824,7 +935,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
        struct i40e_pf *pf = vf->pf;
        struct i40e_hw *hw = &pf->hw;
        u32 reg_idx, reg;
-       int i, msix_vf;
+       int i, j, msix_vf;
 
        /* Start by disabling VF's configuration API to prevent the OS from
         * accessing the VF's VSI after it's freed / invalidated.
@@ -846,6 +957,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
                vf->lan_vsi_id = 0;
                vf->num_mac = 0;
        }
+
+       /* do the accounting and remove additional ADq VSI's */
+       if (vf->adq_enabled && vf->ch[0].vsi_idx) {
+               for (j = 0; j < vf->num_tc; j++) {
+                       /* At this point VSI0 is already released so don't
+                        * release it again and only clear their values in
+                        * structure variables
+                        */
+                       if (j)
+                               i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]);
+                       vf->ch[j].vsi_idx = 0;
+                       vf->ch[j].vsi_id = 0;
+               }
+       }
        msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 
        /* disable interrupts so the VF starts in a known state */
@@ -891,7 +1016,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 {
        struct i40e_pf *pf = vf->pf;
        int total_queue_pairs = 0;
-       int ret;
+       int ret, idx;
 
        if (vf->num_req_queues &&
            vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
@@ -900,11 +1025,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
                pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
 
        /* allocate hw vsi context & associated resources */
-       ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
+       ret = i40e_alloc_vsi_res(vf, 0);
        if (ret)
                goto error_alloc;
        total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
 
+       /* allocate additional VSIs based on tc information for ADq */
+       if (vf->adq_enabled) {
+               if (pf->queues_left >=
+                   (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) {
+                       /* TC 0 always belongs to VF VSI */
+                       for (idx = 1; idx < vf->num_tc; idx++) {
+                               ret = i40e_alloc_vsi_res(vf, idx);
+                               if (ret)
+                                       goto error_alloc;
+                       }
+                       /* send correct number of queues */
+                       total_queue_pairs = I40E_MAX_VF_QUEUES;
+               } else {
+                       dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n",
+                                vf->vf_id);
+                       vf->adq_enabled = false;
+               }
+       }
+
        /* We account for each VF to get a default number of queue pairs.  If
         * the VF has now requested more, we need to account for that to make
         * certain we never request more queues than we actually have left in
@@ -1536,6 +1680,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
                                      sizeof(struct virtchnl_version_info));
 }
 
+/**
+ * i40e_del_qch - delete all the additional VSIs created as a part of ADq
+ * @vf: pointer to VF structure
+ **/
+static void i40e_del_qch(struct i40e_vf *vf)
+{
+       struct i40e_pf *pf = vf->pf;
+       int i;
+
+       /* first element in the array belongs to primary VF VSI and we shouldn't
+        * delete it. We should however delete the rest of the VSIs created
+        */
+       for (i = 1; i < vf->num_tc; i++) {
+               if (vf->ch[i].vsi_idx) {
+                       i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]);
+                       vf->ch[i].vsi_idx = 0;
+                       vf->ch[i].vsi_id = 0;
+               }
+       }
+}
+
 /**
  * i40e_vc_get_vf_resources_msg
  * @vf: pointer to the VF info
@@ -1631,6 +1796,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
        if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
                vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
 
+       if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)
+               vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ;
+
        vfres->num_vsis = num_vsis;
        vfres->num_queue_pairs = vf->num_queue_pairs;
        vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@ -1855,27 +2023,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
            (struct virtchnl_vsi_queue_config_info *)msg;
        struct virtchnl_queue_pair_info *qpi;
        struct i40e_pf *pf = vf->pf;
-       u16 vsi_id, vsi_queue_id;
+       u16 vsi_id, vsi_queue_id = 0;
        i40e_status aq_ret = 0;
-       int i;
+       int i, j = 0, idx = 0;
+
+       vsi_id = qci->vsi_id;
 
        if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
                aq_ret = I40E_ERR_PARAM;
                goto error_param;
        }
 
-       vsi_id = qci->vsi_id;
        if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
                aq_ret = I40E_ERR_PARAM;
                goto error_param;
        }
+
        for (i = 0; i < qci->num_queue_pairs; i++) {
                qpi = &qci->qpair[i];
-               vsi_queue_id = qpi->txq.queue_id;
-               if ((qpi->txq.vsi_id != vsi_id) ||
-                   (qpi->rxq.vsi_id != vsi_id) ||
-                   (qpi->rxq.queue_id != vsi_queue_id) ||
-                   !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
+
+               if (!vf->adq_enabled) {
+                       vsi_queue_id = qpi->txq.queue_id;
+
+                       if (qpi->txq.vsi_id != qci->vsi_id ||
+                           qpi->rxq.vsi_id != qci->vsi_id ||
+                           qpi->rxq.queue_id != vsi_queue_id) {
+                               aq_ret = I40E_ERR_PARAM;
+                               goto error_param;
+                       }
+               }
+
+               if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
                        aq_ret = I40E_ERR_PARAM;
                        goto error_param;
                }
@@ -1887,9 +2065,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                        aq_ret = I40E_ERR_PARAM;
                        goto error_param;
                }
+
+               /* For ADq there can be up to 4 VSIs with max 4 queues each.
+                * VF does not know about these additional VSIs and all
+                * it cares is about its own queues. PF configures these queues
+                * to its appropriate VSIs based on TC mapping
+                **/
+               if (vf->adq_enabled) {
+                       if (j == (vf->ch[idx].num_qps - 1)) {
+                               idx++;
+                               j = 0; /* resetting the queue count */
+                               vsi_queue_id = 0;
+                       } else {
+                               j++;
+                               vsi_queue_id++;
+                       }
+                       vsi_id = vf->ch[idx].vsi_id;
+               }
        }
        /* set vsi num_queue_pairs in use to num configured by VF */
-       pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs;
+       if (!vf->adq_enabled) {
+               pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
+                       qci->num_queue_pairs;
+       } else {
+               for (i = 0; i < vf->num_tc; i++)
+                       pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
+                              vf->ch[i].num_qps;
+       }
 
 error_param:
        /* send the response to the VF */
@@ -1897,6 +2099,33 @@ error_param:
                                       aq_ret);
 }
 
+/**
+ * i40e_validate_queue_map
+ * @vsi_id: vsi id
+ * @queuemap: Tx or Rx queue map
+ *
+ * check if Tx or Rx queue map is valid
+ **/
+static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
+                                  unsigned long queuemap)
+{
+       u16 vsi_queue_id, queue_id;
+
+       for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
+               if (vf->adq_enabled) {
+                       vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
+                       queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
+               } else {
+                       queue_id = vsi_queue_id;
+               }
+
+               if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 /**
  * i40e_vc_config_irq_map_msg
  * @vf: pointer to the VF info
@@ -1911,9 +2140,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
        struct virtchnl_irq_map_info *irqmap_info =
            (struct virtchnl_irq_map_info *)msg;
        struct virtchnl_vector_map *map;
-       u16 vsi_id, vsi_queue_id, vector_id;
+       u16 vsi_id, vector_id;
        i40e_status aq_ret = 0;
-       unsigned long tempmap;
        int i;
 
        if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
@@ -1923,7 +2151,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
        for (i = 0; i < irqmap_info->num_vectors; i++) {
                map = &irqmap_info->vecmap[i];
-
                vector_id = map->vector_id;
                vsi_id = map->vsi_id;
                /* validate msg params */
@@ -1933,23 +2160,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                        goto error_param;
                }
 
-               /* lookout for the invalid queue index */
-               tempmap = map->rxq_map;
-               for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-                       if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-                                                     vsi_queue_id)) {
-                               aq_ret = I40E_ERR_PARAM;
-                               goto error_param;
-                       }
+               if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
                }
 
-               tempmap = map->txq_map;
-               for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-                       if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-                                                     vsi_queue_id)) {
-                               aq_ret = I40E_ERR_PARAM;
-                               goto error_param;
-                       }
+               if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
                }
 
                i40e_config_irq_link_list(vf, vsi_id, map);
@@ -1975,6 +2193,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
        struct i40e_pf *pf = vf->pf;
        u16 vsi_id = vqs->vsi_id;
        i40e_status aq_ret = 0;
+       int i;
 
        if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
                aq_ret = I40E_ERR_PARAM;
@@ -1993,6 +2212,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
        if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
                aq_ret = I40E_ERR_TIMEOUT;
+
+       /* need to start the rings for additional ADq VSI's as well */
+       if (vf->adq_enabled) {
+               /* zero belongs to LAN VSI */
+               for (i = 1; i < vf->num_tc; i++) {
+                       if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx]))
+                               aq_ret = I40E_ERR_TIMEOUT;
+               }
+       }
+
 error_param:
        /* send the response to the VF */
        return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
@@ -2687,6 +2916,618 @@ err:
                                       aq_ret);
 }
 
+/**
+ * i40e_validate_cloud_filter
+ * @mask: mask for TC filter
+ * @data: data for TC filter
+ *
+ * This function validates cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_validate_cloud_filter(struct i40e_vf *vf,
+                                     struct virtchnl_filter *tc_filter)
+{
+       struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec;
+       struct virtchnl_l4_spec data = tc_filter->data.tcp_spec;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       struct i40e_mac_filter *f;
+       struct hlist_node *h;
+       bool found = false;
+       int bkt;
+
+       if (!tc_filter->action) {
+               dev_info(&pf->pdev->dev,
+                        "VF %d: Currently ADq doesn't support Drop Action\n",
+                        vf->vf_id);
+               goto err;
+       }
+
+       /* action_meta is TC number here to which the filter is applied */
+       if (!tc_filter->action_meta ||
+           tc_filter->action_meta > I40E_MAX_VF_VSI) {
+               dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
+                        vf->vf_id, tc_filter->action_meta);
+               goto err;
+       }
+
+       /* Check filter if it's programmed for advanced mode or basic mode.
+        * There are two ADq modes (for VF only),
+        * 1. Basic mode: intended to allow as many filter options as possible
+        *                to be added to a VF in Non-trusted mode. Main goal is
+        *                to add filters to its own MAC and VLAN id.
+        * 2. Advanced mode: is for allowing filters to be applied other than
+        *                its own MAC or VLAN. This mode requires the VF to be
+        *                Trusted.
+        */
+       if (mask.dst_mac[0] && !mask.dst_ip[0]) {
+               vsi = pf->vsi[vf->lan_vsi_idx];
+               f = i40e_find_mac(vsi, data.dst_mac);
+
+               if (!f) {
+                       dev_info(&pf->pdev->dev,
+                                "Destination MAC %pM doesn't belong to VF %d\n",
+                                data.dst_mac, vf->vf_id);
+                       goto err;
+               }
+
+               if (mask.vlan_id) {
+                       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
+                                          hlist) {
+                               if (f->vlan == ntohs(data.vlan_id)) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+                       if (!found) {
+                               dev_info(&pf->pdev->dev,
+                                        "VF %d doesn't have any VLAN id %u\n",
+                                        vf->vf_id, ntohs(data.vlan_id));
+                               goto err;
+                       }
+               }
+       } else {
+               /* Check if VF is trusted */
+               if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+                       dev_err(&pf->pdev->dev,
+                               "VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n",
+                               vf->vf_id);
+                       return I40E_ERR_CONFIG;
+               }
+       }
+
+       if (mask.dst_mac[0] & data.dst_mac[0]) {
+               if (is_broadcast_ether_addr(data.dst_mac) ||
+                   is_zero_ether_addr(data.dst_mac)) {
+                       dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n",
+                                vf->vf_id, data.dst_mac);
+                       goto err;
+               }
+       }
+
+       if (mask.src_mac[0] & data.src_mac[0]) {
+               if (is_broadcast_ether_addr(data.src_mac) ||
+                   is_zero_ether_addr(data.src_mac)) {
+                       dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n",
+                                vf->vf_id, data.src_mac);
+                       goto err;
+               }
+       }
+
+       if (mask.dst_port & data.dst_port) {
+               if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) {
+                       dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
+                                vf->vf_id);
+                       goto err;
+               }
+       }
+
+       if (mask.src_port & data.src_port) {
+               if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) {
+                       dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
+                                vf->vf_id);
+                       goto err;
+               }
+       }
+
+       if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW &&
+           tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) {
+               dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n",
+                        vf->vf_id);
+               goto err;
+       }
+
+       if (mask.vlan_id & data.vlan_id) {
+               if (ntohs(data.vlan_id) > I40E_MAX_VLANID) {
+                       dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n",
+                                vf->vf_id);
+                       goto err;
+               }
+       }
+
+       return I40E_SUCCESS;
+err:
+       return I40E_ERR_CONFIG;
+}
+
+/**
+ * i40e_find_vsi_from_seid - searches for the vsi with the given seid
+ * @vf: pointer to the VF info
+ * @seid - seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       int i;
+
+       for (i = 0; i < vf->num_tc ; i++) {
+               vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
+               if (vsi && vsi->seid == seid)
+                       return vsi;
+       }
+       return NULL;
+}
+
+/**
+ * i40e_del_all_cloud_filters
+ * @vf: pointer to the VF info
+ *
+ * This function deletes all cloud filters
+ **/
+static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
+{
+       struct i40e_cloud_filter *cfilter = NULL;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       struct hlist_node *node;
+       int ret;
+
+       hlist_for_each_entry_safe(cfilter, node,
+                                 &vf->cloud_filter_list, cloud_node) {
+               vsi = i40e_find_vsi_from_seid(vf, cfilter->seid);
+
+               if (!vsi) {
+                       dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n",
+                               vf->vf_id, cfilter->seid);
+                       continue;
+               }
+
+               if (cfilter->dst_port)
+                       ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+                                                               false);
+               else
+                       ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
+               if (ret)
+                       dev_err(&pf->pdev->dev,
+                               "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+                               vf->vf_id, i40e_stat_str(&pf->hw, ret),
+                               i40e_aq_str(&pf->hw,
+                                           pf->hw.aq.asq_last_status));
+
+               hlist_del(&cfilter->cloud_node);
+               kfree(cfilter);
+               vf->num_cloud_filters--;
+       }
+}
+
+/**
+ * i40e_vc_del_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function deletes a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+       struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+       struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+       struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+       struct i40e_cloud_filter cfilter, *cf = NULL;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       struct hlist_node *node;
+       i40e_status aq_ret = 0;
+       int i, ret;
+
+       if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       if (!vf->adq_enabled) {
+               dev_info(&pf->pdev->dev,
+                        "VF %d: ADq not enabled, can't apply cloud filter\n",
+                        vf->vf_id);
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       if (i40e_validate_cloud_filter(vf, vcf)) {
+               dev_info(&pf->pdev->dev,
+                        "VF %d: Invalid input, can't apply cloud filter\n",
+                        vf->vf_id);
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       memset(&cfilter, 0, sizeof(cfilter));
+       /* parse destination mac address */
+       for (i = 0; i < ETH_ALEN; i++)
+               cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+       /* parse source mac address */
+       for (i = 0; i < ETH_ALEN; i++)
+               cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+       cfilter.vlan_id = mask.vlan_id & tcf.vlan_id;
+       cfilter.dst_port = mask.dst_port & tcf.dst_port;
+       cfilter.src_port = mask.src_port & tcf.src_port;
+
+       switch (vcf->flow_type) {
+       case VIRTCHNL_TCP_V4_FLOW:
+               cfilter.n_proto = ETH_P_IP;
+               if (mask.dst_ip[0] & tcf.dst_ip[0])
+                       memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip,
+                              ARRAY_SIZE(tcf.dst_ip));
+               else if (mask.src_ip[0] & tcf.dst_ip[0])
+                       memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip,
+                              ARRAY_SIZE(tcf.dst_ip));
+               break;
+       case VIRTCHNL_TCP_V6_FLOW:
+               cfilter.n_proto = ETH_P_IPV6;
+               if (mask.dst_ip[3] & tcf.dst_ip[3])
+                       memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip,
+                              sizeof(cfilter.ip.v6.dst_ip6));
+               if (mask.src_ip[3] & tcf.src_ip[3])
+                       memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip,
+                              sizeof(cfilter.ip.v6.src_ip6));
+               break;
+       default:
+               /* TC filter can be configured based on different combinations
+                * and in this case IP is not a part of filter config
+                */
+               dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+                        vf->vf_id);
+       }
+
+       /* get the vsi to which the tc belongs to */
+       vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+       cfilter.seid = vsi->seid;
+       cfilter.flags = vcf->field_flags;
+
+       /* Deleting TC filter */
+       if (tcf.dst_port)
+               ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false);
+       else
+               ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+                       vf->vf_id, i40e_stat_str(&pf->hw, ret),
+                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               goto err;
+       }
+
+       hlist_for_each_entry_safe(cf, node,
+                                 &vf->cloud_filter_list, cloud_node) {
+               if (cf->seid != cfilter.seid)
+                       continue;
+               if (mask.dst_port)
+                       if (cfilter.dst_port != cf->dst_port)
+                               continue;
+               if (mask.dst_mac[0])
+                       if (!ether_addr_equal(cf->src_mac, cfilter.src_mac))
+                               continue;
+               /* for ipv4 data to be valid, only first byte of mask is set */
+               if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0])
+                       if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip,
+                                  ARRAY_SIZE(tcf.dst_ip)))
+                               continue;
+               /* for ipv6, mask is set for all sixteen bytes (4 words) */
+               if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3])
+                       if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6,
+                                  sizeof(cfilter.ip.v6.src_ip6)))
+                               continue;
+               if (mask.vlan_id)
+                       if (cfilter.vlan_id != cf->vlan_id)
+                               continue;
+
+               hlist_del(&cf->cloud_node);
+               kfree(cf);
+               vf->num_cloud_filters--;
+       }
+
+err:
+       return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_add_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function adds a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+       struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+       struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+       struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+       struct i40e_cloud_filter *cfilter = NULL;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       i40e_status aq_ret = 0;
+       int i, ret;
+
+       if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       if (!vf->adq_enabled) {
+               dev_info(&pf->pdev->dev,
+                        "VF %d: ADq is not enabled, can't apply cloud filter\n",
+                        vf->vf_id);
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       if (i40e_validate_cloud_filter(vf, vcf)) {
+               dev_info(&pf->pdev->dev,
+                        "VF %d: Invalid input/s, can't apply cloud filter\n",
+                        vf->vf_id);
+                       aq_ret = I40E_ERR_PARAM;
+                       goto err;
+       }
+
+       cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
+       if (!cfilter)
+               return -ENOMEM;
+
+       /* parse destination mac address */
+       for (i = 0; i < ETH_ALEN; i++)
+               cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+       /* parse source mac address */
+       for (i = 0; i < ETH_ALEN; i++)
+               cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+       cfilter->vlan_id = mask.vlan_id & tcf.vlan_id;
+       cfilter->dst_port = mask.dst_port & tcf.dst_port;
+       cfilter->src_port = mask.src_port & tcf.src_port;
+
+       switch (vcf->flow_type) {
+       case VIRTCHNL_TCP_V4_FLOW:
+               cfilter->n_proto = ETH_P_IP;
+               if (mask.dst_ip[0] & tcf.dst_ip[0])
+                       memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip,
+                              ARRAY_SIZE(tcf.dst_ip));
+               else if (mask.src_ip[0] & tcf.dst_ip[0])
+                       memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip,
+                              ARRAY_SIZE(tcf.dst_ip));
+               break;
+       case VIRTCHNL_TCP_V6_FLOW:
+               cfilter->n_proto = ETH_P_IPV6;
+               if (mask.dst_ip[3] & tcf.dst_ip[3])
+                       memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip,
+                              sizeof(cfilter->ip.v6.dst_ip6));
+               if (mask.src_ip[3] & tcf.src_ip[3])
+                       memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip,
+                              sizeof(cfilter->ip.v6.src_ip6));
+               break;
+       default:
+               /* TC filter can be configured based on different combinations
+                * and in this case IP is not a part of filter config
+                */
+               dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+                        vf->vf_id);
+       }
+
+       /* get the VSI to which the TC belongs to */
+       vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+       cfilter->seid = vsi->seid;
+       cfilter->flags = vcf->field_flags;
+
+       /* Adding cloud filter programmed as TC filter */
+       if (tcf.dst_port)
+               ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+       else
+               ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "VF %d: Failed to add cloud filter, err %s aq_err %s\n",
+                       vf->vf_id, i40e_stat_str(&pf->hw, ret),
+                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               goto err;
+       }
+
+       INIT_HLIST_NODE(&cfilter->cloud_node);
+       hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
+       vf->num_cloud_filters++;
+err:
+       return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_add_qch_msg: Add queue channel and enable ADq
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+       struct virtchnl_tc_info *tci =
+               (struct virtchnl_tc_info *)msg;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_link_status *ls = &pf->hw.phy.link_info;
+       int i, adq_request_qps = 0, speed = 0;
+       i40e_status aq_ret = 0;
+
+       if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       /* ADq cannot be applied if spoof check is ON */
+       if (vf->spoofchk) {
+               dev_err(&pf->pdev->dev,
+                       "Spoof check is ON, turn it OFF to enable ADq\n");
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+               dev_err(&pf->pdev->dev,
+                       "VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n",
+                       vf->vf_id);
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       /* max number of traffic classes for VF currently capped at 4 */
+       if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
+               dev_err(&pf->pdev->dev,
+                       "VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
+                       vf->vf_id, tci->num_tc);
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       /* validate queues for each TC */
+       for (i = 0; i < tci->num_tc; i++)
+               if (!tci->list[i].count ||
+                   tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
+                       dev_err(&pf->pdev->dev,
+                               "VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
+                               vf->vf_id, i, tci->list[i].count);
+                       aq_ret = I40E_ERR_PARAM;
+                       goto err;
+               }
+
+       /* need Max VF queues but already have default number of queues */
+       adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF;
+
+       if (pf->queues_left < adq_request_qps) {
+               dev_err(&pf->pdev->dev,
+                       "No queues left to allocate to VF %d\n",
+                       vf->vf_id);
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       } else {
+               /* we need to allocate max VF queues to enable ADq so as to
+                * make sure ADq enabled VF always gets back queues when it
+                * goes through a reset.
+                */
+               vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
+       }
+
+       /* get link speed in MB to validate rate limit */
+       switch (ls->link_speed) {
+       case VIRTCHNL_LINK_SPEED_100MB:
+               speed = SPEED_100;
+               break;
+       case VIRTCHNL_LINK_SPEED_1GB:
+               speed = SPEED_1000;
+               break;
+       case VIRTCHNL_LINK_SPEED_10GB:
+               speed = SPEED_10000;
+               break;
+       case VIRTCHNL_LINK_SPEED_20GB:
+               speed = SPEED_20000;
+               break;
+       case VIRTCHNL_LINK_SPEED_25GB:
+               speed = SPEED_25000;
+               break;
+       case VIRTCHNL_LINK_SPEED_40GB:
+               speed = SPEED_40000;
+               break;
+       default:
+               dev_err(&pf->pdev->dev,
+                       "Cannot detect link speed\n");
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       /* parse data from the queue channel info */
+       vf->num_tc = tci->num_tc;
+       for (i = 0; i < vf->num_tc; i++) {
+               if (tci->list[i].max_tx_rate) {
+                       if (tci->list[i].max_tx_rate > speed) {
+                               dev_err(&pf->pdev->dev,
+                                       "Invalid max tx rate %llu specified for VF %d.",
+                                       tci->list[i].max_tx_rate,
+                                       vf->vf_id);
+                               aq_ret = I40E_ERR_PARAM;
+                               goto err;
+                       } else {
+                               vf->ch[i].max_tx_rate =
+                                       tci->list[i].max_tx_rate;
+                       }
+               }
+               vf->ch[i].num_qps = tci->list[i].count;
+       }
+
+       /* set this flag only after making sure all inputs are sane */
+       vf->adq_enabled = true;
+       /* num_req_queues is set when user changes number of queues via ethtool
+        * and this causes issue for default VSI(which depends on this variable)
+        * when ADq is enabled, hence reset it.
+        */
+       vf->num_req_queues = 0;
+
+       /* reset the VF in order to allocate resources */
+       i40e_vc_notify_vf_reset(vf);
+       i40e_reset_vf(vf, false);
+
+       return I40E_SUCCESS;
+
+       /* send the response to the VF */
+err:
+       return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_del_qch_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+       struct i40e_pf *pf = vf->pf;
+       i40e_status aq_ret = 0;
+
+       if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       if (vf->adq_enabled) {
+               i40e_del_all_cloud_filters(vf);
+               i40e_del_qch(vf);
+               vf->adq_enabled = false;
+               vf->num_tc = 0;
+               dev_info(&pf->pdev->dev,
+                        "Deleting Queue Channels and cloud filters for ADq on VF %d\n",
+                        vf->vf_id);
+       } else {
+               dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
+                        vf->vf_id);
+               aq_ret = I40E_ERR_PARAM;
+       }
+
+       /* reset the VF in order to allocate resources */
+       i40e_vc_notify_vf_reset(vf);
+       i40e_reset_vf(vf, false);
+
+       return I40E_SUCCESS;
+
+err:
+       return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS,
+                                      aq_ret);
+}
+
 /**
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
@@ -2816,7 +3657,18 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
        case VIRTCHNL_OP_REQUEST_QUEUES:
                ret = i40e_vc_request_queues_msg(vf, msg, msglen);
                break;
-
+       case VIRTCHNL_OP_ENABLE_CHANNELS:
+               ret = i40e_vc_add_qch_msg(vf, msg);
+               break;
+       case VIRTCHNL_OP_DISABLE_CHANNELS:
+               ret = i40e_vc_del_qch_msg(vf, msg);
+               break;
+       case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+               ret = i40e_vc_add_cloud_filter(vf, msg);
+               break;
+       case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+               ret = i40e_vc_del_cloud_filter(vf, msg);
+               break;
        case VIRTCHNL_OP_UNKNOWN:
        default:
                dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
@@ -3382,6 +4234,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
        i40e_vc_disable_vf(vf);
        dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
                 vf_id, setting ? "" : "un");
+
+       if (vf->adq_enabled) {
+               if (!vf->trusted) {
+                       dev_info(&pf->pdev->dev,
+                                "VF %u no longer Trusted, deleting all cloud filters\n",
+                                vf_id);
+                       i40e_del_all_cloud_filters(vf);
+               }
+       }
+
 out:
        return ret;
 }
index 5efc4f9..6852599 100644 (file)
@@ -69,6 +69,19 @@ enum i40e_vf_capabilities {
        I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI.
+ * These variables are used to store indices, id's and number of queues
+ * for each VSI including that of primary VF VSI. Each Traffic class is
+ * termed as channel and each channel can in-turn have 4 queues which
+ * means max 16 queues overall per VF.
+ */
+struct i40evf_channel {
+       u16 vsi_idx; /* index in PF struct for all channel VSIs */
+       u16 vsi_id; /* VSI ID used by firmware */
+       u16 num_qps; /* number of queue pairs requested by user */
+       u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
+};
+
 /* VF information structure */
 struct i40e_vf {
        struct i40e_pf *pf;
@@ -111,6 +124,13 @@ struct i40e_vf {
        u16 num_mac;
        u16 num_vlan;
 
+       /* ADq related variables */
+       bool adq_enabled; /* flag to enable adq */
+       u8 num_tc;
+       struct i40evf_channel ch[I40E_MAX_VF_VSI];
+       struct hlist_head cloud_filter_list;
+       u16 num_cloud_filters;
+
        /* RDMA Client */
        struct virtchnl_iwarp_qvlist_info *qvlist_info;
 };
index 357d605..e088d23 100644 (file)
@@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
                         */
                        smp_rmb();
                        tx_ring->tx_stats.prev_pkt_ctr =
-                         i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+                         i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
                }
        }
 }
@@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
             val);
 }
 
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+                                       struct i40e_ring_container *rc)
+{
+       return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+       unsigned int divisor;
+
+       switch (q_vector->adapter->link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+               break;
+       case I40E_LINK_SPEED_25GB:
+       case I40E_LINK_SPEED_20GB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+               break;
+       default:
+       case I40E_LINK_SPEED_10GB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+               break;
+       case I40E_LINK_SPEED_1GB:
+       case I40E_LINK_SPEED_100MB:
+               divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+               break;
+       }
+
+       return divisor;
+}
+
 /**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
  * @rc: structure containing ring performance data
  *
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt.  The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern.  Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
  * while increasing bulk throughput.
  **/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+                           struct i40e_ring_container *rc)
 {
-       enum i40e_latency_range new_latency_range = rc->latency_range;
-       u32 new_itr = rc->itr;
-       int bytes_per_usec;
-       unsigned int usecs, estimated_usecs;
+       unsigned int avg_wire_size, packets, bytes, itr;
+       unsigned long next_update = jiffies;
 
-       if (rc->total_packets == 0 || !rc->itr)
-               return false;
+       /* If we don't have any rings just leave ourselves set for maximum
+        * possible latency so we take ourselves out of the equation.
+        */
+       if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+               return;
+
+       /* For Rx we want to push the delay up and default to low latency.
+        * for Tx we want to pull the delay down and default to high latency.
+        */
+       itr = i40e_container_is_rx(q_vector, rc) ?
+             I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+             I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+       /* If we didn't update within up to 1 - 2 jiffies we can assume
+        * that either packets are coming in so slow there hasn't been
+        * any work, or that there is so much work that NAPI is dealing
+        * with interrupt moderation and we don't need to do anything.
+        */
+       if (time_after(next_update, rc->next_update))
+               goto clear_counts;
+
+       /* If itr_countdown is set it means we programmed an ITR within
+        * the last 4 interrupt cycles. This has a side effect of us
+        * potentially firing an early interrupt. In order to work around
+        * this we need to throw out any data received for a few
+        * interrupts following the update.
+        */
+       if (q_vector->itr_countdown) {
+               itr = rc->target_itr;
+               goto clear_counts;
+       }
+
+       packets = rc->total_packets;
+       bytes = rc->total_bytes;
 
-       usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-       bytes_per_usec = rc->total_bytes / usecs;
+       if (i40e_container_is_rx(q_vector, rc)) {
+               /* If Rx there are 1 to 4 packets and bytes are less than
+                * 9000 assume insufficient data to use bulk rate limiting
+                * approach unless Tx is already in bulk rate limiting. We
+                * are likely latency driven.
+                */
+               if (packets && packets < 4 && bytes < 9000 &&
+                   (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+                       itr = I40E_ITR_ADAPTIVE_LATENCY;
+                       goto adjust_by_size;
+               }
+       } else if (packets < 4) {
+               /* If we have Tx and Rx ITR maxed and Tx ITR is running in
+                * bulk mode and we are receiving 4 or fewer packets just
+                * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+                * that the Rx can relax.
+                */
+               if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+                   (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+                    I40E_ITR_ADAPTIVE_MAX_USECS)
+                       goto clear_counts;
+       } else if (packets > 32) {
+               /* If we have processed over 32 packets in a single interrupt
+                * for Tx assume we need to switch over to "bulk" mode.
+                */
+               rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+       }
 
-       /* The calculations in this algorithm depend on interrupts actually
-        * firing at the ITR rate. This may not happen if the packet rate is
-        * really low, or if we've been napi polling. Check to make sure
-        * that's not the case before we continue.
+       /* We have no packets to actually measure against. This means
+        * either one of the other queues on this vector is active or
+        * we are a Tx queue doing TSO with too high of an interrupt rate.
+        *
+        * Between 4 and 56 we can assume that our current interrupt delay
+        * is only slightly too low. As such we should increase it by a small
+        * fixed amount.
         */
-       estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
-       if (estimated_usecs > usecs) {
-               new_latency_range = I40E_LOW_LATENCY;
-               goto reset_latency;
+       if (packets < 56) {
+               itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+               if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+                       itr &= I40E_ITR_ADAPTIVE_LATENCY;
+                       itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+               }
+               goto clear_counts;
+       }
+
+       if (packets <= 256) {
+               itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+               itr &= I40E_ITR_MASK;
+
+               /* Between 56 and 112 is our "goldilocks" zone where we are
+                * working out "just right". Just report that our current
+                * ITR is good for us.
+                */
+               if (packets <= 112)
+                       goto clear_counts;
+
+               /* If packet count is 128 or greater we are likely looking
+                * at a slight overrun of the delay we want. Try halving
+                * our delay to see if that will cut the number of packets
+                * in half per interrupt.
+                */
+               itr /= 2;
+               itr &= I40E_ITR_MASK;
+               if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+                       itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+               goto clear_counts;
        }
 
-       /* simple throttlerate management
-        *   0-10MB/s   lowest (50000 ints/s)
-        *  10-20MB/s   low    (20000 ints/s)
-        *  20-1249MB/s bulk   (18000 ints/s)
+       /* The paths below assume we are dealing with a bulk ITR since
+        * number of packets is greater than 256. We are just going to have
+        * to compute a value and try to bring the count under control,
+        * though for smaller packet sizes there isn't much we can do as
+        * NAPI polling will likely be kicking in sooner rather than later.
+        */
+       itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+       /* If packet counts are 256 or greater we can assume we have a gross
+        * overestimation of what the rate should be. Instead of trying to fine
+        * tune it just use the formula below to try and dial in an exact value
+        * give the current packet size of the frame.
+        */
+       avg_wire_size = bytes / packets;
+
+       /* The following is a crude approximation of:
+        *  wmem_default / (size + overhead) = desired_pkts_per_int
+        *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+        *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
         *
-        * The math works out because the divisor is in 10^(-6) which
-        * turns the bytes/us input value into MB/s values, but
-        * make sure to use usecs, as the register values written
-        * are in 2 usec increments in the ITR registers, and make sure
-        * to use the smoothed values that the countdown timer gives us.
+        * Assuming wmem_default is 212992 and overhead is 640 bytes per
+        * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+        * formula down to
+        *
+        *  (170 * (size + 24)) / (size + 640) = ITR
+        *
+        * We first do some math on the packet size and then finally bitshift
+        * by 8 after rounding up. We also have to account for PCIe link speed
+        * difference as ITR scales based on this.
         */
-       switch (new_latency_range) {
-       case I40E_LOWEST_LATENCY:
-               if (bytes_per_usec > 10)
-                       new_latency_range = I40E_LOW_LATENCY;
-               break;
-       case I40E_LOW_LATENCY:
-               if (bytes_per_usec > 20)
-                       new_latency_range = I40E_BULK_LATENCY;
-               else if (bytes_per_usec <= 10)
-                       new_latency_range = I40E_LOWEST_LATENCY;
-               break;
-       case I40E_BULK_LATENCY:
-       default:
-               if (bytes_per_usec <= 20)
-                       new_latency_range = I40E_LOW_LATENCY;
-               break;
+       if (avg_wire_size <= 60) {
+               /* Start at 250k ints/sec */
+               avg_wire_size = 4096;
+       } else if (avg_wire_size <= 380) {
+               /* 250K ints/sec to 60K ints/sec */
+               avg_wire_size *= 40;
+               avg_wire_size += 1696;
+       } else if (avg_wire_size <= 1084) {
+               /* 60K ints/sec to 36K ints/sec */
+               avg_wire_size *= 15;
+               avg_wire_size += 11452;
+       } else if (avg_wire_size <= 1980) {
+               /* 36K ints/sec to 30K ints/sec */
+               avg_wire_size *= 5;
+               avg_wire_size += 22420;
+       } else {
+               /* plateau at a limit of 30K ints/sec */
+               avg_wire_size = 32256;
        }
 
-reset_latency:
-       rc->latency_range = new_latency_range;
+       /* If we are in low latency mode halve our delay which doubles the
+        * rate to somewhere between 100K to 16K ints/sec
+        */
+       if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+               avg_wire_size /= 2;
 
-       switch (new_latency_range) {
-       case I40E_LOWEST_LATENCY:
-               new_itr = I40E_ITR_50K;
-               break;
-       case I40E_LOW_LATENCY:
-               new_itr = I40E_ITR_20K;
-               break;
-       case I40E_BULK_LATENCY:
-               new_itr = I40E_ITR_18K;
-               break;
-       default:
-               break;
+       /* Resultant value is 256 times larger than it needs to be. This
+        * gives us room to adjust the value as needed to either increase
+        * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+        *
+        * Use addition as we have already recorded the new latency flag
+        * for the ITR value.
+        */
+       itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+              I40E_ITR_ADAPTIVE_MIN_INC;
+
+       if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+               itr &= I40E_ITR_ADAPTIVE_LATENCY;
+               itr += I40E_ITR_ADAPTIVE_MAX_USECS;
        }
 
+clear_counts:
+       /* write back value */
+       rc->target_itr = itr;
+
+       /* next update should occur within next jiffy */
+       rc->next_update = next_update + 1;
+
        rc->total_bytes = 0;
        rc->total_packets = 0;
-       rc->last_itr_update = jiffies;
-
-       if (new_itr != rc->itr) {
-               rc->itr = new_itr;
-               return true;
-       }
-       return false;
 }
 
 /**
@@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
  * @rx_buffer: rx buffer to pull data from
  *
  * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
  */
 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
                               struct i40e_rx_buffer *rx_buffer)
@@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        return failure ? budget : (int)total_rx_packets;
 }
 
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 {
        u32 val;
 
+       /* We don't bother with setting the CLEARPBA bit as the data sheet
+        * points out doing so is "meaningless since it was already
+        * auto-cleared". The auto-clearing happens when the interrupt is
+        * asserted.
+        *
+        * Hardware errata 28 for also indicates that writing to a
+        * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+        * an event in the PBA anyway so we need to rely on the automask
+        * to hold pending events for us until the interrupt is re-enabled
+        *
+        * The itr value is reported in microseconds, and the register
+        * value is recorded in 2 microsecond units. For this reason we
+        * only need to shift by the interval shift - 1 instead of the
+        * full value.
+        */
+       itr &= I40E_ITR_MASK;
+
        val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-             I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
              (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
-             (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
+             (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
 
        return val;
 }
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_VFINT_DYN_CTLN1
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
-       struct i40evf_adapter *adapter = vsi->back;
 
-       return adapter->rx_rings[idx].rx_itr_setting;
-}
-
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
-       struct i40evf_adapter *adapter = vsi->back;
-
-       return adapter->tx_rings[idx].tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
                                          struct i40e_q_vector *q_vector)
 {
        struct i40e_hw *hw = &vsi->back->hw;
-       bool rx = false, tx = false;
-       u32 rxval, txval;
-       int idx = q_vector->v_idx;
-       int rx_itr_setting, tx_itr_setting;
-
-       /* avoid dynamic calculation if in countdown mode OR if
-        * all dynamic is disabled
-        */
-       rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
-       rx_itr_setting = get_rx_itr(vsi, idx);
-       tx_itr_setting = get_tx_itr(vsi, idx);
+       u32 intval;
 
-       if (q_vector->itr_countdown > 0 ||
-           (!ITR_IS_DYNAMIC(rx_itr_setting) &&
-            !ITR_IS_DYNAMIC(tx_itr_setting))) {
-               goto enable_int;
-       }
-
-       if (ITR_IS_DYNAMIC(rx_itr_setting)) {
-               rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-               rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
-       }
+       /* These will do nothing if dynamic updates are not enabled */
+       i40e_update_itr(q_vector, &q_vector->tx);
+       i40e_update_itr(q_vector, &q_vector->rx);
 
-       if (ITR_IS_DYNAMIC(tx_itr_setting)) {
-               tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-               txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
-       }
-
-       if (rx || tx) {
-               /* get the higher of the two ITR adjustments and
-                * use the same value for both ITR registers
-                * when in adaptive mode (Rx and/or Tx)
-                */
-               u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
-               q_vector->tx.itr = q_vector->rx.itr = itr;
-               txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
-               tx = true;
-               rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
-               rx = true;
-       }
-
-       /* only need to enable the interrupt once, but need
-        * to possibly update both ITR values
+       /* This block of logic allows us to get away with only updating
+        * one ITR value with each interrupt. The idea is to perform a
+        * pseudo-lazy update with the following criteria.
+        *
+        * 1. Rx is given higher priority than Tx if both are in same state
+        * 2. If we must reduce an ITR that is given highest priority.
+        * 3. We then give priority to increasing ITR based on amount.
         */
-       if (rx) {
-               /* set the INTENA_MSK_MASK so that this first write
-                * won't actually enable the interrupt, instead just
-                * updating the ITR (it's bit 31 PF and VF)
+       if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+               /* Rx ITR needs to be reduced, this is highest priority */
+               intval = i40e_buildreg_itr(I40E_RX_ITR,
+                                          q_vector->rx.target_itr);
+               q_vector->rx.current_itr = q_vector->rx.target_itr;
+               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+                  ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+                   (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+               /* Tx ITR needs to be reduced, this is second priority
+                * Tx ITR needs to be increased more than Rx, fourth priority
                 */
-               rxval |= BIT(31);
-               /* don't check _DOWN because interrupt isn't being enabled */
-               wr32(hw, INTREG(q_vector->reg_idx), rxval);
+               intval = i40e_buildreg_itr(I40E_TX_ITR,
+                                          q_vector->tx.target_itr);
+               q_vector->tx.current_itr = q_vector->tx.target_itr;
+               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+               /* Rx ITR needs to be increased, third priority */
+               intval = i40e_buildreg_itr(I40E_RX_ITR,
+                                          q_vector->rx.target_itr);
+               q_vector->rx.current_itr = q_vector->rx.target_itr;
+               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       } else {
+               /* No ITR update, lowest priority */
+               intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+               if (q_vector->itr_countdown)
+                       q_vector->itr_countdown--;
        }
 
-enable_int:
        if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-               wr32(hw, INTREG(q_vector->reg_idx), txval);
-
-       if (q_vector->itr_countdown)
-               q_vector->itr_countdown--;
-       else
-               q_vector->itr_countdown = ITR_COUNTDOWN_START;
+               wr32(hw, INTREG(q_vector->reg_idx), intval);
 }
 
 /**
index 7798a66..9129447 100644 (file)
 #define _I40E_TXRX_H_
 
 /* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR               0x0001  /* reg uses 2 usec resolution */
-#define I40E_ITR_100K              0x0005
-#define I40E_ITR_50K               0x000A
-#define I40E_ITR_20K               0x0019
-#define I40E_ITR_18K               0x001B
-#define I40E_ITR_8K                0x003E
-#define I40E_ITR_4K                0x007A
-#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-                                   I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-                                   I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
-#define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
 #define I40E_DEFAULT_IRQ_WORK      256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC       0x8000  /* use top bit as a flag */
+#define I40E_ITR_MASK          0x1FFE  /* mask for ITR register value */
+#define I40E_MIN_ITR                2  /* reg uses 2 usec resolution */
+#define I40E_ITR_100K              10  /* all values below must be even */
+#define I40E_ITR_50K               20
+#define I40E_ITR_20K               50
+#define I40E_ITR_18K               60
+#define I40E_ITR_8K               122
+#define I40E_MAX_ITR             8160  /* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF                (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF                (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
 /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
  * the value of the rate limit is non-zero
  */
 #define INTRL_ENA                  BIT(6)
+#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
 #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
 #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
 #define I40E_INTRL_8K              125     /* 8000 ints/sec */
@@ -362,8 +366,7 @@ struct i40e_ring {
         * these values always store the USER setting, and must be converted
         * before programming to a register.
         */
-       u16 rx_itr_setting;
-       u16 tx_itr_setting;
+       u16 itr_setting;
 
        u16 count;                      /* Number of descriptors */
        u16 reg_idx;                    /* HW register index of the ring */
@@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
        ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
 }
 
-enum i40e_latency_range {
-       I40E_LOWEST_LATENCY = 0,
-       I40E_LOW_LATENCY = 1,
-       I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC      0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS    0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS    0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY      0x8000
+#define I40E_ITR_ADAPTIVE_BULK         0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
 
 struct i40e_ring_container {
-       /* array of pointers to rings */
-       struct i40e_ring *ring;
+       struct i40e_ring *ring;         /* pointer to linked list of ring(s) */
+       unsigned long next_update;      /* jiffies value of next update */
        unsigned int total_bytes;       /* total bytes processed this int */
        unsigned int total_packets;     /* total packets processed this int */
-       unsigned long last_itr_update;  /* jiffies of last ITR update */
        u16 count;
-       enum i40e_latency_range latency_range;
-       u16 itr;
+       u16 target_itr;                 /* target ITR setting for ring(s) */
+       u16 current_itr;                /* current ITR setting for ring(s) */
 };
 
 /* iterator for handling rings in ring container */
index 9690c1e..e46555a 100644 (file)
 #include <linux/socket.h>
 #include <linux/jiffies.h>
 #include <net/ip6_checksum.h>
+#include <net/pkt_cls.h>
 #include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
 
 #include "i40e_type.h"
 #include <linux/avf/virtchnl.h>
@@ -106,6 +109,7 @@ struct i40e_vsi {
 
 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
 #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40EVF_MBPS_DIVISOR    125000 /* divisor to convert to Mbps */
 
 /* MAX_MSIX_Q_VECTORS of these are allocated,
  * but we only use one per queue-specific vector.
@@ -117,9 +121,8 @@ struct i40e_q_vector {
        struct i40e_ring_container rx;
        struct i40e_ring_container tx;
        u32 ring_mask;
+       u8 itr_countdown;       /* when 0 should adjust adaptive ITR */
        u8 num_ringpairs;       /* total number of ring pairs in vector */
-#define ITR_COUNTDOWN_START 100
-       u8 itr_countdown;       /* when 0 or 1 update ITR */
        u16 v_idx;              /* index in the vsi->q_vector array. */
        u16 reg_idx;            /* register index of the interrupt */
        char name[IFNAMSIZ + 15];
@@ -169,6 +172,28 @@ struct i40evf_vlan_filter {
        bool add;               /* filter needs to be added */
 };
 
+#define I40EVF_MAX_TRAFFIC_CLASS       4
+/* State of traffic class creation */
+enum i40evf_tc_state_t {
+       __I40EVF_TC_INVALID, /* no traffic class, default state */
+       __I40EVF_TC_RUNNING, /* traffic classes have been created */
+};
+
+/* channel info */
+struct i40evf_channel_config {
+       struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS];
+       enum i40evf_tc_state_t state;
+       u8 total_qps;
+};
+
+/* State of cloud filter */
+enum i40evf_cloud_filter_state_t {
+       __I40EVF_CF_INVALID,     /* cloud filter not added */
+       __I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
+       __I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
+       __I40EVF_CF_ACTIVE,      /* cloud filter is active */
+};
+
 /* Driver state. The order of these is important! */
 enum i40evf_state_t {
        __I40EVF_STARTUP,               /* driver loaded, probe complete */
@@ -190,6 +215,36 @@ enum i40evf_critical_section_t {
        __I40EVF_IN_REMOVE_TASK,        /* device being removed */
 };
 
+#define I40EVF_CLOUD_FIELD_OMAC                0x01
+#define I40EVF_CLOUD_FIELD_IMAC                0x02
+#define I40EVF_CLOUD_FIELD_IVLAN       0x04
+#define I40EVF_CLOUD_FIELD_TEN_ID      0x08
+#define I40EVF_CLOUD_FIELD_IIP         0x10
+
+#define I40EVF_CF_FLAGS_OMAC   I40EVF_CLOUD_FIELD_OMAC
+#define I40EVF_CF_FLAGS_IMAC   I40EVF_CLOUD_FIELD_IMAC
+#define I40EVF_CF_FLAGS_IMAC_IVLAN     (I40EVF_CLOUD_FIELD_IMAC |\
+                                        I40EVF_CLOUD_FIELD_IVLAN)
+#define I40EVF_CF_FLAGS_IMAC_TEN_ID    (I40EVF_CLOUD_FIELD_IMAC |\
+                                        I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC       (I40EVF_CLOUD_FIELD_OMAC |\
+                                                I40EVF_CLOUD_FIELD_IMAC |\
+                                                I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID      (I40EVF_CLOUD_FIELD_IMAC |\
+                                                I40EVF_CLOUD_FIELD_IVLAN |\
+                                                I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IIP    I40E_CLOUD_FIELD_IIP
+
+/* bookkeeping of cloud filters */
+struct i40evf_cloud_filter {
+       enum i40evf_cloud_filter_state_t state;
+       struct list_head list;
+       struct virtchnl_filter f;
+       unsigned long cookie;
+       bool del;               /* filter needs to be deleted */
+       bool add;               /* filter needs to be added */
+};
+
 /* board specific private data structure */
 struct i40evf_adapter {
        struct timer_list watchdog_timer;
@@ -241,6 +296,7 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_ALLMULTI_ON                        BIT(14)
 #define I40EVF_FLAG_LEGACY_RX                  BIT(15)
 #define I40EVF_FLAG_REINIT_ITR_NEEDED          BIT(16)
+#define I40EVF_FLAG_QUEUES_DISABLED            BIT(17)
 /* duplicates for common code */
 #define I40E_FLAG_DCB_ENABLED                  0
 #define I40E_FLAG_RX_CSUM_ENABLED              I40EVF_FLAG_RX_CSUM_ENABLED
@@ -269,6 +325,10 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI                BIT(18)
 #define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING   BIT(19)
 #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING  BIT(20)
+#define I40EVF_FLAG_AQ_ENABLE_CHANNELS         BIT(21)
+#define I40EVF_FLAG_AQ_DISABLE_CHANNELS                BIT(22)
+#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER                BIT(23)
+#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER                BIT(24)
 
        /* OS defined structs */
        struct net_device *netdev;
@@ -314,6 +374,13 @@ struct i40evf_adapter {
        u16 rss_lut_size;
        u8 *rss_key;
        u8 *rss_lut;
+       /* ADQ related members */
+       struct i40evf_channel_config ch_config;
+       u8 num_tc;
+       struct list_head cloud_filter_list;
+       /* lock to protest access to the cloud filter list */
+       spinlock_t cloud_filter_list_lock;
+       u16 num_cloud_filters;
 };
 
 
@@ -380,4 +447,8 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len);
 void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
 void i40evf_notify_client_open(struct i40e_vsi *vsi);
 void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
+void i40evf_enable_channels(struct i40evf_adapter *adapter);
+void i40evf_disable_channels(struct i40evf_adapter *adapter);
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter);
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter);
 #endif /* _I40EVF_H_ */
index e2d8aa1..e679325 100644 (file)
@@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev,
        rx_ring = &adapter->rx_rings[queue];
        tx_ring = &adapter->tx_rings[queue];
 
-       if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+       if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
                ec->use_adaptive_rx_coalesce = 1;
 
-       if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+       if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
                ec->use_adaptive_tx_coalesce = 1;
 
-       ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-       ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
 
        return 0;
 }
@@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
 
 /**
  * i40evf_set_itr_per_queue - set ITR values for specific queue
- * @vsi: the VSI to set values for
+ * @adapter: the VF adapter struct to set values for
  * @ec: coalesce settings from ethtool
  * @queue: the queue to modify
  *
@@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 {
        struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
        struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
-       struct i40e_vsi *vsi = &adapter->vsi;
-       struct i40e_hw *hw = &adapter->hw;
        struct i40e_q_vector *q_vector;
-       u16 vector;
 
-       rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
-       tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+       rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+       tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
 
-       rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+       rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
        if (!ec->use_adaptive_rx_coalesce)
-               rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC;
+               rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
 
-       tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+       tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
        if (!ec->use_adaptive_tx_coalesce)
-               tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC;
+               tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
 
        q_vector = rx_ring->q_vector;
-       q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
-       vector = vsi->base_vector + q_vector->v_idx;
-       wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+       q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 
        q_vector = tx_ring->q_vector;
-       q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
-       vector = vsi->base_vector + q_vector->v_idx;
-       wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+       q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
 
-       i40e_flush(hw);
+       /* The interrupt handler itself will take care of programming
+        * the Tx and Rx ITR values based on the values we have entered
+        * into the q_vector, no need to write the values now.
+        */
 }
 
 /**
@@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
        if (ec->rx_coalesce_usecs == 0) {
                if (ec->use_adaptive_rx_coalesce)
                        netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
-       } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-                  (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+       } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
+                  (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
                netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
                return -EINVAL;
        }
@@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
        if (ec->tx_coalesce_usecs == 0) {
                if (ec->use_adaptive_tx_coalesce)
                        netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
-       } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-                  (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+       } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
+                  (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
                netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
                return -EINVAL;
        }
@@ -699,6 +695,12 @@ static int i40evf_set_channels(struct net_device *netdev,
                return -EINVAL;
        }
 
+       if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+           adapter->num_tc) {
+               dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
+               return -EINVAL;
+       }
+
        /* All of these should have already been checked by ethtool before this
         * even gets to us, but just to be sure.
         */
index 16989ad..dae1218 100644 (file)
@@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
        rx_ring->vsi = &adapter->vsi;
        q_vector->rx.ring = rx_ring;
        q_vector->rx.count++;
-       q_vector->rx.latency_range = I40E_LOW_LATENCY;
-       q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+       q_vector->rx.next_update = jiffies + 1;
+       q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
        q_vector->ring_mask |= BIT(r_idx);
-       q_vector->itr_countdown = ITR_COUNTDOWN_START;
-       wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
+       wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
+            q_vector->rx.current_itr);
+       q_vector->rx.current_itr = q_vector->rx.target_itr;
 }
 
 /**
@@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
        tx_ring->vsi = &adapter->vsi;
        q_vector->tx.ring = tx_ring;
        q_vector->tx.count++;
-       q_vector->tx.latency_range = I40E_LOW_LATENCY;
-       q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
-       q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       q_vector->tx.next_update = jiffies + 1;
+       q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
        q_vector->num_ringpairs++;
-       wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
+       wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
+            q_vector->tx.target_itr);
+       q_vector->tx.current_itr = q_vector->tx.target_itr;
 }
 
 /**
@@ -783,7 +785,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
  **/
 static struct
 i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
-                                     u8 *macaddr)
+                                     const u8 *macaddr)
 {
        struct i40evf_mac_filter *f;
 
@@ -806,20 +808,18 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
  **/
 static struct
 i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
-                                    u8 *macaddr)
+                                    const u8 *macaddr)
 {
        struct i40evf_mac_filter *f;
 
        if (!macaddr)
                return NULL;
 
-       spin_lock_bh(&adapter->mac_vlan_list_lock);
-
        f = i40evf_find_filter(adapter, macaddr);
        if (!f) {
                f = kzalloc(sizeof(*f), GFP_ATOMIC);
                if (!f)
-                       goto clearout;
+                       return f;
 
                ether_addr_copy(f->macaddr, macaddr);
 
@@ -830,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
                f->remove = false;
        }
 
-clearout:
-       spin_unlock_bh(&adapter->mac_vlan_list_lock);
        return f;
 }
 
@@ -866,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
                adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
        }
 
+       f = i40evf_add_filter(adapter, addr->sa_data);
+
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-       f = i40evf_add_filter(adapter, addr->sa_data);
        if (f) {
                ether_addr_copy(hw->mac.addr, addr->sa_data);
                ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -878,50 +877,64 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 }
 
 /**
- * i40evf_set_rx_mode - NDO callback to set the netdev filters
- * @netdev: network interface device structure
- **/
-static void i40evf_set_rx_mode(struct net_device *netdev)
+ * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr)
 {
        struct i40evf_adapter *adapter = netdev_priv(netdev);
-       struct i40evf_mac_filter *f, *ftmp;
-       struct netdev_hw_addr *uca;
-       struct netdev_hw_addr *mca;
-       struct netdev_hw_addr *ha;
-
-       /* add addr if not already in the filter list */
-       netdev_for_each_uc_addr(uca, netdev) {
-               i40evf_add_filter(adapter, uca->addr);
-       }
-       netdev_for_each_mc_addr(mca, netdev) {
-               i40evf_add_filter(adapter, mca->addr);
-       }
 
-       spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-       list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
-               netdev_for_each_mc_addr(mca, netdev)
-                       if (ether_addr_equal(mca->addr, f->macaddr))
-                               goto bottom_of_search_loop;
-
-               netdev_for_each_uc_addr(uca, netdev)
-                       if (ether_addr_equal(uca->addr, f->macaddr))
-                               goto bottom_of_search_loop;
+       if (i40evf_add_filter(adapter, addr))
+               return 0;
+       else
+               return -ENOMEM;
+}
 
-               for_each_dev_addr(netdev, ha)
-                       if (ether_addr_equal(ha->addr, f->macaddr))
-                               goto bottom_of_search_loop;
+/**
+ * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+       struct i40evf_adapter *adapter = netdev_priv(netdev);
+       struct i40evf_mac_filter *f;
 
-               if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr))
-                       goto bottom_of_search_loop;
+       /* Under some circumstances, we might receive a request to delete
+        * our own device address from our uc list. Because we store the
+        * device address in the VSI's MAC/VLAN filter list, we need to ignore
+        * such requests and not delete our device address from this list.
+        */
+       if (ether_addr_equal(addr, netdev->dev_addr))
+               return 0;
 
-               /* f->macaddr wasn't found in uc, mc, or ha list so delete it */
+       f = i40evf_find_filter(adapter, addr);
+       if (f) {
                f->remove = true;
                adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-
-bottom_of_search_loop:
-               continue;
        }
+       return 0;
+}
+
+/**
+ * i40evf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void i40evf_set_rx_mode(struct net_device *netdev)
+{
+       struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+       spin_lock_bh(&adapter->mac_vlan_list_lock);
+       __dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+       __dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+       spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
        if (netdev->flags & IFF_PROMISC &&
            !(adapter->flags & I40EVF_FLAG_PROMISC_ON))
@@ -936,8 +949,6 @@ bottom_of_search_loop:
        else if (!(netdev->flags & IFF_ALLMULTI) &&
                 adapter->flags & I40EVF_FLAG_ALLMULTI_ON)
                adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI;
-
-       spin_unlock_bh(&adapter->mac_vlan_list_lock);
 }
 
 /**
@@ -1025,7 +1036,9 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter)
 void i40evf_down(struct i40evf_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
+       struct i40evf_vlan_filter *vlf;
        struct i40evf_mac_filter *f;
+       struct i40evf_cloud_filter *cf;
 
        if (adapter->state <= __I40EVF_DOWN_PENDING)
                return;
@@ -1038,17 +1051,29 @@ void i40evf_down(struct i40evf_adapter *adapter)
 
        spin_lock_bh(&adapter->mac_vlan_list_lock);
 
+       /* clear the sync flag on all filters */
+       __dev_uc_unsync(adapter->netdev, NULL);
+       __dev_mc_unsync(adapter->netdev, NULL);
+
        /* remove all MAC filters */
        list_for_each_entry(f, &adapter->mac_filter_list, list) {
                f->remove = true;
        }
+
        /* remove all VLAN filters */
-       list_for_each_entry(f, &adapter->vlan_filter_list, list) {
-               f->remove = true;
+       list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+               vlf->remove = true;
        }
 
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+       /* remove all cloud filters */
+       spin_lock_bh(&adapter->cloud_filter_list_lock);
+       list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+               cf->del = true;
+       }
+       spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
        if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
            adapter->state != __I40EVF_RESETTING) {
                /* cancel any current operation */
@@ -1059,6 +1084,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
                 */
                adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER;
                adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+               adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
                adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
        }
 
@@ -1144,6 +1170,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
         */
        if (adapter->num_req_queues)
                num_active_queues = adapter->num_req_queues;
+       else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+                adapter->num_tc)
+               num_active_queues = adapter->ch_config.total_qps;
        else
                num_active_queues = min_t(int,
                                          adapter->vsi_res->num_queue_pairs,
@@ -1169,7 +1198,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
                tx_ring->netdev = adapter->netdev;
                tx_ring->dev = &adapter->pdev->dev;
                tx_ring->count = adapter->tx_desc_count;
-               tx_ring->tx_itr_setting = I40E_ITR_TX_DEF;
+               tx_ring->itr_setting = I40E_ITR_TX_DEF;
                if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
                        tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
 
@@ -1178,7 +1207,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
                rx_ring->netdev = adapter->netdev;
                rx_ring->dev = &adapter->pdev->dev;
                rx_ring->count = adapter->rx_desc_count;
-               rx_ring->rx_itr_setting = I40E_ITR_RX_DEF;
+               rx_ring->itr_setting = I40E_ITR_RX_DEF;
        }
 
        adapter->num_active_queues = num_active_queues;
@@ -1471,6 +1500,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter)
                goto err_alloc_q_vectors;
        }
 
+       /* If we've made it so far while ADq flag being ON, then we haven't
+        * bailed out anywhere in middle. And ADq isn't just enabled but actual
+        * resources have been allocated in the reset path.
+        * Now we can truly claim that ADq is enabled.
+        */
+       if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+           adapter->num_tc)
+               dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
+                        adapter->num_tc);
+
        dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
                 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
                 adapter->num_active_queues);
@@ -1712,6 +1751,27 @@ static void i40evf_watchdog_task(struct work_struct *work)
                i40evf_set_promiscuous(adapter, 0);
                goto watchdog_done;
        }
+
+       if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) {
+               i40evf_enable_channels(adapter);
+               goto watchdog_done;
+       }
+
+       if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) {
+               i40evf_disable_channels(adapter);
+               goto watchdog_done;
+       }
+
+       if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+               i40evf_add_cloud_filter(adapter);
+               goto watchdog_done;
+       }
+
+       if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+               i40evf_del_cloud_filter(adapter);
+               goto watchdog_done;
+       }
+
        schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
 
        if (adapter->state == __I40EVF_RUNNING)
@@ -1735,6 +1795,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 {
        struct i40evf_mac_filter *f, *ftmp;
        struct i40evf_vlan_filter *fv, *fvtmp;
+       struct i40evf_cloud_filter *cf, *cftmp;
 
        adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
 
@@ -1756,7 +1817,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
        spin_lock_bh(&adapter->mac_vlan_list_lock);
 
-       /* Delete all of the filters, both MAC and VLAN. */
+       /* Delete all of the filters */
        list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
                list_del(&f->list);
                kfree(f);
@@ -1769,6 +1830,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+       spin_lock_bh(&adapter->cloud_filter_list_lock);
+       list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+               list_del(&cf->list);
+               kfree(cf);
+               adapter->num_cloud_filters--;
+       }
+       spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
        i40evf_free_misc_irq(adapter);
        i40evf_reset_interrupt_capability(adapter);
        i40evf_free_queues(adapter);
@@ -1798,9 +1867,11 @@ static void i40evf_reset_task(struct work_struct *work)
        struct i40evf_adapter *adapter = container_of(work,
                                                      struct i40evf_adapter,
                                                      reset_task);
+       struct virtchnl_vf_resource *vfres = adapter->vf_res;
        struct net_device *netdev = adapter->netdev;
        struct i40e_hw *hw = &adapter->hw;
        struct i40evf_vlan_filter *vlf;
+       struct i40evf_cloud_filter *cf;
        struct i40evf_mac_filter *f;
        u32 reg_val;
        int i = 0, err;
@@ -1893,6 +1964,7 @@ continue_reset:
        i40evf_free_all_rx_resources(adapter);
        i40evf_free_all_tx_resources(adapter);
 
+       adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED;
        /* kill and reinit the admin queue */
        i40evf_shutdown_adminq(hw);
        adapter->current_op = VIRTCHNL_OP_UNKNOWN;
@@ -1924,8 +1996,19 @@ continue_reset:
 
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+       /* check if TCs are running and re-add all cloud filters */
+       spin_lock_bh(&adapter->cloud_filter_list_lock);
+       if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+           adapter->num_tc) {
+               list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+                       cf->add = true;
+               }
+       }
+       spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
        adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
        adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+       adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
        i40evf_misc_irq_enable(adapter);
 
        mod_timer(&adapter->watchdog_timer, jiffies + 2);
@@ -2190,6 +2273,713 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
                        i40evf_free_rx_resources(&adapter->rx_rings[i]);
 }
 
+/**
+ * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth
+ * @adapter: board private structure
+ * @max_tx_rate: max Tx bw for a tc
+ **/
+static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter,
+                                       u64 max_tx_rate)
+{
+       int speed = 0, ret = 0;
+
+       switch (adapter->link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               speed = 40000;
+               break;
+       case I40E_LINK_SPEED_25GB:
+               speed = 25000;
+               break;
+       case I40E_LINK_SPEED_20GB:
+               speed = 20000;
+               break;
+       case I40E_LINK_SPEED_10GB:
+               speed = 10000;
+               break;
+       case I40E_LINK_SPEED_1GB:
+               speed = 1000;
+               break;
+       case I40E_LINK_SPEED_100MB:
+               speed = 100;
+               break;
+       default:
+               break;
+       }
+
+       if (max_tx_rate > speed) {
+               dev_err(&adapter->pdev->dev,
+                       "Invalid tx rate specified\n");
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+/**
+ * i40evf_validate_channel_config - validate queue mapping info
+ * @adapter: board private structure
+ * @mqprio_qopt: queue parameters
+ *
+ * This function validates if the config provided by the user to
+ * configure queue channels is valid or not. Returns 0 on a valid
+ * config.
+ **/
+static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
+                                    struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+       u64 total_max_rate = 0;
+       int i, num_qps = 0;
+       u64 tx_rate = 0;
+       int ret = 0;
+
+       if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
+           mqprio_qopt->qopt.num_tc < 1)
+               return -EINVAL;
+
+       for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
+               if (!mqprio_qopt->qopt.count[i] ||
+                   mqprio_qopt->qopt.offset[i] != num_qps)
+                       return -EINVAL;
+               if (mqprio_qopt->min_rate[i]) {
+                       dev_err(&adapter->pdev->dev,
+                               "Invalid min tx rate (greater than 0) specified\n");
+                       return -EINVAL;
+               }
+               /*convert to Mbps */
+               tx_rate = div_u64(mqprio_qopt->max_rate[i],
+                                 I40EVF_MBPS_DIVISOR);
+               total_max_rate += tx_rate;
+               num_qps += mqprio_qopt->qopt.count[i];
+       }
+       if (num_qps > MAX_QUEUES)
+               return -EINVAL;
+
+       ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
+       return ret;
+}
+
+/**
+ * i40evf_del_all_cloud_filters - delete all cloud filters
+ * on the traffic classes
+ **/
+static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter)
+{
+       struct i40evf_cloud_filter *cf, *cftmp;
+
+       spin_lock_bh(&adapter->cloud_filter_list_lock);
+       list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+                                list) {
+               list_del(&cf->list);
+               kfree(cf);
+               adapter->num_cloud_filters--;
+       }
+       spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
+/**
+ * __i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type_date: tc offload data
+ *
+ * This function processes the config information provided by the
+ * user to configure traffic classes/queue channels and packages the
+ * information to request the PF to setup traffic classes.
+ *
+ * Returns 0 on success.
+ **/
+static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
+{
+       struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+       struct i40evf_adapter *adapter = netdev_priv(netdev);
+       struct virtchnl_vf_resource *vfres = adapter->vf_res;
+       u8 num_tc = 0, total_qps = 0;
+       int ret = 0, netdev_tc = 0;
+       u64 max_tx_rate;
+       u16 mode;
+       int i;
+
+       num_tc = mqprio_qopt->qopt.num_tc;
+       mode = mqprio_qopt->mode;
+
+       /* delete queue_channel */
+       if (!mqprio_qopt->qopt.hw) {
+               if (adapter->ch_config.state == __I40EVF_TC_RUNNING) {
+                       /* reset the tc configuration */
+                       netdev_reset_tc(netdev);
+                       adapter->num_tc = 0;
+                       netif_tx_stop_all_queues(netdev);
+                       netif_tx_disable(netdev);
+                       i40evf_del_all_cloud_filters(adapter);
+                       adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+                       goto exit;
+               } else {
+                       return -EINVAL;
+               }
+       }
+
+       /* add queue channel */
+       if (mode == TC_MQPRIO_MODE_CHANNEL) {
+               if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+                       dev_err(&adapter->pdev->dev, "ADq not supported\n");
+                       return -EOPNOTSUPP;
+               }
+               if (adapter->ch_config.state != __I40EVF_TC_INVALID) {
+                       dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
+                       return -EINVAL;
+               }
+
+               ret = i40evf_validate_ch_config(adapter, mqprio_qopt);
+               if (ret)
+                       return ret;
+               /* Return if same TC config is requested */
+               if (adapter->num_tc == num_tc)
+                       return 0;
+               adapter->num_tc = num_tc;
+
+               for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+                       if (i < num_tc) {
+                               adapter->ch_config.ch_info[i].count =
+                                       mqprio_qopt->qopt.count[i];
+                               adapter->ch_config.ch_info[i].offset =
+                                       mqprio_qopt->qopt.offset[i];
+                               total_qps += mqprio_qopt->qopt.count[i];
+                               max_tx_rate = mqprio_qopt->max_rate[i];
+                               /* convert to Mbps */
+                               max_tx_rate = div_u64(max_tx_rate,
+                                                     I40EVF_MBPS_DIVISOR);
+                               adapter->ch_config.ch_info[i].max_tx_rate =
+                                       max_tx_rate;
+                       } else {
+                               adapter->ch_config.ch_info[i].count = 1;
+                               adapter->ch_config.ch_info[i].offset = 0;
+                       }
+               }
+               adapter->ch_config.total_qps = total_qps;
+               netif_tx_stop_all_queues(netdev);
+               netif_tx_disable(netdev);
+               adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+               netdev_reset_tc(netdev);
+               /* Report the tc mapping up the stack */
+               netdev_set_num_tc(adapter->netdev, num_tc);
+               for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+                       u16 qcount = mqprio_qopt->qopt.count[i];
+                       u16 qoffset = mqprio_qopt->qopt.offset[i];
+
+                       if (i < num_tc)
+                               netdev_set_tc_queue(netdev, netdev_tc++, qcount,
+                                                   qoffset);
+               }
+       }
+exit:
+       return ret;
+}
+
+/**
+ * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @adapter: board private structure
+ * @cls_flower: pointer to struct tc_cls_flower_offload
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
+                                  struct tc_cls_flower_offload *f,
+                                  struct i40evf_cloud_filter *filter)
+{
+       u16 n_proto_mask = 0;
+       u16 n_proto_key = 0;
+       u8 field_flags = 0;
+       u16 addr_type = 0;
+       u16 n_proto = 0;
+       int i = 0;
+       struct virtchnl_filter *vf = &filter->f;
+
+       if (f->dissector->used_keys &
+           ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+             BIT(FLOW_DISSECTOR_KEY_BASIC) |
+             BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_VLAN) |
+             BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_PORTS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+               dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
+                       f->dissector->used_keys);
+               return -EOPNOTSUPP;
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+               struct flow_dissector_key_keyid *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                 f->mask);
+
+               if (mask->keyid != 0)
+                       field_flags |= I40EVF_CLOUD_FIELD_TEN_ID;
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               struct flow_dissector_key_basic *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->key);
+
+               struct flow_dissector_key_basic *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->mask);
+               n_proto_key = ntohs(key->n_proto);
+               n_proto_mask = ntohs(mask->n_proto);
+
+               if (n_proto_key == ETH_P_ALL) {
+                       n_proto_key = 0;
+                       n_proto_mask = 0;
+               }
+               n_proto = n_proto_key & n_proto_mask;
+               if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
+                       return -EINVAL;
+               if (n_proto == ETH_P_IPV6) {
+                       /* specify flow type as TCP IPv6 */
+                       vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
+               }
+
+               if (key->ip_proto != IPPROTO_TCP) {
+                       dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
+                       return -EINVAL;
+               }
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+               struct flow_dissector_key_eth_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                                 f->key);
+
+               struct flow_dissector_key_eth_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                                 f->mask);
+               /* use is_broadcast and is_zero to check for all 0xf or 0 */
+               if (!is_zero_ether_addr(mask->dst)) {
+                       if (is_broadcast_ether_addr(mask->dst)) {
+                               field_flags |= I40EVF_CLOUD_FIELD_OMAC;
+                       } else {
+                               dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+                                       mask->dst);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+
+               if (!is_zero_ether_addr(mask->src)) {
+                       if (is_broadcast_ether_addr(mask->src)) {
+                               field_flags |= I40EVF_CLOUD_FIELD_IMAC;
+                       } else {
+                               dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+                                       mask->src);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+
+               if (!is_zero_ether_addr(key->dst))
+                       if (is_valid_ether_addr(key->dst) ||
+                           is_multicast_ether_addr(key->dst)) {
+                               /* set the mask if a valid dst_mac address */
+                               for (i = 0; i < ETH_ALEN; i++)
+                                       vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+                               ether_addr_copy(vf->data.tcp_spec.dst_mac,
+                                               key->dst);
+                       }
+
+               if (!is_zero_ether_addr(key->src))
+                       if (is_valid_ether_addr(key->src) ||
+                           is_multicast_ether_addr(key->src)) {
+                               /* set the mask if a valid dst_mac address */
+                               for (i = 0; i < ETH_ALEN; i++)
+                                       vf->mask.tcp_spec.src_mac[i] |= 0xff;
+                               ether_addr_copy(vf->data.tcp_spec.src_mac,
+                                               key->src);
+               }
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+               struct flow_dissector_key_vlan *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->key);
+               struct flow_dissector_key_vlan *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->mask);
+
+               if (mask->vlan_id) {
+                       if (mask->vlan_id == VLAN_VID_MASK) {
+                               field_flags |= I40EVF_CLOUD_FIELD_IVLAN;
+                       } else {
+                               dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+                                       mask->vlan_id);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+               vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+               struct flow_dissector_key_control *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_CONTROL,
+                                                 f->key);
+
+               addr_type = key->addr_type;
+       }
+
+       if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+               struct flow_dissector_key_ipv4_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv4_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+                                                 f->mask);
+
+               if (mask->dst) {
+                       if (mask->dst == cpu_to_be32(0xffffffff)) {
+                               field_flags |= I40EVF_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+                                       be32_to_cpu(mask->dst));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+
+               if (mask->src) {
+                       if (mask->src == cpu_to_be32(0xffffffff)) {
+                               field_flags |= I40EVF_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+                                       be32_to_cpu(mask->dst));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+
+               if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) {
+                       dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+                       return I40E_ERR_CONFIG;
+               }
+               if (key->dst) {
+                       vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+                       vf->data.tcp_spec.dst_ip[0] = key->dst;
+               }
+               if (key->src) {
+                       vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+                       vf->data.tcp_spec.src_ip[0] = key->src;
+               }
+       }
+
+       if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+               struct flow_dissector_key_ipv6_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv6_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+                                                 f->mask);
+
+               /* validate mask, make sure it is not IPV6_ADDR_ANY */
+               if (ipv6_addr_any(&mask->dst)) {
+                       dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+                               IPV6_ADDR_ANY);
+                       return I40E_ERR_CONFIG;
+               }
+
+               /* src and dest IPv6 address should not be LOOPBACK
+                * (0:0:0:0:0:0:0:1) which can be represented as ::1
+                */
+               if (ipv6_addr_loopback(&key->dst) ||
+                   ipv6_addr_loopback(&key->src)) {
+                       dev_err(&adapter->pdev->dev,
+                               "ipv6 addr should not be loopback\n");
+                       return I40E_ERR_CONFIG;
+               }
+               if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+                       field_flags |= I40EVF_CLOUD_FIELD_IIP;
+
+               for (i = 0; i < 4; i++)
+                       vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+               memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+                      sizeof(vf->data.tcp_spec.dst_ip));
+               for (i = 0; i < 4; i++)
+                       vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+               memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+                      sizeof(vf->data.tcp_spec.src_ip));
+       }
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+               struct flow_dissector_key_ports *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_PORTS,
+                                                 f->key);
+               struct flow_dissector_key_ports *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_PORTS,
+                                                 f->mask);
+
+               if (mask->src) {
+                       if (mask->src == cpu_to_be16(0xffff)) {
+                               field_flags |= I40EVF_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+                                       be16_to_cpu(mask->src));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+
+               if (mask->dst) {
+                       if (mask->dst == cpu_to_be16(0xffff)) {
+                               field_flags |= I40EVF_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+                                       be16_to_cpu(mask->dst));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               if (key->dst) {
+                       vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+                       vf->data.tcp_spec.dst_port = key->dst;
+               }
+
+               if (key->src) {
+                       vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+                       vf->data.tcp_spec.src_port = key->dst;
+               }
+       }
+       vf->field_flags = field_flags;
+
+       return 0;
+}
+
+/**
+ * i40evf_handle_tclass - Forward to a traffic class on the device
+ * @adapter: board private structure
+ * @tc: traffic class index on the device
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc,
+                               struct i40evf_cloud_filter *filter)
+{
+       if (tc == 0)
+               return 0;
+       if (tc < adapter->num_tc) {
+               if (!filter->f.data.tcp_spec.dst_port) {
+                       dev_err(&adapter->pdev->dev,
+                               "Specify destination port to redirect to traffic class other than TC0\n");
+                       return -EINVAL;
+               }
+       }
+       /* redirect to a traffic class on the same device */
+       filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
+       filter->f.action_meta = tc;
+       return 0;
+}
+
+/**
+ * i40evf_configure_clsflower - Add tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
+                                     struct tc_cls_flower_offload *cls_flower)
+{
+       int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+       struct i40evf_cloud_filter *filter = NULL;
+       int err = 0, count = 50;
+
+       while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+                               &adapter->crit_section)) {
+               udelay(1);
+               if (--count == 0)
+                       return -EINVAL;
+       }
+
+       if (tc < 0) {
+               dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
+               return -EINVAL;
+       }
+
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (!filter) {
+               err = -ENOMEM;
+               goto clearout;
+       }
+       filter->cookie = cls_flower->cookie;
+
+       /* set the mask to all zeroes to begin with */
+       memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
+       /* start out with flow type and eth type IPv4 to begin with */
+       filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+       err = i40evf_parse_cls_flower(adapter, cls_flower, filter);
+       if (err < 0)
+               goto err;
+
+       err = i40evf_handle_tclass(adapter, tc, filter);
+       if (err < 0)
+               goto err;
+
+       /* add filter to the list */
+       spin_lock_bh(&adapter->cloud_filter_list_lock);
+       list_add_tail(&filter->list, &adapter->cloud_filter_list);
+       adapter->num_cloud_filters++;
+       filter->add = true;
+       adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+       spin_unlock_bh(&adapter->cloud_filter_list_lock);
+err:
+       if (err)
+               kfree(filter);
+clearout:
+       clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       return err;
+}
+
+/* i40evf_find_cf - Find the cloud filter in the list
+ * @adapter: Board private structure
+ * @cookie: filter specific cookie
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * cloud_filter_list_lock.
+ */
+static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter,
+                                                 unsigned long *cookie)
+{
+       struct i40evf_cloud_filter *filter = NULL;
+
+       if (!cookie)
+               return NULL;
+
+       list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
+               if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+                       return filter;
+       }
+       return NULL;
+}
+
+/**
+ * i40evf_delete_clsflower - Remove tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_delete_clsflower(struct i40evf_adapter *adapter,
+                                  struct tc_cls_flower_offload *cls_flower)
+{
+       struct i40evf_cloud_filter *filter = NULL;
+       int err = 0;
+
+       spin_lock_bh(&adapter->cloud_filter_list_lock);
+       filter = i40evf_find_cf(adapter, &cls_flower->cookie);
+       if (filter) {
+               filter->del = true;
+               adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+       } else {
+               err = -EINVAL;
+       }
+       spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+       return err;
+}
+
+/**
+ * i40evf_setup_tc_cls_flower - flower classifier offloads
+ * @netdev: net device to configure
+ * @type_data: offload data
+ */
+static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
+                                     struct tc_cls_flower_offload *cls_flower)
+{
+       if (cls_flower->common.chain_index)
+               return -EOPNOTSUPP;
+
+       switch (cls_flower->command) {
+       case TC_CLSFLOWER_REPLACE:
+               return i40evf_configure_clsflower(adapter, cls_flower);
+       case TC_CLSFLOWER_DESTROY:
+               return i40evf_delete_clsflower(adapter, cls_flower);
+       case TC_CLSFLOWER_STATS:
+               return -EOPNOTSUPP;
+       default:
+               return -EINVAL;
+       }
+}
+
+/**
+ * i40evf_setup_tc_block_cb - block callback for tc
+ * @type: type of offload
+ * @type_data: offload data
+ * @cb_priv:
+ *
+ * This function is the block callback for traffic classes
+ **/
+static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                   void *cb_priv)
+{
+       switch (type) {
+       case TC_SETUP_CLSFLOWER:
+               return i40evf_setup_tc_cls_flower(cb_priv, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+/**
+ * i40evf_setup_tc_block - register callbacks for tc
+ * @netdev: network interface device structure
+ * @f: tc offload data
+ *
+ * This function registers block callbacks for tc
+ * offloads
+ **/
+static int i40evf_setup_tc_block(struct net_device *dev,
+                                struct tc_block_offload *f)
+{
+       struct i40evf_adapter *adapter = netdev_priv(dev);
+
+       if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+
+       switch (f->command) {
+       case TC_BLOCK_BIND:
+               return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
+                                            adapter, adapter);
+       case TC_BLOCK_UNBIND:
+               tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
+                                       adapter);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+/**
+ * i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type: type of offload
+ * @type_date: tc offload data
+ *
+ * This function is the callback to ndo_setup_tc in the
+ * netdev_ops.
+ *
+ * Returns 0 on success
+ **/
+static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+                          void *type_data)
+{
+       switch (type) {
+       case TC_SETUP_QDISC_MQPRIO:
+               return __i40evf_setup_tc(netdev, type_data);
+       case TC_SETUP_BLOCK:
+               return i40evf_setup_tc_block(netdev, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
 /**
  * i40evf_open - Called when a network interface is made active
  * @netdev: network interface device structure
@@ -2236,7 +3026,12 @@ static int i40evf_open(struct net_device *netdev)
        if (err)
                goto err_req_irq;
 
+       spin_lock_bh(&adapter->mac_vlan_list_lock);
+
        i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+       spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
        i40evf_configure(adapter);
 
        i40evf_up_complete(adapter);
@@ -2457,6 +3252,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = i40evf_netpoll,
 #endif
+       .ndo_setup_tc           = i40evf_setup_tc,
 };
 
 /**
@@ -2571,6 +3367,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
        if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
                hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
                                NETIF_F_HW_VLAN_CTAG_RX);
+       /* Enable cloud filter if ADQ is supported */
+       if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+               hw_features |= NETIF_F_HW_TC;
 
        netdev->hw_features |= hw_features;
 
@@ -2938,9 +3737,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        mutex_init(&hw->aq.arq_mutex);
 
        spin_lock_init(&adapter->mac_vlan_list_lock);
+       spin_lock_init(&adapter->cloud_filter_list_lock);
 
        INIT_LIST_HEAD(&adapter->mac_filter_list);
        INIT_LIST_HEAD(&adapter->vlan_filter_list);
+       INIT_LIST_HEAD(&adapter->cloud_filter_list);
 
        INIT_WORK(&adapter->reset_task, i40evf_reset_task);
        INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
@@ -3065,7 +3866,9 @@ static void i40evf_remove(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct i40evf_adapter *adapter = netdev_priv(netdev);
+       struct i40evf_vlan_filter *vlf, *vlftmp;
        struct i40evf_mac_filter *f, *ftmp;
+       struct i40evf_cloud_filter *cf, *cftmp;
        struct i40e_hw *hw = &adapter->hw;
        int err;
        /* Indicate we are in remove and not to run reset_task */
@@ -3087,6 +3890,7 @@ static void i40evf_remove(struct pci_dev *pdev)
        /* Shut down all the garbage mashers on the detention level */
        adapter->state = __I40EVF_REMOVE;
        adapter->aq_required = 0;
+       adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
        i40evf_request_reset(adapter);
        msleep(50);
        /* If the FW isn't responding, kick it once, but only once. */
@@ -3127,13 +3931,21 @@ static void i40evf_remove(struct pci_dev *pdev)
                list_del(&f->list);
                kfree(f);
        }
-       list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
-               list_del(&f->list);
-               kfree(f);
+       list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+                                list) {
+               list_del(&vlf->list);
+               kfree(vlf);
        }
 
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+       spin_lock_bh(&adapter->cloud_filter_list_lock);
+       list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+               list_del(&cf->list);
+               kfree(cf);
+       }
+       spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
        free_netdev(netdev);
 
        pci_disable_pcie_error_reporting(pdev);
index 50ce0d6..3c76c81 100644 (file)
@@ -161,7 +161,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
               VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
               VIRTCHNL_VF_OFFLOAD_ENCAP |
               VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
-              VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+              VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+              VIRTCHNL_VF_OFFLOAD_ADQ;
 
        adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
        adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
@@ -344,6 +345,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter)
 void i40evf_map_queues(struct i40evf_adapter *adapter)
 {
        struct virtchnl_irq_map_info *vimi;
+       struct virtchnl_vector_map *vecmap;
        int v_idx, q_vectors, len;
        struct i40e_q_vector *q_vector;
 
@@ -367,17 +369,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
        vimi->num_vectors = adapter->num_msix_vectors;
        /* Queue vectors first */
        for (v_idx = 0; v_idx < q_vectors; v_idx++) {
-               q_vector = adapter->q_vectors + v_idx;
-               vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
-               vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
-               vimi->vecmap[v_idx].txq_map = q_vector->ring_mask;
-               vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask;
+               q_vector = &adapter->q_vectors[v_idx];
+               vecmap = &vimi->vecmap[v_idx];
+
+               vecmap->vsi_id = adapter->vsi_res->vsi_id;
+               vecmap->vector_id = v_idx + NONQ_VECS;
+               vecmap->txq_map = q_vector->ring_mask;
+               vecmap->rxq_map = q_vector->ring_mask;
+               vecmap->rxitr_idx = I40E_RX_ITR;
+               vecmap->txitr_idx = I40E_TX_ITR;
        }
        /* Misc vector last - this is only for AdminQ messages */
-       vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
-       vimi->vecmap[v_idx].vector_id = 0;
-       vimi->vecmap[v_idx].txq_map = 0;
-       vimi->vecmap[v_idx].rxq_map = 0;
+       vecmap = &vimi->vecmap[v_idx];
+       vecmap->vsi_id = adapter->vsi_res->vsi_id;
+       vecmap->vector_id = 0;
+       vecmap->txq_map = 0;
+       vecmap->rxq_map = 0;
 
        adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
        i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
@@ -459,7 +466,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
                more = true;
        }
 
-       veal = kzalloc(len, GFP_KERNEL);
+       veal = kzalloc(len, GFP_ATOMIC);
        if (!veal) {
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                return;
@@ -532,7 +539,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
                      (count * sizeof(struct virtchnl_ether_addr));
                more = true;
        }
-       veal = kzalloc(len, GFP_KERNEL);
+       veal = kzalloc(len, GFP_ATOMIC);
        if (!veal) {
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                return;
@@ -606,7 +613,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
                      (count * sizeof(u16));
                more = true;
        }
-       vvfl = kzalloc(len, GFP_KERNEL);
+       vvfl = kzalloc(len, GFP_ATOMIC);
        if (!vvfl) {
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                return;
@@ -678,7 +685,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
                      (count * sizeof(u16));
                more = true;
        }
-       vvfl = kzalloc(len, GFP_KERNEL);
+       vvfl = kzalloc(len, GFP_ATOMIC);
        if (!vvfl) {
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                return;
@@ -966,6 +973,205 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter)
        netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed);
 }
 
+/**
+ * i40evf_enable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable channels as specified by
+ * the user via tc tool.
+ **/
+void i40evf_enable_channels(struct i40evf_adapter *adapter)
+{
+       struct virtchnl_tc_info *vti = NULL;
+       u16 len;
+       int i;
+
+       if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+               /* bail because we already have a command pending */
+               dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+                       adapter->current_op);
+               return;
+       }
+
+       len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
+              sizeof(struct virtchnl_tc_info);
+
+       vti = kzalloc(len, GFP_KERNEL);
+       if (!vti)
+               return;
+       vti->num_tc = adapter->num_tc;
+       for (i = 0; i < vti->num_tc; i++) {
+               vti->list[i].count = adapter->ch_config.ch_info[i].count;
+               vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
+               vti->list[i].pad = 0;
+               vti->list[i].max_tx_rate =
+                               adapter->ch_config.ch_info[i].max_tx_rate;
+       }
+
+       adapter->ch_config.state = __I40EVF_TC_RUNNING;
+       adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+       adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
+       adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+       i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS,
+                          (u8 *)vti, len);
+       kfree(vti);
+}
+
+/**
+ * i40evf_disable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF disable channels that are configured
+ **/
+void i40evf_disable_channels(struct i40evf_adapter *adapter)
+{
+       if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+               /* bail because we already have a command pending */
+               dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+                       adapter->current_op);
+               return;
+       }
+
+       adapter->ch_config.state = __I40EVF_TC_INVALID;
+       adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+       adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
+       adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+       i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS,
+                          NULL, 0);
+}
+
+/**
+ * i40evf_print_cloud_filter
+ * @adapter: adapter structure
+ * @f: cloud filter to print
+ *
+ * Print the cloud filter
+ **/
+static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
+                                     struct virtchnl_filter *f)
+{
+       switch (f->flow_type) {
+       case VIRTCHNL_TCP_V4_FLOW:
+               dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
+                        &f->data.tcp_spec.dst_mac,
+                        &f->data.tcp_spec.src_mac,
+                        ntohs(f->data.tcp_spec.vlan_id),
+                        &f->data.tcp_spec.dst_ip[0],
+                        &f->data.tcp_spec.src_ip[0],
+                        ntohs(f->data.tcp_spec.dst_port),
+                        ntohs(f->data.tcp_spec.src_port));
+               break;
+       case VIRTCHNL_TCP_V6_FLOW:
+               dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
+                        &f->data.tcp_spec.dst_mac,
+                        &f->data.tcp_spec.src_mac,
+                        ntohs(f->data.tcp_spec.vlan_id),
+                        &f->data.tcp_spec.dst_ip,
+                        &f->data.tcp_spec.src_ip,
+                        ntohs(f->data.tcp_spec.dst_port),
+                        ntohs(f->data.tcp_spec.src_port));
+               break;
+       }
+}
+
+/**
+ * i40evf_add_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF add cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter)
+{
+       struct i40evf_cloud_filter *cf;
+       struct virtchnl_filter *f;
+       int len = 0, count = 0;
+
+       if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+               /* bail because we already have a command pending */
+               dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n",
+                       adapter->current_op);
+               return;
+       }
+       list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+               if (cf->add) {
+                       count++;
+                       break;
+               }
+       }
+       if (!count) {
+               adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+               return;
+       }
+       adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
+
+       len = sizeof(struct virtchnl_filter);
+       f = kzalloc(len, GFP_KERNEL);
+       if (!f)
+               return;
+
+       list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+               if (cf->add) {
+                       memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+                       cf->add = false;
+                       cf->state = __I40EVF_CF_ADD_PENDING;
+                       i40evf_send_pf_msg(adapter,
+                                          VIRTCHNL_OP_ADD_CLOUD_FILTER,
+                                          (u8 *)f, len);
+               }
+       }
+       kfree(f);
+}
+
+/**
+ * i40evf_del_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF delete cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter)
+{
+       struct i40evf_cloud_filter *cf, *cftmp;
+       struct virtchnl_filter *f;
+       int len = 0, count = 0;
+
+       if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+               /* bail because we already have a command pending */
+               dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n",
+                       adapter->current_op);
+               return;
+       }
+       list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+               if (cf->del) {
+                       count++;
+                       break;
+               }
+       }
+       if (!count) {
+               adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+               return;
+       }
+       adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
+
+       len = sizeof(struct virtchnl_filter);
+       f = kzalloc(len, GFP_KERNEL);
+       if (!f)
+               return;
+
+       list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+               if (cf->del) {
+                       memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+                       cf->del = false;
+                       cf->state = __I40EVF_CF_DEL_PENDING;
+                       i40evf_send_pf_msg(adapter,
+                                          VIRTCHNL_OP_DEL_CLOUD_FILTER,
+                                          (u8 *)f, len);
+               }
+       }
+       kfree(f);
+}
+
 /**
  * i40evf_request_reset
  * @adapter: adapter structure
@@ -1011,14 +1217,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                        if (adapter->link_up == link_up)
                                break;
 
-                       /* If we get link up message and start queues before
-                        * our queues are configured it will trigger a TX hang.
-                        * In that case, just ignore the link status message,
-                        * we'll get another one after we enable queues and
-                        * actually prepared to send traffic.
-                        */
-                       if (link_up && adapter->state != __I40EVF_RUNNING)
-                               break;
+                       if (link_up) {
+                               /* If we get link up message and start queues
+                                * before our queues are configured it will
+                                * trigger a TX hang. In that case, just ignore
+                                * the link status message,we'll get another one
+                                * after we enable queues and actually prepared
+                                * to send traffic.
+                                */
+                               if (adapter->state != __I40EVF_RUNNING)
+                                       break;
+
+                               /* For ADq enabled VF, we reconfigure VSIs and
+                                * re-allocate queues. Hence wait till all
+                                * queues are enabled.
+                                */
+                               if (adapter->flags &
+                                   I40EVF_FLAG_QUEUES_DISABLED)
+                                       break;
+                       }
 
                        adapter->link_up = link_up;
                        if (link_up) {
@@ -1031,7 +1248,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                        i40evf_print_link_message(adapter);
                        break;
                case VIRTCHNL_EVENT_RESET_IMPENDING:
-                       dev_info(&adapter->pdev->dev, "PF reset warning received\n");
+                       dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n");
                        if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
                                adapter->flags |= I40EVF_FLAG_RESET_PENDING;
                                dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
@@ -1063,6 +1280,57 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                        dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
                                i40evf_stat_str(&adapter->hw, v_retval));
                        break;
+               case VIRTCHNL_OP_ENABLE_CHANNELS:
+                       dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
+                               i40evf_stat_str(&adapter->hw, v_retval));
+                       adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+                       adapter->ch_config.state = __I40EVF_TC_INVALID;
+                       netdev_reset_tc(netdev);
+                       netif_tx_start_all_queues(netdev);
+                       break;
+               case VIRTCHNL_OP_DISABLE_CHANNELS:
+                       dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
+                               i40evf_stat_str(&adapter->hw, v_retval));
+                       adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+                       adapter->ch_config.state = __I40EVF_TC_RUNNING;
+                       netif_tx_start_all_queues(netdev);
+                       break;
+               case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+                       struct i40evf_cloud_filter *cf, *cftmp;
+
+                       list_for_each_entry_safe(cf, cftmp,
+                                                &adapter->cloud_filter_list,
+                                                list) {
+                               if (cf->state == __I40EVF_CF_ADD_PENDING) {
+                                       cf->state = __I40EVF_CF_INVALID;
+                                       dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
+                                                i40evf_stat_str(&adapter->hw,
+                                                                v_retval));
+                                       i40evf_print_cloud_filter(adapter,
+                                                                 &cf->f);
+                                       list_del(&cf->list);
+                                       kfree(cf);
+                                       adapter->num_cloud_filters--;
+                               }
+                       }
+                       }
+                       break;
+               case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+                       struct i40evf_cloud_filter *cf;
+
+                       list_for_each_entry(cf, &adapter->cloud_filter_list,
+                                           list) {
+                               if (cf->state == __I40EVF_CF_DEL_PENDING) {
+                                       cf->state = __I40EVF_CF_ACTIVE;
+                                       dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
+                                                i40evf_stat_str(&adapter->hw,
+                                                                v_retval));
+                                       i40evf_print_cloud_filter(adapter,
+                                                                 &cf->f);
+                               }
+                       }
+                       }
+                       break;
                default:
                        dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
                                v_retval,
@@ -1102,6 +1370,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
        case VIRTCHNL_OP_ENABLE_QUEUES:
                /* enable transmits */
                i40evf_irq_enable(adapter, true);
+               adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED;
                break;
        case VIRTCHNL_OP_DISABLE_QUEUES:
                i40evf_free_all_tx_resources(adapter);
@@ -1156,6 +1425,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                }
                }
                break;
+       case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+               struct i40evf_cloud_filter *cf;
+
+               list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+                       if (cf->state == __I40EVF_CF_ADD_PENDING)
+                               cf->state = __I40EVF_CF_ACTIVE;
+               }
+               }
+               break;
+       case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+               struct i40evf_cloud_filter *cf, *cftmp;
+
+               list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+                                        list) {
+                       if (cf->state == __I40EVF_CF_DEL_PENDING) {
+                               cf->state = __I40EVF_CF_INVALID;
+                               list_del(&cf->list);
+                               kfree(cf);
+                               adapter->num_cloud_filters--;
+                       }
+               }
+               }
+               break;
        default:
                if (adapter->current_op && (v_opcode != adapter->current_op))
                        dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
index 221f158..89edb9f 100644 (file)
@@ -3059,6 +3059,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 
                for (i = 0; i < reta_entries; i++)
                        adapter->rss_indir_tbl[i] = indir[i];
+
+               ixgbe_store_reta(adapter);
        }
 
        /* Fill out the rss hash key */
@@ -3067,8 +3069,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
                ixgbe_store_key(adapter);
        }
 
-       ixgbe_store_reta(adapter);
-
        return 0;
 }
 
index 4242f02..ed4cbe9 100644 (file)
@@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
                return false;
 
        /* start at VMDq register offset for SR-IOV enabled setups */
-       pool = 0;
        reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
        for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
                /* If we are greater than indices move to next pool */
index 0da5aa2..b032091 100644 (file)
@@ -7703,7 +7703,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
        if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
                ixgbe_ptp_overflow_check(adapter);
-               ixgbe_ptp_rx_hang(adapter);
+               if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+                       ixgbe_ptp_rx_hang(adapter);
                ixgbe_ptp_tx_hang(adapter);
        }
 
index 4400e49..e7623fe 100644 (file)
@@ -94,6 +94,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
 
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX   BIT(0)
+       "legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
 static int ixgbevf_get_link_ksettings(struct net_device *netdev,
                                      struct ethtool_link_ksettings *cmd)
 {
@@ -241,6 +248,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev,
                sizeof(drvinfo->version));
        strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
                sizeof(drvinfo->bus_info));
+
+       drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
 }
 
 static void ixgbevf_get_ringparam(struct net_device *netdev,
@@ -392,6 +401,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
                return IXGBEVF_TEST_LEN;
        case ETH_SS_STATS:
                return IXGBEVF_STATS_LEN;
+       case ETH_SS_PRIV_FLAGS:
+               return IXGBEVF_PRIV_FLAGS_STR_LEN;
        default:
                return -EINVAL;
        }
@@ -496,6 +507,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
                        p += ETH_GSTRING_LEN;
                }
                break;
+       case ETH_SS_PRIV_FLAGS:
+               memcpy(data, ixgbevf_priv_flags_strings,
+                      IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+               break;
        }
 }
 
@@ -888,6 +903,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
        return err;
 }
 
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       u32 priv_flags = 0;
+
+       if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+               priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+       return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       unsigned int flags = adapter->flags;
+
+       flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+       if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+               flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+       if (flags != adapter->flags) {
+               adapter->flags = flags;
+
+               /* reset interface to repopulate queues */
+               if (netif_running(netdev))
+                       ixgbevf_reinit_locked(adapter);
+       }
+
+       return 0;
+}
+
 static const struct ethtool_ops ixgbevf_ethtool_ops = {
        .get_drvinfo            = ixgbevf_get_drvinfo,
        .get_regs_len           = ixgbevf_get_regs_len,
@@ -909,6 +955,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
        .get_rxfh_key_size      = ixgbevf_get_rxfh_key_size,
        .get_rxfh               = ixgbevf_get_rxfh,
        .get_link_ksettings     = ixgbevf_get_link_ksettings,
+       .get_priv_flags         = ixgbevf_get_priv_flags,
+       .set_priv_flags         = ixgbevf_set_priv_flags,
 };
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
index f695242..f65ca15 100644 (file)
@@ -89,19 +89,15 @@ struct ixgbevf_rx_queue_stats {
 };
 
 enum ixgbevf_ring_state_t {
+       __IXGBEVF_RX_3K_BUFFER,
+       __IXGBEVF_RX_BUILD_SKB_ENABLED,
        __IXGBEVF_TX_DETECT_HANG,
        __IXGBEVF_HANG_CHECK_ARMED,
 };
 
-#define check_for_tx_hang(ring) \
-       test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
-       set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
-       clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-
 struct ixgbevf_ring {
        struct ixgbevf_ring *next;
+       struct ixgbevf_q_vector *q_vector;      /* backpointer to q_vector */
        struct net_device *netdev;
        struct device *dev;
        void *desc;                     /* descriptor ring memory */
@@ -133,7 +129,7 @@ struct ixgbevf_ring {
         */
        u16 reg_idx;
        int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define IXGBEVF_RX_BUFFER_WRITE        16      /* Must be power of 2 */
@@ -156,12 +152,20 @@ struct ixgbevf_ring {
 /* Supported Rx Buffer Sizes */
 #define IXGBEVF_RXBUFFER_256   256    /* Used for packet split */
 #define IXGBEVF_RXBUFFER_2048  2048
+#define IXGBEVF_RXBUFFER_3072  3072
 
 #define IXGBEVF_RX_HDR_SIZE    IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ       IXGBEVF_RXBUFFER_2048
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
+#define IXGBEVF_SKB_PAD                (NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+       (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB    IXGBEVF_RXBUFFER_2048
+#endif
+
 #define IXGBE_TX_FLAGS_CSUM            BIT(0)
 #define IXGBE_TX_FLAGS_VLAN            BIT(1)
 #define IXGBE_TX_FLAGS_TSO             BIT(2)
@@ -170,6 +174,50 @@ struct ixgbevf_ring {
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK  0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT      16
 
+#define ring_uses_large_buffer(ring) \
+       test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+       set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+       clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+       test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+       set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+       clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return IXGBEVF_RXBUFFER_3072;
+
+       if (ring_uses_build_skb(ring))
+               return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+       return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return 1;
+#endif
+       return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+       test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+       set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+       clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
 struct ixgbevf_ring_container {
        struct ixgbevf_ring *ring;      /* pointer to linked list of rings */
        unsigned int total_bytes;       /* total bytes processed this int */
@@ -194,7 +242,11 @@ struct ixgbevf_q_vector {
        u16 itr; /* Interrupt throttle rate written to EITR */
        struct napi_struct napi;
        struct ixgbevf_ring_container rx, tx;
+       struct rcu_head rcu;    /* to avoid race with update stats on free */
        char name[IFNAMSIZ + 9];
+
+       /* for dynamic allocation of rings associated with this q_vector */
+       struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
 #ifdef CONFIG_NET_RX_BUSY_POLL
        unsigned int state;
 #define IXGBEVF_QV_STATE_IDLE          0
@@ -331,6 +383,8 @@ struct ixgbevf_adapter {
 
        u32 *rss_key;
        u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+       u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX                BIT(1)
 };
 
 enum ixbgevf_state_t {
index 9b3d43d..f373071 100644 (file)
@@ -130,6 +130,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
 static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
 static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
 static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+                                 struct ixgbevf_rx_buffer *old_buff);
 
 static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
 {
@@ -527,6 +530,49 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+                                               const unsigned int size)
+{
+       struct ixgbevf_rx_buffer *rx_buffer;
+
+       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       prefetchw(rx_buffer->page);
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     size,
+                                     DMA_FROM_DEVICE);
+
+       rx_buffer->pagecnt_bias--;
+
+       return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+                                 struct ixgbevf_rx_buffer *rx_buffer)
+{
+       if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+               /* hand second half of page back to the ring */
+               ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+       } else {
+               /* We are not reusing the buffer so unmap it and free
+                * any references we are holding to it
+                */
+               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+                                    ixgbevf_rx_pg_size(rx_ring),
+                                    DMA_FROM_DEVICE,
+                                    IXGBEVF_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
+       }
+
+       /* clear contents of rx_buffer */
+       rx_buffer->page = NULL;
+}
+
 /**
  * ixgbevf_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
@@ -554,32 +600,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
        return true;
 }
 
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+       return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
 static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
                                      struct ixgbevf_rx_buffer *bi)
 {
        struct page *page = bi->page;
-       dma_addr_t dma = bi->dma;
+       dma_addr_t dma;
 
        /* since we are recycling buffers we should seldom need to alloc */
        if (likely(page))
                return true;
 
        /* alloc new page for storage */
-       page = dev_alloc_page();
+       page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
        if (unlikely(!page)) {
                rx_ring->rx_stats.alloc_rx_page_failed++;
                return false;
        }
 
        /* map page for use */
-       dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+       dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+                                ixgbevf_rx_pg_size(rx_ring),
                                 DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
 
        /* if mapping failed free memory back to system since
         * there isn't much point in holding memory we can't use
         */
        if (dma_mapping_error(rx_ring->dev, dma)) {
-               __free_page(page);
+               __free_pages(page, ixgbevf_rx_pg_order(rx_ring));
 
                rx_ring->rx_stats.alloc_rx_page_failed++;
                return false;
@@ -587,7 +639,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 
        bi->dma = dma;
        bi->page = page;
-       bi->page_offset = 0;
+       bi->page_offset = ixgbevf_rx_offset(rx_ring);
        bi->pagecnt_bias = 1;
        rx_ring->rx_stats.alloc_rx_page++;
 
@@ -621,7 +673,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
                /* sync the buffer for use by the device */
                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
                                                 bi->page_offset,
-                                                IXGBEVF_RX_BUFSZ,
+                                                ixgbevf_rx_bufsz(rx_ring),
                                                 DMA_FROM_DEVICE);
 
                /* Refresh the desc even if pkt_addr didn't change
@@ -734,11 +786,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
        return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
-                                     struct page *page,
-                                     const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
 {
-       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+       struct page *page = rx_buffer->page;
 
        /* avoid re-using remote pages */
        if (unlikely(ixgbevf_page_is_reserved(page)))
@@ -746,17 +797,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_ref_count(page) != pagecnt_bias))
+       if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
                return false;
-
-       /* flip page offset to other buffer */
-       rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
 #else
-       /* move offset up to the next cache line */
-       rx_buffer->page_offset += truesize;
+#define IXGBEVF_LAST_OFFSET \
+       (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
 
-       if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+       if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
                return false;
 
 #endif
@@ -765,7 +812,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
         * the pagecnt_bias and page count so that we fully restock the
         * number of references the driver holds.
         */
-       if (unlikely(pagecnt_bias == 1)) {
+       if (unlikely(!pagecnt_bias)) {
                page_ref_add(page, USHRT_MAX);
                rx_buffer->pagecnt_bias = USHRT_MAX;
        }
@@ -777,127 +824,81 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
  * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
  * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
  **/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
                                struct ixgbevf_rx_buffer *rx_buffer,
-                               u16 size,
-                               union ixgbe_adv_rx_desc *rx_desc,
-                               struct sk_buff *skb)
+                               struct sk_buff *skb,
+                               unsigned int size)
 {
-       struct page *page = rx_buffer->page;
-       unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
-       unsigned int truesize = IXGBEVF_RX_BUFSZ;
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
-       unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+       unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+                               SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+                               SKB_DATA_ALIGN(size);
+#endif
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+                       rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
 #endif
-       unsigned int pull_len;
-
-       if (unlikely(skb_is_nonlinear(skb)))
-               goto add_tail_frag;
-
-       if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
-               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
-               /* page is not reserved, we can reuse buffer as is */
-               if (likely(!ixgbevf_page_is_reserved(page)))
-                       return true;
-
-               /* this page cannot be reused so discard it */
-               return false;
-       }
-
-       /* we need the header to contain the greater of either ETH_HLEN or
-        * 60 bytes if the skb->len is less than 60 for skb_pad.
-        */
-       pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
-
-       /* align pull length to size of long to optimize memcpy performance */
-       memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
-
-       /* update all of the pointers */
-       va += pull_len;
-       size -= pull_len;
-
-add_tail_frag:
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                       (unsigned long)va & ~PAGE_MASK, size, truesize);
-
-       return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
 }
 
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
-                                              union ixgbe_adv_rx_desc *rx_desc,
-                                              struct sk_buff *skb)
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+                                     struct ixgbevf_rx_buffer *rx_buffer,
+                                     union ixgbe_adv_rx_desc *rx_desc,
+                                     unsigned int size)
 {
-       struct ixgbevf_rx_buffer *rx_buffer;
-       struct page *page;
-       u16 size = le16_to_cpu(rx_desc->wb.upper.length);
-
-       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-       page = rx_buffer->page;
-       prefetchw(page);
-
-       /* we are reusing so sync this buffer for CPU use */
-       dma_sync_single_range_for_cpu(rx_ring->dev,
-                                     rx_buffer->dma,
-                                     rx_buffer->page_offset,
-                                     size,
-                                     DMA_FROM_DEVICE);
-
-       if (likely(!skb)) {
-               void *page_addr = page_address(page) +
-                                 rx_buffer->page_offset;
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+       unsigned int headlen;
+       struct sk_buff *skb;
 
-               /* prefetch first cache line of first page */
-               prefetch(page_addr);
+       /* prefetch first cache line of first page */
+       prefetch(va);
 #if L1_CACHE_BYTES < 128
-               prefetch(page_addr + L1_CACHE_BYTES);
+       prefetch(va + L1_CACHE_BYTES);
 #endif
 
-               /* allocate a skb to store the frags */
-               skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-                                               IXGBEVF_RX_HDR_SIZE);
-               if (unlikely(!skb)) {
-                       rx_ring->rx_stats.alloc_rx_buff_failed++;
-                       return NULL;
-               }
+       /* allocate a skb to store the frags */
+       skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+       if (unlikely(!skb))
+               return NULL;
 
-               /* we will be copying header into skb->data in
-                * pskb_may_pull so it is in our interest to prefetch
-                * it now to avoid a possible cache miss
-                */
-               prefetchw(skb->data);
-       }
+       /* Determine available headroom for copy */
+       headlen = size;
+       if (headlen > IXGBEVF_RX_HDR_SIZE)
+               headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
 
-       /* pull page into skb */
-       if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
-               /* hand second half of page back to the ring */
-               ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+       /* align pull length to size of long to optimize memcpy performance */
+       memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+       /* update all of the pointers */
+       size -= headlen;
+       if (size) {
+               skb_add_rx_frag(skb, 0, rx_buffer->page,
+                               (va + headlen) - page_address(rx_buffer->page),
+                               size, truesize);
+#if (PAGE_SIZE < 8192)
+               rx_buffer->page_offset ^= truesize;
+#else
+               rx_buffer->page_offset += truesize;
+#endif
        } else {
-               /* We are not reusing the buffer so unmap it and free
-                * any references we are holding to it
-                */
-               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-                                    PAGE_SIZE, DMA_FROM_DEVICE,
-                                    IXGBEVF_RX_DMA_ATTR);
-               __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+               rx_buffer->pagecnt_bias++;
        }
 
-       /* clear contents of buffer_info */
-       rx_buffer->dma = 0;
-       rx_buffer->page = NULL;
-
        return skb;
 }
 
@@ -909,6 +910,44 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
        IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
 }
 
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+                                        struct ixgbevf_rx_buffer *rx_buffer,
+                                        union ixgbe_adv_rx_desc *rx_desc,
+                                        unsigned int size)
+{
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+                               SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+#endif
+       struct sk_buff *skb;
+
+       /* prefetch first cache line of first page */
+       prefetch(va);
+#if L1_CACHE_BYTES < 128
+       prefetch(va + L1_CACHE_BYTES);
+#endif
+
+       /* build an skb to around the page buffer */
+       skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+       if (unlikely(!skb))
+               return NULL;
+
+       /* update pointers within the skb to store the data */
+       skb_reserve(skb, IXGBEVF_SKB_PAD);
+       __skb_put(skb, size);
+
+       /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
+#endif
+
+       return skb;
+}
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                                struct ixgbevf_ring *rx_ring,
                                int budget)
@@ -919,6 +958,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 
        while (likely(total_rx_packets < budget)) {
                union ixgbe_adv_rx_desc *rx_desc;
+               struct ixgbevf_rx_buffer *rx_buffer;
+               unsigned int size;
 
                /* return some buffers to hardware, one at a time is too slow */
                if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
@@ -927,8 +968,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                }
 
                rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-               if (!rx_desc->wb.upper.length)
+               size = le16_to_cpu(rx_desc->wb.upper.length);
+               if (!size)
                        break;
 
                /* This memory barrier is needed to keep us from reading
@@ -937,15 +978,26 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                 */
                rmb();
 
+               rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
                /* retrieve a buffer from the ring */
-               skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+               if (skb)
+                       ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+               else if (ring_uses_build_skb(rx_ring))
+                       skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+                                               rx_desc, size);
+               else
+                       skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+                                                   rx_desc, size);
 
                /* exit if we failed to retrieve a buffer */
                if (!skb) {
                        rx_ring->rx_stats.alloc_rx_buff_failed++;
+                       rx_buffer->pagecnt_bias++;
                        break;
                }
 
+               ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
                cleaned_count++;
 
                /* fetch next buffer in frame if non-eop */
@@ -1260,85 +1312,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
-                                    int r_idx)
-{
-       struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-       a->rx_ring[r_idx]->next = q_vector->rx.ring;
-       q_vector->rx.ring = a->rx_ring[r_idx];
-       q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
-                                    int t_idx)
-{
-       struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-       a->tx_ring[t_idx]->next = q_vector->tx.ring;
-       q_vector->tx.ring = a->tx_ring[t_idx];
-       q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code.  Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible.  You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
-       int q_vectors;
-       int v_start = 0;
-       int rxr_idx = 0, txr_idx = 0;
-       int rxr_remaining = adapter->num_rx_queues;
-       int txr_remaining = adapter->num_tx_queues;
-       int i, j;
-       int rqpv, tqpv;
-
-       q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       /* The ideal configuration...
-        * We have enough vectors to map one per queue.
-        */
-       if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
-               for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
-                       map_vector_to_rxq(adapter, v_start, rxr_idx);
-
-               for (; txr_idx < txr_remaining; v_start++, txr_idx++)
-                       map_vector_to_txq(adapter, v_start, txr_idx);
-               return 0;
-       }
-
-       /* If we don't have enough vectors for a 1-to-1
-        * mapping, we'll have to group them so there are
-        * multiple queues per vector.
-        */
-       /* Re-adjusting *qpv takes care of the remainder. */
-       for (i = v_start; i < q_vectors; i++) {
-               rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
-               for (j = 0; j < rqpv; j++) {
-                       map_vector_to_rxq(adapter, i, rxr_idx);
-                       rxr_idx++;
-                       rxr_remaining--;
-               }
-       }
-       for (i = v_start; i < q_vectors; i++) {
-               tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
-               for (j = 0; j < tqpv; j++) {
-                       map_vector_to_txq(adapter, i, txr_idx);
-                       txr_idx++;
-                       txr_remaining--;
-               }
-       }
-
-       return 0;
-}
-
 /**
  * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
@@ -1411,20 +1384,6 @@ free_queue_irqs:
        return err;
 }
 
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
-       int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       for (i = 0; i < q_vectors; i++) {
-               struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
-               q_vector->rx.ring = NULL;
-               q_vector->tx.ring = NULL;
-               q_vector->rx.count = 0;
-               q_vector->tx.count = 0;
-       }
-}
-
 /**
  * ixgbevf_request_irq - initialize interrupts
  * @adapter: board private structure
@@ -1464,8 +1423,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
                free_irq(adapter->msix_entries[i].vector,
                         adapter->q_vector[i]);
        }
-
-       ixgbevf_reset_q_vectors(adapter);
 }
 
 /**
@@ -1587,7 +1544,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT        2
 
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+                                    struct ixgbevf_ring *ring, int index)
 {
        struct ixgbe_hw *hw = &adapter->hw;
        u32 srrctl;
@@ -1595,7 +1553,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
        srrctl = IXGBE_SRRCTL_DROP_EN;
 
        srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
-       srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       if (ring_uses_large_buffer(ring))
+               srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       else
+               srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
        srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
        IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
@@ -1767,10 +1728,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
        ring->next_to_use = 0;
        ring->next_to_alloc = 0;
 
-       ixgbevf_configure_srrctl(adapter, reg_idx);
+       ixgbevf_configure_srrctl(adapter, ring, reg_idx);
+
+       /* RXDCTL.RLPML does not work on 82599 */
+       if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+               rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+                           IXGBE_RXDCTL_RLPML_EN);
 
-       /* allow any size packet since we can handle overflow */
-       rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+#if (PAGE_SIZE < 8192)
+               /* Limit the maximum frame size so we don't overrun the skb */
+               if (ring_uses_build_skb(ring) &&
+                   !ring_uses_large_buffer(ring))
+                       rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+                                 IXGBE_RXDCTL_RLPML_EN;
+#endif
+       }
 
        rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
        IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
@@ -1779,6 +1751,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
        ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
 }
 
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+                                     struct ixgbevf_ring *rx_ring)
+{
+       struct net_device *netdev = adapter->netdev;
+       unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+       /* set build_skb and buffer size flags */
+       clear_ring_build_skb_enabled(rx_ring);
+       clear_ring_uses_large_buffer(rx_ring);
+
+       if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+               return;
+
+       set_ring_build_skb_enabled(rx_ring);
+
+#if (PAGE_SIZE < 8192)
+       if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+               return;
+
+       set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
 /**
  * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
  * @adapter: board private structure
@@ -1806,8 +1801,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring
         */
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+               ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+               ixgbevf_configure_rx_ring(adapter, rx_ring);
+       }
 }
 
 static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
@@ -2136,13 +2135,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
                dma_sync_single_range_for_cpu(rx_ring->dev,
                                              rx_buffer->dma,
                                              rx_buffer->page_offset,
-                                             IXGBEVF_RX_BUFSZ,
+                                             ixgbevf_rx_bufsz(rx_ring),
                                              DMA_FROM_DEVICE);
 
                /* free resources associated with mapping */
                dma_unmap_page_attrs(rx_ring->dev,
                                     rx_buffer->dma,
-                                    PAGE_SIZE,
+                                    ixgbevf_rx_pg_size(rx_ring),
                                     DMA_FROM_DEVICE,
                                     IXGBEVF_RX_DMA_ATTR);
 
@@ -2405,105 +2404,171 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 }
 
 /**
- * ixgbevf_alloc_queues - Allocate memory for all rings
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+{
+       int vector, v_budget;
+
+       /* It's easy to be greedy for MSI-X vectors, but it really
+        * doesn't do us much good if we have a lot more vectors
+        * than CPU's.  So let's be conservative and only ask for
+        * (roughly) the same number of vectors as there are CPU's.
+        * The default is to use pairs of vectors.
+        */
+       v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+       v_budget = min_t(int, v_budget, num_online_cpus());
+       v_budget += NON_Q_VECTORS;
+
+       adapter->msix_entries = kcalloc(v_budget,
+                                       sizeof(struct msix_entry), GFP_KERNEL);
+       if (!adapter->msix_entries)
+               return -ENOMEM;
+
+       for (vector = 0; vector < v_budget; vector++)
+               adapter->msix_entries[vector].entry = vector;
+
+       /* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+        * does not support any other modes, so we will simply fail here. Note
+        * that we clean up the msix_entries pointer else-where.
+        */
+       return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+                            struct ixgbevf_ring_container *head)
+{
+       ring->next = head->ring;
+       head->ring = ring;
+       head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
  *
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time.  The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+                                 int txr_count, int txr_idx,
+                                 int rxr_count, int rxr_idx)
 {
+       struct ixgbevf_q_vector *q_vector;
        struct ixgbevf_ring *ring;
-       int rx = 0, tx = 0;
+       int ring_count, size;
+
+       ring_count = txr_count + rxr_count;
+       size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+       /* allocate q_vector and rings */
+       q_vector = kzalloc(size, GFP_KERNEL);
+       if (!q_vector)
+               return -ENOMEM;
+
+       /* initialize NAPI */
+       netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
+
+       /* tie q_vector and adapter together */
+       adapter->q_vector[v_idx] = q_vector;
+       q_vector->adapter = adapter;
+       q_vector->v_idx = v_idx;
 
-       for (; tx < adapter->num_tx_queues; tx++) {
-               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-               if (!ring)
-                       goto err_allocation;
+       /* initialize pointer to rings */
+       ring = q_vector->ring;
 
+       while (txr_count) {
+               /* assign generic ring traits */
                ring->dev = &adapter->pdev->dev;
                ring->netdev = adapter->netdev;
+
+               /* configure backlink on ring */
+               ring->q_vector = q_vector;
+
+               /* update q_vector Tx values */
+               ixgbevf_add_ring(ring, &q_vector->tx);
+
+               /* apply Tx specific ring traits */
                ring->count = adapter->tx_ring_count;
-               ring->queue_index = tx;
-               ring->reg_idx = tx;
+               ring->queue_index = txr_idx;
+               ring->reg_idx = txr_idx;
 
-               adapter->tx_ring[tx] = ring;
-       }
+               /* assign ring to adapter */
+                adapter->tx_ring[txr_idx] = ring;
+
+               /* update count and index */
+               txr_count--;
+               txr_idx++;
 
-       for (; rx < adapter->num_rx_queues; rx++) {
-               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-               if (!ring)
-                       goto err_allocation;
+               /* push pointer to next ring */
+               ring++;
+       }
 
+       while (rxr_count) {
+               /* assign generic ring traits */
                ring->dev = &adapter->pdev->dev;
                ring->netdev = adapter->netdev;
 
+               /* configure backlink on ring */
+               ring->q_vector = q_vector;
+
+               /* update q_vector Rx values */
+               ixgbevf_add_ring(ring, &q_vector->rx);
+
+               /* apply Rx specific ring traits */
                ring->count = adapter->rx_ring_count;
-               ring->queue_index = rx;
-               ring->reg_idx = rx;
+               ring->queue_index = rxr_idx;
+               ring->reg_idx = rxr_idx;
 
-               adapter->rx_ring[rx] = ring;
-       }
+               /* assign ring to adapter */
+               adapter->rx_ring[rxr_idx] = ring;
 
-       return 0;
+               /* update count and index */
+               rxr_count--;
+               rxr_idx++;
 
-err_allocation:
-       while (tx) {
-               kfree(adapter->tx_ring[--tx]);
-               adapter->tx_ring[tx] = NULL;
+               /* push pointer to next ring */
+               ring++;
        }
 
-       while (rx) {
-               kfree(adapter->rx_ring[--rx]);
-               adapter->rx_ring[rx] = NULL;
-       }
-       return -ENOMEM;
+       return 0;
 }
 
 /**
- * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
  *
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
  **/
-static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
 {
-       struct net_device *netdev = adapter->netdev;
-       int err;
-       int vector, v_budget;
-
-       /* It's easy to be greedy for MSI-X vectors, but it really
-        * doesn't do us much good if we have a lot more vectors
-        * than CPU's.  So let's be conservative and only ask for
-        * (roughly) the same number of vectors as there are CPU's.
-        * The default is to use pairs of vectors.
-        */
-       v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
-       v_budget = min_t(int, v_budget, num_online_cpus());
-       v_budget += NON_Q_VECTORS;
-
-       /* A failure in MSI-X entry allocation isn't fatal, but it does
-        * mean we disable MSI-X capabilities of the adapter.
-        */
-       adapter->msix_entries = kcalloc(v_budget,
-                                       sizeof(struct msix_entry), GFP_KERNEL);
-       if (!adapter->msix_entries)
-               return -ENOMEM;
+       struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+       struct ixgbevf_ring *ring;
 
-       for (vector = 0; vector < v_budget; vector++)
-               adapter->msix_entries[vector].entry = vector;
+       ixgbevf_for_each_ring(ring, q_vector->tx)
+               adapter->tx_ring[ring->queue_index] = NULL;
 
-       err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
-       if (err)
-               return err;
+       ixgbevf_for_each_ring(ring, q_vector->rx)
+               adapter->rx_ring[ring->queue_index] = NULL;
 
-       err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
-       if (err)
-               return err;
+       adapter->q_vector[v_idx] = NULL;
+       netif_napi_del(&q_vector->napi);
 
-       return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+       /* ixgbevf_get_stats() might access the rings on this vector,
+        * we must wait a grace period before freeing it.
+        */
+       kfree_rcu(q_vector, rcu);
 }
 
 /**
@@ -2515,35 +2580,53 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
  **/
 static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 {
-       int q_idx, num_q_vectors;
-       struct ixgbevf_q_vector *q_vector;
+       int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+       int rxr_remaining = adapter->num_rx_queues;
+       int txr_remaining = adapter->num_tx_queues;
+       int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+       int err;
+
+       if (q_vectors >= (rxr_remaining + txr_remaining)) {
+               for (; rxr_remaining; v_idx++, q_vectors--) {
+                       int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+
+                       err = ixgbevf_alloc_q_vector(adapter, v_idx,
+                                                    0, 0, rqpv, rxr_idx);
+                       if (err)
+                               goto err_out;
 
-       num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+                       /* update counts and index */
+                       rxr_remaining -= rqpv;
+                       rxr_idx += rqpv;
+               }
+       }
+
+       for (; q_vectors; v_idx++, q_vectors--) {
+               int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+               int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
 
-       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-               q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
-               if (!q_vector)
+               err = ixgbevf_alloc_q_vector(adapter, v_idx,
+                                            tqpv, txr_idx,
+                                            rqpv, rxr_idx);
+
+               if (err)
                        goto err_out;
-               q_vector->adapter = adapter;
-               q_vector->v_idx = q_idx;
-               netif_napi_add(adapter->netdev, &q_vector->napi,
-                              ixgbevf_poll, 64);
-               adapter->q_vector[q_idx] = q_vector;
+
+               /* update counts and index */
+               rxr_remaining -= rqpv;
+               rxr_idx += rqpv;
+               txr_remaining -= tqpv;
+               txr_idx += tqpv;
        }
 
        return 0;
 
 err_out:
-       while (q_idx) {
-               q_idx--;
-               q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               napi_hash_del(&q_vector->napi);
-#endif
-               netif_napi_del(&q_vector->napi);
-               kfree(q_vector);
-               adapter->q_vector[q_idx] = NULL;
+       while (v_idx) {
+               v_idx--;
+               ixgbevf_free_q_vector(adapter, v_idx);
        }
+
        return -ENOMEM;
 }
 
@@ -2557,17 +2640,11 @@ err_out:
  **/
 static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
 {
-       int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-               struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
+       int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
-               adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               napi_hash_del(&q_vector->napi);
-#endif
-               netif_napi_del(&q_vector->napi);
-               kfree(q_vector);
+       while (q_vectors) {
+               q_vectors--;
+               ixgbevf_free_q_vector(adapter, q_vectors);
        }
 }
 
@@ -2611,12 +2688,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
                goto err_alloc_q_vectors;
        }
 
-       err = ixgbevf_alloc_queues(adapter);
-       if (err) {
-               pr_err("Unable to allocate memory for queues\n");
-               goto err_alloc_queues;
-       }
-
        hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
               (adapter->num_rx_queues > 1) ? "Enabled" :
               "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
@@ -2624,8 +2695,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
        set_bit(__IXGBEVF_DOWN, &adapter->state);
 
        return 0;
-err_alloc_queues:
-       ixgbevf_free_q_vectors(adapter);
 err_alloc_q_vectors:
        ixgbevf_reset_interrupt_capability(adapter);
 err_set_interrupt:
@@ -2641,17 +2710,6 @@ err_set_interrupt:
  **/
 static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
 {
-       int i;
-
-       for (i = 0; i < adapter->num_tx_queues; i++) {
-               kfree(adapter->tx_ring[i]);
-               adapter->tx_ring[i] = NULL;
-       }
-       for (i = 0; i < adapter->num_rx_queues; i++) {
-               kfree(adapter->rx_ring[i]);
-               adapter->rx_ring[i] = NULL;
-       }
-
        adapter->num_tx_queues = 0;
        adapter->num_rx_queues = 0;
 
@@ -3088,9 +3146,14 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
                if (!err)
                        continue;
                hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
-               break;
+               goto err_setup_tx;
        }
 
+       return 0;
+err_setup_tx:
+       /* rewind the index freeing the rings as we go */
+       while (i--)
+               ixgbevf_free_tx_resources(adapter->tx_ring[i]);
        return err;
 }
 
@@ -3148,8 +3211,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
                if (!err)
                        continue;
                hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
-               break;
+               goto err_setup_rx;
        }
+
+       return 0;
+err_setup_rx:
+       /* rewind the index freeing the rings as we go */
+       while (i--)
+               ixgbevf_free_rx_resources(adapter->rx_ring[i]);
        return err;
 }
 
@@ -3244,28 +3313,31 @@ int ixgbevf_open(struct net_device *netdev)
 
        ixgbevf_configure(adapter);
 
-       /* Map the Tx/Rx rings to the vectors we were allotted.
-        * if request_irq will be called in this function map_rings
-        * must be called *before* up_complete
-        */
-       ixgbevf_map_rings_to_vectors(adapter);
-
        err = ixgbevf_request_irq(adapter);
        if (err)
                goto err_req_irq;
 
+       /* Notify the stack of the actual queue counts. */
+       err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+       if (err)
+               goto err_set_queues;
+
+       err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+       if (err)
+               goto err_set_queues;
+
        ixgbevf_up_complete(adapter);
 
        return 0;
 
+err_set_queues:
+       ixgbevf_free_irq(adapter);
 err_req_irq:
-       ixgbevf_down(adapter);
-err_setup_rx:
        ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
        ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
        ixgbevf_reset(adapter);
-
 err_setup_reset:
 
        return err;
@@ -3707,11 +3779,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
        return __ixgbevf_maybe_stop_tx(tx_ring, size);
 }
 
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+                                  struct ixgbevf_ring *tx_ring)
 {
-       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
        struct ixgbevf_tx_buffer *first;
-       struct ixgbevf_ring *tx_ring;
        int tso;
        u32 tx_flags = 0;
        u16 count = TXD_USE_COUNT(skb_headlen(skb));
@@ -3726,8 +3797,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
                return NETDEV_TX_OK;
        }
 
-       tx_ring = adapter->tx_ring[skb->queue_mapping];
-
        /* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
         *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
         *       + 2 desc gap to keep tail from touching head,
@@ -3780,6 +3849,29 @@ out_drop:
        return NETDEV_TX_OK;
 }
 
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       struct ixgbevf_ring *tx_ring;
+
+       if (skb->len <= 0) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* The minimum packet size for olinfo paylen is 17 so pad the skb
+        * in order to meet this minimum size requirement.
+        */
+       if (skb->len < 17) {
+               if (skb_padto(skb, 17))
+                       return NETDEV_TX_OK;
+               skb->len = 17;
+       }
+
+       tx_ring = adapter->tx_ring[skb->queue_mapping];
+       return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
 /**
  * ixgbevf_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -3839,6 +3931,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
        /* must set new MTU before calling down or up */
        netdev->mtu = new_mtu;
 
+       if (netif_running(netdev))
+               ixgbevf_reinit_locked(adapter);
+
        return 0;
 }
 
@@ -3917,17 +4012,11 @@ static int ixgbevf_resume(struct pci_dev *pdev)
 
        rtnl_lock();
        err = ixgbevf_init_interrupt_scheme(adapter);
+       if (!err && netif_running(netdev))
+               err = ixgbevf_open(netdev);
        rtnl_unlock();
-       if (err) {
-               dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+       if (err)
                return err;
-       }
-
-       if (netif_running(netdev)) {
-               err = ixgbevf_open(netdev);
-               if (err)
-                       return err;
-       }
 
        netif_device_attach(netdev);
 
@@ -3953,6 +4042,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
        stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
 
+       rcu_read_lock();
        for (i = 0; i < adapter->num_rx_queues; i++) {
                ring = adapter->rx_ring[i];
                do {
@@ -3974,6 +4064,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
                stats->tx_bytes += bytes;
                stats->tx_packets += packets;
        }
+       rcu_read_unlock();
 }
 
 #define IXGBEVF_MAX_MAC_HDR_LEN                127
index a1d7b88..9418f6e 100644 (file)
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK       0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK             BIT(31)
 
+/* Top Registers */
+#define MVPP2_MH_REG(port)                     (0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED                     BIT(5)
+
 /* Parser Registers */
 #define MVPP2_PRS_INIT_LOOKUP_REG              0x1000
 #define     MVPP2_PRS_PORT_LU_MAX              0xf
 #define MVPP2_ETH_TYPE_LEN             2
 #define MVPP2_PPPOE_HDR_SIZE           8
 #define MVPP2_VLAN_TAG_LEN             4
+#define MVPP2_VLAN_TAG_EDSA_LEN                8
 
 /* Lbtd 802.3 type */
 #define MVPP2_IP_LBDT_TYPE             0xfffa
@@ -609,35 +614,64 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_LU_BYTE                 20
 #define MVPP2_PRS_TCAM_EN_OFFS(offs)           ((offs) + 2)
 #define MVPP2_PRS_TCAM_INV_WORD                        5
+
+#define MVPP2_PRS_VID_TCAM_BYTE         2
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX                11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY   (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY  (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
 /* Tcam entries ID */
 #define MVPP2_PE_DROP_ALL              0
 #define MVPP2_PE_FIRST_FREE_TID                1
-#define MVPP2_PE_LAST_FREE_TID         (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END     (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START   (MVPP2_PE_VID_FILT_RANGE_END - \
+                                        MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID          (MVPP2_PE_VID_FILT_RANGE_START - 1)
 #define MVPP2_PE_IP6_EXT_PROTO_UN      (MVPP2_PRS_TCAM_SRAM_SIZE - 30)
 #define MVPP2_PE_MAC_MC_IP6            (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
 #define MVPP2_PE_IP6_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
 #define MVPP2_PE_IP4_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
 #define MVPP2_PE_LAST_DEFAULT_FLOW     (MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW    (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED           (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED         (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED            (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED          (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED     (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED   (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED      (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED    (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT            (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT           (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW    (MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_TAGGED           (MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_EDSA_UNTAGGED         (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_TAGGED            (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_DSA_UNTAGGED          (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED     (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED   (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_TAGGED      (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED    (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_MH_DEFAULT            (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_DSA_DEFAULT           (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP6_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_IP4_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_ETH_TYPE_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_FLTR_DEFAULT      (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
 #define MVPP2_PE_VLAN_DBL              (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
 #define MVPP2_PE_VLAN_NONE             (MVPP2_PRS_TCAM_SRAM_SIZE - 4)
 #define MVPP2_PE_MAC_MC_ALL            (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
 #define MVPP2_PE_MAC_PROMISCUOUS       (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS   (MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 
+#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \
+                                        ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port)  (MVPP2_PRS_VID_PORT_FIRST(port) \
+                                        + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port)  (MVPP2_PRS_VID_PORT_FIRST(port) \
+                                        + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
 /* Sram structure
  * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
  */
@@ -725,6 +759,7 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT                BIT(4)
 #define MVPP2_PRS_SINGLE_VLAN_AI               0
 #define MVPP2_PRS_DBL_VLAN_AI_BIT              BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT              BIT(0)
 
 /* DSA/EDSA type */
 #define MVPP2_PRS_TAGGED               true
@@ -747,6 +782,7 @@ enum mvpp2_prs_lookup {
        MVPP2_PRS_LU_MAC,
        MVPP2_PRS_LU_DSA,
        MVPP2_PRS_LU_VLAN,
+       MVPP2_PRS_LU_VID,
        MVPP2_PRS_LU_L2,
        MVPP2_PRS_LU_PPPOE,
        MVPP2_PRS_LU_IP4,
@@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
        mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
 }
 
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+                               unsigned short vid)
+{
+       mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+       mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
 /* Set bits in sram sw entry */
 static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
                                    int val)
@@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
                mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
                pe.index = tid;
 
-               /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
-               mvpp2_prs_sram_shift_set(&pe, shift,
-                                        MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
                /* Update shadow table */
                mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
 
                if (tagged) {
                        /* Set tagged bit in DSA tag */
                        mvpp2_prs_tcam_data_byte_set(&pe, 0,
-                                                    MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
-                                                    MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
-                       /* Clear all ai bits for next iteration */
-                       mvpp2_prs_sram_ai_update(&pe, 0,
-                                                MVPP2_PRS_SRAM_AI_MASK);
-                       /* If packet is tagged continue check vlans */
-                       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+                                            MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+                                            MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+                       /* Set ai bits for next iteration */
+                       if (extend)
+                               mvpp2_prs_sram_ai_update(&pe, 1,
+                                                       MVPP2_PRS_SRAM_AI_MASK);
+                       else
+                               mvpp2_prs_sram_ai_update(&pe, 0,
+                                                       MVPP2_PRS_SRAM_AI_MASK);
+
+                       /* If packet is tagged continue check vid filtering */
+                       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
                } else {
+                       /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+                       mvpp2_prs_sram_shift_set(&pe, shift,
+                                       MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
                        /* Set result info bits to 'no vlans' */
                        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
                                                 MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 
                mvpp2_prs_match_etype(pe, 0, tpid);
 
-               mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
-               /* Shift 4 bytes - skip 1 vlan tag */
-               mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
-                                        MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+               /* VLAN tag detected, proceed with VID filtering */
+               mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
                /* Clear all ai bits for next iteration */
                mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 
@@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
                mvpp2_prs_match_etype(pe, 4, tpid2);
 
                mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
-               /* Shift 8 bytes - skip 2 vlan tags */
-               mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+               /* Shift 4 bytes - skip outer vlan tag */
+               mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
                                         MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
                mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
                                         MVPP2_PRS_RI_VLAN_MASK);
@@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
        mvpp2_prs_hw_write(priv, &pe);
 }
 
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+       struct mvpp2_prs_entry pe;
+
+       memset(&pe, 0, sizeof(pe));
+
+       /* Set default vid entry */
+       pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+       /* Skip VLAN header - Set offset to 4 bytes */
+       mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+                                MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Unmask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+       /* Update shadow table and hw entry */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+
+       /* Set default vid entry for extended DSA*/
+       memset(&pe, 0, sizeof(pe));
+
+       /* Set default vid entry */
+       pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+                                MVPP2_PRS_EDSA_VID_AI_BIT);
+
+       /* Skip VLAN header - Set offset to 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+                                MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Unmask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+       /* Update shadow table and hw entry */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Match basic ethertypes */
 static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 {
@@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
        mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
        pe.index = MVPP2_PE_VLAN_DBL;
 
-       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
        /* Clear ai for next iterations */
        mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
        return 0;
 }
 
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+                                   u16 mask)
+{
+       unsigned char byte[2], enable[2];
+       struct mvpp2_prs_entry pe;
+       u16 rvid, rmask;
+       int tid;
+
+       /* Go through the all entries with MVPP2_PRS_LU_VID */
+       for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+            tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+               if (!priv->prs_shadow[tid].valid ||
+                   priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+                       continue;
+
+               pe.index = tid;
+
+               mvpp2_prs_hw_read(priv, &pe);
+               mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+               mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+               rvid = ((byte[0] & 0xf) << 8) + byte[1];
+               rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+               if (rvid != vid || rmask != mask)
+                       continue;
+
+               return tid;
+       }
+
+       return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+       unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+                                port->id * MVPP2_PRS_VLAN_FILT_MAX;
+       unsigned int mask = 0xfff, reg_val, shift;
+       struct mvpp2 *priv = port->priv;
+       struct mvpp2_prs_entry pe;
+       int tid;
+
+       /* Scan TCAM and see if entry with this <vid,port> already exist */
+       tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+       reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+       if (reg_val & MVPP2_DSA_EXTENDED)
+               shift = MVPP2_VLAN_TAG_EDSA_LEN;
+       else
+               shift = MVPP2_VLAN_TAG_LEN;
+
+       /* No such entry */
+       if (!tid) {
+               memset(&pe, 0, sizeof(pe));
+
+               /* Go through all entries from first to last in vlan range */
+               tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+                                               vid_start +
+                                               MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+               /* There isn't room for a new VID filter */
+               if (tid < 0)
+                       return tid;
+
+               mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+               pe.index = tid;
+
+               /* Mask all ports */
+               mvpp2_prs_tcam_port_map_set(&pe, 0);
+       } else {
+               mvpp2_prs_hw_read(priv, &pe);
+       }
+
+       /* Enable the current port */
+       mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+       /* Continue - set next lookup */
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Skip VLAN header - Set offset to 4 or 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Set match on VID */
+       mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       /* Update shadow table */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+
+       return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+       struct mvpp2 *priv = port->priv;
+       int tid;
+
+       /* Scan TCAM and see if entry with this <vid,port> already exist */
+       tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+       /* No such entry */
+       if (tid)
+               return;
+
+       mvpp2_prs_hw_inv(priv, tid);
+       priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+       struct mvpp2 *priv = port->priv;
+       int tid;
+
+       for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+            tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+               if (priv->prs_shadow[tid].valid)
+                       mvpp2_prs_vid_entry_remove(port, tid);
+       }
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+       unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+       struct mvpp2 *priv = port->priv;
+
+       /* Invalidate the guard entry */
+       mvpp2_prs_hw_inv(priv, tid);
+
+       priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+       unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+       struct mvpp2 *priv = port->priv;
+       unsigned int reg_val, shift;
+       struct mvpp2_prs_entry pe;
+
+       if (priv->prs_shadow[tid].valid)
+               return;
+
+       memset(&pe, 0, sizeof(pe));
+
+       pe.index = tid;
+
+       reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+       if (reg_val & MVPP2_DSA_EXTENDED)
+               shift = MVPP2_VLAN_TAG_EDSA_LEN;
+       else
+               shift = MVPP2_VLAN_TAG_LEN;
+
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       /* Mask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+       /* Update port mask */
+       mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+       /* Continue - set next lookup */
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Skip VLAN header - Set offset to 4 or 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Drop VLAN packets that don't belong to any VIDs on this port */
+       mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+                                MVPP2_PRS_RI_DROP_MASK);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       /* Update shadow table */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Parser default initialization */
 static int mvpp2_prs_default_init(struct platform_device *pdev,
                                  struct mvpp2 *priv)
@@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
 
        mvpp2_prs_dsa_init(priv);
 
+       mvpp2_prs_vid_init(priv);
+
        err = mvpp2_prs_etype_init(priv);
        if (err)
                return err;
@@ -7137,6 +7431,7 @@ static void mvpp2_set_rx_mode(struct net_device *dev)
        int id = port->id;
        bool allmulti = dev->flags & IFF_ALLMULTI;
 
+retry:
        mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
        mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
        mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
@@ -7144,10 +7439,20 @@ static void mvpp2_set_rx_mode(struct net_device *dev)
        /* Remove all port->id's mcast enries */
        mvpp2_prs_mcast_del_all(priv, id);
 
-       if (allmulti && !netdev_mc_empty(dev)) {
-               netdev_for_each_mc_addr(ha, dev)
-                       mvpp2_prs_mac_da_accept(priv, id, ha->addr, true);
+       if (!allmulti) {
+               netdev_for_each_mc_addr(ha, dev) {
+                       if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) {
+                               allmulti = true;
+                               goto retry;
+                       }
+               }
        }
+
+       /* Disable VLAN filtering in promiscuous mode */
+       if (dev->flags & IFF_PROMISC)
+               mvpp2_prs_vid_disable_filtering(port);
+       else
+               mvpp2_prs_vid_enable_filtering(port);
 }
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7287,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        return ret;
 }
 
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       int ret;
+
+       ret = mvpp2_prs_vid_entry_add(port, vid);
+       if (ret)
+               netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+                          MVPP2_PRS_VLAN_FILT_MAX - 1);
+       return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       mvpp2_prs_vid_entry_remove(port, vid);
+       return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+                             netdev_features_t features)
+{
+       netdev_features_t changed = dev->features ^ features;
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+               if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+                       mvpp2_prs_vid_enable_filtering(port);
+               } else {
+                       /* Invalidate all registered VID filters for this
+                        * port
+                        */
+                       mvpp2_prs_vid_remove_all(port);
+
+                       mvpp2_prs_vid_disable_filtering(port);
+               }
+       }
+
+       return 0;
+}
+
 /* Ethtool methods */
 
 /* Set interrupt coalescing for ethtools */
@@ -7428,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
        .ndo_change_mtu         = mvpp2_change_mtu,
        .ndo_get_stats64        = mvpp2_get_stats64,
        .ndo_do_ioctl           = mvpp2_ioctl,
+       .ndo_vlan_rx_add_vid    = mvpp2_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = mvpp2_vlan_rx_kill_vid,
+       .ndo_set_features       = mvpp2_set_features,
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7940,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
        features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
        dev->features = features | NETIF_F_RXCSUM;
-       dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+       dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+                           NETIF_F_HW_VLAN_CTAG_FILTER;
        dev->vlan_features |= features;
        dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
 
index ebc1f56..9a7a2f0 100644 (file)
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
        "rx_xdp_drop",
        "rx_xdp_tx",
        "rx_xdp_tx_full",
+
+       /* phy statistics */
+       "rx_packets_phy", "rx_bytes_phy",
+       "tx_packets_phy", "tx_bytes_phy",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
                if (bitmap_iterator_test(&it))
                        data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
 
+       for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
        for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                data[index++] = priv->tx_ring[TX][i]->packets;
                data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
                                strcpy(data + (index++) * ETH_GSTRING_LEN,
                                       main_strings[strings]);
 
+               for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
                for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                        sprintf(data + (index++) * ETH_GSTRING_LEN,
                                "tx%d_packets", i);
index 8fc51bc..e0adac4 100644 (file)
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
        bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
        last_i += NUM_XDP_STATS;
+
+       if (!mlx4_is_slave(dev))
+               bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+       last_i += NUM_PHY_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
                mlx4_en_stop_port(dev, 1);
        }
 
-       en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-               ts_config.rx_filter,
-               !!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
        mlx4_en_safe_replace_resources(priv, tmp);
 
        if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
index 1fa4849..0158b88 100644 (file)
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
                priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
        }
 
-       if (mlx4_is_master(mdev->dev)) {
-               stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
-                                                  &mlx4_en_stats->RTOT_prio_1,
-                                                  NUM_PRIORITIES);
-               stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
-                                                  &mlx4_en_stats->TTOT_prio_1,
-                                                  NUM_PRIORITIES);
-               stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
-                                                &mlx4_en_stats->ROCT_prio_1,
-                                                NUM_PRIORITIES);
-               stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
-                                                &mlx4_en_stats->TOCT_prio_1,
-                                                NUM_PRIORITIES);
+       if (!mlx4_is_slave(mdev->dev)) {
+               struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+               p_stats->rx_packets_phy =
+                       en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+                                      &mlx4_en_stats->RTOT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->tx_packets_phy =
+                       en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+                                      &mlx4_en_stats->TTOT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->rx_bytes_phy =
+                       en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+                                      &mlx4_en_stats->ROCT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->tx_bytes_phy =
+                       en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+                                      &mlx4_en_stats->TOCT_prio_1,
+                                      NUM_PRIORITIES);
+               if (mlx4_is_master(mdev->dev)) {
+                       stats->rx_packets = p_stats->rx_packets_phy;
+                       stats->tx_packets = p_stats->tx_packets_phy;
+                       stats->rx_bytes = p_stats->rx_bytes_phy;
+                       stats->tx_bytes = p_stats->tx_bytes_phy;
+               }
        }
 
        /* net device stats */
index b4d144e..c2c6bd7 100644 (file)
@@ -649,6 +649,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
        return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -662,12 +668,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        int polled = 0;
        int index;
 
-       if (unlikely(!priv->port_up))
+       if (unlikely(!priv->port_up || budget <= 0))
                return 0;
 
-       if (unlikely(budget <= 0))
-               return polled;
-
        ring = priv->rx_ring[cq_ring];
 
        /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -838,12 +841,7 @@ xdp_drop_no_cnt:
                                ring->csum_ok++;
                        } else {
                                if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
-                                     (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
-                                                                MLX4_CQE_STATUS_IPV6))))
-#else
-                                                                0))))
-#endif
+                                     (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
                                        goto csum_none;
                                if (check_csum(cqe, skb, va, dev->features))
                                        goto csum_none;
index f470ae3..f7c8113 100644 (file)
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
        struct mlx4_en_flow_stats_tx tx_flowstats;
        struct mlx4_en_port_stats port_stats;
        struct mlx4_en_xdp_stats xdp_stats;
+       struct mlx4_en_phy_stats phy_stats;
        struct mlx4_en_stats_bitmap stats_bitmap;
        struct list_head mc_list;
        struct list_head curr_list;
index aab28eb..86b6051 100644 (file)
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
 #define NUM_XDP_STATS          3
 };
 
+struct mlx4_en_phy_stats {
+       unsigned long rx_packets_phy;
+       unsigned long rx_bytes_phy;
+       unsigned long tx_packets_phy;
+       unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS          4
+};
+
 #define NUM_MAIN_STATS 21
 
 #define MLX4_NUM_PRIORITIES    8
@@ -116,7 +124,7 @@ enum {
 
 #define NUM_ALL_STATS  (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
                         NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
-                        NUM_XDP_STATS)
+                        NUM_XDP_STATS + NUM_PHY_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
                                  sizeof(((struct net_device_stats *)0)->n))
index 47239bf..323ffe8 100644 (file)
@@ -71,19 +71,24 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
 }
 
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-                       struct mlx5_buf *buf, int node)
+                       struct mlx5_frag_buf *buf, int node)
 {
        dma_addr_t t;
 
        buf->size = size;
        buf->npages       = 1;
        buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
-       buf->direct.buf   = mlx5_dma_zalloc_coherent_node(dev, size,
-                                                         &t, node);
-       if (!buf->direct.buf)
+
+       buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
+       if (!buf->frags)
                return -ENOMEM;
 
-       buf->direct.map = t;
+       buf->frags->buf   = mlx5_dma_zalloc_coherent_node(dev, size,
+                                                         &t, node);
+       if (!buf->frags->buf)
+               goto err_out;
+
+       buf->frags->map = t;
 
        while (t & ((1 << buf->page_shift) - 1)) {
                --buf->page_shift;
@@ -91,18 +96,24 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
        }
 
        return 0;
+err_out:
+       kfree(buf->frags);
+       return -ENOMEM;
 }
 
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+                  int size, struct mlx5_frag_buf *buf)
 {
        return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
 }
-EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+EXPORT_SYMBOL(mlx5_buf_alloc);
 
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
-       dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
-                         buf->direct.map);
+       dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+                         buf->frags->map);
+
+       kfree(buf->frags);
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
@@ -147,6 +158,7 @@ err_free_buf:
 err_out:
        return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
 
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
@@ -162,6 +174,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
        }
        kfree(buf->frags);
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
 
 static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
                                                 int node)
@@ -275,13 +288,13 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 }
 EXPORT_SYMBOL_GPL(mlx5_db_free);
 
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
 {
        u64 addr;
        int i;
 
        for (i = 0; i < buf->npages; i++) {
-               addr = buf->direct.map + (i << buf->page_shift);
+               addr = buf->frags->map + (i << buf->page_shift);
 
                pas[i] = cpu_to_be64(addr);
        }
index 1016e05..669ed16 100644 (file)
@@ -58,8 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
                                 tasklet_ctx.list) {
                list_del_init(&mcq->tasklet_ctx.list);
                mcq->tasklet_ctx.comp(mcq);
-               if (refcount_dec_and_test(&mcq->refcount))
-                       complete(&mcq->free);
+               mlx5_cq_put(mcq);
                if (time_after(jiffies, end))
                        break;
        }
@@ -80,69 +79,19 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
         * still arrive.
         */
        if (list_empty_careful(&cq->tasklet_ctx.list)) {
-               refcount_inc(&cq->refcount);
+               mlx5_cq_hold(cq);
                list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
        }
        spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
 }
 
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
-{
-       struct mlx5_core_cq *cq;
-       struct mlx5_cq_table *table = &dev->priv.cq_table;
-
-       spin_lock(&table->lock);
-       cq = radix_tree_lookup(&table->tree, cqn);
-       if (likely(cq))
-               refcount_inc(&cq->refcount);
-       spin_unlock(&table->lock);
-
-       if (!cq) {
-               mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
-               return;
-       }
-
-       ++cq->arm_sn;
-
-       cq->comp(cq);
-
-       if (refcount_dec_and_test(&cq->refcount))
-               complete(&cq->free);
-}
-
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
-{
-       struct mlx5_cq_table *table = &dev->priv.cq_table;
-       struct mlx5_core_cq *cq;
-
-       spin_lock(&table->lock);
-
-       cq = radix_tree_lookup(&table->tree, cqn);
-       if (cq)
-               refcount_inc(&cq->refcount);
-
-       spin_unlock(&table->lock);
-
-       if (!cq) {
-               mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
-               return;
-       }
-
-       cq->event(cq, event_type);
-
-       if (refcount_dec_and_test(&cq->refcount))
-               complete(&cq->free);
-}
-
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
                        u32 *in, int inlen)
 {
-       struct mlx5_cq_table *table = &dev->priv.cq_table;
+       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+       u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
        u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
-       u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
-       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
-                          c_eqn);
        struct mlx5_eq *eq;
        int err;
 
@@ -159,7 +108,9 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        cq->cqn = MLX5_GET(create_cq_out, out, cqn);
        cq->cons_index = 0;
        cq->arm_sn     = 0;
-       refcount_set(&cq->refcount, 1);
+       cq->eq         = eq;
+       refcount_set(&cq->refcount, 0);
+       mlx5_cq_hold(cq);
        init_completion(&cq->free);
        if (!cq->comp)
                cq->comp = mlx5_add_cq_to_tasklet;
@@ -167,12 +118,16 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        cq->tasklet_ctx.priv = &eq->tasklet_ctx;
        INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
-       spin_lock_irq(&table->lock);
-       err = radix_tree_insert(&table->tree, cq->cqn, cq);
-       spin_unlock_irq(&table->lock);
+       /* Add to comp EQ CQ tree to recv comp events */
+       err = mlx5_eq_add_cq(eq, cq);
        if (err)
                goto err_cmd;
 
+       /* Add to async EQ CQ tree to recv async events */
+       err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+       if (err)
+               goto err_cq_add;
+
        cq->pid = current->pid;
        err = mlx5_debug_cq_add(dev, cq);
        if (err)
@@ -183,6 +138,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 
        return 0;
 
+err_cq_add:
+       mlx5_eq_del_cq(eq, cq);
 err_cmd:
        memset(din, 0, sizeof(din));
        memset(dout, 0, sizeof(dout));
@@ -195,23 +152,17 @@ EXPORT_SYMBOL(mlx5_core_create_cq);
 
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
-       struct mlx5_cq_table *table = &dev->priv.cq_table;
        u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
-       struct mlx5_core_cq *tmp;
        int err;
 
-       spin_lock_irq(&table->lock);
-       tmp = radix_tree_delete(&table->tree, cq->cqn);
-       spin_unlock_irq(&table->lock);
-       if (!tmp) {
-               mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
-               return -EINVAL;
-       }
-       if (tmp != cq) {
-               mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
-               return -EINVAL;
-       }
+       err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+       if (err)
+               return err;
+
+       err = mlx5_eq_del_cq(cq->eq, cq);
+       if (err)
+               return err;
 
        MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
        MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
@@ -222,8 +173,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
        synchronize_irq(cq->irqn);
 
        mlx5_debug_cq_remove(dev, cq);
-       if (refcount_dec_and_test(&cq->refcount))
-               complete(&cq->free);
+       mlx5_cq_put(cq);
        wait_for_completion(&cq->free);
 
        return 0;
@@ -270,21 +220,3 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
        return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
-
-int mlx5_init_cq_table(struct mlx5_core_dev *dev)
-{
-       struct mlx5_cq_table *table = &dev->priv.cq_table;
-       int err;
-
-       memset(table, 0, sizeof(*table));
-       spin_lock_init(&table->lock);
-       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-       err = mlx5_cq_debugfs_init(dev);
-
-       return err;
-}
-
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
-{
-       mlx5_cq_debugfs_cleanup(dev);
-}
index 17b7232..b994b80 100644 (file)
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+       mutex_lock(&mlx5_intf_mutex);
+       mlx5_remove_dev_by_protocol(mdev, protocol);
+       mlx5_add_dev_by_protocol(mdev, protocol);
+       mutex_unlock(&mlx5_intf_mutex);
+}
+
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
 {
        struct mlx5_priv *priv = &mdev->priv;
index 0be4575..fd50916 100644 (file)
@@ -96,10 +96,10 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p,
                                          "%pI4");
                } else if (ethertype.v == ETH_P_IPV6) {
                        static const struct in6_addr full_ones = {
-                               .in6_u.u6_addr32 = {htonl(0xffffffff),
-                                                   htonl(0xffffffff),
-                                                   htonl(0xffffffff),
-                                                   htonl(0xffffffff)},
+                               .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+                                                   __constant_htonl(0xffffffff),
+                                                   __constant_htonl(0xffffffff),
+                                                   __constant_htonl(0xffffffff)},
                        };
                        DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
                        DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
index 47bab84..da94c8c 100644 (file)
@@ -1768,13 +1768,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
        param->wq.linear = 1;
 }
 
-static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param)
+static void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+                                     struct mlx5e_rq_param *param)
 {
        void *rqc = param->rqc;
        void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
        MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
        MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
+
+       param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
 }
 
 static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
@@ -2634,6 +2637,9 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
                               struct mlx5e_cq *cq,
                               struct mlx5e_cq_param *param)
 {
+       param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+       param->wq.db_numa_node  = dev_to_node(&mdev->pdev->dev);
+
        return mlx5e_alloc_cq_common(mdev, param, cq);
 }
 
@@ -2645,7 +2651,7 @@ static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev,
        struct mlx5e_cq *cq = &drop_rq->cq;
        int err;
 
-       mlx5e_build_drop_rq_param(&rq_param);
+       mlx5e_build_drop_rq_param(mdev, &rq_param);
 
        err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
        if (err)
@@ -2994,8 +3000,8 @@ static int mlx5e_setup_tc_block(struct net_device *dev,
 }
 #endif
 
-int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
-                  void *type_data)
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+                         void *type_data)
 {
        switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
index 363d8dc..ea4b255 100644 (file)
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
        kfree(ppriv); /* mlx5e_rep_priv */
 }
 
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5e_rep_priv *rpriv;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+
+       return rpriv->netdev;
+}
+
 static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 
                rep_if.load = mlx5e_vport_rep_load;
                rep_if.unload = mlx5e_vport_rep_unload;
+               rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
                mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
        }
 }
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 
        rep_if.load = mlx5e_nic_rep_load;
        rep_if.unload = mlx5e_nic_rep_unload;
+       rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
        rep_if.priv = rpriv;
        INIT_LIST_HEAD(&rpriv->vport_sqs_list);
        mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
index 0d4bb06..8cce90d 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/tcp.h>
 #include <linux/bpf_trace.h>
 #include <net/busy_poll.h>
+#include <net/ip6_checksum.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -52,7 +53,7 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
                                       void *data)
 {
-       u32 ci = cqcc & cq->wq.sz_m1;
+       u32 ci = cqcc & cq->wq.fbc.sz_m1;
 
        memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
 }
@@ -74,9 +75,10 @@ static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
 
 static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
 {
-       u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
-       u32 wq_sz = 1 << cq->wq.log_sz;
-       u32 ci = cqcc & cq->wq.sz_m1;
+       struct mlx5_frag_buf_ctrl *fbc = &cq->wq.fbc;
+       u8 op_own = (cqcc >> fbc->log_sz) & 1;
+       u32 wq_sz = 1 << fbc->log_sz;
+       u32 ci = cqcc & fbc->sz_m1;
        u32 ci_top = min_t(u32, wq_sz, ci + n);
 
        for (; ci < ci_top; ci++, n--) {
@@ -101,7 +103,7 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
        cq->title.byte_cnt     = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
        cq->title.check_sum    = cq->mini_arr[cq->mini_arr_idx].checksum;
        cq->title.op_own      &= 0xf0;
-       cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.log_sz);
+       cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
        cq->title.wqe_counter  = cpu_to_be16(cq->decmprs_wqe_counter);
 
        if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
@@ -546,20 +548,33 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
        return true;
 }
 
+static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
+{
+       u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+       u8 tcp_ack     = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+                        (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
+
+       tcp->check                      = 0;
+       tcp->psh                        = get_cqe_lro_tcppsh(cqe);
+
+       if (tcp_ack) {
+               tcp->ack                = 1;
+               tcp->ack_seq            = cqe->lro_ack_seq_num;
+               tcp->window             = cqe->lro_tcp_win;
+       }
+}
+
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
                                 u32 cqe_bcnt)
 {
        struct ethhdr   *eth = (struct ethhdr *)(skb->data);
        struct tcphdr   *tcp;
        int network_depth = 0;
+       __wsum check;
        __be16 proto;
        u16 tot_len;
        void *ip_p;
 
-       u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
-       u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
-               (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
-
        proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
 
        tot_len = cqe_bcnt - network_depth;
@@ -576,23 +591,30 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
                ipv4->check             = 0;
                ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
                                                       ipv4->ihl);
+
+               mlx5e_lro_update_tcp_hdr(cqe, tcp);
+               check = csum_partial(tcp, tcp->doff * 4,
+                                    csum_unfold((__force __sum16)cqe->check_sum));
+               /* Almost done, don't forget the pseudo header */
+               tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
+                                              tot_len - sizeof(struct iphdr),
+                                              IPPROTO_TCP, check);
        } else {
+               u16 payload_len = tot_len - sizeof(struct ipv6hdr);
                struct ipv6hdr *ipv6 = ip_p;
 
                tcp = ip_p + sizeof(struct ipv6hdr);
                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 
                ipv6->hop_limit         = cqe->lro_min_ttl;
-               ipv6->payload_len       = cpu_to_be16(tot_len -
-                                                     sizeof(struct ipv6hdr));
-       }
-
-       tcp->psh = get_cqe_lro_tcppsh(cqe);
-
-       if (tcp_ack) {
-               tcp->ack                = 1;
-               tcp->ack_seq            = cqe->lro_ack_seq_num;
-               tcp->window             = cqe->lro_tcp_win;
+               ipv6->payload_len       = cpu_to_be16(payload_len);
+
+               mlx5e_lro_update_tcp_hdr(cqe, tcp);
+               check = csum_partial(tcp, tcp->doff * 4,
+                                    csum_unfold((__force __sum16)cqe->check_sum));
+               /* Almost done, don't forget the pseudo header */
+               tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
+                                            IPPROTO_TCP, check);
        }
 }
 
index 5a46082..7079764 100644 (file)
@@ -216,7 +216,8 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
        if (iph->protocol != IPPROTO_UDP)
                goto out;
 
-       udph = udp_hdr(skb);
+       /* Don't assume skb_transport_header() was set */
+       udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl);
        if (udph->dest != htons(9))
                goto out;
 
index fd98b0d..fa86a14 100644 (file)
@@ -2529,7 +2529,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
                        } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
-                               if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
+                               if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
+                                   tcf_vlan_push_prio(a))
                                        return -EOPNOTSUPP;
 
                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
index 569b42a..11b4f10 100644 (file)
@@ -176,7 +176,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
        default:
                hlen = mlx5e_skb_l2_header_offset(skb);
        }
-       return min_t(u16, hlen, skb->len);
+       return min_t(u16, hlen, skb_headlen(skb));
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
index 25106e9..c1c9497 100644 (file)
@@ -393,6 +393,51 @@ static void general_event_handler(struct mlx5_core_dev *dev,
        }
 }
 
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+       struct mlx5_cq_table *table = &eq->cq_table;
+       struct mlx5_core_cq *cq = NULL;
+
+       spin_lock(&table->lock);
+       cq = radix_tree_lookup(&table->tree, cqn);
+       if (likely(cq))
+               mlx5_cq_hold(cq);
+       spin_unlock(&table->lock);
+
+       return cq;
+}
+
+static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+       struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+       if (unlikely(!cq)) {
+               mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+               return;
+       }
+
+       ++cq->arm_sn;
+
+       cq->comp(cq);
+
+       mlx5_cq_put(cq);
+}
+
+static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+{
+       struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+       if (unlikely(!cq)) {
+               mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+               return;
+       }
+
+       cq->event(cq, event_type);
+
+       mlx5_cq_put(cq);
+}
+
 static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 {
        struct mlx5_eq *eq = eq_ptr;
@@ -415,7 +460,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                switch (eqe->type) {
                case MLX5_EVENT_TYPE_COMP:
                        cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-                       mlx5_cq_completion(dev, cqn);
+                       mlx5_eq_cq_completion(eq, cqn);
                        break;
                case MLX5_EVENT_TYPE_DCT_DRAINED:
                        rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
@@ -472,7 +517,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                        cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
                        mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
                                       cqn, eqe->data.cq_err.syndrome);
-                       mlx5_cq_event(dev, cqn, eqe->type);
+                       mlx5_eq_cq_event(eq, cqn, eqe->type);
                        break;
 
                case MLX5_EVENT_TYPE_PAGE_REQUEST:
@@ -567,6 +612,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
                       int nent, u64 mask, const char *name,
                       enum mlx5_eq_type type)
 {
+       struct mlx5_cq_table *cq_table = &eq->cq_table;
        u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
        struct mlx5_priv *priv = &dev->priv;
        irq_handler_t handler;
@@ -576,6 +622,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        u32 *in;
        int err;
 
+       /* Init CQ table */
+       memset(cq_table, 0, sizeof(*cq_table));
+       spin_lock_init(&cq_table->lock);
+       INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
        eq->type = type;
        eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
        eq->cons_index = 0;
@@ -669,7 +720,6 @@ err_buf:
        mlx5_buf_free(dev, &eq->buf);
        return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
 
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
@@ -696,7 +746,40 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 
        return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+       struct mlx5_cq_table *table = &eq->cq_table;
+       int err;
+
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree, cq->cqn, cq);
+       spin_unlock_irq(&table->lock);
+
+       return err;
+}
+
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+       struct mlx5_cq_table *table = &eq->cq_table;
+       struct mlx5_core_cq *tmp;
+
+       spin_lock_irq(&table->lock);
+       tmp = radix_tree_delete(&table->tree, cq->cqn);
+       spin_unlock_irq(&table->lock);
+
+       if (!tmp) {
+               mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
+               return -ENOENT;
+       }
+
+       if (tmp != cq) {
+               mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
+               return -EINVAL;
+       }
+
+       return 0;
+}
 
 int mlx5_eq_init(struct mlx5_core_dev *dev)
 {
@@ -840,4 +923,3 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
        MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
        return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
-EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
index 5ecf2cd..77b7272 100644 (file)
@@ -1529,6 +1529,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 
        esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
 
+       /* Create steering drop counters for ingress and egress ACLs */
+       if (vport_num && esw->mode == SRIOV_LEGACY)
+               esw_vport_create_drop_counters(vport);
+
        /* Restore old vport configuration */
        esw_apply_vport_conf(esw, vport);
 
@@ -1545,10 +1549,6 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
        if (!vport_num)
                vport->info.trusted = true;
 
-       /* create steering drop counters for ingress and egress ACLs */
-       if (vport_num && esw->mode == SRIOV_LEGACY)
-               esw_vport_create_drop_counters(vport);
-
        esw_vport_change_handle_locked(vport);
 
        esw->enabled_vports++;
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
        esw->mode = mode;
 
-       if (mode == SRIOV_LEGACY)
+       if (mode == SRIOV_LEGACY) {
                err = esw_create_legacy_fdb_table(esw, nvfs + 1);
-       else
+       } else {
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
                err = esw_offloads_init(esw, nvfs + 1);
+       }
+
        if (err)
                goto abort;
 
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 abort:
        esw->mode = SRIOV_NONE;
+
+       if (mode == SRIOV_OFFLOADS)
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
        return err;
 }
 
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
        struct esw_mc_addr *mc_promisc;
+       int old_mode;
        int nvports;
        int i;
 
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
        else if (esw->mode == SRIOV_OFFLOADS)
                esw_offloads_cleanup(esw, nvports);
 
+       old_mode = esw->mode;
        esw->mode = SRIOV_NONE;
+
+       if (old_mode == SRIOV_OFFLOADS)
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -2175,3 +2188,9 @@ free_out:
        kvfree(out);
        return err;
 }
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
index 2fa0370..98d2177 100644 (file)
 #include <linux/if_link.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
 #include "lib/mpfs.h"
 
-enum {
-       SRIOV_NONE,
-       SRIOV_LEGACY,
-       SRIOV_OFFLOADS
-};
-
-enum {
-       REP_ETH,
-       NUM_REP_TYPES,
-};
-
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
                        struct mlx5_flow_table *fdb;
                        struct mlx5_flow_group *send_to_vport_grp;
                        struct mlx5_flow_group *miss_grp;
-                       struct mlx5_flow_handle *miss_rule;
+                       struct mlx5_flow_handle *miss_rule_uni;
+                       struct mlx5_flow_handle *miss_rule_multi;
                        int vlan_push_pop_refcount;
                } offloads;
        };
 };
 
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
-       int                    (*load)(struct mlx5_core_dev *dev,
-                                      struct mlx5_eswitch_rep *rep);
-       void                   (*unload)(struct mlx5_eswitch_rep *rep);
-       void                    *priv;
-       bool                   valid;
-};
-
-struct mlx5_eswitch_rep {
-       struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
-       u16                    vport;
-       u8                     hw_id[ETH_ALEN];
-       u16                    vlan;
-       u32                    vlan_refcount;
-};
-
 struct mlx5_esw_offload {
        struct mlx5_flow_table *ft_offloads;
        struct mlx5_flow_group *vport_rx_group;
@@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 int vport,
                                 struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
-                                   u32 sqn);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 struct mlx5_flow_spec;
@@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-                                    int vport_index,
-                                    struct mlx5_eswitch_rep_if *rep_if,
-                                    u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-                                      int vport_index,
-                                      u8 rep_type);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
index 99f583a..0a8303c 100644 (file)
@@ -338,6 +338,7 @@ out:
        kvfree(spec);
        return flow_rule;
 }
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 {
@@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
        struct mlx5_flow_destination dest = {};
        struct mlx5_flow_handle *flow_rule = NULL;
        struct mlx5_flow_spec *spec;
+       void *headers_c;
+       void *headers_v;
        int err = 0;
+       u8 *dmac_c;
+       u8 *dmac_v;
 
        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        if (!spec) {
@@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
                goto out;
        }
 
+       spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+       headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                outer_headers);
+       dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+                             outer_headers.dmac_47_16);
+       dmac_c[0] = 0x01;
+
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport_num = 0;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
                                        &flow_act, &dest, 1);
        if (IS_ERR(flow_rule)) {
                err = PTR_ERR(flow_rule);
-               esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+               esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
                goto out;
        }
 
-       esw->fdb_table.offloads.miss_rule = flow_rule;
+       esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+       headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                outer_headers);
+       dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+                             outer_headers.dmac_47_16);
+       dmac_v[0] = 0x01;
+       flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+                                       &flow_act, &dest, 1);
+       if (IS_ERR(flow_rule)) {
+               err = PTR_ERR(flow_rule);
+               esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+               mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+               goto out;
+       }
+
+       esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
 out:
        kvfree(spec);
        return err;
@@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 }
 
 #define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
 
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
@@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        struct mlx5_flow_group *g;
        void *match_criteria;
        u32 *flow_group_in;
+       u8 *dmac;
 
        esw_debug(esw->dev, "Create offloads FDB Tables\n");
        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        if (err)
                goto fast_fdb_err;
 
-       table_size = nvports + MAX_PF_SQ + 1;
+       table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
 
        ft_attr.max_fte = table_size;
        ft_attr.prio = FDB_SLOW_PATH;
@@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
 
-       ix = nvports + MAX_PF_SQ;
+       ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 
@@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
        /* create miss group */
        memset(flow_group_in, 0, inlen);
-       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+                MLX5_MATCH_OUTER_HEADERS);
+       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+                                     match_criteria);
+       dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+                           outer_headers.dmac_47_16);
+       dmac[0] = 0x01;
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
 
        g = mlx5_create_flow_group(fdb, flow_group_in);
        if (IS_ERR(g)) {
@@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
                return;
 
        esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
-       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 {
        int err;
 
-       /* disable PF RoCE so missed packets don't go through RoCE steering */
-       mlx5_dev_list_lock();
-       mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
-
        err = esw_create_offloads_fdb_tables(esw, nvports);
        if (err)
-               goto create_fdb_err;
+               return err;
 
        err = esw_create_offloads_table(esw);
        if (err)
@@ -821,12 +854,6 @@ create_fg_err:
 create_ft_err:
        esw_destroy_offloads_fdb_tables(esw);
 
-create_fdb_err:
-       /* enable back PF RoCE */
-       mlx5_dev_list_lock();
-       mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
-
        return err;
 }
 
@@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
        }
 
        /* enable back PF RoCE */
-       mlx5_dev_list_lock();
-       mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
+       mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 
        return err;
 }
@@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
        rep_if->load   = __rep_if->load;
        rep_if->unload = __rep_if->unload;
+       rep_if->get_proto_dev = __rep_if->get_proto_dev;
        rep_if->priv = __rep_if->priv;
 
        rep_if->valid = true;
 }
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
                                       int vport_index, u8 rep_type)
@@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
        rep->rep_if[rep_type].valid = false;
 }
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
@@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
        rep = &offloads->vport_reps[UPLINK_REP_INDEX];
        return rep->rep_if[rep_type].priv;
 }
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+                                int vport,
+                                u8 rep_type)
+{
+       struct mlx5_esw_offload *offloads = &esw->offloads;
+       struct mlx5_eswitch_rep *rep;
+
+       if (vport == FDB_UPLINK_VPORT)
+               vport = UPLINK_REP_INDEX;
+
+       rep = &offloads->vport_reps[vport];
+
+       if (rep->rep_if[rep_type].valid &&
+           rep->rep_if[rep_type].get_proto_dev)
+               return rep->rep_if[rep_type].get_proto_dev(rep);
+       return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+       return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+                                               int vport)
+{
+       return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
index c025c98..31fc2cf 100644 (file)
@@ -1429,7 +1429,8 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 
        if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
                             MLX5_FLOW_CONTEXT_ACTION_ENCAP |
-                            MLX5_FLOW_CONTEXT_ACTION_DECAP))
+                            MLX5_FLOW_CONTEXT_ACTION_DECAP |
+                            MLX5_FLOW_CONTEXT_ACTION_MOD_HDR))
                return true;
 
        return false;
@@ -1758,8 +1759,11 @@ search_again_locked:
 
        /* Collect all fgs which has a matching match_criteria */
        err = build_match_list(&match_head, ft, spec);
-       if (err)
+       if (err) {
+               if (take_write)
+                       up_write_ref_node(&ft->node);
                return ERR_PTR(err);
+       }
 
        if (!take_write)
                up_read_ref_node(&ft->node);
@@ -1768,8 +1772,11 @@ search_again_locked:
                                      dest_num, version);
        free_match_list(&match_head);
        if (!IS_ERR(rule) ||
-           (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN))
+           (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
+               if (take_write)
+                       up_write_ref_node(&ft->node);
                return rule;
+       }
 
        if (!take_write) {
                nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
index e159243..8570355 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/highmem.h>
 #include <rdma/mlx5-abi.h>
 #include "en.h"
+#include "clock.h"
 
 enum {
        MLX5_CYCLES_SHIFT       = 23
index 2ef641c..7142c90 100644 (file)
@@ -551,7 +551,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
                MLX5_SET(cmd_hca_cap,
                         set_hca_cap,
                         cache_line_128byte,
-                        cache_line_size() == 128 ? 1 : 0);
+                        cache_line_size() >= 128 ? 1 : 0);
 
        if (MLX5_CAP_GEN_MAX(dev, dct))
                MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
@@ -942,9 +942,9 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto out;
        }
 
-       err = mlx5_init_cq_table(dev);
+       err = mlx5_cq_debugfs_init(dev);
        if (err) {
-               dev_err(&pdev->dev, "failed to initialize cq table\n");
+               dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
                goto err_eq_cleanup;
        }
 
@@ -1002,7 +1002,7 @@ err_tables_cleanup:
        mlx5_cleanup_mkey_table(dev);
        mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
-       mlx5_cleanup_cq_table(dev);
+       mlx5_cq_debugfs_cleanup(dev);
 
 err_eq_cleanup:
        mlx5_eq_cleanup(dev);
@@ -1023,7 +1023,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
        mlx5_cleanup_mkey_table(dev);
        mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
-       mlx5_cleanup_cq_table(dev);
+       mlx5_cq_debugfs_cleanup(dev);
        mlx5_eq_cleanup(dev);
 }
 
index 394552f..4e25f2b 100644 (file)
 #include <linux/sched.h>
 #include <linux/if_link.h>
 #include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
 
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-       (MLX5_CAP_GEN(mdev, vport_group_manager) && \
-       (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-        mlx5_core_is_pf(mdev))
-
 extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)                              \
@@ -115,9 +110,29 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
                                        u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+                      int nent, u64 mask, const char *name,
+                      enum mlx5_eq_type type);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                      u32 *out, int outlen);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
 void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
                        u8 access_reg_group);
@@ -186,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
 int mlx5_lag_allow(struct mlx5_core_dev *dev);
 int mlx5_lag_forbid(struct mlx5_core_dev *dev);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
 #endif /* __MLX5_CORE_H__ */
index 6bcfc25..ea66448 100644 (file)
@@ -41,7 +41,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
-       return wq->sz_m1 + 1;
+       return wq->fbc.sz_m1 + 1;
 }
 
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
@@ -62,7 +62,7 @@ static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
 
 static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
 {
-       return mlx5_cqwq_get_size(wq) << wq->log_stride;
+       return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
 }
 
 static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
@@ -92,7 +92,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                goto err_db_free;
        }
 
-       wq->buf = wq_ctrl->buf.direct.buf;
+       wq->buf = wq_ctrl->buf.frags->buf;
        wq->db  = wq_ctrl->db.db;
 
        wq_ctrl->mdev = mdev;
@@ -130,7 +130,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                goto err_db_free;
        }
 
-       wq->rq.buf = wq_ctrl->buf.direct.buf;
+       wq->rq.buf = wq_ctrl->buf.frags->buf;
        wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
        wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
        wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -151,11 +151,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 {
        int err;
 
-       wq->log_stride  = 6 + MLX5_GET(cqc, cqc, cqe_sz);
-       wq->log_sz      = MLX5_GET(cqc, cqc, log_cq_size);
-       wq->sz_m1       = (1 << wq->log_sz) - 1;
-       wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
-       wq->frag_sz_m1  = (1 << wq->log_frag_strides) - 1;
+       mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
 
        err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
        if (err) {
@@ -172,7 +168,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                goto err_db_free;
        }
 
-       wq->frag_buf = wq_ctrl->frag_buf;
+       wq->fbc.frag_buf = wq_ctrl->frag_buf;
        wq->db  = wq_ctrl->db.db;
 
        wq_ctrl->mdev = mdev;
@@ -209,7 +205,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                goto err_db_free;
        }
 
-       wq->buf = wq_ctrl->buf.direct.buf;
+       wq->buf = wq_ctrl->buf.frags->buf;
        wq->db  = wq_ctrl->db.db;
 
        for (i = 0; i < wq->sz_m1; i++) {
index 718589d..fca90b9 100644 (file)
@@ -45,7 +45,7 @@ struct mlx5_wq_param {
 
 struct mlx5_wq_ctrl {
        struct mlx5_core_dev    *mdev;
-       struct mlx5_buf         buf;
+       struct mlx5_frag_buf    buf;
        struct mlx5_db          db;
 };
 
@@ -68,14 +68,9 @@ struct mlx5_wq_qp {
 };
 
 struct mlx5_cqwq {
-       struct mlx5_frag_buf    frag_buf;
-       __be32                  *db;
-       u32                     sz_m1;
-       u32                     frag_sz_m1;
-       u32                     cc; /* consumer counter */
-       u8                      log_sz;
-       u8                      log_stride;
-       u8                      log_frag_strides;
+       struct mlx5_frag_buf_ctrl fbc;
+       __be32                    *db;
+       u32                       cc; /* consumer counter */
 };
 
 struct mlx5_wq_ll {
@@ -131,20 +126,17 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 
 static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
 {
-       return wq->cc & wq->sz_m1;
+       return wq->cc & wq->fbc.sz_m1;
 }
 
 static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-       unsigned int frag = (ix >> wq->log_frag_strides);
-
-       return wq->frag_buf.frags[frag].buf +
-               ((wq->frag_sz_m1 & ix) << wq->log_stride);
+       return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
 static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
 {
-       return wq->cc >> wq->log_sz;
+       return wq->cc >> wq->fbc.log_sz;
 }
 
 static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
index d56eea3..93d97b4 100644 (file)
@@ -78,6 +78,10 @@ config MLXSW_SPECTRUM
        depends on IPV6 || IPV6=n
        select PARMAN
        select MLXFW
+       depends on NET_IPGRE
+       depends on !(MLXSW_CORE=y && NET_IPGRE=m)
+       depends on IPV6_GRE
+       depends on !(MLXSW_CORE=y && IPV6_GRE=m)
        default m
        ---help---
          This driver supports Mellanox Technologies Spectrum Ethernet
index 9463c3f..0cadcab 100644 (file)
@@ -20,7 +20,7 @@ mlxsw_spectrum-objs           := spectrum.o spectrum_buffers.o \
                                   spectrum_cnt.o spectrum_fid.o \
                                   spectrum_ipip.o spectrum_acl_flex_actions.o \
                                   spectrum_mr.o spectrum_mr_tcam.o \
-                                  spectrum_qdisc.o
+                                  spectrum_qdisc.o spectrum_span.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)    += spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)    += mlxsw_minimal.o
index b698fb4..ba33842 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -838,7 +838,6 @@ struct mlxsw_afa_mirror {
        struct mlxsw_afa_resource resource;
        int span_id;
        u8 local_in_port;
-       u8 local_out_port;
        bool ingress;
 };
 
@@ -848,7 +847,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
 {
        block->afa->ops->mirror_del(block->afa->ops_priv,
                                    mirror->local_in_port,
-                                   mirror->local_out_port,
+                                   mirror->span_id,
                                    mirror->ingress);
        kfree(mirror);
 }
@@ -864,9 +863,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
 }
 
 static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
-                       u8 local_in_port, u8 local_out_port,
-                       bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+                       const struct net_device *out_dev, bool ingress)
 {
        struct mlxsw_afa_mirror *mirror;
        int err;
@@ -876,13 +874,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
                return ERR_PTR(-ENOMEM);
 
        err = block->afa->ops->mirror_add(block->afa->ops_priv,
-                                         local_in_port, local_out_port,
+                                         local_in_port, out_dev,
                                          ingress, &mirror->span_id);
        if (err)
                goto err_mirror_add;
 
        mirror->ingress = ingress;
-       mirror->local_out_port = local_out_port;
        mirror->local_in_port = local_in_port;
        mirror->resource.destructor = mlxsw_afa_mirror_destructor;
        mlxsw_afa_resource_add(block, &mirror->resource);
@@ -909,13 +906,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
 }
 
 int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-                             u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+                             const struct net_device *out_dev, bool ingress)
 {
        struct mlxsw_afa_mirror *mirror;
        int err;
 
-       mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+       mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
                                         ingress);
        if (IS_ERR(mirror))
                return PTR_ERR(mirror);
index 4313229..6dd6017 100644 (file)
@@ -36,6 +36,7 @@
 #define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
 
 #include <linux/types.h>
+#include <linux/netdevice.h>
 
 struct mlxsw_afa;
 struct mlxsw_afa_block;
@@ -48,9 +49,10 @@ struct mlxsw_afa_ops {
        void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
        int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
        void (*counter_index_put)(void *priv, unsigned int counter_index);
-       int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+       int (*mirror_add)(void *priv, u8 local_in_port,
+                         const struct net_device *out_dev,
                          bool ingress, int *p_span_id);
-       void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+       void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
                           bool ingress);
 };
 
@@ -70,7 +72,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
 int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
                                            u16 trap_id);
 int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-                                 u8 local_in_port, u8 local_out_port,
+                                 u8 local_in_port,
+                                 const struct net_device *out_dev,
                                  bool ingress);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
                               u8 local_port, bool in_port);
index 0e08be4..cb5f77f 100644 (file)
@@ -1,11 +1,11 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -6772,8 +6772,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
  */
 MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
 
+enum mlxsw_reg_mpat_span_type {
+       /* Local SPAN Ethernet.
+        * The original packet is not encapsulated.
+        */
+       MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+       /* Encapsulated Remote SPAN Ethernet L3 GRE.
+        * The packet is encapsulated with GRE header.
+        */
+       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+       MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+       MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+       MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
 static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
-                                      u16 system_port, bool e)
+                                      u16 system_port, bool e,
+                                      enum mlxsw_reg_mpat_span_type span_type)
 {
        MLXSW_REG_ZERO(mpat, payload);
        mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6781,6 +6877,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
        mlxsw_reg_mpat_e_set(payload, e);
        mlxsw_reg_mpat_qos_set(payload, 1);
        mlxsw_reg_mpat_be_set(payload, 1);
+       mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+       mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+                                enum mlxsw_reg_mpat_eth_rspan_version version,
+                                const char *mac,
+                                bool tp)
+{
+       mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+       mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+       mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+                                     const char *smac,
+                                     u32 sip, u32 dip)
+{
+       mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+       mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+       mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+       mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+       mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+                                     const char *smac,
+                                     struct in6_addr sip, struct in6_addr dip)
+{
+       mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+       mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+       mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+       mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+       mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
 }
 
 /* MPAR - Monitoring Port Analyzer Register
index 3dcc58d..7c6204f 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -71,6 +71,7 @@
 #include "spectrum_cnt.h"
 #include "spectrum_dpipe.h"
 #include "spectrum_acl_flex_actions.h"
+#include "spectrum_span.h"
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_FWREV_MAJOR 13
@@ -487,327 +488,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
        return 0;
 }
 
-static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
-{
-       int i;
-
-       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
-               return -EIO;
-
-       mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
-                                                         MAX_SPAN);
-       mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
-                                        sizeof(struct mlxsw_sp_span_entry),
-                                        GFP_KERNEL);
-       if (!mlxsw_sp->span.entries)
-               return -ENOMEM;
-
-       for (i = 0; i < mlxsw_sp->span.entries_count; i++)
-               INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
-
-       return 0;
-}
-
-static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
-{
-       int i;
-
-       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-               WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
-       }
-       kfree(mlxsw_sp->span.entries);
-}
-
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
-{
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       struct mlxsw_sp_span_entry *span_entry;
-       char mpat_pl[MLXSW_REG_MPAT_LEN];
-       u8 local_port = port->local_port;
-       int index;
-       int i;
-       int err;
-
-       /* find a free entry to use */
-       index = -1;
-       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-               if (!mlxsw_sp->span.entries[i].used) {
-                       index = i;
-                       span_entry = &mlxsw_sp->span.entries[i];
-                       break;
-               }
-       }
-       if (index < 0)
-               return NULL;
-
-       /* create a new port analayzer entry for local_port */
-       mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-       if (err)
-               return NULL;
-
-       span_entry->used = true;
-       span_entry->id = index;
-       span_entry->ref_count = 1;
-       span_entry->local_port = local_port;
-       return span_entry;
-}
-
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_span_entry *span_entry)
-{
-       u8 local_port = span_entry->local_port;
-       char mpat_pl[MLXSW_REG_MPAT_LEN];
-       int pa_id = span_entry->id;
-
-       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-       span_entry->used = false;
-}
-
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
-{
-       int i;
-
-       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-               if (curr->used && curr->local_port == local_port)
-                       return curr;
-       }
-       return NULL;
-}
-
-static struct mlxsw_sp_span_entry
-*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
-{
-       struct mlxsw_sp_span_entry *span_entry;
-
-       span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
-                                             port->local_port);
-       if (span_entry) {
-               /* Already exists, just take a reference */
-               span_entry->ref_count++;
-               return span_entry;
-       }
-
-       return mlxsw_sp_span_entry_create(port);
-}
-
-static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
-                                  struct mlxsw_sp_span_entry *span_entry)
-{
-       WARN_ON(!span_entry->ref_count);
-       if (--span_entry->ref_count == 0)
-               mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
-       return 0;
-}
-
-static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
-{
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       struct mlxsw_sp_span_inspected_port *p;
-       int i;
-
-       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-               list_for_each_entry(p, &curr->bound_ports_list, list)
-                       if (p->local_port == port->local_port &&
-                           p->type == MLXSW_SP_SPAN_EGRESS)
-                               return true;
-       }
-
-       return false;
-}
-
-static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
-                                        int mtu)
-{
-       return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
-}
-
-static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
-{
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       char sbib_pl[MLXSW_REG_SBIB_LEN];
-       int err;
-
-       /* If port is egress mirrored, the shared buffer size should be
-        * updated according to the mtu value
-        */
-       if (mlxsw_sp_span_is_egress_mirror(port)) {
-               u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
-
-               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-               if (err) {
-                       netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
-                       return err;
-               }
-       }
-
-       return 0;
-}
-
-static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
-                                   struct mlxsw_sp_span_entry *span_entry)
-{
-       struct mlxsw_sp_span_inspected_port *p;
-
-       list_for_each_entry(p, &span_entry->bound_ports_list, list)
-               if (port->local_port == p->local_port)
-                       return p;
-       return NULL;
-}
-
-static int
-mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
-                                 struct mlxsw_sp_span_entry *span_entry,
-                                 enum mlxsw_sp_span_type type,
-                                 bool bind)
-{
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       char mpar_pl[MLXSW_REG_MPAR_LEN];
-       int pa_id = span_entry->id;
-
-       /* bind the port to the SPAN entry */
-       mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
-                           (enum mlxsw_reg_mpar_i_e) type, bind, pa_id);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
-}
-
-static int
-mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
-                                struct mlxsw_sp_span_entry *span_entry,
-                                enum mlxsw_sp_span_type type,
-                                bool bind)
-{
-       struct mlxsw_sp_span_inspected_port *inspected_port;
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       char sbib_pl[MLXSW_REG_SBIB_LEN];
-       int err;
-
-       /* if it is an egress SPAN, bind a shared buffer to it */
-       if (type == MLXSW_SP_SPAN_EGRESS) {
-               u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
-                                                            port->dev->mtu);
-
-               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-               if (err) {
-                       netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
-                       return err;
-               }
-       }
-
-       if (bind) {
-               err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-                                                       true);
-               if (err)
-                       goto err_port_bind;
-       }
-
-       inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
-       if (!inspected_port) {
-               err = -ENOMEM;
-               goto err_inspected_port_alloc;
-       }
-       inspected_port->local_port = port->local_port;
-       inspected_port->type = type;
-       list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
-
-       return 0;
-
-err_inspected_port_alloc:
-       if (bind)
-               mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-                                                 false);
-err_port_bind:
-       if (type == MLXSW_SP_SPAN_EGRESS) {
-               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-               mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-       }
-       return err;
-}
-
-static void
-mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
-                                struct mlxsw_sp_span_entry *span_entry,
-                                enum mlxsw_sp_span_type type,
-                                bool bind)
-{
-       struct mlxsw_sp_span_inspected_port *inspected_port;
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       char sbib_pl[MLXSW_REG_SBIB_LEN];
-
-       inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
-       if (!inspected_port)
-               return;
-
-       if (bind)
-               mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-                                                 false);
-       /* remove the SBIB buffer if it was egress SPAN */
-       if (type == MLXSW_SP_SPAN_EGRESS) {
-               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-               mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-       }
-
-       mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-
-       list_del(&inspected_port->list);
-       kfree(inspected_port);
-}
-
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-                            struct mlxsw_sp_port *to,
-                            enum mlxsw_sp_span_type type, bool bind)
-{
-       struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
-       struct mlxsw_sp_span_entry *span_entry;
-       int err;
-
-       span_entry = mlxsw_sp_span_entry_get(to);
-       if (!span_entry)
-               return -ENOENT;
-
-       netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
-                  span_entry->id);
-
-       err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
-       if (err)
-               goto err_port_bind;
-
-       return 0;
-
-err_port_bind:
-       mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-       return err;
-}
-
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
-                             enum mlxsw_sp_span_type type, bool bind)
-{
-       struct mlxsw_sp_span_entry *span_entry;
-
-       span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
-                                             destination_port);
-       if (!span_entry) {
-               netdev_err(from->dev, "no span entry found\n");
-               return;
-       }
-
-       netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
-                  span_entry->id);
-       mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
-}
-
 static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                    bool enable, u32 rate)
 {
@@ -1360,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
                xstats->tail_drop[i] =
                        mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
        }
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+                                                 i, ppcnt_pl);
+               if (err)
+                       continue;
+
+               xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+               xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+       }
 }
 
 static void update_stats_cache(struct work_struct *work)
@@ -1578,7 +1268,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
                                      bool ingress)
 {
        enum mlxsw_sp_span_type span_type;
-       struct mlxsw_sp_port *to_port;
        struct net_device *to_dev;
 
        to_dev = tcf_mirred_dev(a);
@@ -1587,17 +1276,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
                return -EINVAL;
        }
 
-       if (!mlxsw_sp_port_dev_check(to_dev)) {
-               netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
-               return -EOPNOTSUPP;
-       }
-       to_port = netdev_priv(to_dev);
-
-       mirror->to_local_port = to_port->local_port;
        mirror->ingress = ingress;
        span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
-                                       true);
+       return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+                                       true, &mirror->span_id);
 }
 
 static void
@@ -1608,7 +1290,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 
        span_type = mirror->ingress ?
                        MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+       mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
                                 span_type, true);
 }
 
@@ -3995,14 +3677,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_afa_init;
        }
 
+       err = mlxsw_sp_span_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+               goto err_span_init;
+       }
+
+       /* Initialize router after SPAN is initialized, so that the FIB and
+        * neighbor event handlers can issue SPAN respin.
+        */
        err = mlxsw_sp_router_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
                goto err_router_init;
        }
 
-       /* Initialize netdevice notifier after router is initialized, so that
-        * the event handler can use router structures.
+       /* Initialize netdevice notifier after router and SPAN is initialized,
+        * so that the event handler can use router structures and call SPAN
+        * respin.
         */
        mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
        err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -4011,12 +3703,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_netdev_notifier;
        }
 
-       err = mlxsw_sp_span_init(mlxsw_sp);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
-               goto err_span_init;
-       }
-
        err = mlxsw_sp_acl_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -4042,12 +3728,12 @@ err_ports_create:
 err_dpipe_init:
        mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
-       mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
        mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+       mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
        mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -4073,9 +3759,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_ports_remove(mlxsw_sp);
        mlxsw_sp_dpipe_fini(mlxsw_sp);
        mlxsw_sp_acl_fini(mlxsw_sp);
-       mlxsw_sp_span_fini(mlxsw_sp);
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
        mlxsw_sp_router_fini(mlxsw_sp);
+       mlxsw_sp_span_fini(mlxsw_sp);
        mlxsw_sp_afa_fini(mlxsw_sp);
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
        mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4118,70 +3804,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
        .resource_query_enable          = 1,
 };
 
-static bool
-mlxsw_sp_resource_kvd_granularity_validate(struct netlink_ext_ack *extack,
-                                          u64 size)
-{
-       const struct mlxsw_config_profile *profile;
-
-       profile = &mlxsw_sp_config_profile;
-       if (size % profile->kvd_hash_granularity) {
-               NL_SET_ERR_MSG_MOD(extack, "resource set with wrong granularity");
-               return false;
-       }
-       return true;
-}
-
-static int
-mlxsw_sp_resource_kvd_size_validate(struct devlink *devlink, u64 size,
-                                   struct netlink_ext_ack *extack)
-{
-       NL_SET_ERR_MSG_MOD(extack, "kvd size cannot be changed");
-       return -EINVAL;
-}
-
-static int
-mlxsw_sp_resource_kvd_linear_size_validate(struct devlink *devlink, u64 size,
-                                          struct netlink_ext_ack *extack)
-{
-       if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-               return -EINVAL;
-
-       return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_single_size_validate(struct devlink *devlink, u64 size,
-                                               struct netlink_ext_ack *extack)
-{
-       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-       if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-               return -EINVAL;
-
-       if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE)) {
-               NL_SET_ERR_MSG_MOD(extack, "hash single size is smaller than minimum");
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_double_size_validate(struct devlink *devlink, u64 size,
-                                               struct netlink_ext_ack *extack)
-{
-       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-       if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-               return -EINVAL;
-
-       if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE)) {
-               NL_SET_ERR_MSG_MOD(extack, "hash double size is smaller than minimum");
-               return -EINVAL;
-       }
-       return 0;
-}
-
 static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 {
        struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -4190,23 +3812,10 @@ static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
        return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
 }
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_ops = {
-       .size_validate = mlxsw_sp_resource_kvd_size_validate,
-};
-
 static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
-       .size_validate = mlxsw_sp_resource_kvd_linear_size_validate,
        .occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
 };
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_single_ops = {
-       .size_validate = mlxsw_sp_resource_kvd_hash_single_size_validate,
-};
-
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_double_ops = {
-       .size_validate = mlxsw_sp_resource_kvd_hash_double_size_validate,
-};
-
 static struct devlink_resource_size_params mlxsw_sp_kvd_size_params;
 static struct devlink_resource_size_params mlxsw_sp_linear_size_params;
 static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params;
@@ -4268,7 +3877,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
                                        MLXSW_SP_RESOURCE_KVD,
                                        DEVLINK_RESOURCE_ID_PARENT_TOP,
                                        &mlxsw_sp_kvd_size_params,
-                                       &mlxsw_sp_resource_kvd_ops);
+                                       NULL);
        if (err)
                return err;
 
@@ -4282,6 +3891,10 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
        if (err)
                return err;
 
+       err = mlxsw_sp_kvdl_resources_register(devlink);
+       if  (err)
+               return err;
+
        double_size = kvd_size - linear_size;
        double_size *= profile->kvd_hash_double_parts;
        double_size /= profile->kvd_hash_double_parts +
@@ -4292,7 +3905,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
                                        MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
                                        MLXSW_SP_RESOURCE_KVD,
                                        &mlxsw_sp_hash_double_size_params,
-                                       &mlxsw_sp_resource_kvd_hash_double_ops);
+                                       NULL);
        if (err)
                return err;
 
@@ -4302,7 +3915,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
                                        MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
                                        MLXSW_SP_RESOURCE_KVD,
                                        &mlxsw_sp_hash_single_size_params,
-                                       &mlxsw_sp_resource_kvd_hash_single_ops);
+                                       NULL);
        if (err)
                return err;
 
@@ -4556,13 +4169,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
        u16 lag_id;
 
        if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
-               NL_SET_ERR_MSG(extack,
-                              "spectrum: Exceeded number of supported LAG devices");
+               NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
                return false;
        }
        if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
-               NL_SET_ERR_MSG(extack,
-                              "spectrum: LAG device using unsupported Tx type");
+               NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
                return false;
        }
        return true;
@@ -4804,8 +4415,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                    !netif_is_lag_master(upper_dev) &&
                    !netif_is_bridge_master(upper_dev) &&
                    !netif_is_ovs_master(upper_dev)) {
-                       NL_SET_ERR_MSG(extack,
-                                      "spectrum: Unknown upper device type");
+                       NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
                        return -EINVAL;
                }
                if (!info->linking)
@@ -4814,8 +4424,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                    (!netif_is_bridge_master(upper_dev) ||
                     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
                                                          upper_dev))) {
-                       NL_SET_ERR_MSG(extack,
-                                      "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+                       NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
                        return -EINVAL;
                }
                if (netif_is_lag_master(upper_dev) &&
@@ -4823,24 +4432,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                                               info->upper_info, extack))
                        return -EINVAL;
                if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
-                       NL_SET_ERR_MSG(extack,
-                                      "spectrum: Master device is a LAG master and this device has a VLAN");
+                       NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN");
                        return -EINVAL;
                }
                if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
                    !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
-                       NL_SET_ERR_MSG(extack,
-                                      "spectrum: Can not put a VLAN on a LAG port");
+                       NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
                        return -EINVAL;
                }
                if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
-                       NL_SET_ERR_MSG(extack,
-                                      "spectrum: Master device is an OVS master and this device has a VLAN");
+                       NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
                        return -EINVAL;
                }
                if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
-                       NL_SET_ERR_MSG(extack,
-                                      "spectrum: Can not put a VLAN on an OVS port");
+                       NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
                        return -EINVAL;
                }
                break;
@@ -4953,7 +4558,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
        case NETDEV_PRECHANGEUPPER:
                upper_dev = info->upper_dev;
                if (!netif_is_bridge_master(upper_dev)) {
-                       NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers");
+                       NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
                        return -EINVAL;
                }
                if (!info->linking)
@@ -4962,7 +4567,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
                    (!netif_is_bridge_master(upper_dev) ||
                     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
                                                          upper_dev))) {
-                       NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+                       NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
                        return -EINVAL;
                }
                break;
@@ -5040,10 +4645,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
                                    unsigned long event, void *ptr)
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct mlxsw_sp_span_entry *span_entry;
        struct mlxsw_sp *mlxsw_sp;
        int err = 0;
 
        mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+       if (event == NETDEV_UNREGISTER) {
+               span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+               if (span_entry)
+                       mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+       }
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
                err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
                                                       event, ptr);
index bdd8f94..d5e711d 100644 (file)
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
 
 enum mlxsw_sp_resource_id {
        MLXSW_SP_RESOURCE_KVD,
        MLXSW_SP_RESOURCE_KVD_LINEAR,
        MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
        MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+       MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+       MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+       MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 };
 
 struct mlxsw_sp_port;
 struct mlxsw_sp_rif;
+struct mlxsw_sp_span_entry;
 
 struct mlxsw_sp_upper {
        struct net_device *dev;
@@ -111,32 +118,13 @@ struct mlxsw_sp_mid {
        unsigned long *ports_in_mid; /* bits array */
 };
 
-enum mlxsw_sp_span_type {
-       MLXSW_SP_SPAN_EGRESS,
-       MLXSW_SP_SPAN_INGRESS
-};
-
-struct mlxsw_sp_span_inspected_port {
-       struct list_head list;
-       enum mlxsw_sp_span_type type;
-       u8 local_port;
-};
-
-struct mlxsw_sp_span_entry {
-       u8 local_port;
-       bool used;
-       struct list_head bound_ports_list;
-       int ref_count;
-       int id;
-};
-
 enum mlxsw_sp_port_mall_action_type {
        MLXSW_SP_PORT_MALL_MIRROR,
        MLXSW_SP_PORT_MALL_SAMPLE,
 };
 
 struct mlxsw_sp_port_mall_mirror_tc_entry {
-       u8 to_local_port;
+       int span_id;
        bool ingress;
 };
 
@@ -222,6 +210,8 @@ struct mlxsw_sp_port_xstats {
        u64 wred_drop[TC_MAX_QUEUE];
        u64 tail_drop[TC_MAX_QUEUE];
        u64 backlog[TC_MAX_QUEUE];
+       u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+       u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
 };
 
 struct mlxsw_sp_port {
@@ -259,6 +249,7 @@ struct mlxsw_sp_port {
        struct mlxsw_sp_port_sample *sample;
        struct list_head vlans_list;
        struct mlxsw_sp_qdisc *root_qdisc;
+       struct mlxsw_sp_qdisc *tclass_qdiscs;
        unsigned acl_rule_count;
        struct mlxsw_sp_acl_block *ing_acl_block;
        struct mlxsw_sp_acl_block *eg_acl_block;
@@ -396,16 +387,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
 void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-                            struct mlxsw_sp_port *to,
-                            enum mlxsw_sp_span_type type,
-                            bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from,
-                             u8 destination_port,
-                             enum mlxsw_sp_span_type type,
-                             bool bind);
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
 
 /* spectrum_dcb.c */
 #ifdef CONFIG_MLXSW_SPECTRUM_DCB
@@ -461,6 +442,7 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
                                   unsigned int entry_count,
                                   unsigned int *p_alloc_size);
 u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
 
 struct mlxsw_sp_acl_rule_info {
        unsigned int priority;
index 0897a54..21ed27a 100644 (file)
@@ -572,7 +572,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
                                  struct net_device *out_dev)
 {
        struct mlxsw_sp_acl_block_binding *binding;
-       struct mlxsw_sp_port *out_port;
        struct mlxsw_sp_port *in_port;
 
        if (!list_is_singular(&block->binding_list))
@@ -581,16 +580,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
        binding = list_first_entry(&block->binding_list,
                                   struct mlxsw_sp_acl_block_binding, list);
        in_port = binding->mlxsw_sp_port;
-       if (!mlxsw_sp_port_dev_check(out_dev))
-               return -EINVAL;
-
-       out_port = netdev_priv(out_dev);
-       if (out_port->mlxsw_sp != mlxsw_sp)
-               return -EINVAL;
 
        return mlxsw_afa_block_append_mirror(rulei->act_block,
                                             in_port->local_port,
-                                            out_port->local_port,
+                                            out_dev,
                                             binding->ingress);
 }
 
index 6ca6894..510ce48 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
  *
@@ -35,6 +35,7 @@
 
 #include "spectrum_acl_flex_actions.h"
 #include "core_acl_flex_actions.h"
+#include "spectrum_span.h"
 
 #define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
 
@@ -125,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
 }
 
 static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+                       const struct net_device *out_dev,
                        bool ingress, int *p_span_id)
 {
-       struct mlxsw_sp_port *in_port, *out_port;
-       struct mlxsw_sp_span_entry *span_entry;
+       struct mlxsw_sp_port *in_port;
        struct mlxsw_sp *mlxsw_sp = priv;
        enum mlxsw_sp_span_type type;
-       int err;
 
        type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       out_port = mlxsw_sp->ports[local_out_port];
        in_port = mlxsw_sp->ports[local_in_port];
 
-       err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
-       if (err)
-               return err;
-
-       span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
-       if (!span_entry) {
-               err = -ENOENT;
-               goto err_span_entry_find;
-       }
-
-       *p_span_id = span_entry->id;
-       return 0;
-
-err_span_entry_find:
-       mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
-       return err;
+       return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+                                       false, p_span_id);
 }
 
 static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
-                       bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 {
        struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_port *in_port;
@@ -167,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
        type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
        in_port = mlxsw_sp->ports[local_in_port];
 
-       mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+       mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
 static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
index 7502e53..98d896c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  */
 
 #include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum_ipip.h"
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev)
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
 {
        struct ip_tunnel *tun = netdev_priv(ol_dev);
 
        return tun->parms;
 }
 
-static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms)
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+       struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+       return tun->parms;
+}
+
+static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
 {
        return !!(parms.i_flags & TUNNEL_KEY);
 }
 
-static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms)
+static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
 {
        return !!(parms.o_flags & TUNNEL_KEY);
 }
 
-static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
 {
-       return mlxsw_sp_ipip_parms_has_ikey(parms) ?
+       return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
                be32_to_cpu(parms.i_key) : 0;
 }
 
-static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
 {
-       return mlxsw_sp_ipip_parms_has_okey(parms) ?
+       return mlxsw_sp_ipip_parms4_has_okey(parms) ?
                be32_to_cpu(parms.o_key) : 0;
 }
 
-static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
 {
-       return parms.iph.saddr;
+       return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
 }
 
 static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto,
-                         struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
 {
-       switch (proto) {
-       case MLXSW_SP_L3_PROTO_IPV4:
-               return (union mlxsw_sp_l3addr) {
-                       .addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
-               };
-       case MLXSW_SP_L3_PROTO_IPV6:
-               break;
-       }
-
-       WARN_ON(1);
-       return (union mlxsw_sp_l3addr) {
-               .addr4 = 0,
-       };
+       return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
 }
 
-static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
 {
-       return parms.iph.daddr;
+       return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
 }
 
 static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
-                         struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+       return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+                          const struct net_device *ol_dev)
 {
+       struct ip_tunnel_parm parms4;
+       struct __ip6_tnl_parm parms6;
+
        switch (proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
-               return (union mlxsw_sp_l3addr) {
-                       .addr4 = mlxsw_sp_ipip_parms_daddr4(parms),
-               };
+               parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+               return mlxsw_sp_ipip_parms4_saddr(parms4);
        case MLXSW_SP_L3_PROTO_IPV6:
-               break;
+               parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+               return mlxsw_sp_ipip_parms6_saddr(parms6);
        }
 
        WARN_ON(1);
-       return (union mlxsw_sp_l3addr) {
-               .addr4 = 0,
-       };
-}
-
-static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
-{
-       return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+       return (union mlxsw_sp_l3addr) {0};
 }
 
-static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
 {
-       return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
 
-static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
-{
-       return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+       struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 
-static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
-{
-       return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+       return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
 }
 
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev)
 {
-       return mlxsw_sp_ipip_parms_saddr(proto,
-                                        mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+       struct ip_tunnel_parm parms4;
+       struct __ip6_tnl_parm parms6;
 
-static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
-{
-       return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
+       switch (proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+               return mlxsw_sp_ipip_parms4_daddr(parms4);
+       case MLXSW_SP_L3_PROTO_IPV6:
+               parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+               return mlxsw_sp_ipip_parms6_daddr(parms6);
+       }
+
+       WARN_ON(1);
+       return (union mlxsw_sp_l3addr) {0};
 }
 
-static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
-                          const struct net_device *ol_dev)
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
 {
-       return mlxsw_sp_ipip_parms_daddr(proto,
-                                        mlxsw_sp_ipip_netdev_parms(ol_dev));
+       union mlxsw_sp_l3addr naddr = {0};
+
+       return !memcmp(&addr, &naddr, sizeof(naddr));
 }
 
 static int
@@ -176,12 +175,17 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
                                     u32 tunnel_index,
                                     struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-       bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
        u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
-       u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
        char rtdp_pl[MLXSW_REG_RTDP_LEN];
+       struct ip_tunnel_parm parms;
        unsigned int type_check;
+       bool has_ikey;
        u32 daddr4;
+       u32 ikey;
+
+       parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
+       has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
+       ikey = mlxsw_sp_ipip_parms4_ikey(parms);
 
        mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
 
@@ -243,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
 {
        union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
        union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
-       union mlxsw_sp_l3addr naddr = {0};
 
        /* Tunnels with unset local or remote address are valid in Linux and
         * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
         * (NBMA) tunnels. In principle these can be offloaded, but the driver
         * currently doesn't support this. So punt.
         */
-       return memcmp(&saddr, &naddr, sizeof(naddr)) &&
-              memcmp(&daddr, &naddr, sizeof(naddr));
+       return !mlxsw_sp_l3addr_is_zero(saddr) &&
+              !mlxsw_sp_l3addr_is_zero(daddr);
 }
 
 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
@@ -273,14 +276,15 @@ static struct mlxsw_sp_rif_ipip_lb_config
 mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
                                      const struct net_device *ol_dev)
 {
+       struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
        enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
 
-       lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+       lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
                MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
                MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
        return (struct mlxsw_sp_rif_ipip_lb_config){
                .lb_ipipt = lb_ipipt,
-               .okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
+               .okey = mlxsw_sp_ipip_parms4_okey(parms),
                .ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
                .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
                                                    ol_dev),
@@ -300,16 +304,12 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
        bool update_nhs = false;
        int err = 0;
 
-       new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev);
+       new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
 
-       new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-                                             new_parms);
-       old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-                                             ipip_entry->parms);
-       new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-                                             new_parms);
-       old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-                                             ipip_entry->parms);
+       new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
+       old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
+       new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
+       old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
 
        if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
                u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
@@ -326,14 +326,14 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
                }
 
                update_tunnel = true;
-       } else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) !=
-                   mlxsw_sp_ipip_parms_okey(new_parms)) ||
-                  ipip_entry->parms.link != new_parms.link) {
+       } else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
+                   mlxsw_sp_ipip_parms4_okey(new_parms)) ||
+                  ipip_entry->parms4.link != new_parms.link) {
                update_tunnel = true;
        } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
                update_nhs = true;
-       } else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) !=
-                  mlxsw_sp_ipip_parms_ikey(new_parms)) {
+       } else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
+                  mlxsw_sp_ipip_parms4_ikey(new_parms)) {
                update_decap = true;
        }
 
@@ -350,7 +350,7 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
                                                          false, false, false,
                                                          extack);
 
-       ipip_entry->parms = new_parms;
+       ipip_entry->parms4 = new_parms;
        return err;
 }
 
index 04b08d9..6909d86 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 
 #include "spectrum_router.h"
 #include <net/ip_fib.h>
+#include <linux/if_tunnel.h>
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev);
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
 enum mlxsw_sp_ipip_type {
        MLXSW_SP_IPIP_TYPE_GRE4,
        MLXSW_SP_IPIP_TYPE_MAX,
@@ -56,7 +61,9 @@ struct mlxsw_sp_ipip_entry {
        struct mlxsw_sp_rif_ipip_lb *ol_lb;
        struct mlxsw_sp_fib_entry *decap_fib_entry;
        struct list_head ipip_list_node;
-       struct ip_tunnel_parm parms;
+       union {
+               struct ip_tunnel_parm parms4;
+       };
 };
 
 struct mlxsw_sp_ipip_ops {
index 55f9d2d..059eb32 100644 (file)
@@ -67,7 +67,7 @@ struct mlxsw_sp_kvdl_part_info {
 
 struct mlxsw_sp_kvdl_part {
        struct list_head list;
-       const struct mlxsw_sp_kvdl_part_info *info;
+       struct mlxsw_sp_kvdl_part_info *info;
        unsigned long usage[0]; /* Entries */
 };
 
@@ -188,21 +188,27 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
        return 0;
 }
 
+enum mlxsw_sp_kvdl_part_id {
+       MLXSW_SP_KVDL_PART_SINGLE,
+       MLXSW_SP_KVDL_PART_CHUNKS,
+       MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
+};
+
 static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
        {
-               .part_index     = 0,
+               .part_index     = MLXSW_SP_KVDL_PART_SINGLE,
                .start_index    = MLXSW_SP_KVDL_SINGLE_BASE,
                .end_index      = MLXSW_SP_KVDL_SINGLE_END,
                .alloc_size     = 1,
        },
        {
-               .part_index     = 1,
+               .part_index     = MLXSW_SP_KVDL_PART_CHUNKS,
                .start_index    = MLXSW_SP_KVDL_CHUNKS_BASE,
                .end_index      = MLXSW_SP_KVDL_CHUNKS_END,
                .alloc_size     = MLXSW_SP_CHUNK_MAX,
        },
        {
-               .part_index     = 2,
+               .part_index     = MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
                .start_index    = MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
                .end_index      = MLXSW_SP_KVDL_LARGE_CHUNKS_END,
                .alloc_size     = MLXSW_SP_LARGE_CHUNK_MAX,
@@ -222,27 +228,76 @@ mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
        return NULL;
 }
 
+static void
+mlxsw_sp_kvdl_part_update(struct mlxsw_sp *mlxsw_sp,
+                         struct mlxsw_sp_kvdl_part *part, unsigned int size)
+{
+       struct mlxsw_sp_kvdl_part_info *info = part->info;
+
+       if (list_is_last(&part->list, &mlxsw_sp->kvdl->parts_list)) {
+               info->end_index = size - 1;
+       } else  {
+               struct mlxsw_sp_kvdl_part *last_part;
+
+               last_part = list_next_entry(part, list);
+               info->start_index = last_part->info->end_index + 1;
+               info->end_index = info->start_index + size - 1;
+       }
+}
+
 static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
                                   unsigned int part_index)
 {
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
        const struct mlxsw_sp_kvdl_part_info *info;
+       enum mlxsw_sp_resource_id resource_id;
        struct mlxsw_sp_kvdl_part *part;
+       bool need_update = true;
        unsigned int nr_entries;
        size_t usage_size;
+       u64 resource_size;
+       int err;
 
        info = &kvdl_parts_info[part_index];
 
-       nr_entries = (info->end_index - info->start_index + 1) /
-                    info->alloc_size;
+       switch (part_index) {
+       case MLXSW_SP_KVDL_PART_SINGLE:
+               resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE;
+               break;
+       case MLXSW_SP_KVDL_PART_CHUNKS:
+               resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS;
+               break;
+       case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
+               resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       err = devlink_resource_size_get(devlink, resource_id, &resource_size);
+       if (err) {
+               need_update = false;
+               resource_size = info->end_index - info->start_index + 1;
+       }
+
+       nr_entries = div_u64(resource_size, info->alloc_size);
        usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
        part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
        if (!part)
                return -ENOMEM;
 
-       part->info = info;
-       list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+       part->info = kmemdup(info, sizeof(*part->info), GFP_KERNEL);
+       if (!part->info)
+               goto err_part_info_alloc;
 
+       list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+       if (need_update)
+               mlxsw_sp_kvdl_part_update(mlxsw_sp, part, resource_size);
        return 0;
+
+err_part_info_alloc:
+       kfree(part);
+       return -ENOMEM;
 }
 
 static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
@@ -255,6 +310,7 @@ static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
                return;
 
        list_del(&part->list);
+       kfree(part->info);
        kfree(part);
 }
 
@@ -312,6 +368,123 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
        return occ;
 }
 
+u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+{
+       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+       struct mlxsw_sp_kvdl_part *part;
+
+       part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_SINGLE);
+       if (!part)
+               return -EINVAL;
+
+       return mlxsw_sp_kvdl_part_occ(part);
+}
+
+u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+{
+       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+       struct mlxsw_sp_kvdl_part *part;
+
+       part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_CHUNKS);
+       if (!part)
+               return -EINVAL;
+
+       return mlxsw_sp_kvdl_part_occ(part);
+}
+
+u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+{
+       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+       struct mlxsw_sp_kvdl_part *part;
+
+       part = mlxsw_sp_kvdl_part_find(mlxsw_sp,
+                                      MLXSW_SP_KVDL_PART_LARGE_CHUNKS);
+       if (!part)
+               return -EINVAL;
+
+       return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
+       .occ_get = mlxsw_sp_kvdl_single_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
+       .occ_get = mlxsw_sp_kvdl_chunks_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
+       .occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
+       .size_min = 0,
+       .size_granularity = 1,
+       .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_chunks_size_params = {
+       .size_min = 0,
+       .size_granularity = MLXSW_SP_CHUNK_MAX,
+       .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_large_chunks_size_params = {
+       .size_min = 0,
+       .size_granularity = MLXSW_SP_LARGE_CHUNK_MAX,
+       .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static void
+mlxsw_sp_kvdl_resource_size_params_prepare(struct devlink *devlink)
+{
+       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+       u32 kvdl_max_size;
+
+       kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+                       MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+                       MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+       mlxsw_sp_kvdl_single_size_params.size_max = kvdl_max_size;
+       mlxsw_sp_kvdl_chunks_size_params.size_max = kvdl_max_size;
+       mlxsw_sp_kvdl_large_chunks_size_params.size_max = kvdl_max_size;
+}
+
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
+{
+       int err;
+
+       mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
+       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+                                       false, MLXSW_SP_KVDL_SINGLE_SIZE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                       &mlxsw_sp_kvdl_single_size_params,
+                                       &mlxsw_sp_kvdl_single_ops);
+       if (err)
+               return err;
+
+       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+                                       false, MLXSW_SP_KVDL_CHUNKS_SIZE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                       &mlxsw_sp_kvdl_chunks_size_params,
+                                       &mlxsw_sp_kvdl_chunks_ops);
+       if (err)
+               return err;
+
+       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+                                       false, MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                       &mlxsw_sp_kvdl_large_chunks_size_params,
+                                       &mlxsw_sp_kvdl_chunks_large_ops);
+       return err;
+}
+
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
        struct mlxsw_sp_kvdl *kvdl;
index d20b143..978a3c7 100644 (file)
@@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
 
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
-               ivif = mr_route->mfc4->mfc_parent;
-               return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+               ivif = mr_route->mfc4->_c.mfc_parent;
+               return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
        default:
@@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
        mr_route->mfc4 = mfc;
        mr_route->mr_table = mr_table;
        for (i = 0; i < MAXVIFS; i++) {
-               if (mfc->mfc_un.res.ttls[i] != 255) {
+               if (mfc->_c.mfc_un.res.ttls[i] != 255) {
                        err = mlxsw_sp_mr_route_evif_link(mr_route,
                                                          &mr_table->vifs[i]);
                        if (err)
@@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
                                mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
                }
        }
-       mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+       mlxsw_sp_mr_route_ivif_link(mr_route,
+                                   &mr_table->vifs[mfc->_c.mfc_parent]);
 
        mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
        return mr_route;
@@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                if (offload)
-                       mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+                       mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
                else
-                       mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+                       mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
                break;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
@@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
 
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
-               if (mr_route->mfc4->mfc_un.res.pkt != packets)
-                       mr_route->mfc4->mfc_un.res.lastuse = jiffies;
-               mr_route->mfc4->mfc_un.res.pkt = packets;
-               mr_route->mfc4->mfc_un.res.bytes = bytes;
+               if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+                       mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+               mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+               mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
                break;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
index 0b76704..91262b0 100644 (file)
@@ -42,6 +42,8 @@
 #include "reg.h"
 
 #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+       MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
 
 enum mlxsw_sp_qdisc_type {
        MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
 struct mlxsw_sp_qdisc {
        u32 handle;
        u8 tclass_num;
+       u8 prio_bitmap;
        union {
                struct red_stats red;
        } xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
               mlxsw_sp_qdisc->handle == handle;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+                   bool root_only)
+{
+       int tclass, child_index;
+
+       if (parent == TC_H_ROOT)
+               return mlxsw_sp_port->root_qdisc;
+
+       if (root_only || !mlxsw_sp_port->root_qdisc ||
+           !mlxsw_sp_port->root_qdisc->ops ||
+           TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+           TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+               return NULL;
+
+       child_index = TC_H_MIN(parent);
+       tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+       return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+       int i;
+
+       if (mlxsw_sp_port->root_qdisc->handle == handle)
+               return mlxsw_sp_port->root_qdisc;
+
+       if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+               return NULL;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+               if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+                       return &mlxsw_sp_port->tclass_qdiscs[i];
+
+       return NULL;
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
                       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
        return -EOPNOTSUPP;
 }
 
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+                                      u8 prio_bitmap, u64 *tx_packets,
+                                      u64 *tx_bytes)
+{
+       int i;
+
+       *tx_packets = 0;
+       *tx_bytes = 0;
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (prio_bitmap & BIT(i)) {
+                       *tx_packets += xstats->tx_packets[i];
+                       *tx_bytes += xstats->tx_bytes[i];
+               }
+       }
+}
+
 static int
 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
                                  int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
        struct mlxsw_sp_qdisc_stats *stats_base;
        struct mlxsw_sp_port_xstats *xstats;
-       struct rtnl_link_stats64 *stats;
        struct red_stats *red_base;
 
        xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-       stats = &mlxsw_sp_port->periodic_hw_stats.stats;
        stats_base = &mlxsw_sp_qdisc->stats_base;
        red_base = &mlxsw_sp_qdisc->xstats_base.red;
 
-       stats_base->tx_packets = stats->tx_packets;
-       stats_base->tx_bytes = stats->tx_bytes;
-
+       mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+                                              mlxsw_sp_qdisc->prio_bitmap,
+                                              &stats_base->tx_packets,
+                                              &stats_base->tx_bytes);
        red_base->prob_mark = xstats->ecn;
        red_base->prob_drop = xstats->wred_drop[tclass_num];
        red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+       struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+       if (root_qdisc != mlxsw_sp_qdisc)
+               root_qdisc->stats_base.backlog -=
+                                       mlxsw_sp_qdisc->stats_base.backlog;
+
        return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
                                                  mlxsw_sp_qdisc->tclass_num);
 }
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
        backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
                                       mlxsw_sp_qdisc->stats_base.backlog);
        p->qstats->backlog -= backlog;
+       mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
 static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
        struct mlxsw_sp_qdisc_stats *stats_base;
        struct mlxsw_sp_port_xstats *xstats;
-       struct rtnl_link_stats64 *stats;
 
        xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-       stats = &mlxsw_sp_port->periodic_hw_stats.stats;
        stats_base = &mlxsw_sp_qdisc->stats_base;
 
-       tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
-       tx_packets = stats->tx_packets - stats_base->tx_packets;
+       mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+                                              mlxsw_sp_qdisc->prio_bitmap,
+                                              &tx_packets, &tx_bytes);
+       tx_bytes = tx_bytes - stats_base->tx_bytes;
+       tx_packets = tx_packets - stats_base->tx_packets;
+
        overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
                     stats_base->overlimits;
        drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-       if (p->parent != TC_H_ROOT)
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+       if (!mlxsw_sp_qdisc)
                return -EOPNOTSUPP;
 
-       mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
        if (p->command == TC_RED_REPLACE)
                return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
                                              mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        int i;
 
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
                                          MLXSW_SP_PORT_DEFAULT_TCLASS);
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+                                      &mlxsw_sp_port->tclass_qdiscs[i]);
+               mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+       }
 
        return 0;
 }
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
                            void *params)
 {
        struct tc_prio_qopt_offload_params *p = params;
-       int tclass, i;
+       struct mlxsw_sp_qdisc *child_qdisc;
+       int tclass, i, band, backlog;
+       u8 old_priomap;
        int err;
 
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
-               err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
-               if (err)
-                       return err;
+       for (band = 0; band < p->bands; band++) {
+               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+               child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+               old_priomap = child_qdisc->prio_bitmap;
+               child_qdisc->prio_bitmap = 0;
+               for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+                       if (p->priomap[i] == band) {
+                               child_qdisc->prio_bitmap |= BIT(i);
+                               if (BIT(i) & old_priomap)
+                                       continue;
+                               err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+                                                               i, tclass);
+                               if (err)
+                                       return err;
+                       }
+               }
+               if (old_priomap != child_qdisc->prio_bitmap &&
+                   child_qdisc->ops && child_qdisc->ops->clean_stats) {
+                       backlog = child_qdisc->stats_base.backlog;
+                       child_qdisc->ops->clean_stats(mlxsw_sp_port,
+                                                     child_qdisc);
+                       child_qdisc->stats_base.backlog = backlog;
+               }
+       }
+       for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+               child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+               child_qdisc->prio_bitmap = 0;
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
        }
-
        return 0;
 }
 
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                drops += xstats->tail_drop[i];
+               drops += xstats->wred_drop[i];
                backlog += xstats->backlog[i];
        }
        drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        stats_base->tx_bytes = stats->tx_bytes;
 
        stats_base->drops = 0;
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                stats_base->drops += xstats->tail_drop[i];
+               stats_base->drops += xstats->wred_drop[i];
+       }
 
        mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
        .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 };
 
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+                         struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+                         struct tc_prio_qopt_offload_graft_params *p)
+{
+       int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+       struct mlxsw_sp_qdisc *old_qdisc;
+
+       /* Check if the grafted qdisc is already in its "new" location. If so -
+        * nothing needs to be done.
+        */
+       if (p->band < IEEE_8021QAZ_MAX_TCS &&
+           mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+               return 0;
+
+       /* See if the grafted qdisc is already offloaded on any tclass. If so,
+        * unoffload it.
+        */
+       old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+                                                 p->child_handle);
+       if (old_qdisc)
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+       mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+                              &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+       return -EOPNOTSUPP;
+}
+
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct tc_prio_qopt_offload *p)
 {
        struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-       if (p->parent != TC_H_ROOT)
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+       if (!mlxsw_sp_qdisc)
                return -EOPNOTSUPP;
 
-       mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
        if (p->command == TC_PRIO_REPLACE)
                return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
                                              mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
        case TC_PRIO_STATS:
                return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
                                                &p->stats);
+       case TC_PRIO_GRAFT:
+               return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+                                                &p->graft_params);
        default:
                return -EOPNOTSUPP;
        }
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-       mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
-                                           GFP_KERNEL);
-       if (!mlxsw_sp_port->root_qdisc)
-               return -ENOMEM;
+       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+       int i;
 
+       mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+       if (!mlxsw_sp_qdisc)
+               goto err_root_qdisc_init;
+
+       mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+       mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
        mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 
+       mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+                                GFP_KERNEL);
+       if (!mlxsw_sp_qdisc)
+               goto err_tclass_qdiscs_init;
+
+       mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+               mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
        return 0;
+
+err_tclass_qdiscs_init:
+       kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+       return -ENOMEM;
 }
 
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+       kfree(mlxsw_sp_port->tclass_qdiscs);
        kfree(mlxsw_sp_port->root_qdisc);
 }
index f0b25ba..a8a5786 100644 (file)
@@ -1,10 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -70,6 +70,7 @@
 #include "spectrum_mr.h"
 #include "spectrum_mr_tcam.h"
 #include "spectrum_router.h"
+#include "spectrum_span.h"
 
 struct mlxsw_sp_fib;
 struct mlxsw_sp_vr;
@@ -788,37 +789,41 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
                                              u32 tb_id,
                                              struct netlink_ext_ack *extack)
 {
+       struct mlxsw_sp_mr_table *mr4_table;
+       struct mlxsw_sp_fib *fib4;
+       struct mlxsw_sp_fib *fib6;
        struct mlxsw_sp_vr *vr;
        int err;
 
        vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
        if (!vr) {
-               NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
+               NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
                return ERR_PTR(-EBUSY);
        }
-       vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
-       if (IS_ERR(vr->fib4))
-               return ERR_CAST(vr->fib4);
-       vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
-       if (IS_ERR(vr->fib6)) {
-               err = PTR_ERR(vr->fib6);
+       fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+       if (IS_ERR(fib4))
+               return ERR_CAST(fib4);
+       fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
+       if (IS_ERR(fib6)) {
+               err = PTR_ERR(fib6);
                goto err_fib6_create;
        }
-       vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
-                                                MLXSW_SP_L3_PROTO_IPV4);
-       if (IS_ERR(vr->mr4_table)) {
-               err = PTR_ERR(vr->mr4_table);
+       mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+                                            MLXSW_SP_L3_PROTO_IPV4);
+       if (IS_ERR(mr4_table)) {
+               err = PTR_ERR(mr4_table);
                goto err_mr_table_create;
        }
+       vr->fib4 = fib4;
+       vr->fib6 = fib6;
+       vr->mr4_table = mr4_table;
        vr->tb_id = tb_id;
        return vr;
 
 err_mr_table_create:
-       mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
-       vr->fib6 = NULL;
+       mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
 err_fib6_create:
-       mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
-       vr->fib4 = NULL;
+       mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
        return ERR_PTR(err);
 }
 
@@ -1020,9 +1025,11 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
                          enum mlxsw_sp_ipip_type ipipt,
                          struct net_device *ol_dev)
 {
+       const struct mlxsw_sp_ipip_ops *ipip_ops;
        struct mlxsw_sp_ipip_entry *ipip_entry;
        struct mlxsw_sp_ipip_entry *ret = NULL;
 
+       ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
        ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
        if (!ipip_entry)
                return ERR_PTR(-ENOMEM);
@@ -1036,7 +1043,15 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 
        ipip_entry->ipipt = ipipt;
        ipip_entry->ol_dev = ol_dev;
-       ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
+
+       switch (ipip_ops->ul_proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+               break;
+       case MLXSW_SP_L3_PROTO_IPV6:
+               WARN_ON(1);
+               break;
+       }
 
        return ipip_entry;
 
@@ -2316,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
        read_unlock_bh(&n->lock);
 
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        entry_connected = nud_state & NUD_VALID && !dead;
        neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
        if (!entry_connected && !neigh_entry)
@@ -2413,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
                mlxsw_core_schedule_work(&net_work->work);
                mlxsw_sp_port_dev_put(mlxsw_sp_port);
                break;
-       case NETEVENT_MULTIPATH_HASH_UPDATE:
+       case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+       case NETEVENT_IPV6_MPATH_HASH_UPDATE:
                net = ptr;
 
                if (!net_eq(net, &init_net))
@@ -3790,6 +3808,9 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
        struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
        int i;
 
+       if (!list_is_singular(&nh_grp->fib_list))
+               return;
+
        for (i = 0; i < nh_grp->count; i++) {
                struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 
@@ -5572,6 +5593,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 
        /* Protect internal structures from changes */
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        switch (fib_work->event) {
        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
        case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5614,6 +5637,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
        int err;
 
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        switch (fib_work->event) {
        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
        case FIB_EVENT_ENTRY_ADD:
@@ -5786,7 +5811,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
        }
 
        if (err < 0)
-               NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
+               NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
 
        return err;
 }
@@ -6025,7 +6050,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 
        err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
        if (err) {
-               NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
+               NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
                goto err_rif_index_alloc;
        }
 
@@ -7006,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
 
 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
 {
+       bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy;
+
        mlxsw_sp_mp_hash_header_set(recr2_pl,
                                    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
        mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
        mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
        mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-       mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
        mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+       if (only_l3) {
+               mlxsw_sp_mp_hash_field_set(recr2_pl,
+                                          MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+       } else {
+               mlxsw_sp_mp_hash_header_set(recr2_pl,
+                                           MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+               mlxsw_sp_mp_hash_field_set(recr2_pl,
+                                          MLXSW_REG_RECR2_TCP_UDP_SPORT);
+               mlxsw_sp_mp_hash_field_set(recr2_pl,
+                                          MLXSW_REG_RECR2_TCP_UDP_DPORT);
+       }
 }
 
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
new file mode 100644 (file)
index 0000000..f537e1d
--- /dev/null
@@ -0,0 +1,796 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
+
+#include "spectrum.h"
+#include "spectrum_span.h"
+#include "spectrum_ipip.h"
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
+               return -EIO;
+
+       mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                                         MAX_SPAN);
+       mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
+                                        sizeof(struct mlxsw_sp_span_entry),
+                                        GFP_KERNEL);
+       if (!mlxsw_sp->span.entries)
+               return -ENOMEM;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               INIT_LIST_HEAD(&curr->bound_ports_list);
+               curr->id = i;
+       }
+
+       return 0;
+}
+
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
+       }
+       kfree(mlxsw_sp->span.entries);
+}
+
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+                              struct mlxsw_sp_span_parms *sparmsp)
+{
+       sparmsp->dest_port = netdev_priv(to_dev);
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+                                  struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+                                      enum mlxsw_reg_mpat_span_type span_type)
+{
+       struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                           MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+       .can_handle = mlxsw_sp_port_dev_check,
+       .parms = mlxsw_sp_span_entry_phys_parms,
+       .configure = mlxsw_sp_span_entry_phys_configure,
+       .deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+                           __be32 *saddrp, __be32 *daddrp)
+{
+       struct ip_tunnel *tun = netdev_priv(to_dev);
+       struct net_device *dev = NULL;
+       struct ip_tunnel_parm parms;
+       struct rtable *rt = NULL;
+       struct flowi4 fl4;
+
+       /* We assume "dev" stays valid after rt is put. */
+       ASSERT_RTNL();
+
+       parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+       ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+                           0, 0, parms.link, tun->fwmark);
+
+       rt = ip_route_output_key(tun->net, &fl4);
+       if (IS_ERR(rt))
+               return NULL;
+
+       if (rt->rt_type != RTN_UNICAST)
+               goto out;
+
+       dev = rt->dst.dev;
+       *saddrp = fl4.saddr;
+       *daddrp = rt->rt_gateway;
+
+out:
+       ip_rt_put(rt);
+       return dev;
+}
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+                             const void *pkey,
+                             struct net_device *l3edev,
+                             unsigned char dmac[ETH_ALEN])
+{
+       struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+       int err = 0;
+
+       if (!neigh) {
+               neigh = neigh_create(tbl, pkey, l3edev);
+               if (IS_ERR(neigh))
+                       return PTR_ERR(neigh);
+       }
+
+       neigh_event_send(neigh, NULL);
+
+       read_lock_bh(&neigh->lock);
+       if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+               memcpy(dmac, neigh->ha, ETH_ALEN);
+       else
+               err = -ENOENT;
+       read_unlock_bh(&neigh->lock);
+
+       neigh_release(neigh);
+       return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+       sparmsp->dest_port = NULL;
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+                                       union mlxsw_sp_l3addr saddr,
+                                       union mlxsw_sp_l3addr daddr,
+                                       union mlxsw_sp_l3addr gw,
+                                       __u8 ttl,
+                                       struct neigh_table *tbl,
+                                       struct mlxsw_sp_span_parms *sparmsp)
+{
+       unsigned char dmac[ETH_ALEN];
+
+       if (mlxsw_sp_l3addr_is_zero(gw))
+               gw = daddr;
+
+       if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+           mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       sparmsp->dest_port = netdev_priv(l3edev);
+       sparmsp->ttl = ttl;
+       memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+       memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+       sparmsp->saddr = saddr;
+       sparmsp->daddr = daddr;
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+                                 struct mlxsw_sp_span_parms *sparmsp)
+{
+       struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+       union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+       union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+       bool inherit_tos = tparm.iph.tos & 0x1;
+       bool inherit_ttl = !tparm.iph.ttl;
+       union mlxsw_sp_l3addr gw = daddr;
+       struct net_device *l3edev;
+
+       if (!(to_dev->flags & IFF_UP) ||
+           /* Reject tunnels with GRE keys, checksums, etc. */
+           tparm.i_flags || tparm.o_flags ||
+           /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+           inherit_ttl || !inherit_tos ||
+           /* A destination address may not be "any". */
+           mlxsw_sp_l3addr_is_zero(daddr))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+       return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+                                                      tparm.iph.ttl,
+                                                      &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+                                     struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+                                   sparms.dmac, false);
+       mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+                                             sparms.ttl, sparms.smac,
+                                             be32_to_cpu(sparms.saddr.addr4),
+                                             be32_to_cpu(sparms.daddr.addr4));
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+       .can_handle = is_gretap_dev,
+       .parms = mlxsw_sp_span_entry_gretap4_parms,
+       .configure = mlxsw_sp_span_entry_gretap4_configure,
+       .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+                           struct in6_addr *saddrp,
+                           struct in6_addr *daddrp)
+{
+       struct ip6_tnl *t = netdev_priv(to_dev);
+       struct flowi6 fl6 = t->fl.u.ip6;
+       struct net_device *dev = NULL;
+       struct dst_entry *dst;
+       struct rt6_info *rt6;
+
+       /* We assume "dev" stays valid after dst is released. */
+       ASSERT_RTNL();
+
+       fl6.flowi6_mark = t->parms.fwmark;
+       if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+               return NULL;
+
+       dst = ip6_route_output(t->net, NULL, &fl6);
+       if (!dst || dst->error)
+               goto out;
+
+       rt6 = container_of(dst, struct rt6_info, dst);
+
+       dev = dst->dev;
+       *saddrp = fl6.saddr;
+       *daddrp = rt6->rt6i_gateway;
+
+out:
+       dst_release(dst);
+       return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+                                 struct mlxsw_sp_span_parms *sparmsp)
+{
+       struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+       bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+       union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+       union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+       bool inherit_ttl = !tparm.hop_limit;
+       union mlxsw_sp_l3addr gw = daddr;
+       struct net_device *l3edev;
+
+       if (!(to_dev->flags & IFF_UP) ||
+           /* Reject tunnels with GRE keys, checksums, etc. */
+           tparm.i_flags || tparm.o_flags ||
+           /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+           inherit_ttl || !inherit_tos ||
+           /* A destination address may not be "any". */
+           mlxsw_sp_l3addr_is_zero(daddr))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+       return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+                                                      tparm.hop_limit,
+                                                      &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+                                     struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+                                   sparms.dmac, false);
+       mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+                                             sparms.saddr.addr6,
+                                             sparms.daddr.addr6);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+       .can_handle = is_ip6gretap_dev,
+       .parms = mlxsw_sp_span_entry_gretap6_parms,
+       .configure = mlxsw_sp_span_entry_gretap6_configure,
+       .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+       &mlxsw_sp_span_entry_ops_phys,
+       &mlxsw_sp_span_entry_ops_gretap4,
+       &mlxsw_sp_span_entry_ops_gretap6,
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+                             struct mlxsw_sp_span_parms *sparmsp)
+{
+       return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+                                 struct mlxsw_sp_span_parms sparms)
+{
+       return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+       .parms = mlxsw_sp_span_entry_nop_parms,
+       .configure = mlxsw_sp_span_entry_nop_configure,
+       .deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_span_entry *span_entry,
+                             struct mlxsw_sp_span_parms sparms)
+{
+       if (sparms.dest_port) {
+               if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+                       netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+                                  sparms.dest_port->dev->name);
+                       sparms.dest_port = NULL;
+               } else if (span_entry->ops->configure(span_entry, sparms)) {
+                       netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+                                  sparms.dest_port->dev->name);
+                       sparms.dest_port = NULL;
+               }
+       }
+
+       span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       if (span_entry->parms.dest_port)
+               span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+                          const struct net_device *to_dev,
+                          const struct mlxsw_sp_span_entry_ops *ops,
+                          struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_span_entry *span_entry = NULL;
+       int i;
+
+       /* find a free entry to use */
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               if (!mlxsw_sp->span.entries[i].ref_count) {
+                       span_entry = &mlxsw_sp->span.entries[i];
+                       break;
+               }
+       }
+       if (!span_entry)
+               return NULL;
+
+       span_entry->ops = ops;
+       span_entry->ref_count = 1;
+       span_entry->to_dev = to_dev;
+       mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
+       return span_entry;
+}
+
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure(span_entry);
+}
+
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *to_dev)
+{
+       int i;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               if (curr->ref_count && curr->to_dev == to_dev)
+                       return curr;
+       }
+       return NULL;
+}
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure(span_entry);
+       span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+       int i;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               if (curr->ref_count && curr->id == span_id)
+                       return curr;
+       }
+       return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+                       const struct net_device *to_dev,
+                       const struct mlxsw_sp_span_entry_ops *ops,
+                       struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_span_entry *span_entry;
+
+       span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
+       if (span_entry) {
+               /* Already exists, just take a reference */
+               span_entry->ref_count++;
+               return span_entry;
+       }
+
+       return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
+}
+
+static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_span_entry *span_entry)
+{
+       WARN_ON(!span_entry->ref_count);
+       if (--span_entry->ref_count == 0)
+               mlxsw_sp_span_entry_destroy(span_entry);
+       return 0;
+}
+
+static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
+{
+       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+       struct mlxsw_sp_span_inspected_port *p;
+       int i;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               list_for_each_entry(p, &curr->bound_ports_list, list)
+                       if (p->local_port == port->local_port &&
+                           p->type == MLXSW_SP_SPAN_EGRESS)
+                               return true;
+       }
+
+       return false;
+}
+
+static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
+                                        int mtu)
+{
+       return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
+}
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
+{
+       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+       char sbib_pl[MLXSW_REG_SBIB_LEN];
+       int err;
+
+       /* If port is egress mirrored, the shared buffer size should be
+        * updated according to the mtu value
+        */
+       if (mlxsw_sp_span_is_egress_mirror(port)) {
+               u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+
+               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+               if (err) {
+                       netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
+static struct mlxsw_sp_span_inspected_port *
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
+                                   struct mlxsw_sp_span_entry *span_entry)
+{
+       struct mlxsw_sp_span_inspected_port *p;
+
+       list_for_each_entry(p, &span_entry->bound_ports_list, list)
+               if (port->local_port == p->local_port)
+                       return p;
+       return NULL;
+}
+
+static int
+mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
+                                 struct mlxsw_sp_span_entry *span_entry,
+                                 enum mlxsw_sp_span_type type,
+                                 bool bind)
+{
+       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+       char mpar_pl[MLXSW_REG_MPAR_LEN];
+       int pa_id = span_entry->id;
+
+       /* bind the port to the SPAN entry */
+       mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+                           (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+}
+
+static int
+mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
+                                struct mlxsw_sp_span_entry *span_entry,
+                                enum mlxsw_sp_span_type type,
+                                bool bind)
+{
+       struct mlxsw_sp_span_inspected_port *inspected_port;
+       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+       char sbib_pl[MLXSW_REG_SBIB_LEN];
+       int err;
+
+       /* if it is an egress SPAN, bind a shared buffer to it */
+       if (type == MLXSW_SP_SPAN_EGRESS) {
+               u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
+                                                            port->dev->mtu);
+
+               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+               if (err) {
+                       netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+                       return err;
+               }
+       }
+
+       if (bind) {
+               err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+                                                       true);
+               if (err)
+                       goto err_port_bind;
+       }
+
+       inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
+       if (!inspected_port) {
+               err = -ENOMEM;
+               goto err_inspected_port_alloc;
+       }
+       inspected_port->local_port = port->local_port;
+       inspected_port->type = type;
+       list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
+
+       return 0;
+
+err_inspected_port_alloc:
+       if (bind)
+               mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+                                                 false);
+err_port_bind:
+       if (type == MLXSW_SP_SPAN_EGRESS) {
+               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+               mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+       }
+       return err;
+}
+
+static void
+mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
+                                struct mlxsw_sp_span_entry *span_entry,
+                                enum mlxsw_sp_span_type type,
+                                bool bind)
+{
+       struct mlxsw_sp_span_inspected_port *inspected_port;
+       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+       char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+       inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+       if (!inspected_port)
+               return;
+
+       if (bind)
+               mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+                                                 false);
+       /* remove the SBIB buffer if it was egress SPAN */
+       if (type == MLXSW_SP_SPAN_EGRESS) {
+               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+               mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+       }
+
+       mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+
+       list_del(&inspected_port->list);
+       kfree(inspected_port);
+}
+
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+                       const struct net_device *to_dev)
+{
+       size_t i;
+
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+               if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+                       return mlxsw_sp_span_entry_types[i];
+
+       return NULL;
+}
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+                            const struct net_device *to_dev,
+                            enum mlxsw_sp_span_type type, bool bind,
+                            int *p_span_id)
+{
+       struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+       const struct mlxsw_sp_span_entry_ops *ops;
+       struct mlxsw_sp_span_parms sparms = {0};
+       struct mlxsw_sp_span_entry *span_entry;
+       int err;
+
+       ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+       if (!ops) {
+               netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+               return -EOPNOTSUPP;
+       }
+
+       err = ops->parms(to_dev, &sparms);
+       if (err)
+               return err;
+
+       span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
+       if (!span_entry)
+               return -ENOENT;
+
+       netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
+                  span_entry->id);
+
+       err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
+       if (err)
+               goto err_port_bind;
+
+       *p_span_id = span_entry->id;
+       return 0;
+
+err_port_bind:
+       mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+       return err;
+}
+
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+                             enum mlxsw_sp_span_type type, bool bind)
+{
+       struct mlxsw_sp_span_entry *span_entry;
+
+       span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
+       if (!span_entry) {
+               netdev_err(from->dev, "no span entry found\n");
+               return;
+       }
+
+       netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
+                  span_entry->id);
+       mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
+}
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+       int err;
+
+       ASSERT_RTNL();
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+               struct mlxsw_sp_span_parms sparms = {0};
+
+               if (!curr->ref_count)
+                       continue;
+
+               err = curr->ops->parms(curr->to_dev, &sparms);
+               if (err)
+                       continue;
+
+               if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+                       mlxsw_sp_span_entry_deconfigure(curr);
+                       mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+               }
+       }
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
new file mode 100644 (file)
index 0000000..948aceb
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_SPAN_H
+#define _MLXSW_SPECTRUM_SPAN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+
+enum mlxsw_sp_span_type {
+       MLXSW_SP_SPAN_EGRESS,
+       MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+       struct list_head list;
+       enum mlxsw_sp_span_type type;
+       u8 local_port;
+};
+
+struct mlxsw_sp_span_parms {
+       struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+       unsigned int ttl;
+       unsigned char dmac[ETH_ALEN];
+       unsigned char smac[ETH_ALEN];
+       union mlxsw_sp_l3addr daddr;
+       union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
+struct mlxsw_sp_span_entry {
+       const struct net_device *to_dev;
+       const struct mlxsw_sp_span_entry_ops *ops;
+       struct mlxsw_sp_span_parms parms;
+       struct list_head bound_ports_list;
+       int ref_count;
+       int id;
+};
+
+struct mlxsw_sp_span_entry_ops {
+       bool (*can_handle)(const struct net_device *to_dev);
+       int (*parms)(const struct net_device *to_dev,
+                    struct mlxsw_sp_span_parms *sparmsp);
+       int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+                        struct mlxsw_sp_span_parms sparms);
+       void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+                            const struct net_device *to_dev,
+                            enum mlxsw_sp_span_type type,
+                            bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+                             enum mlxsw_sp_span_type type, bool bind);
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_span_entry *span_entry);
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+
+#endif
index 593ad31..917663a 100644 (file)
@@ -1819,7 +1819,7 @@ mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
        if (is_vlan_dev(bridge_port->dev)) {
-               NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge");
+               NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
                return -EINVAL;
        }
 
@@ -1882,20 +1882,16 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
                                struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+       struct net_device *dev = bridge_port->dev;
        u16 vid;
 
-       if (!is_vlan_dev(bridge_port->dev)) {
-               NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge");
-               return -EINVAL;
-       }
-       vid = vlan_dev_vlan_id(bridge_port->dev);
-
+       vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
        if (WARN_ON(!mlxsw_sp_port_vlan))
                return -EINVAL;
 
        if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
-               NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port");
+               NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port");
                return -EINVAL;
        }
 
@@ -1912,8 +1908,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
                                 struct mlxsw_sp_port *mlxsw_sp_port)
 {
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-       u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+       struct net_device *dev = bridge_port->dev;
+       u16 vid;
 
+       vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
        if (WARN_ON(!mlxsw_sp_port_vlan))
                return;
index d5b2888..51fa82b 100644 (file)
@@ -60,14 +60,6 @@ do {                                                                 \
        *((volatile unsigned int *)dev->base_addr+(reg)) = (val);               \
 } while (0)
 
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = {
 
 static int sonic_probe1(struct net_device *dev)
 {
-       static unsigned version_printed;
        unsigned int silicon_revision;
        unsigned int val;
        struct sonic_local *lp = netdev_priv(dev);
@@ -133,26 +124,17 @@ static int sonic_probe1(struct net_device *dev)
         * the expected location.
         */
        silicon_revision = SONIC_READ(SONIC_SR);
-       if (sonic_debug > 1)
-               printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
        i = 0;
        while (known_revisions[i] != 0xffff &&
               known_revisions[i] != silicon_revision)
                i++;
 
        if (known_revisions[i] == 0xffff) {
-               printk("SONIC ethernet controller not found (0x%4x)\n",
-                      silicon_revision);
+               pr_info("SONIC ethernet controller not found (0x%4x)\n",
+                       silicon_revision);
                goto out;
        }
 
-       if (sonic_debug  &&  version_printed++ == 0)
-               printk(version);
-
-       printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
-              dev_name(lp->device), dev->base_addr);
-
        /*
         * Put the sonic into software reset, then
         * retrieve and print the ethernet address.
@@ -245,12 +227,16 @@ static int jazz_sonic_probe(struct platform_device *pdev)
        err = sonic_probe1(dev);
        if (err)
                goto out;
+
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
+       sonic_msg_init(dev);
+
        err = register_netdev(dev);
        if (err)
                goto out1;
 
-       printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
        return 0;
 
 out1:
@@ -262,8 +248,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
 MODULE_ALIAS("platform:jazzsonic");
 
 #include "sonic.c"
index b922ab5..0937fc2 100644 (file)
@@ -60,8 +60,6 @@
 #include <asm/macints.h>
 #include <asm/mac_via.h>
 
-static char mac_sonic_string[] = "macsonic";
-
 #include "sonic.h"
 
 /* These should basically be bus-size and endian independent (since
@@ -72,15 +70,6 @@ static char mac_sonic_string[] = "macsonic";
 #define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
              + lp->reg_offset))
 
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
 /* For onboard SONIC */
 #define ONBOARD_SONIC_REGISTERS        0x50F0A000
 #define ONBOARD_SONIC_PROM_BASE        0x50f08000
@@ -313,11 +302,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        int sr;
        bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
 
-       if (!MACH_IS_MAC)
-               return -ENODEV;
-
-       printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
        /* Bogus probing, on the models which may or may not have
           Ethernet (BTW, the Ethernet *is* always at the same
           address, and nothing else lives there, at least if Apple's
@@ -327,13 +311,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 
                card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
                if (!card_present) {
-                       printk("none.\n");
+                       pr_info("Onboard/comm-slot SONIC not found\n");
                        return -ENODEV;
                }
        }
 
-       printk("yes\n");
-
        /* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
         * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
        dev->base_addr = ONBOARD_SONIC_REGISTERS;
@@ -342,18 +324,10 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        else
                dev->irq = IRQ_NUBUS_9;
 
-       if (!sonic_version_printed) {
-               printk(KERN_INFO "%s", version);
-               sonic_version_printed = 1;
-       }
-       printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
-              dev_name(lp->device), dev->base_addr);
-
        /* The PowerBook's SONIC is 16 bit always. */
        if (macintosh_config->ident == MAC_MODEL_PB520) {
                lp->reg_offset = 0;
                lp->dma_bitmode = SONIC_BITMODE16;
-               sr = SONIC_READ(SONIC_SR);
        } else if (commslot) {
                /* Some of the comm-slot cards are 16 bit.  But some
                   of them are not.  The 32-bit cards use offset 2 and
@@ -370,22 +344,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
                else {
                        lp->dma_bitmode = SONIC_BITMODE16;
                        lp->reg_offset = 0;
-                       sr = SONIC_READ(SONIC_SR);
                }
        } else {
                /* All onboard cards are at offset 2 with 32 bit DMA. */
                lp->reg_offset = 2;
                lp->dma_bitmode = SONIC_BITMODE32;
-               sr = SONIC_READ(SONIC_SR);
        }
-       printk(KERN_INFO
-              "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-              dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+       pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+               SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+               lp->reg_offset);
+
+       /* This is sometimes useful to find out how MacOS configured the card */
+       pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+                SONIC_READ(SONIC_DCR) & 0xffff,
+                SONIC_READ(SONIC_DCR2) & 0xffff);
 
        /* Software reset, then initialize control registers. */
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -406,11 +379,14 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        /* Now look for the MAC address. */
        mac_onboard_sonic_ethernet_addr(dev);
 
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
        /* Shared init code */
        return macsonic_init(dev);
 }
 
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
                                         unsigned long prom_addr, int id)
 {
        int i;
@@ -449,70 +425,49 @@ static int macsonic_ident(struct nubus_rsrc *fres)
        return -1;
 }
 
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+                                      struct net_device *dev)
 {
-       static int slots;
-       struct nubus_rsrc *ndev = NULL;
        struct sonic_local* lp = netdev_priv(dev);
        unsigned long base_addr, prom_addr;
        u16 sonic_dcr;
-       int id = -1;
        int reg_offset, dma_bitmode;
 
-       /* Find the first SONIC that hasn't been initialized already */
-       for_each_func_rsrc(ndev) {
-               if (ndev->category != NUBUS_CAT_NETWORK ||
-                   ndev->type != NUBUS_TYPE_ETHERNET)
-                       continue;
-
-               /* Have we seen it already? */
-               if (slots & (1<<ndev->board->slot))
-                       continue;
-               slots |= 1<<ndev->board->slot;
-
-               /* Is it one of ours? */
-               if ((id = macsonic_ident(ndev)) != -1)
-                       break;
-       }
-
-       if (ndev == NULL)
-               return -ENODEV;
-
        switch (id) {
        case MACSONIC_DUODOCK:
-               base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
                            SONIC_DCR_TFT0;
                reg_offset = 2;
                dma_bitmode = SONIC_BITMODE32;
                break;
        case MACSONIC_APPLE:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE32;
                break;
        case MACSONIC_APPLE16:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
                            SONIC_DCR_PO1 | SONIC_DCR_BMS;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE16;
                break;
        case MACSONIC_DAYNALINK:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
                sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
                            SONIC_DCR_PO1 | SONIC_DCR_BMS;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE16;
                break;
        case MACSONIC_DAYNA:
-               base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+               base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
                sonic_dcr = SONIC_DCR_BMS |
                            SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
                reg_offset = 0;
@@ -528,21 +483,16 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
        dev->base_addr = base_addr;
        lp->reg_offset = reg_offset;
        lp->dma_bitmode = dma_bitmode;
-       dev->irq = SLOT2IRQ(ndev->board->slot);
+       dev->irq = SLOT2IRQ(board->slot);
 
-       if (!sonic_version_printed) {
-               printk(KERN_INFO "%s", version);
-               sonic_version_printed = 1;
-       }
-       printk(KERN_INFO "%s: %s in slot %X\n",
-              dev_name(lp->device), ndev->board->name, ndev->board->slot);
-       printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-              dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
+       dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+                board->name, SONIC_READ(SONIC_SR),
+                lp->dma_bitmode ? 32 : 16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+       /* This is sometimes useful to find out how MacOS configured the card */
+       dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+               SONIC_READ(SONIC_DCR) & 0xffff,
+               SONIC_READ(SONIC_DCR2) & 0xffff);
 
        /* Software reset, then initialize control registers. */
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -557,14 +507,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
        SONIC_WRITE(SONIC_ISR, 0x7fff);
 
        /* Now look for the MAC address. */
-       if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+       if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
                return -ENODEV;
 
+       dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+                dev->base_addr, dev->dev_addr, dev->irq);
+
        /* Shared init code */
        return macsonic_init(dev);
 }
 
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
 {
        struct net_device *dev;
        struct sonic_local *lp;
@@ -579,22 +532,16 @@ static int mac_sonic_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
        platform_set_drvdata(pdev, dev);
 
-       /* This will catch fatal stuff like -ENOMEM as well as success */
        err = mac_onboard_sonic_probe(dev);
-       if (err == 0)
-               goto found;
-       if (err != -ENODEV)
-               goto out;
-       err = mac_nubus_sonic_probe(dev);
        if (err)
                goto out;
-found:
+
+       sonic_msg_init(dev);
+
        err = register_netdev(dev);
        if (err)
                goto out;
 
-       printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
        return 0;
 
 out:
@@ -604,13 +551,11 @@ out:
 }
 
 MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
 MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
 
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct sonic_local* lp = netdev_priv(dev);
@@ -623,12 +568,105 @@ static int mac_sonic_device_remove(struct platform_device *pdev)
        return 0;
 }
 
-static struct platform_driver mac_sonic_driver = {
-       .probe  = mac_sonic_probe,
-       .remove = mac_sonic_device_remove,
-       .driver = {
-               .name   = mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+       .probe  = mac_sonic_platform_probe,
+       .remove = mac_sonic_platform_remove,
+       .driver = {
+               .name = "macsonic",
+       },
+};
+
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+       struct net_device *ndev;
+       struct sonic_local *lp;
+       struct nubus_rsrc *fres;
+       int id = -1;
+       int err;
+
+       /* The platform driver will handle a PDS or Comm Slot card (even if
+        * it has a pseudoslot declaration ROM).
+        */
+       if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+               return -ENODEV;
+
+       for_each_board_func_rsrc(board, fres) {
+               if (fres->category != NUBUS_CAT_NETWORK ||
+                   fres->type != NUBUS_TYPE_ETHERNET)
+                       continue;
+
+               id = macsonic_ident(fres);
+               if (id != -1)
+                       break;
+       }
+       if (!fres)
+               return -ENODEV;
+
+       ndev = alloc_etherdev(sizeof(struct sonic_local));
+       if (!ndev)
+               return -ENOMEM;
+
+       lp = netdev_priv(ndev);
+       lp->device = &board->dev;
+       SET_NETDEV_DEV(ndev, &board->dev);
+
+       err = mac_sonic_nubus_probe_board(board, id, ndev);
+       if (err)
+               goto out;
+
+       sonic_msg_init(ndev);
+
+       err = register_netdev(ndev);
+       if (err)
+               goto out;
+
+       nubus_set_drvdata(board, ndev);
+
+       return 0;
+
+out:
+       free_netdev(ndev);
+       return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+       struct net_device *ndev = nubus_get_drvdata(board);
+       struct sonic_local *lp = netdev_priv(ndev);
+
+       unregister_netdev(ndev);
+       dma_free_coherent(lp->device,
+                         SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                         lp->descriptors, lp->descriptors_laddr);
+       free_netdev(ndev);
+
+       return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+       .probe  = mac_sonic_nubus_probe,
+       .remove = mac_sonic_nubus_remove,
+       .driver = {
+               .name = "macsonic-nubus",
+               .owner = THIS_MODULE,
        },
 };
 
-module_platform_driver(mac_sonic_driver);
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+       perr = platform_driver_register(&mac_sonic_platform_driver);
+       nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+       return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+       if (!perr)
+               platform_driver_unregister(&mac_sonic_platform_driver);
+       if (!nerr)
+               nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);
index 612c7a4..7ed0848 100644 (file)
  * the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
  */
 
+static unsigned int version_printed;
 
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+       struct sonic_local *lp = netdev_priv(dev);
+
+       lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+       if (version_printed++ == 0)
+               netif_dbg(lp, drv, dev, "%s", version);
+}
 
 /*
  * Open/initialize the SONIC controller.
@@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev)
        struct sonic_local *lp = netdev_priv(dev);
        int i;
 
-       if (sonic_debug > 2)
-               printk("sonic_open: initializing sonic driver.\n");
+       netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
 
        for (i = 0; i < SONIC_NUM_RRS; i++) {
                struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
@@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev)
 
        netif_start_queue(dev);
 
-       if (sonic_debug > 2)
-               printk("sonic_open: Initialization done.\n");
+       netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
 
        return 0;
 }
@@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev)
        struct sonic_local *lp = netdev_priv(dev);
        int i;
 
-       if (sonic_debug > 2)
-               printk("sonic_close\n");
+       netif_dbg(lp, ifdown, dev, "%s\n", __func__);
 
        netif_stop_queue(dev);
 
@@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
        int length;
        int entry = lp->next_tx;
 
-       if (sonic_debug > 2)
-               printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+       netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
 
        length = skb->len;
        if (length < ETH_ZLEN) {
@@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
        lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
        if (lp->tx_skb[lp->next_tx] != NULL) {
                /* The ring is full, the ISR has yet to process the next TD. */
-               if (sonic_debug > 3)
-                       printk("%s: stopping queue\n", dev->name);
+               netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
                netif_stop_queue(dev);
                /* after this packet, wait for ISR to free up some TDAs */
        } else netif_start_queue(dev);
 
-       if (sonic_debug > 2)
-               printk("sonic_send_packet: issuing Tx command\n");
+       netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
 
        SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
 
@@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
        do {
                if (status & SONIC_INT_PKTRX) {
-                       if (sonic_debug > 2)
-                               printk("%s: packet rx\n", dev->name);
+                       netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
                        sonic_rx(dev);  /* got packet(s) */
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
                }
@@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
                         *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
                         */
 
-                       if (sonic_debug > 2)
-                               printk("%s: tx done\n", dev->name);
+                       netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
 
                        while (lp->tx_skb[entry] != NULL) {
                                if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
@@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
                 * check error conditions
                 */
                if (status & SONIC_INT_RFO) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx fifo overrun\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+                                 __func__);
                        lp->stats.rx_fifo_errors++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
                }
                if (status & SONIC_INT_RDE) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx descriptors exhausted\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+                                 __func__);
                        lp->stats.rx_dropped++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
                }
                if (status & SONIC_INT_RBAE) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx buffer area exceeded\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+                                 __func__);
                        lp->stats.rx_dropped++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
                }
@@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
                /* transmit error */
                if (status & SONIC_INT_TXER) {
-                       if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
-                               printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+                       if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+                               netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+                                         __func__);
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
                }
 
@@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev)
                        if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
                        SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
                        if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
-                               if (sonic_debug > 2)
-                                       printk("%s: rx buffer exhausted\n", dev->name);
+                               netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+                                         __func__);
                                SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
                        }
                } else
@@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev)
                    (netdev_mc_count(dev) > 15)) {
                        rcr |= SONIC_RCR_AMC;
                } else {
-                       if (sonic_debug > 2)
-                               printk("sonic_multicast_list: mc_count %d\n",
-                                      netdev_mc_count(dev));
+                       netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+                                 netdev_mc_count(dev));
                        sonic_set_cam_enable(dev, 1);  /* always enable our own address */
                        i = 1;
                        netdev_for_each_mc_addr(ha, dev) {
@@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev)
                }
        }
 
-       if (sonic_debug > 2)
-               printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+       netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
 
        SONIC_WRITE(SONIC_RCR, rcr);
 }
@@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev)
        /*
         * initialize the receive resource area
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize receive resource area\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+                 __func__);
 
        for (i = 0; i < SONIC_NUM_RRS; i++) {
                u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
@@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev)
        SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
 
        /* load the resource pointers */
-       if (sonic_debug > 3)
-               printk("sonic_init: issuing RRRA command\n");
+       netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
        i = 0;
@@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev)
                        break;
        }
 
-       if (sonic_debug > 2)
-               printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+       netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+                 SONIC_READ(SONIC_CMD), i);
 
        /*
         * Initialize the receive descriptors so that they
         * become a circular linked list, ie. let the last
         * descriptor point to the first again.
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize receive descriptors\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+                 __func__);
+
        for (i=0; i<SONIC_NUM_RDS; i++) {
                sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
                sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
@@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev)
        /*
         * initialize transmit descriptors
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize transmit descriptors\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+                 __func__);
+
        for (i = 0; i < SONIC_NUM_TDS; i++) {
                sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
                sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
@@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev)
                if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
                        break;
        }
-       if (sonic_debug > 2) {
-               printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
-                      SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
-       }
+       netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+                 SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
 
        /*
         * enable receiver, disable loopback
@@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev)
        if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
                printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
 
-       if (sonic_debug > 2)
-               printk("sonic_init: new status=%x\n",
-                      SONIC_READ(SONIC_CMD));
+       netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+                 SONIC_READ(SONIC_CMD));
 
        return 0;
 }
index 421b1a2..2b27f70 100644 (file)
@@ -319,6 +319,7 @@ struct sonic_local {
        unsigned int eol_rx;
        unsigned int eol_tx;           /* last unacked transmit packet */
        unsigned int next_tx;          /* next free TD */
+       int msg_enable;
        struct device *device;         /* generic device */
        struct net_device_stats stats;
 };
@@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
 static int sonic_init(struct net_device *dev);
 static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
 
 /* Internal inlines for reading/writing DMA buffers.  Note that bus
    size and endianness matter here, whereas they don't for registers,
index 1817dee..e1b886e 100644 (file)
@@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf);
 #define SONIC_WRITE(reg,val) \
        *((volatile unsigned int *)dev->base_addr+reg) = val
 
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
 
 static int __init sonic_probe1(struct net_device *dev)
 {
-       static unsigned version_printed = 0;
        unsigned int silicon_revision;
        struct sonic_local *lp = netdev_priv(dev);
        unsigned int base_addr = dev->base_addr;
@@ -146,23 +137,17 @@ static int __init sonic_probe1(struct net_device *dev)
         * the expected location.
         */
        silicon_revision = SONIC_READ(SONIC_SR);
-       if (sonic_debug > 1)
-               printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
        i = 0;
        while ((known_revisions[i] != 0xffff) &&
                        (known_revisions[i] != silicon_revision))
                i++;
 
        if (known_revisions[i] == 0xffff) {
-               printk("SONIC ethernet controller not found (0x%4x)\n",
-                               silicon_revision);
+               pr_info("SONIC ethernet controller not found (0x%4x)\n",
+                       silicon_revision);
                return -ENODEV;
        }
 
-       if (sonic_debug  &&  version_printed++ == 0)
-               printk(version);
-
        /*
         * Put the sonic into software reset, then retrieve ethernet address.
         * Note: we are assuming that the boot-loader has initialized the cam.
@@ -273,12 +258,15 @@ int xtsonic_probe(struct platform_device *pdev)
 
        if ((err = sonic_probe1(dev)))
                goto out;
+
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
+       sonic_msg_init(dev);
+
        if ((err = register_netdev(dev)))
                goto out1;
 
-       printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
-              dev->base_addr, dev->dev_addr, dev->irq);
-
        return 0;
 
 out1:
@@ -290,8 +278,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
 
 #include "sonic.c"
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/Makefile b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
new file mode 100644 (file)
index 0000000..805fa28
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/flower/Makefile b/drivers/net/ethernet/netronome/nfp/flower/Makefile
new file mode 100644 (file)
index 0000000..805fa28
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
index adfe474..28c1cd5 100644 (file)
 #define NFP_FLOWER_MASK_MPLS_BOS       BIT(8)
 #define NFP_FLOWER_MASK_MPLS_Q         BIT(0)
 
+/* Compressed HW representation of TCP Flags */
+#define NFP_FL_TCP_FLAG_URG            BIT(4)
+#define NFP_FL_TCP_FLAG_PSH            BIT(3)
+#define NFP_FL_TCP_FLAG_RST            BIT(2)
+#define NFP_FL_TCP_FLAG_SYN            BIT(1)
+#define NFP_FL_TCP_FLAG_FIN            BIT(0)
+
 #define NFP_FL_SC_ACT_DROP             0x80000000
 #define NFP_FL_SC_ACT_USER             0x7D000000
 #define NFP_FL_SC_ACT_POPV             0x6A000000
@@ -257,7 +264,7 @@ struct nfp_flower_tp_ports {
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |    DSCP   |ECN|   protocol    |           reserved            |
+ * |    DSCP   |ECN|   protocol    |      ttl      |     flags     |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                        ipv4_addr_src                          |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -268,7 +275,7 @@ struct nfp_flower_ipv4 {
        u8 tos;
        u8 proto;
        u8 ttl;
-       u8 reserved;
+       u8 flags;
        __be32 ipv4_src;
        __be32 ipv4_dst;
 };
index 332ff0f..c5cebf6 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/time64.h>
 #include <linux/types.h>
 #include <net/pkt_cls.h>
+#include <net/tcp.h>
 #include <linux/workqueue.h>
 
 struct net_device;
index 37c2eca..b3bc827 100644 (file)
@@ -181,6 +181,26 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
                frame->tos = flow_ip->tos;
                frame->ttl = flow_ip->ttl;
        }
+
+       if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+               struct flow_dissector_key_tcp *tcp;
+               u32 tcp_flags;
+
+               tcp = skb_flow_dissector_target(flow->dissector,
+                                               FLOW_DISSECTOR_KEY_TCP, target);
+               tcp_flags = be16_to_cpu(tcp->flags);
+
+               if (tcp_flags & TCPHDR_FIN)
+                       frame->flags |= NFP_FL_TCP_FLAG_FIN;
+               if (tcp_flags & TCPHDR_SYN)
+                       frame->flags |= NFP_FL_TCP_FLAG_SYN;
+               if (tcp_flags & TCPHDR_RST)
+                       frame->flags |= NFP_FL_TCP_FLAG_RST;
+               if (tcp_flags & TCPHDR_PSH)
+                       frame->flags |= NFP_FL_TCP_FLAG_PSH;
+               if (tcp_flags & TCPHDR_URG)
+                       frame->flags |= NFP_FL_TCP_FLAG_URG;
+       }
 }
 
 static void
index eb5c13d..f3586c5 100644 (file)
 #include "../nfp_net.h"
 #include "../nfp_port.h"
 
+#define NFP_FLOWER_SUPPORTED_TCPFLAGS \
+       (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
+        TCPHDR_PSH | TCPHDR_URG)
+
 #define NFP_FLOWER_WHITELIST_DISSECTOR \
        (BIT(FLOW_DISSECTOR_KEY_CONTROL) | \
         BIT(FLOW_DISSECTOR_KEY_BASIC) | \
         BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \
         BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \
+        BIT(FLOW_DISSECTOR_KEY_TCP) | \
         BIT(FLOW_DISSECTOR_KEY_PORTS) | \
         BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
         BIT(FLOW_DISSECTOR_KEY_VLAN) | \
@@ -288,6 +293,35 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
                }
        }
 
+       if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+               struct flow_dissector_key_tcp *tcp;
+               u32 tcp_flags;
+
+               tcp = skb_flow_dissector_target(flow->dissector,
+                                               FLOW_DISSECTOR_KEY_TCP,
+                                               flow->key);
+               tcp_flags = be16_to_cpu(tcp->flags);
+
+               if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
+                       return -EOPNOTSUPP;
+
+               /* We only support PSH and URG flags when either
+                * FIN, SYN or RST is present as well.
+                */
+               if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
+                   !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
+                       return -EOPNOTSUPP;
+
+               /* We need to store TCP flags in the IPv4 key space, thus
+                * we need to ensure we include a IPv4 key layer if we have
+                * not done so already.
+                */
+               if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) {
+                       key_layer |= NFP_FLOWER_LAYER_IPV4;
+                       key_size += sizeof(struct nfp_flower_ipv4);
+               }
+       }
+
        ret_key_ls->key_layer = key_layer;
        ret_key_ls->key_layer_two = key_layer_two;
        ret_key_ls->key_size = key_size;
index ab301d5..c4b1f34 100644 (file)
@@ -645,6 +645,7 @@ MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_4x10_1x40.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_8x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x25.nffw");
+MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw");
 
 MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
 MODULE_LICENSE("GPL");
index 4499a73..bb63c11 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
+ * Copyright (C) 2015-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
  * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
-#define NFP_NET_CFG_BAR_SZ              (32 * 1024)
+#define NFP_NET_CFG_BAR_SZ             (32 * 1024)
 
 /**
  * Offset in Freelist buffer where packet starts on RX
  */
-#define NFP_NET_RX_OFFSET               32
+#define NFP_NET_RX_OFFSET              32
 
 /**
  * LSO parameters
 #define NFP_NET_META_PORTID            5
 #define NFP_NET_META_CSUM              6 /* checksum complete type */
 
-#define        NFP_META_PORT_ID_CTRL           ~0U
+#define NFP_META_PORT_ID_CTRL          ~0U
 
 /**
  * Hash type pre-pended when a RSS hash was computed
  */
-#define NFP_NET_RSS_NONE                0
-#define NFP_NET_RSS_IPV4                1
-#define NFP_NET_RSS_IPV6                2
-#define NFP_NET_RSS_IPV6_EX             3
-#define NFP_NET_RSS_IPV4_TCP            4
-#define NFP_NET_RSS_IPV6_TCP            5
-#define NFP_NET_RSS_IPV6_EX_TCP         6
-#define NFP_NET_RSS_IPV4_UDP            7
-#define NFP_NET_RSS_IPV6_UDP            8
-#define NFP_NET_RSS_IPV6_EX_UDP         9
+#define NFP_NET_RSS_NONE               0
+#define NFP_NET_RSS_IPV4               1
+#define NFP_NET_RSS_IPV6               2
+#define NFP_NET_RSS_IPV6_EX            3
+#define NFP_NET_RSS_IPV4_TCP           4
+#define NFP_NET_RSS_IPV6_TCP           5
+#define NFP_NET_RSS_IPV6_EX_TCP                6
+#define NFP_NET_RSS_IPV4_UDP           7
+#define NFP_NET_RSS_IPV6_UDP           8
+#define NFP_NET_RSS_IPV6_EX_UDP                9
 
 /**
  * Ring counts
- * %NFP_NET_TXR_MAX:         Maximum number of TX rings
- * %NFP_NET_RXR_MAX:         Maximum number of RX rings
+ * %NFP_NET_TXR_MAX:        Maximum number of TX rings
+ * %NFP_NET_RXR_MAX:        Maximum number of RX rings
  */
-#define NFP_NET_TXR_MAX                 64
-#define NFP_NET_RXR_MAX                 64
+#define NFP_NET_TXR_MAX                        64
+#define NFP_NET_RXR_MAX                        64
 
 /**
  * Read/Write config words (0x0000 - 0x002c)
- * %NFP_NET_CFG_CTRL:        Global control
+ * %NFP_NET_CFG_CTRL:       Global control
  * %NFP_NET_CFG_UPDATE:      Indicate which fields are updated
  * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
  * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
- * %NFP_NET_CFG_MTU:         Set MTU size
+ * %NFP_NET_CFG_MTU:        Set MTU size
  * %NFP_NET_CFG_FLBUFSZ:     Set freelist buffer size (must be larger than MTU)
- * %NFP_NET_CFG_EXN:         MSI-X table entry for exceptions
- * %NFP_NET_CFG_LSC:         MSI-X table entry for link state changes
+ * %NFP_NET_CFG_EXN:        MSI-X table entry for exceptions
+ * %NFP_NET_CFG_LSC:        MSI-X table entry for link state changes
  * %NFP_NET_CFG_MACADDR:     MAC address
  *
  * TODO:
  * - define Error details in UPDATE
  */
-#define NFP_NET_CFG_CTRL                0x0000
-#define   NFP_NET_CFG_CTRL_ENABLE         (0x1 <<  0) /* Global enable */
-#define   NFP_NET_CFG_CTRL_PROMISC        (0x1 <<  1) /* Enable Promisc mode */
-#define   NFP_NET_CFG_CTRL_L2BC           (0x1 <<  2) /* Allow L2 Broadcast */
-#define   NFP_NET_CFG_CTRL_L2MC           (0x1 <<  3) /* Allow L2 Multicast */
-#define   NFP_NET_CFG_CTRL_RXCSUM         (0x1 <<  4) /* Enable RX Checksum */
-#define   NFP_NET_CFG_CTRL_TXCSUM         (0x1 <<  5) /* Enable TX Checksum */
-#define   NFP_NET_CFG_CTRL_RXVLAN         (0x1 <<  6) /* Enable VLAN strip */
-#define   NFP_NET_CFG_CTRL_TXVLAN         (0x1 <<  7) /* Enable VLAN insert */
-#define   NFP_NET_CFG_CTRL_SCATTER        (0x1 <<  8) /* Scatter DMA */
-#define   NFP_NET_CFG_CTRL_GATHER         (0x1 <<  9) /* Gather DMA */
-#define   NFP_NET_CFG_CTRL_LSO            (0x1 << 10) /* LSO/TSO (version 1) */
+#define NFP_NET_CFG_CTRL               0x0000
+#define   NFP_NET_CFG_CTRL_ENABLE        (0x1 <<  0) /* Global enable */
+#define   NFP_NET_CFG_CTRL_PROMISC       (0x1 <<  1) /* Enable Promisc mode */
+#define   NFP_NET_CFG_CTRL_L2BC                  (0x1 <<  2) /* Allow L2 Broadcast */
+#define   NFP_NET_CFG_CTRL_L2MC                  (0x1 <<  3) /* Allow L2 Multicast */
+#define   NFP_NET_CFG_CTRL_RXCSUM        (0x1 <<  4) /* Enable RX Checksum */
+#define   NFP_NET_CFG_CTRL_TXCSUM        (0x1 <<  5) /* Enable TX Checksum */
+#define   NFP_NET_CFG_CTRL_RXVLAN        (0x1 <<  6) /* Enable VLAN strip */
+#define   NFP_NET_CFG_CTRL_TXVLAN        (0x1 <<  7) /* Enable VLAN insert */
+#define   NFP_NET_CFG_CTRL_SCATTER       (0x1 <<  8) /* Scatter DMA */
+#define   NFP_NET_CFG_CTRL_GATHER        (0x1 <<  9) /* Gather DMA */
+#define   NFP_NET_CFG_CTRL_LSO           (0x1 << 10) /* LSO/TSO (version 1) */
 #define   NFP_NET_CFG_CTRL_CTAG_FILTER   (0x1 << 11) /* VLAN CTAG filtering */
-#define   NFP_NET_CFG_CTRL_RINGCFG        (0x1 << 16) /* Ring runtime changes */
+#define   NFP_NET_CFG_CTRL_RINGCFG       (0x1 << 16) /* Ring runtime changes */
 #define   NFP_NET_CFG_CTRL_RSS           (0x1 << 17) /* RSS (version 1) */
-#define   NFP_NET_CFG_CTRL_IRQMOD         (0x1 << 18) /* Interrupt moderation */
-#define   NFP_NET_CFG_CTRL_RINGPRIO       (0x1 << 19) /* Ring priorities */
-#define   NFP_NET_CFG_CTRL_MSIXAUTO       (0x1 << 20) /* MSI-X auto-masking */
-#define   NFP_NET_CFG_CTRL_TXRWB          (0x1 << 21) /* Write-back of TX ring*/
-#define   NFP_NET_CFG_CTRL_L2SWITCH       (0x1 << 22) /* L2 Switch */
+#define   NFP_NET_CFG_CTRL_IRQMOD        (0x1 << 18) /* Interrupt moderation */
+#define   NFP_NET_CFG_CTRL_RINGPRIO      (0x1 << 19) /* Ring priorities */
+#define   NFP_NET_CFG_CTRL_MSIXAUTO      (0x1 << 20) /* MSI-X auto-masking */
+#define   NFP_NET_CFG_CTRL_TXRWB         (0x1 << 21) /* Write-back of TX ring*/
+#define   NFP_NET_CFG_CTRL_L2SWITCH      (0x1 << 22) /* L2 Switch */
 #define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN         (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE         (0x1 << 25) /* NVGRE tunnel support */
 #define NFP_NET_CFG_CTRL_CHAIN_META    (NFP_NET_CFG_CTRL_RSS2 | \
                                         NFP_NET_CFG_CTRL_CSUM_COMPLETE)
 
-#define NFP_NET_CFG_UPDATE              0x0004
-#define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
-#define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
-#define   NFP_NET_CFG_UPDATE_RSS          (0x1 <<  2) /* RSS config change */
-#define   NFP_NET_CFG_UPDATE_TXRPRIO      (0x1 <<  3) /* TX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_RXRPRIO      (0x1 <<  4) /* RX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_MSIX         (0x1 <<  5) /* MSI-X change */
-#define   NFP_NET_CFG_UPDATE_L2SWITCH     (0x1 <<  6) /* Switch changes */
-#define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
-#define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
+#define NFP_NET_CFG_UPDATE             0x0004
+#define   NFP_NET_CFG_UPDATE_GEN         (0x1 <<  0) /* General update */
+#define   NFP_NET_CFG_UPDATE_RING        (0x1 <<  1) /* Ring config change */
+#define   NFP_NET_CFG_UPDATE_RSS         (0x1 <<  2) /* RSS config change */
+#define   NFP_NET_CFG_UPDATE_TXRPRIO     (0x1 <<  3) /* TX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_RXRPRIO     (0x1 <<  4) /* RX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_MSIX        (0x1 <<  5) /* MSI-X change */
+#define   NFP_NET_CFG_UPDATE_L2SWITCH    (0x1 <<  6) /* Switch changes */
+#define   NFP_NET_CFG_UPDATE_RESET       (0x1 <<  7) /* Update due to FLR */
+#define   NFP_NET_CFG_UPDATE_IRQMOD      (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN       (0x1 <<  9) /* VXLAN port change */
 #define   NFP_NET_CFG_UPDATE_BPF         (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_MACADDR     (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX        (0x1 << 12) /* Mailbox update */
 #define   NFP_NET_CFG_UPDATE_VF                  (0x1 << 13) /* VF settings change */
-#define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
-#define NFP_NET_CFG_TXRS_ENABLE         0x0008
-#define NFP_NET_CFG_RXRS_ENABLE         0x0010
-#define NFP_NET_CFG_MTU                 0x0018
-#define NFP_NET_CFG_FLBUFSZ             0x001c
-#define NFP_NET_CFG_EXN                 0x001f
-#define NFP_NET_CFG_LSC                 0x0020
-#define NFP_NET_CFG_MACADDR             0x0024
+#define   NFP_NET_CFG_UPDATE_ERR         (0x1 << 31) /* A error occurred */
+#define NFP_NET_CFG_TXRS_ENABLE                0x0008
+#define NFP_NET_CFG_RXRS_ENABLE                0x0010
+#define NFP_NET_CFG_MTU                        0x0018
+#define NFP_NET_CFG_FLBUFSZ            0x001c
+#define NFP_NET_CFG_EXN                        0x001f
+#define NFP_NET_CFG_LSC                        0x0020
+#define NFP_NET_CFG_MACADDR            0x0024
 
 /**
  * Read-only words (0x0030 - 0x0050):
  * %NFP_NET_CFG_VERSION:     Firmware version number
- * %NFP_NET_CFG_STS:         Status
- * %NFP_NET_CFG_CAP:         Capabilities (same bits as %NFP_NET_CFG_CTRL)
+ * %NFP_NET_CFG_STS:        Status
+ * %NFP_NET_CFG_CAP:        Capabilities (same bits as %NFP_NET_CFG_CTRL)
  * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
  * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
  * %NFP_NET_CFG_MAX_MTU:     Maximum support MTU
  * TODO:
  * - define more STS bits
  */
-#define NFP_NET_CFG_VERSION             0x0030
+#define NFP_NET_CFG_VERSION            0x0030
 #define   NFP_NET_CFG_VERSION_RESERVED_MASK    (0xff << 24)
 #define   NFP_NET_CFG_VERSION_CLASS_MASK  (0xff << 16)
-#define   NFP_NET_CFG_VERSION_CLASS(x)    (((x) & 0xff) << 16)
+#define   NFP_NET_CFG_VERSION_CLASS(x)   (((x) & 0xff) << 16)
 #define   NFP_NET_CFG_VERSION_CLASS_GENERIC    0
 #define   NFP_NET_CFG_VERSION_MAJOR_MASK  (0xff <<  8)
-#define   NFP_NET_CFG_VERSION_MAJOR(x)    (((x) & 0xff) <<  8)
+#define   NFP_NET_CFG_VERSION_MAJOR(x)   (((x) & 0xff) <<  8)
 #define   NFP_NET_CFG_VERSION_MINOR_MASK  (0xff <<  0)
-#define   NFP_NET_CFG_VERSION_MINOR(x)    (((x) & 0xff) <<  0)
-#define NFP_NET_CFG_STS                 0x0034
-#define   NFP_NET_CFG_STS_LINK            (0x1 << 0) /* Link up or down */
+#define   NFP_NET_CFG_VERSION_MINOR(x)   (((x) & 0xff) <<  0)
+#define NFP_NET_CFG_STS                        0x0034
+#define   NFP_NET_CFG_STS_LINK           (0x1 << 0) /* Link up or down */
 /* Link rate */
 #define   NFP_NET_CFG_STS_LINK_RATE_SHIFT 1
 #define   NFP_NET_CFG_STS_LINK_RATE_MASK  0xF
-#define   NFP_NET_CFG_STS_LINK_RATE       \
+#define   NFP_NET_CFG_STS_LINK_RATE      \
        (NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT)
 #define   NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED   0
-#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN       1
-#define   NFP_NET_CFG_STS_LINK_RATE_1G            2
-#define   NFP_NET_CFG_STS_LINK_RATE_10G           3
-#define   NFP_NET_CFG_STS_LINK_RATE_25G           4
-#define   NFP_NET_CFG_STS_LINK_RATE_40G           5
-#define   NFP_NET_CFG_STS_LINK_RATE_50G           6
-#define   NFP_NET_CFG_STS_LINK_RATE_100G          7
-#define NFP_NET_CFG_CAP                 0x0038
-#define NFP_NET_CFG_MAX_TXRINGS         0x003c
-#define NFP_NET_CFG_MAX_RXRINGS         0x0040
-#define NFP_NET_CFG_MAX_MTU             0x0044
+#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN      1
+#define   NFP_NET_CFG_STS_LINK_RATE_1G           2
+#define   NFP_NET_CFG_STS_LINK_RATE_10G                  3
+#define   NFP_NET_CFG_STS_LINK_RATE_25G                  4
+#define   NFP_NET_CFG_STS_LINK_RATE_40G                  5
+#define   NFP_NET_CFG_STS_LINK_RATE_50G                  6
+#define   NFP_NET_CFG_STS_LINK_RATE_100G         7
+#define NFP_NET_CFG_CAP                        0x0038
+#define NFP_NET_CFG_MAX_TXRINGS                0x003c
+#define NFP_NET_CFG_MAX_RXRINGS                0x0040
+#define NFP_NET_CFG_MAX_MTU            0x0044
 /* Next two words are being used by VFs for solving THB350 issue */
-#define NFP_NET_CFG_START_TXQ           0x0048
-#define NFP_NET_CFG_START_RXQ           0x004c
+#define NFP_NET_CFG_START_TXQ          0x0048
+#define NFP_NET_CFG_START_RXQ          0x004c
 
 /**
  * Prepend configuration
 /**
  * 40B reserved for future use (0x0098 - 0x00c0)
  */
-#define NFP_NET_CFG_RESERVED            0x0098
-#define NFP_NET_CFG_RESERVED_SZ         0x0028
+#define NFP_NET_CFG_RESERVED           0x0098
+#define NFP_NET_CFG_RESERVED_SZ                0x0028
 
 /**
  * RSS configuration (0x0100 - 0x01ac):
  * %NFP_NET_CFG_RSS_KEY:     RSS "secret" key
  * %NFP_NET_CFG_RSS_ITBL:    RSS indirection table
  */
-#define NFP_NET_CFG_RSS_BASE            0x0100
-#define NFP_NET_CFG_RSS_CTRL            NFP_NET_CFG_RSS_BASE
-#define   NFP_NET_CFG_RSS_MASK            (0x7f)
-#define   NFP_NET_CFG_RSS_MASK_of(_x)     ((_x) & 0x7f)
-#define   NFP_NET_CFG_RSS_IPV4            (1 <<  8) /* RSS for IPv4 */
-#define   NFP_NET_CFG_RSS_IPV6            (1 <<  9) /* RSS for IPv6 */
-#define   NFP_NET_CFG_RSS_IPV4_TCP        (1 << 10) /* RSS for IPv4/TCP */
-#define   NFP_NET_CFG_RSS_IPV4_UDP        (1 << 11) /* RSS for IPv4/UDP */
-#define   NFP_NET_CFG_RSS_IPV6_TCP        (1 << 12) /* RSS for IPv6/TCP */
-#define   NFP_NET_CFG_RSS_IPV6_UDP        (1 << 13) /* RSS for IPv6/UDP */
+#define NFP_NET_CFG_RSS_BASE           0x0100
+#define NFP_NET_CFG_RSS_CTRL           NFP_NET_CFG_RSS_BASE
+#define   NFP_NET_CFG_RSS_MASK           (0x7f)
+#define   NFP_NET_CFG_RSS_MASK_of(_x)    ((_x) & 0x7f)
+#define   NFP_NET_CFG_RSS_IPV4           (1 <<  8) /* RSS for IPv4 */
+#define   NFP_NET_CFG_RSS_IPV6           (1 <<  9) /* RSS for IPv6 */
+#define   NFP_NET_CFG_RSS_IPV4_TCP       (1 << 10) /* RSS for IPv4/TCP */
+#define   NFP_NET_CFG_RSS_IPV4_UDP       (1 << 11) /* RSS for IPv4/UDP */
+#define   NFP_NET_CFG_RSS_IPV6_TCP       (1 << 12) /* RSS for IPv6/TCP */
+#define   NFP_NET_CFG_RSS_IPV6_UDP       (1 << 13) /* RSS for IPv6/UDP */
 #define   NFP_NET_CFG_RSS_HFUNC                  0xff000000
-#define   NFP_NET_CFG_RSS_TOEPLITZ        (1 << 24) /* Use Toeplitz hash */
+#define   NFP_NET_CFG_RSS_TOEPLITZ       (1 << 24) /* Use Toeplitz hash */
 #define   NFP_NET_CFG_RSS_XOR            (1 << 25) /* Use XOR as hash */
 #define   NFP_NET_CFG_RSS_CRC32                  (1 << 26) /* Use CRC32 as hash */
 #define   NFP_NET_CFG_RSS_HFUNCS         3
-#define NFP_NET_CFG_RSS_KEY             (NFP_NET_CFG_RSS_BASE + 0x4)
-#define NFP_NET_CFG_RSS_KEY_SZ          0x28
-#define NFP_NET_CFG_RSS_ITBL            (NFP_NET_CFG_RSS_BASE + 0x4 + \
+#define NFP_NET_CFG_RSS_KEY            (NFP_NET_CFG_RSS_BASE + 0x4)
+#define NFP_NET_CFG_RSS_KEY_SZ         0x28
+#define NFP_NET_CFG_RSS_ITBL           (NFP_NET_CFG_RSS_BASE + 0x4 + \
                                         NFP_NET_CFG_RSS_KEY_SZ)
-#define NFP_NET_CFG_RSS_ITBL_SZ         0x80
+#define NFP_NET_CFG_RSS_ITBL_SZ                0x80
 
 /**
  * TX ring configuration (0x200 - 0x800)
  * %NFP_NET_CFG_TXR_PRIO:    Per TX ring priority (1B entries)
  * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
  */
-#define NFP_NET_CFG_TXR_BASE            0x0200
-#define NFP_NET_CFG_TXR_ADDR(_x)        (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_WB_ADDR(_x)     (NFP_NET_CFG_TXR_BASE + 0x200 + \
+#define NFP_NET_CFG_TXR_BASE           0x0200
+#define NFP_NET_CFG_TXR_ADDR(_x)       (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_TXR_WB_ADDR(_x)    (NFP_NET_CFG_TXR_BASE + 0x200 + \
                                         ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_SZ(_x)          (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
-#define NFP_NET_CFG_TXR_VEC(_x)         (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
-#define NFP_NET_CFG_TXR_PRIO(_x)        (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
+#define NFP_NET_CFG_TXR_SZ(_x)         (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
+#define NFP_NET_CFG_TXR_VEC(_x)                (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
+#define NFP_NET_CFG_TXR_PRIO(_x)       (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
 #define NFP_NET_CFG_TXR_IRQ_MOD(_x)    (NFP_NET_CFG_TXR_BASE + 0x500 + \
                                         ((_x) * 0x4))
 
  * %NFP_NET_CFG_RXR_PRIO:    Per RX ring priority (1B entries)
  * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
  */
-#define NFP_NET_CFG_RXR_BASE            0x0800
-#define NFP_NET_CFG_RXR_ADDR(_x)        (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_RXR_SZ(_x)          (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
-#define NFP_NET_CFG_RXR_VEC(_x)         (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
-#define NFP_NET_CFG_RXR_PRIO(_x)        (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
+#define NFP_NET_CFG_RXR_BASE           0x0800
+#define NFP_NET_CFG_RXR_ADDR(_x)       (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_RXR_SZ(_x)         (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
+#define NFP_NET_CFG_RXR_VEC(_x)                (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
+#define NFP_NET_CFG_RXR_PRIO(_x)       (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
 #define NFP_NET_CFG_RXR_IRQ_MOD(_x)    (NFP_NET_CFG_RXR_BASE + 0x300 + \
                                         ((_x) * 0x4))
 
  * the MSI-X entry and the host driver must clear the register to
  * re-enable the interrupt.
  */
-#define NFP_NET_CFG_ICR_BASE            0x0c00
-#define NFP_NET_CFG_ICR(_x)             (NFP_NET_CFG_ICR_BASE + (_x))
-#define   NFP_NET_CFG_ICR_UNMASKED      0x0
-#define   NFP_NET_CFG_ICR_RXTX          0x1
-#define   NFP_NET_CFG_ICR_LSC           0x2
+#define NFP_NET_CFG_ICR_BASE           0x0c00
+#define NFP_NET_CFG_ICR(_x)            (NFP_NET_CFG_ICR_BASE + (_x))
+#define   NFP_NET_CFG_ICR_UNMASKED     0x0
+#define   NFP_NET_CFG_ICR_RXTX         0x1
+#define   NFP_NET_CFG_ICR_LSC          0x2
 
 /**
  * General device stats (0x0d00 - 0x0d90)
  * all counters are 64bit.
  */
-#define NFP_NET_CFG_STATS_BASE          0x0d00
-#define NFP_NET_CFG_STATS_RX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x00)
-#define NFP_NET_CFG_STATS_RX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x08)
-#define NFP_NET_CFG_STATS_RX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x10)
-#define NFP_NET_CFG_STATS_RX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x18)
-#define NFP_NET_CFG_STATS_RX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x20)
-#define NFP_NET_CFG_STATS_RX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x28)
-#define NFP_NET_CFG_STATS_RX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x30)
-#define NFP_NET_CFG_STATS_RX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x38)
-#define NFP_NET_CFG_STATS_RX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x40)
-
-#define NFP_NET_CFG_STATS_TX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x48)
-#define NFP_NET_CFG_STATS_TX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x50)
-#define NFP_NET_CFG_STATS_TX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x58)
-#define NFP_NET_CFG_STATS_TX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x60)
-#define NFP_NET_CFG_STATS_TX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x68)
-#define NFP_NET_CFG_STATS_TX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x70)
-#define NFP_NET_CFG_STATS_TX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x78)
-#define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
-#define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
+#define NFP_NET_CFG_STATS_BASE         0x0d00
+#define NFP_NET_CFG_STATS_RX_DISCARDS  (NFP_NET_CFG_STATS_BASE + 0x00)
+#define NFP_NET_CFG_STATS_RX_ERRORS    (NFP_NET_CFG_STATS_BASE + 0x08)
+#define NFP_NET_CFG_STATS_RX_OCTETS    (NFP_NET_CFG_STATS_BASE + 0x10)
+#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18)
+#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20)
+#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28)
+#define NFP_NET_CFG_STATS_RX_FRAMES    (NFP_NET_CFG_STATS_BASE + 0x30)
+#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38)
+#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40)
+
+#define NFP_NET_CFG_STATS_TX_DISCARDS  (NFP_NET_CFG_STATS_BASE + 0x48)
+#define NFP_NET_CFG_STATS_TX_ERRORS    (NFP_NET_CFG_STATS_BASE + 0x50)
+#define NFP_NET_CFG_STATS_TX_OCTETS    (NFP_NET_CFG_STATS_BASE + 0x58)
+#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60)
+#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68)
+#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70)
+#define NFP_NET_CFG_STATS_TX_FRAMES    (NFP_NET_CFG_STATS_BASE + 0x78)
+#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80)
+#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88)
 
 #define NFP_NET_CFG_STATS_APP0_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x90)
 #define NFP_NET_CFG_STATS_APP0_BYTES   (NFP_NET_CFG_STATS_BASE + 0x98)
  * %NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
  * %NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
  */
-#define NFP_NET_CFG_TXR_STATS_BASE      0x1000
-#define NFP_NET_CFG_TXR_STATS(_x)       (NFP_NET_CFG_TXR_STATS_BASE + \
+#define NFP_NET_CFG_TXR_STATS_BASE     0x1000
+#define NFP_NET_CFG_TXR_STATS(_x)      (NFP_NET_CFG_TXR_STATS_BASE + \
                                         ((_x) * 0x10))
-#define NFP_NET_CFG_RXR_STATS_BASE      0x1400
-#define NFP_NET_CFG_RXR_STATS(_x)       (NFP_NET_CFG_RXR_STATS_BASE + \
+#define NFP_NET_CFG_RXR_STATS_BASE     0x1400
+#define NFP_NET_CFG_RXR_STATS(_x)      (NFP_NET_CFG_RXR_STATS_BASE + \
                                         ((_x) * 0x10))
 
 /**
  * %NFP_NET_CFG_TLV_TYPE:      Offset of type within the TLV
  * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
  * %NFP_NET_CFG_TLV_LENGTH:    Offset of length within the TLV
- * %NFP_NET_CFG_TLV_LENGTH_INC:        TLV length increments
+ * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
  * %NFP_NET_CFG_TLV_VALUE:     Offset of value with the TLV
  *
  * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE.
  * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH.
  */
 #define NFP_NET_CFG_TLV_TYPE           0x00
-#define   NFP_NET_CFG_TLV_TYPE_REQUIRED          0x8000
+#define   NFP_NET_CFG_TLV_TYPE_REQUIRED   0x8000
 #define NFP_NET_CFG_TLV_LENGTH         0x02
 #define   NFP_NET_CFG_TLV_LENGTH_INC     4
 #define NFP_NET_CFG_TLV_VALUE          0x04
 
-#define NFP_NET_CFG_TLV_HEADER_REQUIRED        0x80000000
+#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
 #define NFP_NET_CFG_TLV_HEADER_TYPE    0x7fff0000
 #define NFP_NET_CFG_TLV_HEADER_LENGTH  0x0000ffff
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
new file mode 100644 (file)
index 0000000..805fa28
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
new file mode 100644 (file)
index 0000000..805fa28
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nic/Makefile b/drivers/net/ethernet/netronome/nfp/nic/Makefile
new file mode 100644 (file)
index 0000000..805fa28
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
index ca4a81d..03ad4ee 100644 (file)
@@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type {
 /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
 #define QED_IWARP_MAX_BDS_PER_FPDU 3
 
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
        "QED_IWARP_MPA_PKT_PACKED",
        "QED_IWARP_MPA_PKT_PARTIAL",
        "QED_IWARP_MPA_PKT_UNALIGNED"
index 7e7704d..c494918 100644 (file)
 
 /* Local Definitions and Declarations */
 
-struct rmnet_walk_data {
-       struct net_device *real_dev;
-       struct list_head *head;
-       struct rmnet_port *port;
-};
-
 static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
 {
        return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler;
@@ -112,17 +106,14 @@ static int rmnet_register_real_device(struct net_device *real_dev)
 static void rmnet_unregister_bridge(struct net_device *dev,
                                    struct rmnet_port *port)
 {
-       struct net_device *rmnet_dev, *bridge_dev;
        struct rmnet_port *bridge_port;
+       struct net_device *bridge_dev;
 
        if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
                return;
 
        /* bridge slave handling */
        if (!port->nr_rmnet_devs) {
-               rmnet_dev = netdev_master_upper_dev_get_rcu(dev);
-               netdev_upper_dev_unlink(dev, rmnet_dev);
-
                bridge_dev = port->bridge_ep;
 
                bridge_port = rmnet_get_port_rtnl(bridge_dev);
@@ -132,9 +123,6 @@ static void rmnet_unregister_bridge(struct net_device *dev,
                bridge_dev = port->bridge_ep;
 
                bridge_port = rmnet_get_port_rtnl(bridge_dev);
-               rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev);
-               netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
-
                rmnet_unregister_real_device(bridge_dev, bridge_port);
        }
 }
@@ -173,10 +161,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
        if (err)
                goto err1;
 
-       err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack);
-       if (err)
-               goto err2;
-
        port->rmnet_mode = mode;
 
        hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
@@ -193,8 +177,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 
        return 0;
 
-err2:
-       rmnet_vnd_dellink(mux_id, port, ep);
 err1:
        rmnet_unregister_real_device(real_dev, port);
 err0:
@@ -204,14 +186,13 @@ err0:
 
 static void rmnet_dellink(struct net_device *dev, struct list_head *head)
 {
+       struct rmnet_priv *priv = netdev_priv(dev);
        struct net_device *real_dev;
        struct rmnet_endpoint *ep;
        struct rmnet_port *port;
        u8 mux_id;
 
-       rcu_read_lock();
-       real_dev = netdev_master_upper_dev_get_rcu(dev);
-       rcu_read_unlock();
+       real_dev = priv->real_dev;
 
        if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
                return;
@@ -219,7 +200,6 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
        port = rmnet_get_port_rtnl(real_dev);
 
        mux_id = rmnet_vnd_get_mux(dev);
-       netdev_upper_dev_unlink(dev, real_dev);
 
        ep = rmnet_get_endpoint(port, mux_id);
        if (ep) {
@@ -233,30 +213,13 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
        unregister_netdevice_queue(dev, head);
 }
 
-static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data)
-{
-       struct rmnet_walk_data *d = data;
-       struct rmnet_endpoint *ep;
-       u8 mux_id;
-
-       mux_id = rmnet_vnd_get_mux(rmnet_dev);
-       ep = rmnet_get_endpoint(d->port, mux_id);
-       if (ep) {
-               hlist_del_init_rcu(&ep->hlnode);
-               rmnet_vnd_dellink(mux_id, d->port, ep);
-               kfree(ep);
-       }
-       netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
-       unregister_netdevice_queue(rmnet_dev, d->head);
-
-       return 0;
-}
-
 static void rmnet_force_unassociate_device(struct net_device *dev)
 {
        struct net_device *real_dev = dev;
-       struct rmnet_walk_data d;
+       struct hlist_node *tmp_ep;
+       struct rmnet_endpoint *ep;
        struct rmnet_port *port;
+       unsigned long bkt_ep;
        LIST_HEAD(list);
 
        if (!rmnet_is_real_dev_registered(real_dev))
@@ -264,16 +227,19 @@ static void rmnet_force_unassociate_device(struct net_device *dev)
 
        ASSERT_RTNL();
 
-       d.real_dev = real_dev;
-       d.head = &list;
-
        port = rmnet_get_port_rtnl(dev);
-       d.port = port;
 
        rcu_read_lock();
        rmnet_unregister_bridge(dev, port);
 
-       netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d);
+       hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
+               unregister_netdevice_queue(ep->egress_dev, &list);
+               rmnet_vnd_dellink(ep->mux_id, port, ep);
+
+               hlist_del_init_rcu(&ep->hlnode);
+               kfree(ep);
+       }
+
        rcu_read_unlock();
        unregister_netdevice_many(&list);
 
@@ -422,11 +388,6 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
        if (err)
                return -EBUSY;
 
-       err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
-                                          extack);
-       if (err)
-               return -EINVAL;
-
        slave_port = rmnet_get_port(slave_dev);
        slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
        slave_port->bridge_ep = real_dev;
@@ -449,7 +410,6 @@ int rmnet_del_bridge(struct net_device *rmnet_dev,
        port->rmnet_mode = RMNET_EPMODE_VND;
        port->bridge_ep = NULL;
 
-       netdev_upper_dev_unlink(slave_dev, rmnet_dev);
        slave_port = rmnet_get_port(slave_dev);
        rmnet_unregister_real_device(slave_dev, slave_port);
 
index 6bc328f..b0dbca0 100644 (file)
@@ -38,6 +38,11 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
        }
 
        ep = rmnet_get_endpoint(port, mux_id);
+       if (!ep) {
+               kfree_skb(skb);
+               return RX_HANDLER_CONSUMED;
+       }
+
        vnd = ep->egress_dev;
 
        ip_family = cmd->flow_control.ip_family;
index 570a227..346d310 100644 (file)
@@ -121,7 +121,7 @@ static void rmnet_get_stats64(struct net_device *dev,
        memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
 
        for_each_possible_cpu(cpu) {
-               pcpu_ptr = this_cpu_ptr(priv->pcpu_stats);
+               pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
 
                do {
                        start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
index 0bf7d17..7055db1 100644 (file)
@@ -99,12 +99,12 @@ static const int multicast_filter_limit = 32;
 #define RTL8169_PHY_TIMEOUT    (10*HZ)
 
 /* write/read MMIO register */
-#define RTL_W8(reg, val8)      writeb ((val8), ioaddr + (reg))
-#define RTL_W16(reg, val16)    writew ((val16), ioaddr + (reg))
-#define RTL_W32(reg, val32)    writel ((val32), ioaddr + (reg))
-#define RTL_R8(reg)            readb (ioaddr + (reg))
-#define RTL_R16(reg)           readw (ioaddr + (reg))
-#define RTL_R32(reg)           readl (ioaddr + (reg))
+#define RTL_W8(tp, reg, val8)  writeb((val8), tp->mmio_addr + (reg))
+#define RTL_W16(tp, reg, val16)        writew((val16), tp->mmio_addr + (reg))
+#define RTL_W32(tp, reg, val32)        writel((val32), tp->mmio_addr + (reg))
+#define RTL_R8(tp, reg)                readb(tp->mmio_addr + (reg))
+#define RTL_R16(tp, reg)               readw(tp->mmio_addr + (reg))
+#define RTL_R32(tp, reg)               readl(tp->mmio_addr + (reg))
 
 enum mac_version {
        RTL_GIGA_MAC_VER_01 = 0,
@@ -735,12 +735,6 @@ struct ring_info {
        u8              __pad[sizeof(void *) - sizeof(u32)];
 };
 
-enum features {
-       RTL_FEATURE_WOL         = (1 << 0),
-       RTL_FEATURE_MSI         = (1 << 1),
-       RTL_FEATURE_GMII        = (1 << 2),
-};
-
 struct rtl8169_counters {
        __le64  tx_packets;
        __le64  rx_packets;
@@ -829,7 +823,7 @@ struct rtl8169_private {
        void (*phy_reset_enable)(struct rtl8169_private *tp);
        void (*hw_start)(struct net_device *);
        unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
-       unsigned int (*link_ok)(void __iomem *);
+       unsigned int (*link_ok)(struct rtl8169_private *tp);
        int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
        bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
 
@@ -984,56 +978,46 @@ static bool rtl_ocp_reg_failure(struct rtl8169_private *tp, u32 reg)
 
 DECLARE_RTL_COND(rtl_ocp_gphy_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(GPHY_OCP) & OCPAR_FLAG;
+       return RTL_R32(tp, GPHY_OCP) & OCPAR_FLAG;
 }
 
 static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return;
 
-       RTL_W32(GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
+       RTL_W32(tp, GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
 
        rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
 static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return 0;
 
-       RTL_W32(GPHY_OCP, reg << 15);
+       RTL_W32(tp, GPHY_OCP, reg << 15);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
-               (RTL_R32(GPHY_OCP) & 0xffff) : ~0;
+               (RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
 }
 
 static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return;
 
-       RTL_W32(OCPDR, OCPAR_FLAG | (reg << 15) | data);
+       RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
 }
 
 static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return 0;
 
-       RTL_W32(OCPDR, reg << 15);
+       RTL_W32(tp, OCPDR, reg << 15);
 
-       return RTL_R32(OCPDR);
+       return RTL_R32(tp, OCPDR);
 }
 
 #define OCP_STD_PHY_BASE       0xa400
@@ -1076,16 +1060,12 @@ static int mac_mcu_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_phyar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(PHYAR) & 0x80000000;
+       return RTL_R32(tp, PHYAR) & 0x80000000;
 }
 
 static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
+       RTL_W32(tp, PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
 
        rtl_udelay_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
        /*
@@ -1097,13 +1077,12 @@ static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        int value;
 
-       RTL_W32(PHYAR, 0x0 | (reg & 0x1f) << 16);
+       RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
        value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
-               RTL_R32(PHYAR) & 0xffff : ~0;
+               RTL_R32(tp, PHYAR) & 0xffff : ~0;
 
        /*
         * According to hardware specs a 20us delay is required after read
@@ -1116,18 +1095,14 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_ocpar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(OCPAR) & OCPAR_FLAG;
+       return RTL_R32(tp, OCPAR) & OCPAR_FLAG;
 }
 
 static void r8168dp_1_mdio_access(struct rtl8169_private *tp, int reg, u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
-       RTL_W32(OCPAR, OCPAR_GPHY_WRITE_CMD);
-       RTL_W32(EPHY_RXER_NUM, 0);
+       RTL_W32(tp, OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
+       RTL_W32(tp, OCPAR, OCPAR_GPHY_WRITE_CMD);
+       RTL_W32(tp, EPHY_RXER_NUM, 0);
 
        rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
 }
@@ -1140,51 +1115,46 @@ static void r8168dp_1_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r8168dp_1_mdio_access(tp, reg, OCPDR_READ_CMD);
 
        mdelay(1);
-       RTL_W32(OCPAR, OCPAR_GPHY_READ_CMD);
-       RTL_W32(EPHY_RXER_NUM, 0);
+       RTL_W32(tp, OCPAR, OCPAR_GPHY_READ_CMD);
+       RTL_W32(tp, EPHY_RXER_NUM, 0);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
-               RTL_R32(OCPDR) & OCPDR_DATA_MASK : ~0;
+               RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
 }
 
 #define R8168DP_1_MDIO_ACCESS_BIT      0x00020000
 
-static void r8168dp_2_mdio_start(void __iomem *ioaddr)
+static void r8168dp_2_mdio_start(struct rtl8169_private *tp)
 {
-       RTL_W32(0xd0, RTL_R32(0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
+       RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
 }
 
-static void r8168dp_2_mdio_stop(void __iomem *ioaddr)
+static void r8168dp_2_mdio_stop(struct rtl8169_private *tp)
 {
-       RTL_W32(0xd0, RTL_R32(0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
+       RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
 }
 
 static void r8168dp_2_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       r8168dp_2_mdio_start(ioaddr);
+       r8168dp_2_mdio_start(tp);
 
        r8169_mdio_write(tp, reg, value);
 
-       r8168dp_2_mdio_stop(ioaddr);
+       r8168dp_2_mdio_stop(tp);
 }
 
 static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        int value;
 
-       r8168dp_2_mdio_start(ioaddr);
+       r8168dp_2_mdio_start(tp);
 
        value = r8169_mdio_read(tp, reg);
 
-       r8168dp_2_mdio_stop(ioaddr);
+       r8168dp_2_mdio_stop(tp);
 
        return value;
 }
@@ -1229,16 +1199,12 @@ static int rtl_mdio_read(struct net_device *dev, int phy_id, int location)
 
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(EPHYAR) & EPHYAR_FLAG;
+       return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
 }
 
 static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
+       RTL_W32(tp, EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
                (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
        rtl_udelay_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
@@ -1248,41 +1214,33 @@ static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 
 static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
+       RTL_W32(tp, EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
-               RTL_R32(EPHYAR) & EPHYAR_DATA_MASK : ~0;
+               RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0;
 }
 
 DECLARE_RTL_COND(rtl_eriar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(ERIAR) & ERIAR_FLAG;
+       return RTL_R32(tp, ERIAR) & ERIAR_FLAG;
 }
 
 static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
                          u32 val, int type)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        BUG_ON((addr & 3) || (mask == 0));
-       RTL_W32(ERIDR, val);
-       RTL_W32(ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
+       RTL_W32(tp, ERIDR, val);
+       RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
 
        rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
 }
 
 static u32 rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
+       RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
-               RTL_R32(ERIDR) : ~0;
+               RTL_R32(tp, ERIDR) : ~0;
 }
 
 static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
@@ -1296,11 +1254,9 @@ static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
 
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+       RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
        return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
-               RTL_R32(OCPDR) : ~0;
+               RTL_R32(tp, OCPDR) : ~0;
 }
 
 static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
@@ -1328,10 +1284,8 @@ static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
                              u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(OCPDR, data);
-       RTL_W32(OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+       RTL_W32(tp, OCPDR, data);
+       RTL_W32(tp, OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
        rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
 }
 
@@ -1393,19 +1347,15 @@ DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
 
 DECLARE_RTL_COND(rtl_ocp_tx_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(IBISR0) & 0x20;
+       return RTL_R8(tp, IBISR0) & 0x20;
 }
 
 static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
+       RTL_W8(tp, IBCR2, RTL_R8(tp, IBCR2) & ~0x01);
        rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
-       RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
-       RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
+       RTL_W8(tp, IBISR0, RTL_R8(tp, IBISR0) | 0x20);
+       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
 
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
@@ -1473,19 +1423,19 @@ static void rtl8168_driver_stop(struct rtl8169_private *tp)
        }
 }
 
-static int r8168dp_check_dash(struct rtl8169_private *tp)
+static bool r8168dp_check_dash(struct rtl8169_private *tp)
 {
        u16 reg = rtl8168_get_ocp_reg(tp);
 
-       return (ocp_read(tp, 0x0f, reg) & 0x00008000) ? 1 : 0;
+       return !!(ocp_read(tp, 0x0f, reg) & 0x00008000);
 }
 
-static int r8168ep_check_dash(struct rtl8169_private *tp)
+static bool r8168ep_check_dash(struct rtl8169_private *tp)
 {
-       return (ocp_read(tp, 0x0f, 0x128) & 0x00000001) ? 1 : 0;
+       return !!(ocp_read(tp, 0x0f, 0x128) & 0x00000001);
 }
 
-static int r8168_check_dash(struct rtl8169_private *tp)
+static bool r8168_check_dash(struct rtl8169_private *tp)
 {
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_27:
@@ -1497,7 +1447,7 @@ static int r8168_check_dash(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_51:
                return r8168ep_check_dash(tp);
        default:
-               return 0;
+               return false;
        }
 }
 
@@ -1518,49 +1468,37 @@ static void rtl_write_exgmac_batch(struct rtl8169_private *tp,
 
 DECLARE_RTL_COND(rtl_efusear_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(EFUSEAR) & EFUSEAR_FLAG;
+       return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
 }
 
 static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
+       RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
-               RTL_R32(EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
+               RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
 }
 
 static u16 rtl_get_events(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R16(IntrStatus);
+       return RTL_R16(tp, IntrStatus);
 }
 
 static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W16(IntrStatus, bits);
+       RTL_W16(tp, IntrStatus, bits);
        mmiowb();
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W16(IntrMask, 0);
+       RTL_W16(tp, IntrMask, 0);
        mmiowb();
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp, u16 bits)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W16(IntrMask, bits);
+       RTL_W16(tp, IntrMask, bits);
 }
 
 #define RTL_EVENT_NAPI_RX      (RxOK | RxErr)
@@ -1574,18 +1512,14 @@ static void rtl_irq_enable_all(struct rtl8169_private *tp)
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_irq_disable(tp);
        rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
-       RTL_R8(ChipCmd);
+       RTL_R8(tp, ChipCmd);
 }
 
 static unsigned int rtl8169_tbi_reset_pending(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(TBICSR) & TBIReset;
+       return RTL_R32(tp, TBICSR) & TBIReset;
 }
 
 static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
@@ -1593,21 +1527,19 @@ static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
        return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
 }
 
-static unsigned int rtl8169_tbi_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_tbi_link_ok(struct rtl8169_private *tp)
 {
-       return RTL_R32(TBICSR) & TBILinkOk;
+       return RTL_R32(tp, TBICSR) & TBILinkOk;
 }
 
-static unsigned int rtl8169_xmii_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_xmii_link_ok(struct rtl8169_private *tp)
 {
-       return RTL_R8(PHYstatus) & LinkStatus;
+       return RTL_R8(tp, PHYstatus) & LinkStatus;
 }
 
 static void rtl8169_tbi_reset_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(TBICSR, RTL_R32(TBICSR) | TBIReset);
+       RTL_W32(tp, TBICSR, RTL_R32(tp, TBICSR) | TBIReset);
 }
 
 static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
@@ -1620,7 +1552,6 @@ static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
 
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct net_device *dev = tp->dev;
 
        if (!netif_running(dev))
@@ -1628,12 +1559,12 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
            tp->mac_version == RTL_GIGA_MAC_VER_38) {
-               if (RTL_R8(PHYstatus) & _1000bpsF) {
+               if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
                                      ERIAR_EXGMAC);
-               } else if (RTL_R8(PHYstatus) & _100bps) {
+               } else if (RTL_R8(tp, PHYstatus) & _100bps) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1651,7 +1582,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
                             ERIAR_EXGMAC);
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
                   tp->mac_version == RTL_GIGA_MAC_VER_36) {
-               if (RTL_R8(PHYstatus) & _1000bpsF) {
+               if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1663,7 +1594,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
                                      ERIAR_EXGMAC);
                }
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
-               if (RTL_R8(PHYstatus) & _10bps) {
+               if (RTL_R8(tp, PHYstatus) & _10bps) {
                        rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
@@ -1676,10 +1607,9 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 }
 
 static void rtl8169_check_link_status(struct net_device *dev,
-                                     struct rtl8169_private *tp,
-                                     void __iomem *ioaddr)
+                                     struct rtl8169_private *tp)
 {
-       if (tp->link_ok(ioaddr)) {
+       if (tp->link_ok(tp)) {
                rtl_link_chg_patch(tp);
                /* This is to cancel a scheduled suspend if there's one. */
                pm_request_resume(&tp->pci_dev->dev);
@@ -1697,15 +1627,14 @@ static void rtl8169_check_link_status(struct net_device *dev,
 
 static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u8 options;
        u32 wolopts = 0;
 
-       options = RTL_R8(Config1);
+       options = RTL_R8(tp, Config1);
        if (!(options & PMEnable))
                return 0;
 
-       options = RTL_R8(Config3);
+       options = RTL_R8(tp, Config3);
        if (options & LinkUp)
                wolopts |= WAKE_PHY;
        switch (tp->mac_version) {
@@ -1735,7 +1664,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
                break;
        }
 
-       options = RTL_R8(Config5);
+       options = RTL_R8(tp, Config5);
        if (options & UWF)
                wolopts |= WAKE_UCAST;
        if (options & BWF)
@@ -1768,7 +1697,6 @@ static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        unsigned int i, tmp;
        static const struct {
                u32 opt;
@@ -1784,7 +1712,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
        };
        u8 options;
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_34:
@@ -1826,28 +1754,28 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
        }
 
        for (i = 0; i < tmp; i++) {
-               options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+               options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
                if (wolopts & cfg[i].opt)
                        options |= cfg[i].mask;
-               RTL_W8(cfg[i].reg, options);
+               RTL_W8(tp, cfg[i].reg, options);
        }
 
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
-               options = RTL_R8(Config1) & ~PMEnable;
+               options = RTL_R8(tp, Config1) & ~PMEnable;
                if (wolopts)
                        options |= PMEnable;
-               RTL_W8(Config1, options);
+               RTL_W8(tp, Config1, options);
                break;
        default:
-               options = RTL_R8(Config2) & ~PME_SIGNAL;
+               options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
                if (wolopts)
                        options |= PME_SIGNAL;
-               RTL_W8(Config2, options);
+               RTL_W8(tp, Config2, options);
                break;
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -1859,10 +1787,6 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
        rtl_lock_work(tp);
 
-       if (wol->wolopts)
-               tp->features |= RTL_FEATURE_WOL;
-       else
-               tp->features &= ~RTL_FEATURE_WOL;
        if (pm_runtime_active(d))
                __rtl8169_set_wol(tp, wol->wolopts);
        else
@@ -1906,16 +1830,15 @@ static int rtl8169_set_speed_tbi(struct net_device *dev,
                                 u8 autoneg, u16 speed, u8 duplex, u32 ignored)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        int ret = 0;
        u32 reg;
 
-       reg = RTL_R32(TBICSR);
+       reg = RTL_R32(tp, TBICSR);
        if ((autoneg == AUTONEG_DISABLE) && (speed == SPEED_1000) &&
            (duplex == DUPLEX_FULL)) {
-               RTL_W32(TBICSR, reg & ~(TBINwEnable | TBINwRestart));
+               RTL_W32(tp, TBICSR, reg & ~(TBINwEnable | TBINwRestart));
        } else if (autoneg == AUTONEG_ENABLE)
-               RTL_W32(TBICSR, reg | TBINwEnable | TBINwRestart);
+               RTL_W32(tp, TBICSR, reg | TBINwEnable | TBINwRestart);
        else {
                netif_warn(tp, link, dev,
                           "incorrect speed setting refused in TBI mode\n");
@@ -2040,16 +1963,15 @@ static void __rtl8169_set_features(struct net_device *dev,
                                   netdev_features_t features)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 rx_config;
 
-       rx_config = RTL_R32(RxConfig);
+       rx_config = RTL_R32(tp, RxConfig);
        if (features & NETIF_F_RXALL)
                rx_config |= (AcceptErr | AcceptRunt);
        else
                rx_config &= ~(AcceptErr | AcceptRunt);
 
-       RTL_W32(RxConfig, rx_config);
+       RTL_W32(tp, RxConfig, rx_config);
 
        if (features & NETIF_F_RXCSUM)
                tp->cp_cmd |= RxChkSum;
@@ -2061,10 +1983,10 @@ static void __rtl8169_set_features(struct net_device *dev,
        else
                tp->cp_cmd &= ~RxVlan;
 
-       tp->cp_cmd |= RTL_R16(CPlusCmd) & ~(RxVlan | RxChkSum);
+       tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
 
-       RTL_W16(CPlusCmd, tp->cp_cmd);
-       RTL_R16(CPlusCmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+       RTL_R16(tp, CPlusCmd);
 }
 
 static int rtl8169_set_features(struct net_device *dev,
@@ -2101,7 +2023,6 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
                                          struct ethtool_link_ksettings *cmd)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 status;
        u32 supported, advertising;
 
@@ -2109,7 +2030,7 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
                SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
        cmd->base.port = PORT_FIBRE;
 
-       status = RTL_R32(TBICSR);
+       status = RTL_R32(tp, TBICSR);
        advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
        cmd->base.autoneg = !!(status & TBINwEnable);
 
@@ -2224,23 +2145,20 @@ static int rtl8169_get_sset_count(struct net_device *dev, int sset)
 
 DECLARE_RTL_COND(rtl_counters_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
+       return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
 }
 
 static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        dma_addr_t paddr = tp->counters_phys_addr;
        u32 cmd;
 
-       RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
-       RTL_R32(CounterAddrHigh);
+       RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+       RTL_R32(tp, CounterAddrHigh);
        cmd = (u64)paddr & DMA_BIT_MASK(32);
-       RTL_W32(CounterAddrLow, cmd);
-       RTL_W32(CounterAddrLow, cmd | counter_cmd);
+       RTL_W32(tp, CounterAddrLow, cmd);
+       RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
        return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
@@ -2262,13 +2180,12 @@ static bool rtl8169_reset_counters(struct net_device *dev)
 static bool rtl8169_update_counters(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
 
        /*
         * Some chips are unable to dump tally counters when the receiver
         * is disabled.
         */
-       if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
+       if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
                return true;
 
        return rtl8169_do_counters(dev, CounterDump);
@@ -2448,7 +2365,6 @@ static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        const struct rtl_coalesce_info *ci;
        const struct rtl_coalesce_scale *scale;
        struct {
@@ -2468,10 +2384,10 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
        if (IS_ERR(ci))
                return PTR_ERR(ci);
 
-       scale = &ci->scalev[RTL_R16(CPlusCmd) & 3];
+       scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
 
        /* read IntrMitigate and adjust according to scale */
-       for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+       for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
                *p->max_frames = (w & RTL_COALESCE_MASK) << 2;
                w >>= RTL_COALESCE_SHIFT;
                *p->usecs = w & RTL_COALESCE_MASK;
@@ -2518,7 +2434,6 @@ static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        const struct rtl_coalesce_scale *scale;
        struct {
                u32 frames;
@@ -2566,11 +2481,11 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
        rtl_lock_work(tp);
 
-       RTL_W16(IntrMitigate, swab16(w));
+       RTL_W16(tp, IntrMitigate, swab16(w));
 
        tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
-       RTL_W16(CPlusCmd, tp->cp_cmd);
-       RTL_R16(CPlusCmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+       RTL_R16(tp, CPlusCmd);
 
        rtl_unlock_work(tp);
 
@@ -2600,17 +2515,16 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
                                    struct net_device *dev, u8 default_version)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        /*
         * The driver currently handles the 8168Bf and the 8168Be identically
         * but they can be identified more specifically through the test below
         * if needed:
         *
-        * (RTL_R32(TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
+        * (RTL_R32(tp, TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
         *
         * Same thing for the 8101Eb and the 8101Ec:
         *
-        * (RTL_R32(TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
+        * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
         */
        static const struct rtl_mac_info {
                u32 mask;
@@ -2708,7 +2622,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
        const struct rtl_mac_info *p = mac_info;
        u32 reg;
 
-       reg = RTL_R32(TxConfig);
+       reg = RTL_R32(tp, TxConfig);
        while ((reg & p->mask) != p->val)
                p++;
        tp->mac_version = p->mac_version;
@@ -3805,8 +3719,6 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
        rtl_writephy(tp, 0x1f, 0x0005);
        rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
        rtl_writephy(tp, 0x1f, 0x0000);
-       /* soft-reset phy */
-       rtl_writephy(tp, MII_BMCR, BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
 
        /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
        rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
@@ -4591,7 +4503,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
 static void rtl_phy_work(struct rtl8169_private *tp)
 {
        struct timer_list *timer = &tp->timer;
-       void __iomem *ioaddr = tp->mmio_addr;
        unsigned long timeout = RTL8169_PHY_TIMEOUT;
 
        assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
@@ -4605,7 +4516,7 @@ static void rtl_phy_work(struct rtl8169_private *tp)
                goto out_mod_timer;
        }
 
-       if (tp->link_ok(ioaddr))
+       if (tp->link_ok(tp))
                return;
 
        netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
@@ -4643,21 +4554,17 @@ static void rtl8169_phy_reset(struct net_device *dev,
 
 static bool rtl_tbi_enabled(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-           (RTL_R8(PHYstatus) & TBI_Enable);
+           (RTL_R8(tp, PHYstatus) & TBI_Enable);
 }
 
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_hw_phy_config(dev);
 
        if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
                dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-               RTL_W8(0x82, 0x01);
+               RTL_W8(tp, 0x82, 0x01);
        }
 
        pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
@@ -4667,7 +4574,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
                dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-               RTL_W8(0x82, 0x01);
+               RTL_W8(tp, 0x82, 0x01);
                dprintk("Set PHY Reg 0x0bh = 0x00h\n");
                rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
        }
@@ -4687,22 +4594,20 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_lock_work(tp);
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-       RTL_W32(MAC4, addr[4] | addr[5] << 8);
-       RTL_R32(MAC4);
+       RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
+       RTL_R32(tp, MAC4);
 
-       RTL_W32(MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
-       RTL_R32(MAC0);
+       RTL_W32(tp, MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
+       RTL_R32(tp, MAC0);
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_34)
                rtl_rar_exgmac_set(tp, addr);
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
        rtl_unlock_work(tp);
 }
@@ -4822,8 +4727,6 @@ static void rtl_speed_down(struct rtl8169_private *tp)
 
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_25:
        case RTL_GIGA_MAC_VER_26:
@@ -4847,7 +4750,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_49:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W32(RxConfig, RTL_R32(RxConfig) |
+               RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
                        AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
                break;
        default:
@@ -4880,8 +4783,6 @@ static void r810x_phy_power_up(struct rtl8169_private *tp)
 
 static void r810x_pll_power_down(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_wol_pll_power_down(tp))
                return;
 
@@ -4896,15 +4797,13 @@ static void r810x_pll_power_down(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_16:
                break;
        default:
-               RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
                break;
        }
 }
 
 static void r810x_pll_power_up(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r810x_phy_power_up(tp);
 
        switch (tp->mac_version) {
@@ -4917,10 +4816,10 @@ static void r810x_pll_power_up(struct rtl8169_private *tp)
                break;
        case RTL_GIGA_MAC_VER_47:
        case RTL_GIGA_MAC_VER_48:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
                break;
        default:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
                break;
        }
 }
@@ -4987,21 +4886,12 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
 
 static void r8168_pll_power_down(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-           r8168_check_dash(tp)) {
+       if (r8168_check_dash(tp))
                return;
-       }
 
        if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
             tp->mac_version == RTL_GIGA_MAC_VER_24) &&
-           (RTL_R16(CPlusCmd) & ASF)) {
+           (RTL_R16(tp, CPlusCmd) & ASF)) {
                return;
        }
 
@@ -5027,22 +4917,20 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_46:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
                break;
        case RTL_GIGA_MAC_VER_40:
        case RTL_GIGA_MAC_VER_41:
        case RTL_GIGA_MAC_VER_49:
                rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000,
                             0xfc000000, ERIAR_EXGMAC);
-               RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
                break;
        }
 }
 
 static void r8168_pll_power_up(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_25:
        case RTL_GIGA_MAC_VER_26:
@@ -5051,19 +4939,19 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_31:
        case RTL_GIGA_MAC_VER_32:
        case RTL_GIGA_MAC_VER_33:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
                break;
        case RTL_GIGA_MAC_VER_44:
        case RTL_GIGA_MAC_VER_45:
        case RTL_GIGA_MAC_VER_46:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
                break;
        case RTL_GIGA_MAC_VER_40:
        case RTL_GIGA_MAC_VER_41:
        case RTL_GIGA_MAC_VER_49:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
                rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000,
                             0x00000000, ERIAR_EXGMAC);
                break;
@@ -5153,8 +5041,6 @@ static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_01:
        case RTL_GIGA_MAC_VER_02:
@@ -5170,7 +5056,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_15:
        case RTL_GIGA_MAC_VER_16:
        case RTL_GIGA_MAC_VER_17:
-               RTL_W32(RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
+               RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
                break;
        case RTL_GIGA_MAC_VER_18:
        case RTL_GIGA_MAC_VER_19:
@@ -5181,7 +5067,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_24:
        case RTL_GIGA_MAC_VER_34:
        case RTL_GIGA_MAC_VER_35:
-               RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
                break;
        case RTL_GIGA_MAC_VER_40:
        case RTL_GIGA_MAC_VER_41:
@@ -5195,10 +5081,10 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_49:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
                break;
        default:
-               RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST);
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
                break;
        }
 }
@@ -5210,71 +5096,55 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 
 static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
        rtl_generic_op(tp, tp->jumbo_ops.enable);
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
        rtl_generic_op(tp, tp->jumbo_ops.disable);
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) | Jumbo_En1);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
        rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) & ~Jumbo_En1);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
        rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 }
 
 static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 }
 
 static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(MaxTxPacketSize, 0x3f);
-       RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) | 0x01);
+       RTL_W8(tp, MaxTxPacketSize, 0x3f);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
        rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(MaxTxPacketSize, 0x0c);
-       RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) & ~0x01);
+       RTL_W8(tp, MaxTxPacketSize, 0x0c);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
        rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
@@ -5292,20 +5162,16 @@ static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r8168b_0_hw_jumbo_enable(tp);
 
-       RTL_W8(Config4, RTL_R8(Config4) | (1 << 0));
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
 }
 
 static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r8168b_0_hw_jumbo_disable(tp);
 
-       RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
@@ -5372,16 +5238,12 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(ChipCmd) & CmdReset;
+       return RTL_R8(tp, ChipCmd) & CmdReset;
 }
 
 static void rtl_hw_reset(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(ChipCmd, CmdReset);
+       RTL_W8(tp, ChipCmd, CmdReset);
 
        rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
@@ -5432,29 +5294,21 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
 
 static void rtl_rx_close(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(RxConfig, RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
+       RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
 }
 
 DECLARE_RTL_COND(rtl_npq_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(TxPoll) & NPQ;
+       return RTL_R8(tp, TxPoll) & NPQ;
 }
 
 DECLARE_RTL_COND(rtl_txcfg_empty_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(TxConfig) & TXCFG_EMPTY;
+       return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
 }
 
 static void rtl8169_hw_reset(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* Disable interrupts */
        rtl8169_irq_mask_and_ack(tp);
 
@@ -5481,10 +5335,10 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
                   tp->mac_version == RTL_GIGA_MAC_VER_49 ||
                   tp->mac_version == RTL_GIGA_MAC_VER_50 ||
                   tp->mac_version == RTL_GIGA_MAC_VER_51) {
-               RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+               RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
                rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
        } else {
-               RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+               RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
                udelay(100);
        }
 
@@ -5493,10 +5347,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 
 static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* Set DMA burst size and Interframe Gap Time */
-       RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) |
+       RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
                (InterFrameGap << TxInterFrameGapShift));
 }
 
@@ -5509,36 +5361,35 @@ static void rtl_hw_start(struct net_device *dev)
        rtl_irq_enable_all(tp);
 }
 
-static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp,
-                                        void __iomem *ioaddr)
+static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
        /*
         * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
         * register to be written before TxDescAddrLow to work.
         * Switching from MMIO to I/O access fixes the issue as well.
         */
-       RTL_W32(TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
-       RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
-       RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
-       RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+       RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
+       RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
+       RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
+       RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
 }
 
-static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
+static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
 {
        u16 cmd;
 
-       cmd = RTL_R16(CPlusCmd);
-       RTL_W16(CPlusCmd, cmd);
+       cmd = RTL_R16(tp, CPlusCmd);
+       RTL_W16(tp, CPlusCmd, cmd);
        return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
 {
        /* Low hurts. Let's disable the filtering. */
-       RTL_W16(RxMaxSize, rx_buf_sz + 1);
+       RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
 }
 
-static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
+static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
        static const struct rtl_cfg2_info {
                u32 mac_version;
@@ -5554,10 +5405,10 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
        unsigned int i;
        u32 clk;
 
-       clk = RTL_R8(Config2) & PCI_Clock_66MHz;
+       clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
        for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
                if ((p->mac_version == mac_version) && (p->clk == clk)) {
-                       RTL_W32(0x7c, p->val);
+                       RTL_W32(tp, 0x7c, p->val);
                        break;
                }
        }
@@ -5566,7 +5417,6 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 static void rtl_set_rx_mode(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 mc_filter[2];       /* Multicast hash filter */
        int rx_mode;
        u32 tmp = 0;
@@ -5598,7 +5448,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
        if (dev->features & NETIF_F_RXALL)
                rx_mode |= (AcceptErr | AcceptRunt);
 
-       tmp = (RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
+       tmp = (RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
 
        if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
                u32 data = mc_filter[0];
@@ -5610,35 +5460,34 @@ static void rtl_set_rx_mode(struct net_device *dev)
        if (tp->mac_version == RTL_GIGA_MAC_VER_35)
                mc_filter[1] = mc_filter[0] = 0xffffffff;
 
-       RTL_W32(MAR0 + 4, mc_filter[1]);
-       RTL_W32(MAR0 + 0, mc_filter[0]);
+       RTL_W32(tp, MAR0 + 4, mc_filter[1]);
+       RTL_W32(tp, MAR0 + 0, mc_filter[0]);
 
-       RTL_W32(RxConfig, tmp);
+       RTL_W32(tp, RxConfig, tmp);
 }
 
 static void rtl_hw_start_8169(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
-               RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | PCIMulRW);
+               RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
                pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
        if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
            tp->mac_version == RTL_GIGA_MAC_VER_02 ||
            tp->mac_version == RTL_GIGA_MAC_VER_03 ||
            tp->mac_version == RTL_GIGA_MAC_VER_04)
-               RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+               RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
        rtl_init_rxcfg(tp);
 
-       RTL_W8(EarlyTxThres, NoEarlyTx);
+       RTL_W8(tp, EarlyTxThres, NoEarlyTx);
 
-       rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+       rtl_set_rx_max_size(tp, rx_buf_sz);
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
            tp->mac_version == RTL_GIGA_MAC_VER_02 ||
@@ -5646,7 +5495,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
            tp->mac_version == RTL_GIGA_MAC_VER_04)
                rtl_set_rx_tx_config_registers(tp);
 
-       tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
+       tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
            tp->mac_version == RTL_GIGA_MAC_VER_03) {
@@ -5655,37 +5504,37 @@ static void rtl_hw_start_8169(struct net_device *dev)
                tp->cp_cmd |= (1 << 14);
        }
 
-       RTL_W16(CPlusCmd, tp->cp_cmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-       rtl8169_set_magic_reg(ioaddr, tp->mac_version);
+       rtl8169_set_magic_reg(tp, tp->mac_version);
 
        /*
         * Undocumented corner. Supposedly:
         * (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets
         */
-       RTL_W16(IntrMitigate, 0x0000);
+       RTL_W16(tp, IntrMitigate, 0x0000);
 
-       rtl_set_rx_tx_desc_registers(tp, ioaddr);
+       rtl_set_rx_tx_desc_registers(tp);
 
        if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
            tp->mac_version != RTL_GIGA_MAC_VER_02 &&
            tp->mac_version != RTL_GIGA_MAC_VER_03 &&
            tp->mac_version != RTL_GIGA_MAC_VER_04) {
-               RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+               RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
                rtl_set_rx_tx_config_registers(tp);
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
        /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-       RTL_R8(IntrMask);
+       RTL_R8(tp, IntrMask);
 
-       RTL_W32(RxMissed, 0);
+       RTL_W32(tp, RxMissed, 0);
 
        rtl_set_rx_mode(dev);
 
        /* no early-rx interrupts */
-       RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
@@ -5719,17 +5568,13 @@ static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_csiar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(CSIAR) & CSIAR_FLAG;
+       return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
 }
 
 static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIDR, value);
-       RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIDR, value);
+       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
@@ -5737,21 +5582,17 @@ static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(CSIDR) : ~0;
+               RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIDR, value);
-       RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIDR, value);
+       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
                CSIAR_FUNC_NIC);
 
@@ -5760,21 +5601,17 @@ static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
+       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(CSIDR) : ~0;
+               RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIDR, value);
-       RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIDR, value);
+       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
                CSIAR_FUNC_NIC2);
 
@@ -5783,13 +5620,11 @@ static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
+       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(CSIDR) : ~0;
+               RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void rtl_init_csi_ops(struct rtl8169_private *tp)
@@ -5865,17 +5700,16 @@ static void rtl_enable_clock_request(struct pci_dev *pdev)
 
 static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u8 data;
 
-       data = RTL_R8(Config3);
+       data = RTL_R8(tp, Config3);
 
        if (enable)
                data |= Rdy_to_L23;
        else
                data &= ~Rdy_to_L23;
 
-       RTL_W8(Config3, data);
+       RTL_W8(tp, Config3, data);
 }
 
 #define R8168_CPCMD_QUIRK_MASK (\
@@ -5891,12 +5725,11 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 
        if (tp->dev->mtu <= ETH_DATA_LEN) {
                rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) |
@@ -5906,30 +5739,27 @@ static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_hw_start_8168bb(tp);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
-       RTL_W8(Config1, RTL_R8(Config1) | Speed_down);
+       RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
        rtl_disable_clock_request(pdev);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -5951,42 +5781,39 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
        /* Magic. */
-       RTL_W8(DBG_REG, 0x20);
+       RTL_W8(tp, DBG_REG, 0x20);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168c_1[] = {
                { 0x02, 0x0800, 0x1000 },
                { 0x03, 0,      0x0002 },
@@ -5995,7 +5822,7 @@ static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
+       RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
 
        rtl_ephy_init(tp, e_info_8168c_1, ARRAY_SIZE(e_info_8168c_1));
 
@@ -6030,24 +5857,22 @@ static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        rtl_csi_access_enable_2(tp);
 
        rtl_disable_clock_request(pdev);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        rtl_csi_access_enable_1(tp);
@@ -6055,14 +5880,13 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        rtl_disable_clock_request(pdev);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8168d_4[] = {
                { 0x0b, 0x0000, 0x0048 },
@@ -6074,7 +5898,7 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
        rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
 
@@ -6083,7 +5907,6 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8168e_1[] = {
                { 0x00, 0x0200, 0x0100 },
@@ -6108,20 +5931,19 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        rtl_disable_clock_request(pdev);
 
        /* Reset tx FIFO pointer */
-       RTL_W32(MISC, RTL_R32(MISC) | TXPLA_RST);
-       RTL_W32(MISC, RTL_R32(MISC) & ~TXPLA_RST);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST);
 
-       RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8168e_2[] = {
                { 0x09, 0x0000, 0x0080 },
@@ -6144,24 +5966,23 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
        rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
        rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_disable_clock_request(pdev);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-       RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-       RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        rtl_csi_access_enable_2(tp);
@@ -6179,20 +6000,19 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060, ERIAR_EXGMAC);
 
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_disable_clock_request(pdev);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
-       RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-       RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-       RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168f_1[] = {
                { 0x06, 0x00c0, 0x0020 },
                { 0x08, 0x0001, 0x0002 },
@@ -6207,7 +6027,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
        rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -6229,10 +6049,9 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
        rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6247,14 +6066,14 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
        rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC);
 
-       RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
        rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
        rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
@@ -6264,7 +6083,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168g_1[] = {
                { 0x00, 0x0000, 0x0008 },
                { 0x0c, 0x37d0, 0x0820 },
@@ -6275,14 +6093,13 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
 }
 
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168g_2[] = {
                { 0x00, 0x0000, 0x0008 },
                { 0x0c, 0x3df0, 0x0200 },
@@ -6293,14 +6110,13 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2));
 }
 
 static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8411_2[] = {
                { 0x00, 0x0000, 0x0008 },
                { 0x0c, 0x3df0, 0x0200 },
@@ -6312,14 +6128,13 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
 }
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        int rg_saw_cnt;
        u32 data;
@@ -6333,11 +6148,11 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
        rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6357,19 +6172,19 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
        rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-       RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-       RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+       RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
        rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
 
@@ -6417,12 +6232,11 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        rtl8168ep_stop_cmac(tp);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
        rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
@@ -6440,25 +6254,24 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
        rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-       RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
        rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
        rtl_pcie_state_l2l3_enable(tp, false);
 }
 
 static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168ep_1[] = {
                { 0x00, 0xffff, 0x10ab },
                { 0x06, 0xffff, 0xf030 },
@@ -6468,8 +6281,8 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
 
        rtl_hw_start_8168ep(tp);
@@ -6477,7 +6290,6 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168ep_2[] = {
                { 0x00, 0xffff, 0x10a3 },
                { 0x19, 0xffff, 0xfc00 },
@@ -6485,19 +6297,18 @@ static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
 
        rtl_hw_start_8168ep(tp);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-       RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+       RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 }
 
 static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 data;
        static const struct ephy_info e_info_8168ep_3[] = {
                { 0x00, 0xffff, 0x10a3 },
@@ -6507,14 +6318,14 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
 
        rtl_hw_start_8168ep(tp);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-       RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+       RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
        data = r8168_mac_ocp_read(tp, 0xd3e2);
        data &= 0xf000;
@@ -6533,19 +6344,18 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 static void rtl_hw_start_8168(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+       rtl_set_rx_max_size(tp, rx_buf_sz);
 
-       tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
+       tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
 
-       RTL_W16(CPlusCmd, tp->cp_cmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-       RTL_W16(IntrMitigate, 0x5151);
+       RTL_W16(tp, IntrMitigate, 0x5151);
 
        /* Work around for RxFIFO overflow. */
        if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
@@ -6553,11 +6363,11 @@ static void rtl_hw_start_8168(struct net_device *dev)
                tp->event_slow &= ~RxOverflow;
        }
 
-       rtl_set_rx_tx_desc_registers(tp, ioaddr);
+       rtl_set_rx_tx_desc_registers(tp);
 
        rtl_set_rx_tx_config_registers(tp);
 
-       RTL_R8(IntrMask);
+       RTL_R8(tp, IntrMask);
 
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_11:
@@ -6663,13 +6473,13 @@ static void rtl_hw_start_8168(struct net_device *dev)
                break;
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-       RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+       RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
        rtl_set_rx_mode(dev);
 
-       RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 #define R810X_CPCMD_QUIRK_MASK (\
@@ -6685,7 +6495,6 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8102e_1[] = {
                { 0x01, 0, 0x6e65 },
@@ -6701,32 +6510,31 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(DBG_REG, FIX_NAK_1);
+       RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
        rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(Config1,
+       RTL_W8(tp, Config1,
               LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-       cfg1 = RTL_R8(Config1);
+       cfg1 = RTL_R8(tp, Config1);
        if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
-               RTL_W8(Config1, cfg1 & ~LEDS0);
+               RTL_W8(tp, Config1, cfg1 & ~LEDS0);
 
        rtl_ephy_init(tp, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
 }
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        rtl_csi_access_enable_2(tp);
 
        rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
 
 static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
@@ -6738,7 +6546,6 @@ static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8105e_1[] = {
                { 0x07, 0, 0x4000 },
                { 0x19, 0, 0x0200 },
@@ -6751,13 +6558,13 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
        };
 
        /* Force LAN exit from ASPM if Rx/Tx are not idle */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
        /* Disable Early Tally Counter */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) & ~0x010000);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) & ~0x010000);
 
-       RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-       RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 
        rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
 
@@ -6772,7 +6579,6 @@ static void rtl_hw_start_8105e_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8402(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8402[] = {
                { 0x19, 0xffff, 0xff64 },
                { 0x1e, 0, 0x4000 }
@@ -6781,10 +6587,10 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
        rtl_csi_access_enable_2(tp);
 
        /* Force LAN exit from ASPM if Rx/Tx are not idle */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
        rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
@@ -6803,14 +6609,12 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8106(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* Force LAN exit from ASPM if Rx/Tx are not idle */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-       RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
-       RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
+       RTL_W32(tp, MISC, (RTL_R32(tp, MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
        rtl_pcie_state_l2l3_enable(tp, false);
 }
@@ -6818,7 +6622,6 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 static void rtl_hw_start_8101(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
@@ -6829,16 +6632,16 @@ static void rtl_hw_start_8101(struct net_device *dev)
                pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
                                         PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+       rtl_set_rx_max_size(tp, rx_buf_sz);
 
        tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
-       RTL_W16(CPlusCmd, tp->cp_cmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-       rtl_set_rx_tx_desc_registers(tp, ioaddr);
+       rtl_set_rx_tx_desc_registers(tp);
 
        rtl_set_rx_tx_config_registers(tp);
 
@@ -6878,17 +6681,17 @@ static void rtl_hw_start_8101(struct net_device *dev)
                break;
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-       RTL_W16(IntrMitigate, 0x0000);
+       RTL_W16(tp, IntrMitigate, 0x0000);
 
-       RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+       RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
        rtl_set_rx_mode(dev);
 
-       RTL_R8(IntrMask);
+       RTL_R8(tp, IntrMask);
 
-       RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -7098,7 +6901,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
        napi_enable(&tp->napi);
        rtl_hw_start(dev);
        netif_wake_queue(dev);
-       rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+       rtl8169_check_link_status(dev, tp);
 }
 
 static void rtl8169_tx_timeout(struct net_device *dev)
@@ -7346,7 +7149,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
        struct rtl8169_private *tp = netdev_priv(dev);
        unsigned int entry = tp->cur_tx % NUM_TX_DESC;
        struct TxDesc *txd = tp->TxDescArray + entry;
-       void __iomem *ioaddr = tp->mmio_addr;
        struct device *d = &tp->pci_dev->dev;
        dma_addr_t mapping;
        u32 status, len;
@@ -7406,7 +7208,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
        tp->cur_tx += frags + 1;
 
-       RTL_W8(TxPoll, NPQ);
+       RTL_W8(tp, TxPoll, NPQ);
 
        mmiowb();
 
@@ -7477,11 +7279,9 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 
        /* The infamous DAC f*ckup only happens at boot time */
        if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) {
-               void __iomem *ioaddr = tp->mmio_addr;
-
                netif_info(tp, intr, dev, "disabling PCI DAC\n");
                tp->cp_cmd &= ~PCIDAC;
-               RTL_W16(CPlusCmd, tp->cp_cmd);
+               RTL_W16(tp, CPlusCmd, tp->cp_cmd);
                dev->features &= ~NETIF_F_HIGHDMA;
        }
 
@@ -7547,11 +7347,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
                 * of start_xmit activity is detected (if it is not detected,
                 * it is slow enough). -- FR
                 */
-               if (tp->cur_tx != dirty_tx) {
-                       void __iomem *ioaddr = tp->mmio_addr;
-
-                       RTL_W8(TxPoll, NPQ);
-               }
+               if (tp->cur_tx != dirty_tx)
+                       RTL_W8(tp, TxPoll, NPQ);
        }
 }
 
@@ -7732,7 +7529,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp)
                rtl8169_pcierr_interrupt(dev);
 
        if (status & LinkChg)
-               rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+               rtl8169_check_link_status(dev, tp);
 
        rtl_irq_enable_all(tp);
 }
@@ -7804,21 +7601,20 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
        return work_done;
 }
 
-static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
+static void rtl8169_rx_missed(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
        if (tp->mac_version > RTL_GIGA_MAC_VER_06)
                return;
 
-       dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
-       RTL_W32(RxMissed, 0);
+       dev->stats.rx_missed_errors += RTL_R32(tp, RxMissed) & 0xffffff;
+       RTL_W32(tp, RxMissed, 0);
 }
 
 static void rtl8169_down(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
 
        del_timer_sync(&tp->timer);
 
@@ -7831,7 +7627,7 @@ static void rtl8169_down(struct net_device *dev)
         * as netif_running is not true (rtl8169_interrupt, rtl8169_reset_task)
         * and napi is disabled (rtl8169_poll).
         */
-       rtl8169_rx_missed(dev, ioaddr);
+       rtl8169_rx_missed(dev);
 
        /* Give a racing hard_start_xmit a few cycles to complete. */
        synchronize_sched();
@@ -7861,7 +7657,7 @@ static int rtl8169_close(struct net_device *dev)
 
        cancel_work_sync(&tp->wk.work);
 
-       free_irq(pdev->irq, dev);
+       pci_free_irq(pdev, 0, dev);
 
        dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
                          tp->RxPhyAddr);
@@ -7880,14 +7676,13 @@ static void rtl8169_netpoll(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       rtl8169_interrupt(tp->pci_dev->irq, dev);
+       rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
 }
 #endif
 
 static int rtl_open(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        int retval = -ENOMEM;
 
@@ -7917,9 +7712,8 @@ static int rtl_open(struct net_device *dev)
 
        rtl_request_firmware(tp);
 
-       retval = request_irq(pdev->irq, rtl8169_interrupt,
-                            (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
-                            dev->name, dev);
+       retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+                                dev->name);
        if (retval < 0)
                goto err_release_fw_2;
 
@@ -7947,7 +7741,7 @@ static int rtl_open(struct net_device *dev)
        tp->saved_wolopts = 0;
        pm_runtime_put_sync(&pdev->dev);
 
-       rtl8169_check_link_status(dev, tp, ioaddr);
+       rtl8169_check_link_status(dev, tp);
 out:
        return retval;
 
@@ -7971,7 +7765,6 @@ static void
 rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        struct rtl8169_counters *counters = tp->counters;
        unsigned int start;
@@ -7979,7 +7772,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
        pm_runtime_get_noresume(&pdev->dev);
 
        if (netif_running(dev) && pm_runtime_active(&pdev->dev))
-               rtl8169_rx_missed(dev, ioaddr);
+               rtl8169_rx_missed(dev);
 
        do {
                start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
@@ -8102,7 +7895,7 @@ static int rtl8169_runtime_suspend(struct device *device)
        rtl8169_net_suspend(dev);
 
        /* Update counters before going runtime suspend */
-       rtl8169_rx_missed(dev, tp->mmio_addr);
+       rtl8169_rx_missed(dev);
        rtl8169_update_counters(dev);
 
        return 0;
@@ -8163,8 +7956,6 @@ static const struct dev_pm_ops rtl8169_pm_ops = {
 
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* WoL fails with 8168b when the receiver is disabled. */
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_11:
@@ -8172,9 +7963,9 @@ static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_17:
                pci_clear_master(tp->pci_dev);
 
-               RTL_W8(ChipCmd, CmdRxEnb);
+               RTL_W8(tp, ChipCmd, CmdRxEnb);
                /* PCI commit */
-               RTL_R8(ChipCmd);
+               RTL_R8(tp, ChipCmd);
                break;
        default:
                break;
@@ -8209,15 +8000,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
        struct net_device *dev = pci_get_drvdata(pdev);
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-           r8168_check_dash(tp)) {
+       if (r8168_check_dash(tp))
                rtl8168_driver_stop(tp);
-       }
 
        netif_napi_del(&tp->napi);
 
@@ -8256,7 +8040,7 @@ static const struct rtl_cfg_info {
        unsigned int region;
        unsigned int align;
        u16 event_slow;
-       unsigned features;
+       unsigned int has_gmii:1;
        const struct rtl_coalesce_info *coalesce_info;
        u8 default_ver;
 } rtl_cfg_infos [] = {
@@ -8265,7 +8049,7 @@ static const struct rtl_cfg_info {
                .region         = 1,
                .align          = 0,
                .event_slow     = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-               .features       = RTL_FEATURE_GMII,
+               .has_gmii       = 1,
                .coalesce_info  = rtl_coalesce_info_8169,
                .default_ver    = RTL_GIGA_MAC_VER_01,
        },
@@ -8274,7 +8058,7 @@ static const struct rtl_cfg_info {
                .region         = 2,
                .align          = 8,
                .event_slow     = SYSErr | LinkChg | RxOverflow,
-               .features       = RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+               .has_gmii       = 1,
                .coalesce_info  = rtl_coalesce_info_8168_8136,
                .default_ver    = RTL_GIGA_MAC_VER_11,
        },
@@ -8284,56 +8068,44 @@ static const struct rtl_cfg_info {
                .align          = 8,
                .event_slow     = SYSErr | LinkChg | RxOverflow | RxFIFOOver |
                                  PCSTimeout,
-               .features       = RTL_FEATURE_MSI,
                .coalesce_info  = rtl_coalesce_info_8168_8136,
                .default_ver    = RTL_GIGA_MAC_VER_13,
        }
 };
 
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
-                           const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       unsigned msi = 0;
-       u8 cfg2;
+       unsigned int flags;
 
-       cfg2 = RTL_R8(Config2) & ~MSIEnable;
-       if (cfg->features & RTL_FEATURE_MSI) {
-               if (pci_enable_msi(tp->pci_dev)) {
-                       netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
-               } else {
-                       cfg2 |= MSIEnable;
-                       msi = RTL_FEATURE_MSI;
-               }
+       if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+               RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+               RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
+               RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+               flags = PCI_IRQ_LEGACY;
+       } else {
+               flags = PCI_IRQ_ALL_TYPES;
        }
-       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-               RTL_W8(Config2, cfg2);
-       return msi;
+
+       return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
 }
 
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(MCU) & LINK_LIST_RDY;
+       return RTL_R8(tp, MCU) & LINK_LIST_RDY;
 }
 
 DECLARE_RTL_COND(rtl_rxtx_empty_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return (RTL_R8(MCU) & RXTX_EMPTY) == RXTX_EMPTY;
+       return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
 }
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 data;
 
        tp->ocp_base = OCP_STD_PHY_BASE;
 
-       RTL_W32(MISC, RTL_R32(MISC) | RXDV_GATED_EN);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
        if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
                return;
@@ -8341,9 +8113,9 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
        if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
                return;
 
-       RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+       RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
        msleep(1);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
        data = r8168_mac_ocp_read(tp, 0xe8de);
        data &= ~(1 << 14);
@@ -8397,7 +8169,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct rtl8169_private *tp;
        struct mii_if_info *mii;
        struct net_device *dev;
-       void __iomem *ioaddr;
        int chipset, i;
        int rc;
 
@@ -8423,7 +8194,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        mii->mdio_write = rtl_mdio_write;
        mii->phy_id_mask = 0x1f;
        mii->reg_num_mask = 0x1f;
-       mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+       mii->supports_gmii = cfg->has_gmii;
 
        /* disable ASPM completely as that cause random device stop working
         * problems as well as full system hangs for some PCIe devices users */
@@ -8455,20 +8226,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                return -ENODEV;
        }
 
-       rc = pci_request_regions(pdev, MODULENAME);
+       rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
        if (rc < 0) {
-               netif_err(tp, probe, dev, "could not request regions\n");
+               netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
                return rc;
        }
 
-       /* ioremap MMIO region */
-       ioaddr = devm_ioremap(&pdev->dev, pci_resource_start(pdev, region),
-                             R8169_REGS_SIZE);
-       if (!ioaddr) {
-               netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
-               return -EIO;
-       }
-       tp->mmio_addr = ioaddr;
+       tp->mmio_addr = pcim_iomap_table(pdev)[region];
 
        if (!pci_is_pcie(pdev))
                netif_info(tp, probe, dev, "not PCI Express\n");
@@ -8518,41 +8282,14 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        chipset = tp->mac_version;
        tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
-       RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
-       RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
-       switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_34:
-       case RTL_GIGA_MAC_VER_35:
-       case RTL_GIGA_MAC_VER_36:
-       case RTL_GIGA_MAC_VER_37:
-       case RTL_GIGA_MAC_VER_38:
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
-               if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
-                       tp->features |= RTL_FEATURE_WOL;
-               if ((RTL_R8(Config3) & LinkUp) != 0)
-                       tp->features |= RTL_FEATURE_WOL;
-               break;
-       default:
-               if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
-                       tp->features |= RTL_FEATURE_WOL;
-               break;
+       rc = rtl_alloc_irq(tp);
+       if (rc < 0) {
+               netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+               return rc;
        }
-       if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
-               tp->features |= RTL_FEATURE_WOL;
-       tp->features |= rtl_try_msi(tp, cfg);
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+
+       /* override BIOS settings, use userspace tools to enable WOL */
+       __rtl8169_set_wol(tp, 0);
 
        if (rtl_tbi_enabled(tp)) {
                tp->set_speed = rtl8169_set_speed_tbi;
@@ -8600,7 +8337,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                        rtl_rar_set(tp, (u8 *)mac_addr);
        }
        for (i = 0; i < ETH_ALEN; i++)
-               dev->dev_addr[i] = RTL_R8(MAC0 + i);
+               dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
 
        dev->ethtool_ops = &rtl8169_ethtool_ops;
        dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
@@ -8667,8 +8404,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_drvdata(pdev, dev);
 
        netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
-                  rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
-                  (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+                  rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
+                  (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
+                  pci_irq_vector(pdev, 0));
        if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
                netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
                           "tx checksumming: %s]\n",
@@ -8676,15 +8414,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                           rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
        }
 
-       if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-           r8168_check_dash(tp)) {
+       if (r8168_check_dash(tp))
                rtl8168_driver_start(tp);
-       }
 
        netif_carrier_off(dev);
 
index 96a27b0..b81f4fa 100644 (file)
@@ -1018,6 +1018,7 @@ struct ravb_private {
        u32 dirty_rx[NUM_RX_QUEUE];     /* Producer ring indices */
        u32 cur_tx[NUM_TX_QUEUE];
        u32 dirty_tx[NUM_TX_QUEUE];
+       u32 rx_buf_sz;                  /* Based on MTU+slack. */
        struct napi_struct napi[NUM_RX_QUEUE];
        struct work_struct work;
        /* MII transceiver section. */
index c87f57c..54a6265 100644 (file)
@@ -238,7 +238,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
                                               le32_to_cpu(desc->dptr)))
                                dma_unmap_single(ndev->dev.parent,
                                                 le32_to_cpu(desc->dptr),
-                                                PKT_BUF_SZ,
+                                                priv->rx_buf_sz,
                                                 DMA_FROM_DEVICE);
                }
                ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -300,9 +300,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
        for (i = 0; i < priv->num_rx_ring[q]; i++) {
                /* RX descriptor */
                rx_desc = &priv->rx_ring[q][i];
-               rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+               rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
                dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
-                                         PKT_BUF_SZ,
+                                         priv->rx_buf_sz,
                                          DMA_FROM_DEVICE);
                /* We just set the data size to 0 for a failed mapping which
                 * should prevent DMA from happening...
@@ -346,6 +346,10 @@ static int ravb_ring_init(struct net_device *ndev, int q)
        int ring_size;
        int i;
 
+       /* +16 gets room from the status from the card. */
+       priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
+               ETH_HLEN + VLAN_HLEN;
+
        /* Allocate RX and TX skb rings */
        priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
                                  sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -355,7 +359,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
                goto error;
 
        for (i = 0; i < priv->num_rx_ring[q]; i++) {
-               skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1);
+               skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
                if (!skb)
                        goto error;
                ravb_set_buffer_align(skb);
@@ -586,7 +590,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
                        skb = priv->rx_skb[q][entry];
                        priv->rx_skb[q][entry] = NULL;
                        dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
-                                        PKT_BUF_SZ,
+                                        priv->rx_buf_sz,
                                         DMA_FROM_DEVICE);
                        get_ts &= (q == RAVB_NC) ?
                                        RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -619,11 +623,12 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
        for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
                entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
                desc = &priv->rx_ring[q][entry];
-               desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+               desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
 
                if (!priv->rx_skb[q][entry]) {
                        skb = netdev_alloc_skb(ndev,
-                                              PKT_BUF_SZ + RAVB_ALIGN - 1);
+                                              priv->rx_buf_sz +
+                                              RAVB_ALIGN - 1);
                        if (!skb)
                                break;  /* Better luck next round. */
                        ravb_set_buffer_align(skb);
@@ -1854,6 +1859,17 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
        return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
+{
+       if (netif_running(ndev))
+               return -EBUSY;
+
+       ndev->mtu = new_mtu;
+       netdev_update_features(ndev);
+
+       return 0;
+}
+
 static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
 {
        struct ravb_private *priv = netdev_priv(ndev);
@@ -1895,6 +1911,7 @@ static const struct net_device_ops ravb_netdev_ops = {
        .ndo_set_rx_mode        = ravb_set_rx_mode,
        .ndo_tx_timeout         = ravb_tx_timeout,
        .ndo_do_ioctl           = ravb_do_ioctl,
+       .ndo_change_mtu         = ravb_change_mtu,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_set_features       = ravb_set_features,
@@ -2117,6 +2134,9 @@ static int ravb_probe(struct platform_device *pdev)
                goto out_release;
        }
 
+       ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+       ndev->min_mtu = ETH_MIN_MTU;
+
        /* Set function */
        ndev->netdev_ops = &ravb_netdev_ops;
        ndev->ethtool_ops = &ravb_ethtool_ops;
@@ -2255,9 +2275,6 @@ static int ravb_wol_setup(struct net_device *ndev)
        /* Enable MagicPacket */
        ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
 
-       /* Increased clock usage so device won't be suspended */
-       clk_enable(priv->clk);
-
        return enable_irq_wake(priv->emac_irq);
 }
 
@@ -2276,9 +2293,6 @@ static int ravb_wol_restore(struct net_device *ndev)
        if (ret < 0)
                return ret;
 
-       /* Restore clock usage count */
-       clk_disable(priv->clk);
-
        return disable_irq_wake(priv->emac_irq);
 }
 
index a197e11..d3e1bc0 100644 (file)
@@ -40,7 +40,6 @@
 #include <linux/slab.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
-#include <linux/clk.h>
 #include <linux/sh_eth.h>
 #include <linux/of_mdio.h>
 
@@ -124,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
        [TSU_FWSL0]     = 0x0030,
        [TSU_FWSL1]     = 0x0034,
        [TSU_FWSLC]     = 0x0038,
-       [TSU_QTAG0]     = 0x0040,
-       [TSU_QTAG1]     = 0x0044,
+       [TSU_QTAGM0]    = 0x0040,
+       [TSU_QTAGM1]    = 0x0044,
        [TSU_FWSR]      = 0x0050,
        [TSU_FWINMK]    = 0x0054,
        [TSU_ADQT0]     = 0x0048,
@@ -753,6 +752,7 @@ static struct sh_eth_cpu_data sh7757_data = {
        .rpadir         = 1,
        .rpadir_value   = 2 << 16,
        .rtrate         = 1,
+       .dual_port      = 1,
 };
 
 #define SH_GIGA_ETH_BASE       0xfee00000UL
@@ -831,6 +831,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
        .no_trimd       = 1,
        .no_ade         = 1,
        .tsu            = 1,
+       .dual_port      = 1,
 };
 
 /* SH7734 */
@@ -901,6 +902,7 @@ static struct sh_eth_cpu_data sh7763_data = {
        .tsu            = 1,
        .irq_flags      = IRQF_SHARED,
        .magic          = 1,
+       .dual_port      = 1,
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
@@ -933,6 +935,7 @@ static struct sh_eth_cpu_data sh771x_data = {
                          EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
        .tsu            = 1,
+       .dual_port      = 1,
 };
 
 static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
@@ -962,20 +965,16 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
 
 static int sh_eth_check_reset(struct net_device *ndev)
 {
-       int ret = 0;
-       int cnt = 100;
+       int cnt;
 
-       while (cnt > 0) {
+       for (cnt = 100; cnt > 0; cnt--) {
                if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
-                       break;
+                       return 0;
                mdelay(1);
-               cnt--;
        }
-       if (cnt <= 0) {
-               netdev_err(ndev, "Device reset failed\n");
-               ret = -ETIMEDOUT;
-       }
-       return ret;
+
+       netdev_err(ndev, "Device reset failed\n");
+       return -ETIMEDOUT;
 }
 
 static int sh_eth_reset(struct net_device *ndev)
@@ -2102,8 +2101,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
                add_tsu_reg(TSU_FWSL0);
                add_tsu_reg(TSU_FWSL1);
                add_tsu_reg(TSU_FWSLC);
-               add_tsu_reg(TSU_QTAG0);
-               add_tsu_reg(TSU_QTAG1);
                add_tsu_reg(TSU_QTAGM0);
                add_tsu_reg(TSU_QTAGM1);
                add_tsu_reg(TSU_FWSR);
@@ -2304,7 +2301,7 @@ static void sh_eth_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
        wol->supported = 0;
        wol->wolopts = 0;
 
-       if (mdp->cd->magic && mdp->clk) {
+       if (mdp->cd->magic) {
                wol->supported = WAKE_MAGIC;
                wol->wolopts = mdp->wol_enabled ? WAKE_MAGIC : 0;
        }
@@ -2314,7 +2311,7 @@ static int sh_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
 
-       if (!mdp->cd->magic || !mdp->clk || wol->wolopts & ~WAKE_MAGIC)
+       if (!mdp->cd->magic || wol->wolopts & ~WAKE_MAGIC)
                return -EOPNOTSUPP;
 
        mdp->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
@@ -2922,7 +2919,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev,
 /* SuperH's TSU register init function */
 static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 {
-       if (sh_eth_is_rz_fast_ether(mdp)) {
+       if (!mdp->cd->dual_port) {
                sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
                sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
                                 TSU_FWSLC);    /* Enable POST registers */
@@ -2939,13 +2936,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
        sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
        sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
        sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
-       if (sh_eth_is_gether(mdp)) {
-               sh_eth_tsu_write(mdp, 0, TSU_QTAG0);    /* Disable QTAG(0->1) */
-               sh_eth_tsu_write(mdp, 0, TSU_QTAG1);    /* Disable QTAG(1->0) */
-       } else {
-               sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);   /* Disable QTAG(0->1) */
-               sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);   /* Disable QTAG(1->0) */
-       }
+       sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);   /* Disable QTAG(0->1) */
+       sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);   /* Disable QTAG(1->0) */
        sh_eth_tsu_write(mdp, 0, TSU_FWSR);     /* all interrupt status clear */
        sh_eth_tsu_write(mdp, 0, TSU_FWINMK);   /* Disable all interrupt */
        sh_eth_tsu_write(mdp, 0, TSU_TEN);      /* Disable all CAM entry */
@@ -3153,11 +3145,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
                goto out_release;
        }
 
-       /* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
-       mdp->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(mdp->clk))
-               mdp->clk = NULL;
-
        ndev->base_addr = res->start;
 
        spin_lock_init(&mdp->lock);
@@ -3278,7 +3265,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
        if (ret)
                goto out_napi_del;
 
-       if (mdp->cd->magic && mdp->clk)
+       if (mdp->cd->magic)
                device_set_wakeup_capable(&pdev->dev, 1);
 
        /* print device information */
@@ -3331,9 +3318,6 @@ static int sh_eth_wol_setup(struct net_device *ndev)
        /* Enable MagicPacket */
        sh_eth_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
 
-       /* Increased clock usage so device won't be suspended */
-       clk_enable(mdp->clk);
-
        return enable_irq_wake(ndev->irq);
 }
 
@@ -3359,9 +3343,6 @@ static int sh_eth_wol_restore(struct net_device *ndev)
        if (ret < 0)
                return ret;
 
-       /* Restore clock usage count */
-       clk_disable(mdp->clk);
-
        return disable_irq_wake(ndev->irq);
 }
 
index a6753cc..5bbaf9e 100644 (file)
@@ -118,8 +118,8 @@ enum {
        TSU_FWSL0,
        TSU_FWSL1,
        TSU_FWSLC,
-       TSU_QTAG0,
-       TSU_QTAG1,
+       TSU_QTAG0,                      /* Same as TSU_QTAGM0 */
+       TSU_QTAG1,                      /* Same as TSU_QTAGM1 */
        TSU_QTAGM0,
        TSU_QTAGM1,
        TSU_FWSR,
@@ -509,6 +509,7 @@ struct sh_eth_cpu_data {
        unsigned rmiimode:1;    /* EtherC has RMIIMODE register */
        unsigned rtrate:1;      /* EtherC has RTRATE register */
        unsigned magic:1;       /* EtherC has ECMR.MPDE and ECSR.MPD */
+       unsigned dual_port:1;   /* Dual EtherC/E-DMAC */
 };
 
 struct sh_eth_private {
index 30a1136..4824fcf 100644 (file)
@@ -81,7 +81,6 @@ enum ef4_loopback_mode {
                            (1 << LOOPBACK_XAUI) |              \
                            (1 << LOOPBACK_GMII) |              \
                            (1 << LOOPBACK_SGMII) |             \
-                           (1 << LOOPBACK_SGMII) |             \
                            (1 << LOOPBACK_XGBR) |              \
                            (1 << LOOPBACK_XFI) |               \
                            (1 << LOOPBACK_XAUI_FAR) |          \
index 63aca9f..4c2f612 100644 (file)
@@ -20,7 +20,7 @@ if NET_VENDOR_SMSC
 
 config SMC9194
        tristate "SMC 9194 support"
-       depends on (ISA || MAC && BROKEN)
+       depends on ISA
        select CRC32
        ---help---
          This is support for the SMC9xxx based Ethernet cards. Choose this
index 5270d26..2d5d4ae 100644 (file)
 #define MUX_CLK_NUM_PARENTS            2
 
 struct meson8b_dwmac {
-       struct platform_device  *pdev;
-
+       struct device           *dev;
        void __iomem            *regs;
-
        phy_interface_t         phy_mode;
+       struct clk              *rgmii_tx_clk;
+       u32                     tx_delay_ns;
+};
 
+struct meson8b_dwmac_clk_configs {
        struct clk_mux          m250_mux;
-       struct clk              *m250_mux_clk;
-       struct clk              *m250_mux_parent[MUX_CLK_NUM_PARENTS];
-
        struct clk_divider      m250_div;
-       struct clk              *m250_div_clk;
-
        struct clk_fixed_factor fixed_div2;
-       struct clk              *fixed_div2_clk;
-
        struct clk_gate         rgmii_tx_en;
-       struct clk              *rgmii_tx_en_clk;
-
-       u32                     tx_delay_ns;
 };
 
 static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
@@ -82,106 +74,99 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
        writel(data, dwmac->regs + reg);
 }
 
-static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
+                                             const char *name_suffix,
+                                             const char **parent_names,
+                                             int num_parents,
+                                             const struct clk_ops *ops,
+                                             struct clk_hw *hw)
 {
        struct clk_init_data init;
-       int i, ret;
-       struct device *dev = &dwmac->pdev->dev;
        char clk_name[32];
-       const char *clk_div_parents[1];
-       const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
+
+       snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev),
+                name_suffix);
+
+       init.name = clk_name;
+       init.ops = ops;
+       init.flags = CLK_SET_RATE_PARENT;
+       init.parent_names = parent_names;
+       init.num_parents = num_parents;
+
+       hw->init = &init;
+
+       return devm_clk_register(dwmac->dev, hw);
+}
+
+static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+{
+       int i, ret;
+       struct clk *clk;
+       struct device *dev = dwmac->dev;
+       const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+       struct meson8b_dwmac_clk_configs *clk_configs;
+
+       clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+       if (!clk_configs)
+               return -ENOMEM;
 
        /* get the mux parents from DT */
        for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
                char name[16];
 
                snprintf(name, sizeof(name), "clkin%d", i);
-               dwmac->m250_mux_parent[i] = devm_clk_get(dev, name);
-               if (IS_ERR(dwmac->m250_mux_parent[i])) {
-                       ret = PTR_ERR(dwmac->m250_mux_parent[i]);
+               clk = devm_clk_get(dev, name);
+               if (IS_ERR(clk)) {
+                       ret = PTR_ERR(clk);
                        if (ret != -EPROBE_DEFER)
                                dev_err(dev, "Missing clock %s\n", name);
                        return ret;
                }
 
-               mux_parent_names[i] =
-                       __clk_get_name(dwmac->m250_mux_parent[i]);
+               mux_parent_names[i] = __clk_get_name(clk);
        }
 
-       /* create the m250_mux */
-       snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev));
-       init.name = clk_name;
-       init.ops = &clk_mux_ops;
-       init.flags = CLK_SET_RATE_PARENT;
-       init.parent_names = mux_parent_names;
-       init.num_parents = MUX_CLK_NUM_PARENTS;
-
-       dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0;
-       dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
-       dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
-       dwmac->m250_mux.flags = 0;
-       dwmac->m250_mux.table = NULL;
-       dwmac->m250_mux.hw.init = &init;
-
-       dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw);
-       if (WARN_ON(IS_ERR(dwmac->m250_mux_clk)))
-               return PTR_ERR(dwmac->m250_mux_clk);
-
-       /* create the m250_div */
-       snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
-       init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-       init.ops = &clk_divider_ops;
-       init.flags = CLK_SET_RATE_PARENT;
-       clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
-       init.parent_names = clk_div_parents;
-       init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-       dwmac->m250_div.reg = dwmac->regs + PRG_ETH0;
-       dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
-       dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
-       dwmac->m250_div.hw.init = &init;
-       dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+       clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0;
+       clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
+       clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
+       clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names,
+                                        MUX_CLK_NUM_PARENTS, &clk_mux_ops,
+                                        &clk_configs->m250_mux.hw);
+       if (WARN_ON(IS_ERR(clk)))
+               return PTR_ERR(clk);
+
+       parent_name = __clk_get_name(clk);
+       clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+       clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+       clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+       clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
                                CLK_DIVIDER_ALLOW_ZERO |
                                CLK_DIVIDER_ROUND_CLOSEST;
-
-       dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw);
-       if (WARN_ON(IS_ERR(dwmac->m250_div_clk)))
-               return PTR_ERR(dwmac->m250_div_clk);
-
-       /* create the fixed_div2 */
-       snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev));
-       init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-       init.ops = &clk_fixed_factor_ops;
-       init.flags = CLK_SET_RATE_PARENT;
-       clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk);
-       init.parent_names = clk_div_parents;
-       init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-       dwmac->fixed_div2.mult = 1;
-       dwmac->fixed_div2.div = 2;
-       dwmac->fixed_div2.hw.init = &init;
-
-       dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw);
-       if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk)))
-               return PTR_ERR(dwmac->fixed_div2_clk);
-
-       /* create the rgmii_tx_en */
-       init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en",
-                                  dev_name(dev));
-       init.ops = &clk_gate_ops;
-       init.flags = CLK_SET_RATE_PARENT;
-       clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk);
-       init.parent_names = clk_div_parents;
-       init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-       dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
-       dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
-       dwmac->rgmii_tx_en.hw.init = &init;
-
-       dwmac->rgmii_tx_en_clk = devm_clk_register(dev,
-                                                  &dwmac->rgmii_tx_en.hw);
-       if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk)))
-               return PTR_ERR(dwmac->rgmii_tx_en_clk);
+       clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+                                        &clk_divider_ops,
+                                        &clk_configs->m250_div.hw);
+       if (WARN_ON(IS_ERR(clk)))
+               return PTR_ERR(clk);
+
+       parent_name = __clk_get_name(clk);
+       clk_configs->fixed_div2.mult = 1;
+       clk_configs->fixed_div2.div = 2;
+       clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1,
+                                        &clk_fixed_factor_ops,
+                                        &clk_configs->fixed_div2.hw);
+       if (WARN_ON(IS_ERR(clk)))
+               return PTR_ERR(clk);
+
+       parent_name = __clk_get_name(clk);
+       clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
+       clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
+       clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1,
+                                        &clk_gate_ops,
+                                        &clk_configs->rgmii_tx_en.hw);
+       if (WARN_ON(IS_ERR(clk)))
+               return PTR_ERR(clk);
+
+       dwmac->rgmii_tx_clk = clk;
 
        return 0;
 }
@@ -219,19 +204,23 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
                 * a register) based on the line-speed (125MHz for Gbit speeds,
                 * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s).
                 */
-               ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000);
+               ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000);
                if (ret) {
-                       dev_err(&dwmac->pdev->dev,
+                       dev_err(dwmac->dev,
                                "failed to set RGMII TX clock\n");
                        return ret;
                }
 
-               ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk);
+               ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
                if (ret) {
-                       dev_err(&dwmac->pdev->dev,
+                       dev_err(dwmac->dev,
                                "failed to enable the RGMII TX clock\n");
                        return ret;
                }
+
+               devm_add_action_or_reset(dwmac->dev,
+                                       (void(*)(void *))clk_disable_unprepare,
+                                       dwmac->rgmii_tx_clk);
                break;
 
        case PHY_INTERFACE_MODE_RMII:
@@ -251,7 +240,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
                break;
 
        default:
-               dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n",
+               dev_err(dwmac->dev, "unsupported phy-mode %s\n",
                        phy_modes(dwmac->phy_mode));
                return -EINVAL;
        }
@@ -292,7 +281,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
                goto err_remove_config_dt;
        }
 
-       dwmac->pdev = pdev;
+       dwmac->dev = &pdev->dev;
        dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
        if (dwmac->phy_mode < 0) {
                dev_err(&pdev->dev, "missing phy-mode property\n");
@@ -317,29 +306,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 
        ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
        if (ret)
-               goto err_clk_disable;
+               goto err_remove_config_dt;
 
        return 0;
 
-err_clk_disable:
-       if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-               clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
 err_remove_config_dt:
        stmmac_remove_config_dt(pdev, plat_dat);
 
        return ret;
 }
 
-static int meson8b_dwmac_remove(struct platform_device *pdev)
-{
-       struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-
-       if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-               clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
-
-       return stmmac_pltfr_remove(pdev);
-}
-
 static const struct of_device_id meson8b_dwmac_match[] = {
        { .compatible = "amlogic,meson8b-dwmac" },
        { .compatible = "amlogic,meson-gxbb-dwmac" },
@@ -349,7 +325,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
 
 static struct platform_driver meson8b_dwmac_driver = {
        .probe  = meson8b_dwmac_probe,
-       .remove = meson8b_dwmac_remove,
+       .remove = stmmac_pltfr_remove,
        .driver = {
                .name           = "meson8b-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index 63795ec..46b9ae2 100644 (file)
@@ -120,7 +120,7 @@ static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
        writel(value, ioaddr + base_register);
 }
 
-static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
+static void dwmac4_rx_queue_routing(struct mac_device_info *hw,
                                    u8 packet, u32 queue)
 {
        void __iomem *ioaddr = hw->pcsr;
@@ -713,7 +713,7 @@ static const struct stmmac_ops dwmac4_ops = {
        .rx_queue_enable = dwmac4_rx_queue_enable,
        .rx_queue_prio = dwmac4_rx_queue_priority,
        .tx_queue_prio = dwmac4_tx_queue_priority,
-       .rx_queue_routing = dwmac4_tx_queue_routing,
+       .rx_queue_routing = dwmac4_rx_queue_routing,
        .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
        .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
        .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
@@ -744,7 +744,7 @@ static const struct stmmac_ops dwmac410_ops = {
        .rx_queue_enable = dwmac4_rx_queue_enable,
        .rx_queue_prio = dwmac4_rx_queue_priority,
        .tx_queue_prio = dwmac4_tx_queue_priority,
-       .rx_queue_routing = dwmac4_tx_queue_routing,
+       .rx_queue_routing = dwmac4_rx_queue_routing,
        .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
        .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
        .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
index c728ffa..2a6521d 100644 (file)
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 {
+       p->des0 = 0;
+       p->des1 = 0;
        p->des2 = 0;
        p->des3 = 0;
 }
index a916e13..75161e1 100644 (file)
@@ -58,6 +58,7 @@ struct stmmac_tx_queue {
        unsigned int dirty_tx;
        dma_addr_t dma_tx_phy;
        u32 tx_tail_addr;
+       u32 mss;
 };
 
 struct stmmac_rx_queue {
@@ -138,7 +139,6 @@ struct stmmac_priv {
        spinlock_t ptp_lock;
        void __iomem *mmcaddr;
        void __iomem *ptpaddr;
-       u32 mss;
 
 #ifdef CONFIG_DEBUG_FS
        struct dentry *dbgfs_dir;
index 7ad8414..a9856a8 100644 (file)
@@ -1355,6 +1355,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 
                tx_q->dirty_tx = 0;
                tx_q->cur_tx = 0;
+               tx_q->mss = 0;
 
                netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
        }
@@ -1843,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
                if (unlikely(status & tx_dma_own))
                        break;
 
+               /* Make sure descriptor fields are read after reading
+                * the own bit.
+                */
+               dma_rmb();
+
                /* Just consider the last segment and ...*/
                if (likely(!(status & tx_not_ls))) {
                        /* ... verify the status error condition */
@@ -1946,6 +1952,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
                                                     (i == DMA_TX_SIZE - 1));
        tx_q->dirty_tx = 0;
        tx_q->cur_tx = 0;
+       tx_q->mss = 0;
        netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
        stmmac_start_tx_dma(priv, chan);
 
@@ -2430,7 +2437,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
                        continue;
 
                packet = priv->plat->rx_queues_cfg[queue].pkt_route;
-               priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+               priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
        }
 }
 
@@ -2632,7 +2639,6 @@ static int stmmac_open(struct net_device *dev)
 
        priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
        priv->rx_copybreak = STMMAC_RX_COPYBREAK;
-       priv->mss = 0;
 
        ret = alloc_dma_desc_resources(priv);
        if (ret < 0) {
@@ -2793,6 +2799,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 
        while (tmp_len > 0) {
                tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+               WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
                desc = tx_q->dma_tx + tx_q->cur_tx;
 
                desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
@@ -2872,11 +2879,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        mss = skb_shinfo(skb)->gso_size;
 
        /* set new MSS value if needed */
-       if (mss != priv->mss) {
+       if (mss != tx_q->mss) {
                mss_desc = tx_q->dma_tx + tx_q->cur_tx;
                priv->hw->desc->set_mss(mss_desc, mss);
-               priv->mss = mss;
+               tx_q->mss = mss;
                tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+               WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
        }
 
        if (netif_msg_tx_queued(priv)) {
@@ -2887,6 +2895,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        first_entry = tx_q->cur_tx;
+       WARN_ON(tx_q->tx_skbuff[first_entry]);
 
        desc = tx_q->dma_tx + first_entry;
        first = desc;
@@ -2926,7 +2935,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
                tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
                tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
-               tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
                tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
        }
 
@@ -2980,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                        tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
        /* If context desc is used to change MSS */
-       if (mss_desc)
+       if (mss_desc) {
+               /* Make sure that first descriptor has been completely
+                * written, including its own bit. This is because MSS is
+                * actually before first descriptor, so we need to make
+                * sure that MSS's own bit is the last thing written.
+                */
+               dma_wmb();
                priv->hw->desc->set_tx_owner(mss_desc);
+       }
 
        /* The own bit must be the latest setting done when prepare the
         * descriptor and then barrier is needed to make sure that
         * all is coherent before granting the DMA engine.
         */
-       dma_wmb();
+       wmb();
 
        if (netif_msg_pktdata(priv)) {
                pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3062,6 +3077,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        entry = tx_q->cur_tx;
        first_entry = entry;
+       WARN_ON(tx_q->tx_skbuff[first_entry]);
 
        csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
@@ -3090,6 +3106,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                bool last_segment = (i == (nfrags - 1));
 
                entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+               WARN_ON(tx_q->tx_skbuff[entry]);
 
                if (likely(priv->extend_desc))
                        desc = (struct dma_desc *)(tx_q->dma_etx + entry);
@@ -3101,8 +3118,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                if (dma_mapping_error(priv->device, des))
                        goto dma_map_err; /* should reuse desc w/o issues */
 
-               tx_q->tx_skbuff[entry] = NULL;
-
                tx_q->tx_skbuff_dma[entry].buf = des;
                if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
                        desc->des0 = cpu_to_le32(des);
@@ -3211,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                 * descriptor and then barrier is needed to make sure that
                 * all is coherent before granting the DMA engine.
                 */
-               dma_wmb();
+               wmb();
        }
 
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
@@ -4436,6 +4451,7 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv)
 
                tx_q->cur_tx = 0;
                tx_q->dirty_tx = 0;
+               tx_q->mss = 0;
        }
 }
 
@@ -4481,11 +4497,6 @@ int stmmac_resume(struct device *dev)
 
        stmmac_reset_queues_param(priv);
 
-       /* reset private mss value to force mss context settings at
-        * next tso xmit (only used for gmac4).
-        */
-       priv->mss = 0;
-
        stmmac_clear_descriptors(priv);
 
        stmmac_hw_setup(ndev, false);
index 05f122b..ebd3e5f 100644 (file)
@@ -135,13 +135,14 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
  * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
  * @pdev: platform device
  */
-static void stmmac_mtl_setup(struct platform_device *pdev,
-                            struct plat_stmmacenet_data *plat)
+static int stmmac_mtl_setup(struct platform_device *pdev,
+                           struct plat_stmmacenet_data *plat)
 {
        struct device_node *q_node;
        struct device_node *rx_node;
        struct device_node *tx_node;
        u8 queue = 0;
+       int ret = 0;
 
        /* For backwards-compatibility with device trees that don't have any
         * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
@@ -159,12 +160,12 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
        rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
        if (!rx_node)
-               return;
+               return ret;
 
        tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
        if (!tx_node) {
                of_node_put(rx_node);
-               return;
+               return ret;
        }
 
        /* Processing RX queues common config */
@@ -220,6 +221,11 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
                queue++;
        }
+       if (queue != plat->rx_queues_to_use) {
+               ret = -EINVAL;
+               dev_err(&pdev->dev, "Not all RX queues were configured\n");
+               goto out;
+       }
 
        /* Processing TX queues common config */
        if (of_property_read_u32(tx_node, "snps,tx-queues-to-use",
@@ -281,10 +287,18 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
                queue++;
        }
+       if (queue != plat->tx_queues_to_use) {
+               ret = -EINVAL;
+               dev_err(&pdev->dev, "Not all TX queues were configured\n");
+               goto out;
+       }
 
+out:
        of_node_put(rx_node);
        of_node_put(tx_node);
        of_node_put(q_node);
+
+       return ret;
 }
 
 /**
@@ -376,6 +390,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
        struct device_node *np = pdev->dev.of_node;
        struct plat_stmmacenet_data *plat;
        struct stmmac_dma_cfg *dma_cfg;
+       int rc;
 
        plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
        if (!plat)
@@ -402,8 +417,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
                dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
        /* To Configure PHY by using all device-tree supported properties */
-       if (stmmac_dt_phy(plat, np, &pdev->dev))
-               return ERR_PTR(-ENODEV);
+       rc = stmmac_dt_phy(plat, np, &pdev->dev);
+       if (rc)
+               return ERR_PTR(rc);
 
        of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
 
@@ -499,7 +515,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 
        plat->axi = stmmac_axi_setup(pdev);
 
-       stmmac_mtl_setup(pdev, plat);
+       rc = stmmac_mtl_setup(pdev, plat);
+       if (rc) {
+               stmmac_remove_config_dt(pdev, plat);
+               return ERR_PTR(rc);
+       }
 
        /* clock setup */
        plat->stmmac_clk = devm_clk_get(&pdev->dev,
index b919e89..516dd59 100644 (file)
@@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = {
        .exit_batch = geneve_exit_batch_net,
        .id   = &geneve_net_id,
        .size = sizeof(struct geneve_net),
+       .async = true,
 };
 
 static int __init geneve_init_module(void)
index f38e32a..127edd2 100644 (file)
@@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = {
        .exit   = gtp_net_exit,
        .id     = &gtp_net_id,
        .size   = sizeof(struct gtp_net),
+       .async  = true,
 };
 
 static int __init gtp_init(void)
index 303ba41..8782f56 100644 (file)
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
          exposes a debugfs node for each CA8210 instance which allows
          direct use of the Cascoda API, exposing the 802.15.4 MAC
          management entities.
+
+config IEEE802154_MCR20A
+       tristate "MCR20A transceiver driver"
+       depends on IEEE802154_DRIVERS && MAC802154
+       depends on SPI
+       ---help---
+         Say Y here to enable the MCR20A SPI 802.15.4 wireless
+         controller.
+
+         This driver can also be built as a module. To do so, say M here.
+         the module will be called 'mcr20a'.
index bea1de5..104744d 100644 (file)
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
 obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
 obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
 obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644 (file)
index 0000000..d9eb22a
--- /dev/null
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define        SPI_COMMAND_BUFFER              3
+
+#define REGISTER_READ                  BIT(7)
+#define REGISTER_WRITE                 (0 << 7)
+#define REGISTER_ACCESS                        (0 << 6)
+#define PACKET_BUFF_BURST_ACCESS       BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS                BIT(5)
+
+#define MCR20A_WRITE_REG(x)            (x)
+#define MCR20A_READ_REG(x)             (REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF   (0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF  (0x40)
+
+#define MCR20A_CMD_REG         0x80
+#define MCR20A_CMD_REG_MASK    0x3f
+#define MCR20A_CMD_WRITE       0x40
+#define MCR20A_CMD_FB          0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+       MCR20A_CCA_ED,    // energy detect - CCA bit not active,
+                         // not to be used for T and CCCA sequences
+       MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+       MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+       MCR20A_CCA_MODE3
+};
+
+enum {
+       MCR20A_XCVSEQ_IDLE      = 0x00,
+       MCR20A_XCVSEQ_RX        = 0x01,
+       MCR20A_XCVSEQ_TX        = 0x02,
+       MCR20A_XCVSEQ_CCA       = 0x03,
+       MCR20A_XCVSEQ_TR        = 0x04,
+       MCR20A_XCVSEQ_CCCA      = 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define        MCR20A_MIN_CHANNEL      (11)
+#define        MCR20A_MAX_CHANNEL      (26)
+#define        MCR20A_CHANNEL_SPACING  (5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8  PLL_INT[16] = {
+       /* 2405 */ 0x0B,        /* 2410 */ 0x0B,        /* 2415 */ 0x0B,
+       /* 2420 */ 0x0B,        /* 2425 */ 0x0B,        /* 2430 */ 0x0B,
+       /* 2435 */ 0x0C,        /* 2440 */ 0x0C,        /* 2445 */ 0x0C,
+       /* 2450 */ 0x0C,        /* 2455 */ 0x0C,        /* 2460 */ 0x0C,
+       /* 2465 */ 0x0D,        /* 2470 */ 0x0D,        /* 2475 */ 0x0D,
+       /* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+       /* 2405 */ 0x28,        /* 2410 */ 0x50,        /* 2415 */ 0x78,
+       /* 2420 */ 0xA0,        /* 2425 */ 0xC8,        /* 2430 */ 0xF0,
+       /* 2435 */ 0x18,        /* 2440 */ 0x40,        /* 2445 */ 0x68,
+       /* 2450 */ 0x90,        /* 2455 */ 0xB8,        /* 2460 */ 0xE0,
+       /* 2465 */ 0x08,        /* 2470 */ 0x30,        /* 2475 */ 0x58,
+       /* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+       { IAR_MISC_PAD_CTRL,    0x02 },
+       { IAR_VCO_CTRL1,        0xB3 },
+       { IAR_VCO_CTRL2,        0x07 },
+       { IAR_PA_TUNING,        0x71 },
+       { IAR_CHF_IBUF,         0x2F },
+       { IAR_CHF_QBUF,         0x2F },
+       { IAR_CHF_IRIN,         0x24 },
+       { IAR_CHF_QRIN,         0x24 },
+       { IAR_CHF_IL,           0x24 },
+       { IAR_CHF_QL,           0x24 },
+       { IAR_CHF_CC1,          0x32 },
+       { IAR_CHF_CCL,          0x1D },
+       { IAR_CHF_CC2,          0x2D },
+       { IAR_CHF_IROUT,        0x24 },
+       { IAR_CHF_QROUT,        0x24 },
+       { IAR_PA_CAL,           0x28 },
+       { IAR_AGC_THR1,         0x55 },
+       { IAR_AGC_THR2,         0x2D },
+       { IAR_ATT_RSSI1,        0x5F },
+       { IAR_ATT_RSSI2,        0x8F },
+       { IAR_RSSI_OFFSET,      0x61 },
+       { IAR_CHF_PMA_GAIN,     0x03 },
+       { IAR_CCA1_THRESH,      0x50 },
+       { IAR_CORR_NVAL,        0x13 },
+       { IAR_ACKDELAY,         0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+       int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF         (127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE       0x01
+#define MCR20A_DAR_READ                0x00
+#define MCR20A_DAR_NUMREGS     0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS      0x80
+#define MCR20A_IAR_NUMREGS     0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg)  ((reg) << 1)
+#define MCR20A_WRITESHORT(reg) ((reg) << 1 | 1)
+#define MCR20A_READLONG(reg)   (1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg)  (1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers  */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+       case DAR_PHY_CTRL1:
+       case DAR_PHY_CTRL2:
+       case DAR_PHY_CTRL3:
+       case DAR_PHY_CTRL4:
+       case DAR_SRC_CTRL:
+       case DAR_SRC_ADDRS_SUM_LSB:
+       case DAR_SRC_ADDRS_SUM_MSB:
+       case DAR_T3CMP_LSB:
+       case DAR_T3CMP_MSB:
+       case DAR_T3CMP_USB:
+       case DAR_T2PRIMECMP_LSB:
+       case DAR_T2PRIMECMP_MSB:
+       case DAR_T1CMP_LSB:
+       case DAR_T1CMP_MSB:
+       case DAR_T1CMP_USB:
+       case DAR_T2CMP_LSB:
+       case DAR_T2CMP_MSB:
+       case DAR_T2CMP_USB:
+       case DAR_T4CMP_LSB:
+       case DAR_T4CMP_MSB:
+       case DAR_T4CMP_USB:
+       case DAR_PLL_INT0:
+       case DAR_PLL_FRAC0_LSB:
+       case DAR_PLL_FRAC0_MSB:
+       case DAR_PA_PWR:
+       /* no DAR_ACM */
+       case DAR_OVERWRITE_VER:
+       case DAR_CLK_OUT_CTRL:
+       case DAR_PWR_MODES:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+       bool rc;
+
+       /* all writeable are also readable */
+       rc = mcr20a_dar_writeable(dev, reg);
+       if (rc)
+               return rc;
+
+       /* readonly regs */
+       switch (reg) {
+       case DAR_RX_FRM_LEN:
+       case DAR_CCA1_ED_FNL:
+       case DAR_EVENT_TMR_LSB:
+       case DAR_EVENT_TMR_MSB:
+       case DAR_EVENT_TMR_USB:
+       case DAR_TIMESTAMP_LSB:
+       case DAR_TIMESTAMP_MSB:
+       case DAR_TIMESTAMP_USB:
+       case DAR_SEQ_STATE:
+       case DAR_LQI_VALUE:
+       case DAR_RSSI_CCA_CONT:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+       /* can be changed during runtime */
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+       /* use them in spi_async and regmap so it's volatile */
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+       /* don't clear irq line on read */
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+       .name                   = "mcr20a_dar",
+       .reg_bits               = 8,
+       .val_bits               = 8,
+       .write_flag_mask        = REGISTER_ACCESS | REGISTER_WRITE,
+       .read_flag_mask         = REGISTER_ACCESS | REGISTER_READ,
+       .cache_type             = REGCACHE_RBTREE,
+       .writeable_reg          = mcr20a_dar_writeable,
+       .readable_reg           = mcr20a_dar_readable,
+       .volatile_reg           = mcr20a_dar_volatile,
+       .precious_reg           = mcr20a_dar_precious,
+       .fast_io                = true,
+       .can_multi_write        = true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case IAR_XTAL_TRIM:
+       case IAR_PMC_LP_TRIM:
+       case IAR_MACPANID0_LSB:
+       case IAR_MACPANID0_MSB:
+       case IAR_MACSHORTADDRS0_LSB:
+       case IAR_MACSHORTADDRS0_MSB:
+       case IAR_MACLONGADDRS0_0:
+       case IAR_MACLONGADDRS0_8:
+       case IAR_MACLONGADDRS0_16:
+       case IAR_MACLONGADDRS0_24:
+       case IAR_MACLONGADDRS0_32:
+       case IAR_MACLONGADDRS0_40:
+       case IAR_MACLONGADDRS0_48:
+       case IAR_MACLONGADDRS0_56:
+       case IAR_RX_FRAME_FILTER:
+       case IAR_PLL_INT1:
+       case IAR_PLL_FRAC1_LSB:
+       case IAR_PLL_FRAC1_MSB:
+       case IAR_MACPANID1_LSB:
+       case IAR_MACPANID1_MSB:
+       case IAR_MACSHORTADDRS1_LSB:
+       case IAR_MACSHORTADDRS1_MSB:
+       case IAR_MACLONGADDRS1_0:
+       case IAR_MACLONGADDRS1_8:
+       case IAR_MACLONGADDRS1_16:
+       case IAR_MACLONGADDRS1_24:
+       case IAR_MACLONGADDRS1_32:
+       case IAR_MACLONGADDRS1_40:
+       case IAR_MACLONGADDRS1_48:
+       case IAR_MACLONGADDRS1_56:
+       case IAR_DUAL_PAN_CTRL:
+       case IAR_DUAL_PAN_DWELL:
+       case IAR_CCA1_THRESH:
+       case IAR_CCA1_ED_OFFSET_COMP:
+       case IAR_LQI_OFFSET_COMP:
+       case IAR_CCA_CTRL:
+       case IAR_CCA2_CORR_PEAKS:
+       case IAR_CCA2_CORR_THRESH:
+       case IAR_TMR_PRESCALE:
+       case IAR_ANT_PAD_CTRL:
+       case IAR_MISC_PAD_CTRL:
+       case IAR_BSM_CTRL:
+       case IAR_RNG:
+       case IAR_RX_WTR_MARK:
+       case IAR_SOFT_RESET:
+       case IAR_TXDELAY:
+       case IAR_ACKDELAY:
+       case IAR_CORR_NVAL:
+       case IAR_ANT_AGC_CTRL:
+       case IAR_AGC_THR1:
+       case IAR_AGC_THR2:
+       case IAR_PA_CAL:
+       case IAR_ATT_RSSI1:
+       case IAR_ATT_RSSI2:
+       case IAR_RSSI_OFFSET:
+       case IAR_XTAL_CTRL:
+       case IAR_CHF_PMA_GAIN:
+       case IAR_CHF_IBUF:
+       case IAR_CHF_QBUF:
+       case IAR_CHF_IRIN:
+       case IAR_CHF_QRIN:
+       case IAR_CHF_IL:
+       case IAR_CHF_QL:
+       case IAR_CHF_CC1:
+       case IAR_CHF_CCL:
+       case IAR_CHF_CC2:
+       case IAR_CHF_IROUT:
+       case IAR_CHF_QROUT:
+       case IAR_PA_TUNING:
+       case IAR_VCO_CTRL1:
+       case IAR_VCO_CTRL2:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+       bool rc;
+
+       /* all writeable are also readable */
+       rc = mcr20a_iar_writeable(dev, reg);
+       if (rc)
+               return rc;
+
+       /* readonly regs */
+       switch (reg) {
+       case IAR_PART_ID:
+       case IAR_DUAL_PAN_STS:
+       case IAR_RX_BYTE_COUNT:
+       case IAR_FILTERFAIL_CODE1:
+       case IAR_FILTERFAIL_CODE2:
+       case IAR_RSSI:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+       switch (reg) {
+       case IAR_DUAL_PAN_STS:
+       case IAR_RX_BYTE_COUNT:
+       case IAR_FILTERFAIL_CODE1:
+       case IAR_FILTERFAIL_CODE2:
+       case IAR_RSSI:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+       .name                   = "mcr20a_iar",
+       .reg_bits               = 16,
+       .val_bits               = 8,
+       .write_flag_mask        = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+       .read_flag_mask         = REGISTER_ACCESS | REGISTER_READ  | IAR_INDEX,
+       .cache_type             = REGCACHE_RBTREE,
+       .writeable_reg          = mcr20a_iar_writeable,
+       .readable_reg           = mcr20a_iar_readable,
+       .volatile_reg           = mcr20a_iar_volatile,
+       .fast_io                = true,
+};
+
+struct mcr20a_local {
+       struct spi_device *spi;
+
+       struct ieee802154_hw *hw;
+       struct mcr20a_platform_data *pdata;
+       struct regmap *regmap_dar;
+       struct regmap *regmap_iar;
+
+       u8 *buf;
+
+       bool is_tx;
+
+       /* for writing tx buffer */
+       struct spi_message tx_buf_msg;
+       u8 tx_header[1];
+       /* burst buffer write command */
+       struct spi_transfer tx_xfer_header;
+       u8 tx_len[1];
+       /* len of tx packet */
+       struct spi_transfer tx_xfer_len;
+       /* data of tx packet */
+       struct spi_transfer tx_xfer_buf;
+       struct sk_buff *tx_skb;
+
+       /* for read length rxfifo */
+       struct spi_message reg_msg;
+       u8 reg_cmd[1];
+       u8 reg_data[MCR20A_IRQSTS_NUM];
+       struct spi_transfer reg_xfer_cmd;
+       struct spi_transfer reg_xfer_data;
+
+       /* receive handling */
+       struct spi_message rx_buf_msg;
+       u8 rx_header[1];
+       struct spi_transfer rx_xfer_header;
+       u8 rx_lqi[1];
+       struct spi_transfer rx_xfer_lqi;
+       u8 rx_buf[MCR20A_MAX_BUF];
+       struct spi_transfer rx_xfer_buf;
+
+       /* isr handling for reading intstat */
+       struct spi_message irq_msg;
+       u8 irq_header[1];
+       u8 irq_data[MCR20A_IRQSTS_NUM];
+       struct spi_transfer irq_xfer_data;
+       struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       lp->reg_msg.complete = NULL;
+       lp->reg_cmd[0]  = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+       lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+       lp->reg_xfer_data.len = 1;
+
+       ret = spi_async(lp->spi, &lp->reg_msg);
+       if (ret)
+               dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       lp->tx_skb = skb;
+
+       print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+                            skb->data, skb->len, 0);
+
+       lp->is_tx = 1;
+
+       lp->reg_msg.complete    = NULL;
+       lp->reg_cmd[0]          = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+       lp->reg_data[0]         = MCR20A_XCVSEQ_IDLE;
+       lp->reg_xfer_data.len   = 1;
+
+       return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+       WARN_ON(!level);
+       *level = 0xbe;
+       return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+       if (ret)
+               return ret;
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+       if (ret)
+               return ret;
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+                          PLL_FRAC[channel - 11]);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* No slotted operation */
+       dev_dbg(printdev(lp), "no slotted operation\n");
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+       /* enable irq */
+       enable_irq(lp->spi->irq);
+
+       /* Unmask SEQ interrupt */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+                                DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+       /* Start the RX sequence */
+       dev_dbg(printdev(lp), "start the RX sequence\n");
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+       return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* stop all running sequence */
+       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                          DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+       /* disable irq */
+       disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+                       struct ieee802154_hw_addr_filt *filt,
+                       unsigned long changed)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+               u16 addr = le16_to_cpu(filt->short_addr);
+
+               regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+               regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+       }
+
+       if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+               u16 pan = le16_to_cpu(filt->pan_id);
+
+               regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+               regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+       }
+
+       if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+               u8 addr[8], i;
+
+               memcpy(addr, &filt->ieee_addr, 8);
+               for (i = 0; i < 8; i++)
+                       regmap_write(lp->regmap_iar,
+                                    IAR_MACLONGADDRS0_0 + i, addr[i]);
+       }
+
+       if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+               if (filt->pan_coord) {
+                       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                          DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+               } else {
+                       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                          DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+               }
+       }
+
+       return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+       -3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+       -1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+       struct mcr20a_local *lp = hw->priv;
+       u32 i;
+
+       dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+       for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+               if (lp->hw->phy->supported.tx_powers[i] == mbm)
+                       return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+                                           ((i + 8) & 0x1F));
+       }
+
+       return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+                   const struct wpan_phy_cca *cca)
+{
+       struct mcr20a_local *lp = hw->priv;
+       unsigned int cca_mode = 0xff;
+       bool cca_mode_and = false;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* mapping 802.15.4 to driver spec */
+       switch (cca->mode) {
+       case NL802154_CCA_ENERGY:
+               cca_mode = MCR20A_CCA_MODE1;
+               break;
+       case NL802154_CCA_CARRIER:
+               cca_mode = MCR20A_CCA_MODE2;
+               break;
+       case NL802154_CCA_ENERGY_CARRIER:
+               switch (cca->opt) {
+               case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+                       cca_mode = MCR20A_CCA_MODE3;
+                       cca_mode_and = true;
+                       break;
+               case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+                       cca_mode = MCR20A_CCA_MODE3;
+                       cca_mode_and = false;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                DAR_PHY_CTRL4_CCATYPE_MASK,
+                                cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+       if (ret < 0)
+               return ret;
+
+       if (cca_mode == MCR20A_CCA_MODE3) {
+               if (cca_mode_and) {
+                       ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+                                                IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+                                                0x08);
+               } else {
+                       ret = regmap_update_bits(lp->regmap_iar,
+                                                IAR_CCA_CTRL,
+                                                IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+                                                0x00);
+               }
+               if (ret < 0)
+                       return ret;
+       }
+
+       return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+       struct mcr20a_local *lp = hw->priv;
+       u32 i;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+               if (hw->phy->supported.cca_ed_levels[i] == mbm)
+                       return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+       }
+
+       return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+       u8 rx_frame_filter_reg = 0x0;
+       u8 val;
+
+       dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+       if (on) {
+               /* All frame types accepted*/
+               val |= DAR_PHY_CTRL4_PROMISCUOUS;
+               rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+               rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+                                 IAR_RX_FRAME_FLT_NS_FT);
+
+               ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                        DAR_PHY_CTRL4_PROMISCUOUS,
+                                        DAR_PHY_CTRL4_PROMISCUOUS);
+               if (ret < 0)
+                       return ret;
+
+               ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                                  rx_frame_filter_reg);
+               if (ret < 0)
+                       return ret;
+       } else {
+               ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                        DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+               if (ret < 0)
+                       return ret;
+
+               ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                                  IAR_RX_FRAME_FLT_FRM_VER |
+                                  IAR_RX_FRAME_FLT_BEACON_FT |
+                                  IAR_RX_FRAME_FLT_DATA_FT |
+                                  IAR_RX_FRAME_FLT_CMD_FT);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+       .owner                  = THIS_MODULE,
+       .xmit_async             = mcr20a_xmit,
+       .ed                     = mcr20a_ed,
+       .set_channel            = mcr20a_set_channel,
+       .start                  = mcr20a_start,
+       .stop                   = mcr20a_stop,
+       .set_hw_addr_filt       = mcr20a_set_hw_addr_filt,
+       .set_txpower            = mcr20a_set_txpower,
+       .set_cca_mode           = mcr20a_set_cca_mode,
+       .set_cca_ed_level       = mcr20a_set_cca_ed_level,
+       .set_promiscuous_mode   = mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* Start the RX sequence */
+       regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+       return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+       struct sk_buff *skb;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       dev_dbg(printdev(lp), "RX is done\n");
+
+       if (!ieee802154_is_valid_psdu_len(len)) {
+               dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+               len = IEEE802154_MTU;
+       }
+
+       len = len - 2;  /* get rid of frame check field */
+
+       skb = dev_alloc_skb(len);
+       if (!skb)
+               return;
+
+       memcpy(skb_put(skb, len), lp->rx_buf, len);
+       ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+       print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+                            lp->rx_buf, len, 0);
+       pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+       /* start RX sequence */
+       mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 len;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* get the length of received frame */
+       len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+       dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+       /* prepare to read the rx buf */
+       lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+       lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+       lp->rx_xfer_buf.len = len;
+
+       ret = spi_async(lp->spi, &lp->rx_buf_msg);
+       if (ret)
+               dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+       lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+       lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+       lp->reg_xfer_data.len   = 1;
+
+       return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+       return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* write tx buffer */
+       lp->tx_header[0]        = MCR20A_BURST_WRITE_PACKET_BUF;
+       /* add 2 bytes of FCS */
+       lp->tx_len[0]           = lp->tx_skb->len + 2;
+       lp->tx_xfer_buf.tx_buf  = lp->tx_skb->data;
+       /* add 1 byte psduLength */
+       lp->tx_xfer_buf.len     = lp->tx_skb->len + 1;
+
+       ret = spi_async(lp->spi, &lp->tx_buf_msg);
+       if (ret) {
+               dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       enable_irq(lp->spi->irq);
+
+       dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+               lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+       switch (seq_state) {
+       /* TX IRQ, RX IRQ and SEQ IRQ */
+       case (0x03):
+               if (lp->is_tx) {
+                       lp->is_tx = 0;
+                       dev_dbg(printdev(lp), "TX is done. No ACK\n");
+                       mcr20a_handle_tx_complete(lp);
+               }
+               break;
+       case (0x05):
+                       /* rx is starting */
+                       dev_dbg(printdev(lp), "RX is starting\n");
+                       mcr20a_handle_rx(lp);
+               break;
+       case (0x07):
+               if (lp->is_tx) {
+                       /* tx is done */
+                       lp->is_tx = 0;
+                       dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+                       mcr20a_handle_tx_complete(lp);
+               } else {
+                       /* rx is starting */
+                       dev_dbg(printdev(lp), "RX is starting\n");
+                       mcr20a_handle_rx(lp);
+               }
+               break;
+       case (0x01):
+               if (lp->is_tx) {
+                       dev_dbg(printdev(lp), "TX is starting\n");
+                       mcr20a_handle_tx(lp);
+               } else {
+                       dev_dbg(printdev(lp), "MCR20A is stop\n");
+               }
+               break;
+       }
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+       int ret;
+       struct mcr20a_local *lp = context;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+       regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+       lp->reg_msg.complete = mcr20a_irq_clean_complete;
+       lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+       memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+       lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+       ret = spi_async(lp->spi, &lp->reg_msg);
+
+       if (ret)
+               dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+       struct mcr20a_local *lp = data;
+       int ret;
+
+       disable_irq_nosync(irq);
+
+       lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+       /* read IRQSTSx */
+       ret = spi_async(lp->spi, &lp->irq_msg);
+       if (ret) {
+               enable_irq(irq);
+               return IRQ_NONE;
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+                                   struct mcr20a_platform_data *pdata)
+{
+       int ret = 0;
+
+       if (!spi->dev.of_node)
+               return -EINVAL;
+
+       pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+       dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+       return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+       u8 i;
+       struct ieee802154_hw *hw = lp->hw;
+       struct wpan_phy *phy = lp->hw->phy;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       phy->symbol_duration = 16;
+       phy->lifs_period = 40;
+       phy->sifs_period = 12;
+
+       hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+                       IEEE802154_HW_AFILT |
+                       IEEE802154_HW_PROMISCUOUS;
+
+       phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+                       WPAN_PHY_FLAG_CCA_MODE;
+
+       phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+               BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+       phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+               BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+       /* initiating cca_ed_levels */
+       for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+             ++i) {
+               mcr20a_ed_levels[i] =  -i * 100;
+       }
+
+       phy->supported.cca_ed_levels = mcr20a_ed_levels;
+       phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+       phy->cca.mode = NL802154_CCA_ENERGY;
+
+       phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+       phy->current_page = 0;
+       /* MCR20A default reset value */
+       phy->current_channel = 20;
+       phy->symbol_duration = 16;
+       phy->supported.tx_powers = mcr20a_powers;
+       phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+       phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+       phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->tx_buf_msg);
+       lp->tx_buf_msg.context = lp;
+       lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+       lp->tx_xfer_header.len = 1;
+       lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+       lp->tx_xfer_len.len = 1;
+       lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+       spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+       spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+       spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->reg_msg);
+       lp->reg_msg.context = lp;
+
+       lp->reg_xfer_cmd.len = 1;
+       lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+       lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+       lp->reg_xfer_data.rx_buf = lp->reg_data;
+       lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+       spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+       spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+       spi_message_init(&lp->rx_buf_msg);
+       lp->rx_buf_msg.context = lp;
+       lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+       lp->rx_xfer_header.len = 1;
+       lp->rx_xfer_header.tx_buf = lp->rx_header;
+       lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+       lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+       lp->rx_xfer_lqi.len = 1;
+       lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+       spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+       spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+       spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->irq_msg);
+       lp->irq_msg.context             = lp;
+       lp->irq_msg.complete    = mcr20a_irq_status_complete;
+       lp->irq_xfer_header.len = 1;
+       lp->irq_xfer_header.tx_buf = lp->irq_header;
+       lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+       lp->irq_xfer_data.len   = MCR20A_IRQSTS_NUM;
+       lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+       spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+       spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+       u8 index;
+       unsigned int phy_reg = 0;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* Disable Tristate on COCO MISO for SPI reads */
+       ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+       if (ret)
+               goto err_ret;
+
+       /* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+        * immediately after init
+        */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+       if (ret)
+               goto err_ret;
+
+       /* Clear all PP IRQ bits in IRQSTS2 */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+                          DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+                          DAR_IRQSTS2_WAKE_IRQ);
+       if (ret)
+               goto err_ret;
+
+       /* Disable all timer interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+       if (ret)
+               goto err_ret;
+
+       /*  PHY_CTRL1 : default HW settings + AUTOACK enabled */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+       /*  PHY_CTRL2 : disable all interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+       if (ret)
+               goto err_ret;
+
+       /* PHY_CTRL3 : disable all timers and remaining interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+                          DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+                          DAR_PHY_CTRL3_WAKE_MSK);
+       if (ret)
+               goto err_ret;
+
+       /* SRC_CTRL : enable Acknowledge Frame Pending and
+        * Source Address Matching Enable
+        */
+       ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+                          DAR_SRC_CTRL_ACK_FRM_PND |
+                          (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+       if (ret)
+               goto err_ret;
+
+       /*  RX_FRAME_FILTER */
+       /*  FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+       ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                          IAR_RX_FRAME_FLT_FRM_VER |
+                          IAR_RX_FRAME_FLT_BEACON_FT |
+                          IAR_RX_FRAME_FLT_DATA_FT |
+                          IAR_RX_FRAME_FLT_CMD_FT);
+       if (ret)
+               goto err_ret;
+
+       dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+                MCR20A_OVERWRITE_VERSION);
+
+       /* Overwrites direct registers  */
+       ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+                          MCR20A_OVERWRITE_VERSION);
+       if (ret)
+               goto err_ret;
+
+       /* Overwrites indirect registers  */
+       ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+                                    ARRAY_SIZE(mar20a_iar_overwrites));
+       if (ret)
+               goto err_ret;
+
+       /* Clear HW indirect queue */
+       dev_dbg(printdev(lp), "clear HW indirect queue\n");
+       for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+               phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+                              DAR_SRC_CTRL_INDEX_SHIFT)
+                             | (DAR_SRC_CTRL_SRCADDR_EN)
+                             | (DAR_SRC_CTRL_INDEX_DISABLE));
+               ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+               if (ret)
+                       goto err_ret;
+               phy_reg = 0;
+       }
+
+       /* Assign HW Indirect hash table to PAN0 */
+       ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+       if (ret)
+               goto err_ret;
+
+       /* Clear current lvl */
+       phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+       /* Set new lvl */
+       phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+               IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+       ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+       if (ret)
+               goto err_ret;
+
+       /* Set CCA threshold to -75 dBm */
+       ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+       if (ret)
+               goto err_ret;
+
+       /* Set prescaller to obtain 1 symbol (16us) timebase */
+       ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+       if (ret)
+               goto err_ret;
+
+       /* Enable autodoze mode. */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+                                DAR_PWR_MODES_AUTODOZE,
+                                DAR_PWR_MODES_AUTODOZE);
+       if (ret)
+               goto err_ret;
+
+       /* Disable clk_out */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+                                DAR_CLK_OUT_CTRL_EN, 0x0);
+       if (ret)
+               goto err_ret;
+
+       return 0;
+
+err_ret:
+       return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+       struct ieee802154_hw *hw;
+       struct mcr20a_local *lp;
+       struct mcr20a_platform_data *pdata;
+       int irq_type;
+       int ret = -ENOMEM;
+
+       dev_dbg(&spi->dev, "%s\n", __func__);
+
+       if (!spi->irq) {
+               dev_err(&spi->dev, "no IRQ specified\n");
+               return -EINVAL;
+       }
+
+       pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+       if (!pdata)
+               return -ENOMEM;
+
+       /* set mcr20a platform data */
+       ret = mcr20a_get_platform_data(spi, pdata);
+       if (ret < 0) {
+               dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+               return ret;
+       }
+
+       /* init reset gpio */
+       if (gpio_is_valid(pdata->rst_gpio)) {
+               ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+                                           GPIOF_OUT_INIT_HIGH, "reset");
+               if (ret)
+                       return ret;
+       }
+
+       /* reset mcr20a */
+       if (gpio_is_valid(pdata->rst_gpio)) {
+               usleep_range(10, 20);
+               gpio_set_value_cansleep(pdata->rst_gpio, 0);
+               usleep_range(10, 20);
+               gpio_set_value_cansleep(pdata->rst_gpio, 1);
+               usleep_range(120, 240);
+       }
+
+       /* allocate ieee802154_hw and private data */
+       hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+       if (!hw) {
+               dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+               return -ENOMEM;
+       }
+
+       /* init mcr20a local data */
+       lp = hw->priv;
+       lp->hw = hw;
+       lp->spi = spi;
+       lp->spi->dev.platform_data = pdata;
+       lp->pdata = pdata;
+
+       /* init ieee802154_hw */
+       hw->parent = &spi->dev;
+       ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+       /* init buf */
+       lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+       if (!lp->buf)
+               return -ENOMEM;
+
+       mcr20a_setup_tx_spi_messages(lp);
+       mcr20a_setup_rx_spi_messages(lp);
+       mcr20a_setup_irq_spi_messages(lp);
+
+       /* setup regmap */
+       lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+       if (IS_ERR(lp->regmap_dar)) {
+               ret = PTR_ERR(lp->regmap_dar);
+               dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+                       ret);
+               goto free_dev;
+       }
+
+       lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+       if (IS_ERR(lp->regmap_iar)) {
+               ret = PTR_ERR(lp->regmap_iar);
+               dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+               goto free_dev;
+       }
+
+       mcr20a_hw_setup(lp);
+
+       spi_set_drvdata(spi, lp);
+
+       ret = mcr20a_phy_init(lp);
+       if (ret < 0) {
+               dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+               goto free_dev;
+       }
+
+       irq_type = irq_get_trigger_type(spi->irq);
+       if (!irq_type)
+               irq_type = IRQF_TRIGGER_FALLING;
+
+       ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+                              irq_type, dev_name(&spi->dev), lp);
+       if (ret) {
+               dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+               ret = -ENODEV;
+               goto free_dev;
+       }
+
+       /* disable_irq by default and wait for starting hardware */
+       disable_irq(spi->irq);
+
+       ret = ieee802154_register_hw(hw);
+       if (ret) {
+               dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+               goto free_dev;
+       }
+
+       return ret;
+
+free_dev:
+       ieee802154_free_hw(lp->hw);
+
+       return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+       struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+       dev_dbg(&spi->dev, "%s\n", __func__);
+
+       ieee802154_unregister_hw(lp->hw);
+       ieee802154_free_hw(lp->hw);
+
+       return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+       { .compatible = "nxp,mcr20a", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+       { .name = "mcr20a", },
+       { },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+       .id_table = mcr20a_device_id,
+       .driver = {
+               .of_match_table = of_match_ptr(mcr20a_of_match),
+               .name   = "mcr20a",
+       },
+       .probe      = mcr20a_probe,
+       .remove     = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644 (file)
index 0000000..6da4fd0
--- /dev/null
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1           0x00
+#define DAR_IRQ_STS2           0x01
+#define DAR_IRQ_STS3           0x02
+#define DAR_PHY_CTRL1          0x03
+#define DAR_PHY_CTRL2          0x04
+#define DAR_PHY_CTRL3          0x05
+#define DAR_RX_FRM_LEN         0x06
+#define DAR_PHY_CTRL4          0x07
+#define DAR_SRC_CTRL           0x08
+#define DAR_SRC_ADDRS_SUM_LSB  0x09
+#define DAR_SRC_ADDRS_SUM_MSB  0x0A
+#define DAR_CCA1_ED_FNL                0x0B
+#define DAR_EVENT_TMR_LSB      0x0C
+#define DAR_EVENT_TMR_MSB      0x0D
+#define DAR_EVENT_TMR_USB      0x0E
+#define DAR_TIMESTAMP_LSB      0x0F
+#define DAR_TIMESTAMP_MSB      0x10
+#define DAR_TIMESTAMP_USB      0x11
+#define DAR_T3CMP_LSB          0x12
+#define DAR_T3CMP_MSB          0x13
+#define DAR_T3CMP_USB          0x14
+#define DAR_T2PRIMECMP_LSB     0x15
+#define DAR_T2PRIMECMP_MSB     0x16
+#define DAR_T1CMP_LSB          0x17
+#define DAR_T1CMP_MSB          0x18
+#define DAR_T1CMP_USB          0x19
+#define DAR_T2CMP_LSB          0x1A
+#define DAR_T2CMP_MSB          0x1B
+#define DAR_T2CMP_USB          0x1C
+#define DAR_T4CMP_LSB          0x1D
+#define DAR_T4CMP_MSB          0x1E
+#define DAR_T4CMP_USB          0x1F
+#define DAR_PLL_INT0           0x20
+#define DAR_PLL_FRAC0_LSB      0x21
+#define DAR_PLL_FRAC0_MSB      0x22
+#define DAR_PA_PWR             0x23
+#define DAR_SEQ_STATE          0x24
+#define DAR_LQI_VALUE          0x25
+#define DAR_RSSI_CCA_CONT      0x26
+/*------------------            0x27 */
+#define DAR_ASM_CTRL1          0x28
+#define DAR_ASM_CTRL2          0x29
+#define DAR_ASM_DATA_0         0x2A
+#define DAR_ASM_DATA_1         0x2B
+#define DAR_ASM_DATA_2         0x2C
+#define DAR_ASM_DATA_3         0x2D
+#define DAR_ASM_DATA_4         0x2E
+#define DAR_ASM_DATA_5         0x2F
+#define DAR_ASM_DATA_6         0x30
+#define DAR_ASM_DATA_7         0x31
+#define DAR_ASM_DATA_8         0x32
+#define DAR_ASM_DATA_9         0x33
+#define DAR_ASM_DATA_A         0x34
+#define DAR_ASM_DATA_B         0x35
+#define DAR_ASM_DATA_C         0x36
+#define DAR_ASM_DATA_D         0x37
+#define DAR_ASM_DATA_E         0x38
+#define DAR_ASM_DATA_F         0x39
+/*-----------------------       0x3A */
+#define DAR_OVERWRITE_VER      0x3B
+#define DAR_CLK_OUT_CTRL       0x3C
+#define DAR_PWR_MODES          0x3D
+#define IAR_INDEX              0x3E
+#define IAR_DATA               0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID            0x00
+#define IAR_XTAL_TRIM          0x01
+#define IAR_PMC_LP_TRIM                0x02
+#define IAR_MACPANID0_LSB      0x03
+#define IAR_MACPANID0_MSB      0x04
+#define IAR_MACSHORTADDRS0_LSB 0x05
+#define IAR_MACSHORTADDRS0_MSB 0x06
+#define IAR_MACLONGADDRS0_0    0x07
+#define IAR_MACLONGADDRS0_8    0x08
+#define IAR_MACLONGADDRS0_16   0x09
+#define IAR_MACLONGADDRS0_24   0x0A
+#define IAR_MACLONGADDRS0_32   0x0B
+#define IAR_MACLONGADDRS0_40   0x0C
+#define IAR_MACLONGADDRS0_48   0x0D
+#define IAR_MACLONGADDRS0_56   0x0E
+#define IAR_RX_FRAME_FILTER    0x0F
+#define IAR_PLL_INT1           0x10
+#define IAR_PLL_FRAC1_LSB      0x11
+#define IAR_PLL_FRAC1_MSB      0x12
+#define IAR_MACPANID1_LSB      0x13
+#define IAR_MACPANID1_MSB      0x14
+#define IAR_MACSHORTADDRS1_LSB 0x15
+#define IAR_MACSHORTADDRS1_MSB 0x16
+#define IAR_MACLONGADDRS1_0    0x17
+#define IAR_MACLONGADDRS1_8    0x18
+#define IAR_MACLONGADDRS1_16   0x19
+#define IAR_MACLONGADDRS1_24   0x1A
+#define IAR_MACLONGADDRS1_32   0x1B
+#define IAR_MACLONGADDRS1_40   0x1C
+#define IAR_MACLONGADDRS1_48   0x1D
+#define IAR_MACLONGADDRS1_56   0x1E
+#define IAR_DUAL_PAN_CTRL      0x1F
+#define IAR_DUAL_PAN_DWELL     0x20
+#define IAR_DUAL_PAN_STS       0x21
+#define IAR_CCA1_THRESH                0x22
+#define IAR_CCA1_ED_OFFSET_COMP        0x23
+#define IAR_LQI_OFFSET_COMP    0x24
+#define IAR_CCA_CTRL           0x25
+#define IAR_CCA2_CORR_PEAKS    0x26
+#define IAR_CCA2_CORR_THRESH   0x27
+#define IAR_TMR_PRESCALE       0x28
+/*--------------------          0x29 */
+#define IAR_GPIO_DATA          0x2A
+#define IAR_GPIO_DIR           0x2B
+#define IAR_GPIO_PUL_EN                0x2C
+#define IAR_GPIO_PUL_SEL       0x2D
+#define IAR_GPIO_DS            0x2E
+/*------------------            0x2F */
+#define IAR_ANT_PAD_CTRL       0x30
+#define IAR_MISC_PAD_CTRL      0x31
+#define IAR_BSM_CTRL           0x32
+/*-------------------           0x33 */
+#define IAR_RNG                        0x34
+#define IAR_RX_BYTE_COUNT      0x35
+#define IAR_RX_WTR_MARK                0x36
+#define IAR_SOFT_RESET         0x37
+#define IAR_TXDELAY            0x38
+#define IAR_ACKDELAY           0x39
+#define IAR_SEQ_MGR_CTRL       0x3A
+#define IAR_SEQ_MGR_STS                0x3B
+#define IAR_SEQ_T_STS          0x3C
+#define IAR_ABORT_STS          0x3D
+#define IAR_CCCA_BUSY_CNT      0x3E
+#define IAR_SRC_ADDR_CHECKSUM1 0x3F
+#define IAR_SRC_ADDR_CHECKSUM2 0x40
+#define IAR_SRC_TBL_VALID1     0x41
+#define IAR_SRC_TBL_VALID2     0x42
+#define IAR_FILTERFAIL_CODE1   0x43
+#define IAR_FILTERFAIL_CODE2   0x44
+#define IAR_SLOT_PRELOAD       0x45
+/*--------------------          0x46 */
+#define IAR_CORR_VT            0x47
+#define IAR_SYNC_CTRL          0x48
+#define IAR_PN_LSB_0           0x49
+#define IAR_PN_LSB_1           0x4A
+#define IAR_PN_MSB_0           0x4B
+#define IAR_PN_MSB_1           0x4C
+#define IAR_CORR_NVAL          0x4D
+#define IAR_TX_MODE_CTRL       0x4E
+#define IAR_SNF_THR            0x4F
+#define IAR_FAD_THR            0x50
+#define IAR_ANT_AGC_CTRL       0x51
+#define IAR_AGC_THR1           0x52
+#define IAR_AGC_THR2           0x53
+#define IAR_AGC_HYS            0x54
+#define IAR_AFC                        0x55
+/*-------------------           0x56 */
+/*-------------------           0x57 */
+#define IAR_PHY_STS            0x58
+#define IAR_RX_MAX_CORR                0x59
+#define IAR_RX_MAX_PREAMBLE    0x5A
+#define IAR_RSSI               0x5B
+/*-------------------           0x5C */
+/*-------------------           0x5D */
+#define IAR_PLL_DIG_CTRL       0x5E
+#define IAR_VCO_CAL            0x5F
+#define IAR_VCO_BEST_DIFF      0x60
+#define IAR_VCO_BIAS           0x61
+#define IAR_KMOD_CTRL          0x62
+#define IAR_KMOD_CAL           0x63
+#define IAR_PA_CAL             0x64
+#define IAR_PA_PWRCAL          0x65
+#define IAR_ATT_RSSI1          0x66
+#define IAR_ATT_RSSI2          0x67
+#define IAR_RSSI_OFFSET                0x68
+#define IAR_RSSI_SLOPE         0x69
+#define IAR_RSSI_CAL1          0x6A
+#define IAR_RSSI_CAL2          0x6B
+/*-------------------           0x6C */
+/*-------------------           0x6D */
+#define IAR_XTAL_CTRL          0x6E
+#define IAR_XTAL_COMP_MIN      0x6F
+#define IAR_XTAL_COMP_MAX      0x70
+#define IAR_XTAL_GM            0x71
+/*-------------------           0x72 */
+/*-------------------           0x73 */
+#define IAR_LNA_TUNE           0x74
+#define IAR_LNA_AGCGAIN                0x75
+/*-------------------           0x76 */
+/*-------------------           0x77 */
+#define IAR_CHF_PMA_GAIN       0x78
+#define IAR_CHF_IBUF           0x79
+#define IAR_CHF_QBUF           0x7A
+#define IAR_CHF_IRIN           0x7B
+#define IAR_CHF_QRIN           0x7C
+#define IAR_CHF_IL             0x7D
+#define IAR_CHF_QL             0x7E
+#define IAR_CHF_CC1            0x7F
+#define IAR_CHF_CCL            0x80
+#define IAR_CHF_CC2            0x81
+#define IAR_CHF_IROUT          0x82
+#define IAR_CHF_QROUT          0x83
+/*-------------------           0x84 */
+/*-------------------           0x85 */
+#define IAR_RSSI_CTRL          0x86
+/*-------------------           0x87 */
+/*-------------------           0x88 */
+#define IAR_PA_BIAS            0x89
+#define IAR_PA_TUNING          0x8A
+/*-------------------           0x8B */
+/*-------------------           0x8C */
+#define IAR_PMC_HP_TRIM                0x8D
+#define IAR_VREGA_TRIM         0x8E
+/*-------------------           0x8F */
+/*-------------------           0x90 */
+#define IAR_VCO_CTRL1          0x91
+#define IAR_VCO_CTRL2          0x92
+/*-------------------           0x93 */
+/*-------------------           0x94 */
+#define IAR_ANA_SPARE_OUT1     0x95
+#define IAR_ANA_SPARE_OUT2     0x96
+#define IAR_ANA_SPARE_IN       0x97
+#define IAR_MISCELLANEOUS      0x98
+/*-------------------           0x99 */
+#define IAR_SEQ_MGR_OVRD0      0x9A
+#define IAR_SEQ_MGR_OVRD1      0x9B
+#define IAR_SEQ_MGR_OVRD2      0x9C
+#define IAR_SEQ_MGR_OVRD3      0x9D
+#define IAR_SEQ_MGR_OVRD4      0x9E
+#define IAR_SEQ_MGR_OVRD5      0x9F
+#define IAR_SEQ_MGR_OVRD6      0xA0
+#define IAR_SEQ_MGR_OVRD7      0xA1
+/*-------------------           0xA2 */
+#define IAR_TESTMODE_CTRL      0xA3
+#define IAR_DTM_CTRL1          0xA4
+#define IAR_DTM_CTRL2          0xA5
+#define IAR_ATM_CTRL1          0xA6
+#define IAR_ATM_CTRL2          0xA7
+#define IAR_ATM_CTRL3          0xA8
+/*-------------------           0xA9 */
+#define IAR_LIM_FE_TEST_CTRL   0xAA
+#define IAR_CHF_TEST_CTRL      0xAB
+#define IAR_VCO_TEST_CTRL      0xAC
+#define IAR_PLL_TEST_CTRL      0xAD
+#define IAR_PA_TEST_CTRL       0xAE
+#define IAR_PMC_TEST_CTRL      0xAF
+#define IAR_SCAN_DTM_PROTECT_1 0xFE
+#define IAR_SCAN_DTM_PROTECT_0 0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND                BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ     BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ     BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ                BIT(4)
+#define DAR_IRQSTS1_CCAIRQ             BIT(3)
+#define DAR_IRQSTS1_RXIRQ              BIT(2)
+#define DAR_IRQSTS1_TXIRQ              BIT(1)
+#define DAR_IRQSTS1_SEQIRQ             BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID           BIT(7)
+#define DAR_IRQSTS2_CCA                        BIT(6)
+#define DAR_IRQSTS2_SRCADDR            BIT(5)
+#define DAR_IRQSTS2_PI                 BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS          BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ            BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ         BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ           BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK            BIT(7)
+#define DAR_IRQSTS3_TMR3MSK            BIT(6)
+#define DAR_IRQSTS3_TMR2MSK            BIT(5)
+#define DAR_IRQSTS3_TMR1MSK            BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ            BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ            BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ            BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ            BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN                BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED          BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX         BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT   5
+#define DAR_PHY_CTRL1_RXACKRQD         BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK          BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK      0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK          BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK   BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK   BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK      BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK           BIT(3)
+#define DAR_PHY_CTRL2_RXMSK            BIT(2)
+#define DAR_PHY_CTRL2_TXMSK            BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK           BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN       BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN       BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN       BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN       BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK          BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK       BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK         BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK       (0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK         BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT         BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0      BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE          (3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT    (3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK     (0x18)
+#define DAR_PHY_CTRL4_TMRLOAD          BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS      BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN      BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX             (0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT       (4)
+#define DAR_SRC_CTRL_ACK_FRM_PND       BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN                BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN          BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE     BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR            BIT(7)
+#define DAR_ASM_CTRL1_START            BIT(6)
+#define DAR_ASM_CTRL1_SELFTST          BIT(5)
+#define DAR_ASM_CTRL1_CTR              BIT(4)
+#define DAR_ASM_CTRL1_CBC              BIT(3)
+#define DAR_ASM_CTRL1_AES              BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC         BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL                (7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT  (5)
+#define DAR_ASM_CTRL2_TSTPAS                   BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND                BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ           BIT(6)
+#define DAR_CLK_OUT_CTRL_SR            BIT(5)
+#define DAR_CLK_OUT_CTRL_DS            BIT(4)
+#define DAR_CLK_OUT_CTRL_EN            BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV           (7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY       BIT(5)
+#define DAR_PWR_MODES_XTALEN           BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN       BIT(3)
+#define DAR_PWR_MODES_AUTODOZE         BIT(1)
+#define DAR_PWR_MODES_PMC_MODE         BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER               (0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT         (6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS    BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT                 BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT                        BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT                        BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT               BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT             BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK (0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT       (4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK      BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1          BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO                BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK       BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1          BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0          BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN       (0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN                        BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN              BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR   BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED  BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1        BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL      (0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT        (4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ       BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN       (3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN  BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD     BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS  BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR    (1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT  (0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK   (1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT    (1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK     BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN                (1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST         BIT(7)
+#define IAR_SOFT_RESET_REGS_RST                BIT(4)
+#define IAR_SOFT_RESET_PLL_RST         BIT(3)
+#define IAR_SOFT_RESET_TX_RST          BIT(2)
+#define IAR_SOFT_RESET_RX_RST          BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST     BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL                (3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT  (6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE         BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE                BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH        BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT   BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS          BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD                BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED    BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE                        BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING     BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT                BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE               BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL          (7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED      BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED      BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED       BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL   BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8       (3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK         BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR       BIT(6)
+#define IAR_PHY_STS_PLL_LOCK           BIT(5)
+#define IAR_PHY_STS_CRCVALID           BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL        BIT(2)
+#define IAR_PHY_STS_SFD_DET            BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET       BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT             BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN       BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN                BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN       BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN             BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED       BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN           BIT(6)
+#define IAR_DTM_CTRL1_PAGE5            BIT(5)
+#define IAR_DTM_CTRL1_PAGE4            BIT(4)
+#define IAR_DTM_CTRL1_PAGE3            BIT(3)
+#define IAR_DTM_CTRL1_PAGE2            BIT(2)
+#define IAR_DTM_CTRL1_PAGE1            BIT(1)
+#define IAR_DTM_CTRL1_PAGE0            BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV                BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN         BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2          BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1          BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0          BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK  (7)
+
+#endif /* _MCR20A_H */
index 5166575..a115f12 100644 (file)
@@ -74,6 +74,7 @@ struct ipvl_dev {
        DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
        netdev_features_t       sfeatures;
        u32                     msg_enable;
+       spinlock_t              addrs_lock;
 };
 
 struct ipvl_addr {
index c1f008f..1a8132e 100644 (file)
@@ -35,6 +35,7 @@ void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
 }
 EXPORT_SYMBOL_GPL(ipvlan_count_rx);
 
+#if IS_ENABLED(CONFIG_IPV6)
 static u8 ipvlan_get_v6_hash(const void *iaddr)
 {
        const struct in6_addr *ip6_addr = iaddr;
@@ -42,6 +43,12 @@ static u8 ipvlan_get_v6_hash(const void *iaddr)
        return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
               IPVLAN_HASH_MASK;
 }
+#else
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+       return 0;
+}
+#endif
 
 static u8 ipvlan_get_v4_hash(const void *iaddr)
 {
@@ -51,6 +58,23 @@ static u8 ipvlan_get_v4_hash(const void *iaddr)
               IPVLAN_HASH_MASK;
 }
 
+static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
+{
+       if (!is_v6 && addr->atype == IPVL_IPV4) {
+               struct in_addr *i4addr = (struct in_addr *)iaddr;
+
+               return addr->ip4addr.s_addr == i4addr->s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+       } else if (is_v6 && addr->atype == IPVL_IPV6) {
+               struct in6_addr *i6addr = (struct in6_addr *)iaddr;
+
+               return ipv6_addr_equal(&addr->ip6addr, i6addr);
+#endif
+       }
+
+       return false;
+}
+
 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
                                               const void *iaddr, bool is_v6)
 {
@@ -59,15 +83,9 @@ static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
 
        hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
               ipvlan_get_v4_hash(iaddr);
-       hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
-               if (is_v6 && addr->atype == IPVL_IPV6 &&
-                   ipv6_addr_equal(&addr->ip6addr, iaddr))
-                       return addr;
-               else if (!is_v6 && addr->atype == IPVL_IPV4 &&
-                        addr->ip4addr.s_addr ==
-                               ((struct in_addr *)iaddr)->s_addr)
+       hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
+               if (addr_equal(is_v6, addr, iaddr))
                        return addr;
-       }
        return NULL;
 }
 
@@ -91,29 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
                                   const void *iaddr, bool is_v6)
 {
-       struct ipvl_addr *addr;
+       struct ipvl_addr *addr, *ret = NULL;
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode) {
-               if ((is_v6 && addr->atype == IPVL_IPV6 &&
-                   ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
-                   (!is_v6 && addr->atype == IPVL_IPV4 &&
-                   addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
-                       return addr;
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+               if (addr_equal(is_v6, addr, iaddr)) {
+                       ret = addr;
+                       break;
+               }
        }
-       return NULL;
+       rcu_read_unlock();
+       return ret;
 }
 
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 {
        struct ipvl_dev *ipvlan;
+       bool ret = false;
 
-       ASSERT_RTNL();
-
-       list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-               if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
-                       return true;
+       rcu_read_lock();
+       list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+               if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+                       ret = true;
+                       break;
+               }
        }
-       return false;
+       rcu_read_unlock();
+       return ret;
 }
 
 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -150,6 +172,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
                lyr3h = ip4h;
                break;
        }
+#if IS_ENABLED(CONFIG_IPV6)
        case htons(ETH_P_IPV6): {
                struct ipv6hdr *ip6h;
 
@@ -188,6 +211,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
                }
                break;
        }
+#endif
        default:
                return NULL;
        }
@@ -337,14 +361,18 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 {
        struct ipvl_addr *addr = NULL;
 
-       if (addr_type == IPVL_IPV6) {
+       switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+       case IPVL_IPV6: {
                struct ipv6hdr *ip6h;
                struct in6_addr *i6addr;
 
                ip6h = (struct ipv6hdr *)lyr3h;
                i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
                addr = ipvlan_ht_addr_lookup(port, i6addr, true);
-       } else if (addr_type == IPVL_ICMPV6) {
+               break;
+       }
+       case IPVL_ICMPV6: {
                struct nd_msg *ndmh;
                struct in6_addr *i6addr;
 
@@ -356,14 +384,19 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
                        i6addr = &ndmh->target;
                        addr = ipvlan_ht_addr_lookup(port, i6addr, true);
                }
-       } else if (addr_type == IPVL_IPV4) {
+               break;
+       }
+#endif
+       case IPVL_IPV4: {
                struct iphdr *ip4h;
                __be32 *i4addr;
 
                ip4h = (struct iphdr *)lyr3h;
                i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
                addr = ipvlan_ht_addr_lookup(port, i4addr, false);
-       } else if (addr_type == IPVL_ARP) {
+               break;
+       }
+       case IPVL_ARP: {
                struct arphdr *arph;
                unsigned char *arp_ptr;
                __be32 dip;
@@ -377,6 +410,8 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 
                memcpy(&dip, arp_ptr, 4);
                addr = ipvlan_ht_addr_lookup(port, &dip, false);
+               break;
+       }
        }
 
        return addr;
@@ -420,6 +455,7 @@ out:
        return ret;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 {
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -456,6 +492,12 @@ err:
 out:
        return ret;
 }
+#else
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+       return NET_XMIT_DROP;
+}
+#endif
 
 static int ipvlan_process_outbound(struct sk_buff *skb)
 {
@@ -464,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 
        /* In this mode we dont care about multicast and broadcast traffic */
        if (is_multicast_ether_addr(ethh->h_dest)) {
-               pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
-                                   ntohs(skb->protocol));
+               pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+                                    ntohs(skb->protocol));
                kfree_skb(skb);
                goto out;
        }
@@ -759,6 +801,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
                        goto out;
                break;
        }
+#if IS_ENABLED(CONFIG_IPV6)
        case AF_INET6:
        {
                struct dst_entry *dst;
@@ -774,10 +817,12 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
                };
 
                skb_dst_drop(skb);
-               dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+               dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+                                            skb, flags);
                skb_dst_set(skb, dst);
                break;
        }
+#endif
        default:
                break;
        }
index 2469df1..4cbe9e2 100644 (file)
@@ -22,12 +22,14 @@ static const struct nf_hook_ops ipvl_nfops[] = {
                .hooknum  = NF_INET_LOCAL_IN,
                .priority = INT_MAX,
        },
+#if IS_ENABLED(CONFIG_IPV6)
        {
                .hook     = ipvlan_nf_input,
                .pf       = NFPROTO_IPV6,
                .hooknum  = NF_INET_LOCAL_IN,
                .priority = INT_MAX,
        },
+#endif
 };
 
 static const struct l3mdev_ops ipvl_l3mdev_ops = {
@@ -176,7 +178,7 @@ static int ipvlan_init(struct net_device *dev)
        dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
                     (phy_dev->state & IPVLAN_STATE_MASK);
        dev->features = phy_dev->features & IPVLAN_FEATURES;
-       dev->features |= NETIF_F_LLTX;
+       dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED;
        dev->gso_max_size = phy_dev->gso_max_size;
        dev->gso_max_segs = phy_dev->gso_max_segs;
        dev->hard_header_len = phy_dev->hard_header_len;
@@ -225,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
        else
                dev->flags &= ~IFF_NOARP;
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
                ipvlan_ht_addr_add(ipvlan, addr);
+       rcu_read_unlock();
 
        return dev_uc_add(phy_dev, phy_dev->dev_addr);
 }
@@ -242,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
 
        dev_uc_del(phy_dev, phy_dev->dev_addr);
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
                ipvlan_ht_addr_del(addr);
+       rcu_read_unlock();
 
        return 0;
 }
@@ -586,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
        ipvlan->sfeatures = IPVLAN_FEATURES;
        ipvlan_adjust_mtu(ipvlan, phy_dev);
        INIT_LIST_HEAD(&ipvlan->addrs);
+       spin_lock_init(&ipvlan->addrs_lock);
 
        /* TODO Probably put random address here to be presented to the
         * world but keep using the physical-dev address for the outgoing
@@ -663,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
        struct ipvl_dev *ipvlan = netdev_priv(dev);
        struct ipvl_addr *addr, *next;
 
+       spin_lock_bh(&ipvlan->addrs_lock);
        list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
                ipvlan_ht_addr_del(addr);
-               list_del(&addr->anode);
+               list_del_rcu(&addr->anode);
                kfree_rcu(addr, rcu);
        }
+       spin_unlock_bh(&ipvlan->addrs_lock);
 
        ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
        list_del_rcu(&ipvlan->pnode);
@@ -758,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
                if (dev->reg_state != NETREG_UNREGISTERING)
                        break;
 
-               list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
-                                        pnode)
+               list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
                        ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
                                                            &lst_kill);
                unregister_netdevice_many(&lst_kill);
@@ -791,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
        return NOTIFY_DONE;
 }
 
+/* the caller must held the addrs lock */
 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
        struct ipvl_addr *addr;
@@ -800,14 +809,17 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
                return -ENOMEM;
 
        addr->master = ipvlan;
-       if (is_v6) {
-               memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
-               addr->atype = IPVL_IPV6;
-       } else {
+       if (!is_v6) {
                memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
                addr->atype = IPVL_IPV4;
+#if IS_ENABLED(CONFIG_IPV6)
+       } else {
+               memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
+               addr->atype = IPVL_IPV6;
+#endif
        }
-       list_add_tail(&addr->anode, &ipvlan->addrs);
+
+       list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 
        /* If the interface is not up, the address will be added to the hash
         * list by ipvlan_open.
@@ -822,32 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
        struct ipvl_addr *addr;
 
+       spin_lock_bh(&ipvlan->addrs_lock);
        addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
-       if (!addr)
+       if (!addr) {
+               spin_unlock_bh(&ipvlan->addrs_lock);
                return;
+       }
 
        ipvlan_ht_addr_del(addr);
-       list_del(&addr->anode);
+       list_del_rcu(&addr->anode);
+       spin_unlock_bh(&ipvlan->addrs_lock);
        kfree_rcu(addr, rcu);
-
-       return;
-}
-
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
-       if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
-               netif_err(ipvlan, ifup, ipvlan->dev,
-                         "Failed to add IPv6=%pI6c addr for %s intf\n",
-                         ip6_addr, ipvlan->dev->name);
-               return -EINVAL;
-       }
-
-       return ipvlan_add_addr(ipvlan, ip6_addr, true);
-}
-
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
-       return ipvlan_del_addr(ipvlan, ip6_addr, true);
 }
 
 static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -863,6 +860,27 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
        return true;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+       int ret = -EINVAL;
+
+       spin_lock_bh(&ipvlan->addrs_lock);
+       if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
+               netif_err(ipvlan, ifup, ipvlan->dev,
+                         "Failed to add IPv6=%pI6c addr for %s intf\n",
+                         ip6_addr, ipvlan->dev->name);
+       else
+               ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+       spin_unlock_bh(&ipvlan->addrs_lock);
+       return ret;
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+       return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
 static int ipvlan_addr6_event(struct notifier_block *unused,
                              unsigned long event, void *ptr)
 {
@@ -894,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
        struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
        struct ipvl_dev *ipvlan = netdev_priv(dev);
 
-       /* FIXME IPv6 autoconf calls us from bh without RTNL */
-       if (in_softirq())
-               return NOTIFY_DONE;
-
        if (!ipvlan_is_valid_dev(dev))
                return NOTIFY_DONE;
 
@@ -913,17 +927,21 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 
        return NOTIFY_OK;
 }
+#endif
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-       if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+       int ret = -EINVAL;
+
+       spin_lock_bh(&ipvlan->addrs_lock);
+       if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
                netif_err(ipvlan, ifup, ipvlan->dev,
                          "Failed to add IPv4=%pI4 on %s intf.\n",
                          ip4_addr, ipvlan->dev->name);
-               return -EINVAL;
-       }
-
-       return ipvlan_add_addr(ipvlan, ip4_addr, false);
+       else
+               ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+       spin_unlock_bh(&ipvlan->addrs_lock);
+       return ret;
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
@@ -993,6 +1011,7 @@ static struct notifier_block ipvlan_notifier_block __read_mostly = {
        .notifier_call = ipvlan_device_event,
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
        .notifier_call = ipvlan_addr6_event,
 };
@@ -1000,6 +1019,7 @@ static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
        .notifier_call = ipvlan_addr6_validator_event,
 };
+#endif
 
 static void ipvlan_ns_exit(struct net *net)
 {
@@ -1016,6 +1036,7 @@ static struct pernet_operations ipvlan_net_ops = {
        .id = &ipvlan_netid,
        .size = sizeof(struct ipvlan_netns),
        .exit = ipvlan_ns_exit,
+       .async = true,
 };
 
 static int __init ipvlan_init_module(void)
@@ -1024,9 +1045,11 @@ static int __init ipvlan_init_module(void)
 
        ipvlan_init_secret();
        register_netdevice_notifier(&ipvlan_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
        register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
        register_inet6addr_validator_notifier(
            &ipvlan_addr6_vtor_notifier_block);
+#endif
        register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
        register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
 
@@ -1045,9 +1068,11 @@ error:
        unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
        unregister_inetaddr_validator_notifier(
            &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
        unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
        unregister_inet6addr_validator_notifier(
            &ipvlan_addr6_vtor_notifier_block);
+#endif
        unregister_netdevice_notifier(&ipvlan_notifier_block);
        return err;
 }
@@ -1060,9 +1085,11 @@ static void __exit ipvlan_cleanup_module(void)
        unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
        unregister_inetaddr_validator_notifier(
            &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
        unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
        unregister_inet6addr_validator_notifier(
            &ipvlan_addr6_vtor_notifier_block);
+#endif
 }
 
 module_init(ipvlan_init_module);
index 3061249..b97a907 100644 (file)
@@ -230,4 +230,5 @@ out:
 /* Registered in net/core/dev.c */
 struct pernet_operations __net_initdata loopback_net_ops = {
        .init = loopback_net_init,
+       .async = true,
 };
index a0f2be8..8fc02d9 100644 (file)
@@ -1451,7 +1451,7 @@ destroy_macvlan_port:
        /* the macvlan port may be freed by macvlan_uninit when fail to register.
         * so we destroy the macvlan port only when it's valid.
         */
-       if (create && macvlan_port_get_rtnl(dev))
+       if (create && macvlan_port_get_rtnl(lowerdev))
                macvlan_port_destroy(port->dev);
        return err;
 }
index e8ae50e..319edc9 100644 (file)
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
        return 0;
 }
 
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
-       int reg;
-
-       reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-       return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
 static int aquantia_config_intr(struct phy_device *phydev)
 {
        int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQ1202",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQ2104",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR105",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR106",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR107",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR405",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
index 421feb8..29b1c88 100644 (file)
@@ -565,7 +565,7 @@ static int bcm7xxx_28nm_set_tunable(struct phy_device *phydev,
        if (ret)
                return ret;
 
-       /* Disable EEE advertisment since this prevents the PHY
+       /* Disable EEE advertisement since this prevents the PHY
         * from successfully linking up, trigger auto-negotiation restart
         * to let the MAC decide what to do.
         */
index 9442db2..8022cd3 100644 (file)
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
                            MII_ADDR_C45 | regnum);
 }
 
-static int cortina_config_aneg(struct phy_device *phydev)
-{
-       phydev->supported = SUPPORTED_10000baseT_Full;
-       phydev->advertising = SUPPORTED_10000baseT_Full;
-
-       return 0;
-}
-
 static int cortina_read_status(struct phy_device *phydev)
 {
        int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ err:
        return ret;
 }
 
-static int cortina_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int cortina_probe(struct phy_device *phydev)
 {
        u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
        .phy_id         = PHY_ID_CS4340,
        .phy_id_mask    = 0xffffffff,
        .name           = "Cortina CS4340",
-       .config_aneg    = cortina_config_aneg,
+       .config_init    = gen10g_config_init,
+       .config_aneg    = gen10g_config_aneg,
        .read_status    = cortina_read_status,
-       .soft_reset     = cortina_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .probe          = cortina_probe,
 },
 };
index ab58224..b393577 100644 (file)
@@ -75,6 +75,8 @@
 
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX    0x0
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN    0x1f
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK      (0x1f << 8)
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT     8
 
 /* CFG4 bits */
 #define DP83867_CFG4_PORT_MIRROR_EN              BIT(0)
@@ -92,6 +94,7 @@ struct dp83867_private {
        int io_impedance;
        int port_mirroring;
        bool rxctrl_strap_quirk;
+       int clk_output_sel;
 };
 
 static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -160,6 +163,14 @@ static int dp83867_of_init(struct phy_device *phydev)
        dp83867->io_impedance = -EINVAL;
 
        /* Optional configuration */
+       ret = of_property_read_u32(of_node, "ti,clk-output-sel",
+                                  &dp83867->clk_output_sel);
+       if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
+               /* Keep the default value if ti,clk-output-sel is not set
+                * or too high
+                */
+               dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
+
        if (of_property_read_bool(of_node, "ti,max-output-impedance"))
                dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
        else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
@@ -295,6 +306,14 @@ static int dp83867_config_init(struct phy_device *phydev)
        if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP)
                dp83867_config_port_mirroring(phydev);
 
+       /* Clock output selection if muxing property is set */
+       if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) {
+               val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG);
+               val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
+               val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
+               phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val);
+       }
+
        return 0;
 }
 
index 22d9bc9..98fd6b7 100644 (file)
@@ -860,7 +860,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
                        return err;
 
                /* There appears to be a bug in the 88e1512 when used in
-                * SGMII to copper mode, where the AN advertisment register
+                * SGMII to copper mode, where the AN advertisement register
                 * clears the pause bits each time a negotiation occurs.
                 * This means we can never be truely sure what was advertised,
                 * so disable Pause support.
index 8a0bd98..9564916 100644 (file)
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
        return 0;
 }
 
-/*
- * Resetting the MV88X3310 causes it to become non-responsive.  Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int mv3310_config_init(struct phy_device *phydev)
 {
        __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -317,7 +308,7 @@ static int mv3310_read_status(struct phy_device *phydev)
                if (val < 0)
                        return val;
 
-               /* Read the link partner's 1G advertisment */
+               /* Read the link partner's 1G advertisement */
                val = phy_read_mmd(phydev, MDIO_MMD_AN, MV_AN_STAT1000);
                if (val < 0)
                        return val;
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
                                  SUPPORTED_10000baseT_Full |
                                  SUPPORTED_Backplane,
                .probe          = mv3310_probe,
-               .soft_reset     = mv3310_soft_reset,
+               .soft_reset     = gen10g_no_soft_reset,
                .config_init    = mv3310_config_init,
                .config_aneg    = mv3310_config_aneg,
                .aneg_done      = mv3310_aneg_done,
index a457685..e122554 100644 (file)
@@ -163,11 +163,11 @@ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
 EXPORT_SYMBOL_GPL(genphy_c45_read_link);
 
 /**
- * genphy_c45_read_lpa - read the link partner advertisment and pause
+ * genphy_c45_read_lpa - read the link partner advertisement and pause
  * @phydev: target phy_device struct
  *
  * Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,
- * filling in the link partner advertisment, pause and asym_pause members
+ * filling in the link partner advertisement, pause and asym_pause members
  * in @phydev.  This assumes that the auto-negotiation MMD is present, and
  * the backplane bit (7.48.0) is clear.  Clause 45 PHY drivers are expected
  * to fill in the remainder of the link partner advert from vendor registers.
@@ -176,7 +176,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 {
        int val;
 
-       /* Read the link partner's base page advertisment */
+       /* Read the link partner's base page advertisement */
        val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
        if (val < 0)
                return val;
@@ -185,7 +185,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
        phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0;
        phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0;
 
-       /* Read the link partner's 10G advertisment */
+       /* Read the link partner's 10G advertisement */
        val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
        if (val < 0)
                return val;
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
 {
        u32 mmd_mask = phydev->c45_ids.devices_in_package;
        int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_read_status);
 
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
 {
        /* Do nothing for now */
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
 
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
 {
        /* Temporarily just say we support everything */
        phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_init);
 
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_suspend);
 
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_resume);
 
 struct phy_driver genphy_10g_driver = {
        .phy_id         = 0xffffffff,
        .phy_id_mask    = 0xffffffff,
        .name           = "Generic 10G PHY",
-       .soft_reset     = gen10g_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .config_init    = gen10g_config_init,
        .features       = 0,
        .config_aneg    = gen10g_config_aneg,
index 4083f00..c7da4cb 100644 (file)
@@ -190,10 +190,10 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 }
 
 /**
- * phy_resolve_aneg_linkmode - resolve the advertisments into phy settings
+ * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
  *
- * Resolve our and the link partner advertisments into their corresponding
+ * Resolve our and the link partner advertisements into their corresponding
  * speed and duplex. If full duplex was negotiated, extract the pause mode
  * from the link partner mask.
  */
index b13eed2..d39ae77 100644 (file)
@@ -1382,7 +1382,7 @@ int genphy_setup_forced(struct phy_device *phydev)
                ctl |= BMCR_FULLDPLX;
 
        return phy_modify(phydev, MII_BMCR,
-                         BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN, ctl);
+                         ~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl);
 }
 EXPORT_SYMBOL(genphy_setup_forced);
 
index 6ac8b29..51a011a 100644 (file)
@@ -364,7 +364,7 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat
 }
 
 /* Flow control is resolved according to our and the link partners
- * advertisments using the following drawn from the 802.3 specs:
+ * advertisements using the following drawn from the 802.3 specs:
  *  Local device  Link partner
  *  Pause AsymDir Pause AsymDir Result
  *    1     X       1     X     TX+RX
@@ -679,12 +679,11 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 
        mutex_lock(&phy->lock);
        mutex_lock(&pl->state_mutex);
-       pl->netdev->phydev = phy;
        pl->phydev = phy;
        linkmode_copy(pl->supported, supported);
        linkmode_copy(pl->link_config.advertising, config.advertising);
 
-       /* Restrict the phy advertisment according to the MAC support. */
+       /* Restrict the phy advertisement according to the MAC support. */
        ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
        phy->advertising = advertising;
        mutex_unlock(&pl->state_mutex);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
        if (phy) {
                mutex_lock(&phy->lock);
                mutex_lock(&pl->state_mutex);
-               pl->netdev->phydev = NULL;
                pl->phydev = NULL;
                mutex_unlock(&pl->state_mutex);
                mutex_unlock(&phy->lock);
@@ -889,7 +887,7 @@ void phylink_start(struct phylink *pl)
 
        /* Apply the link configuration to the MAC when starting. This allows
         * a fixed-link to start with the correct parameters, and also
-        * ensures that we set the appropriate advertisment for Serdes links.
+        * ensures that we set the appropriate advertisement for Serdes links.
         */
        phylink_resolve_flow(pl, &pl->link_config);
        phylink_mac_config(pl, &pl->link_config);
@@ -1076,7 +1074,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 
        config = pl->link_config;
 
-       /* Mask out unsupported advertisments */
+       /* Mask out unsupported advertisements */
        linkmode_and(config.advertising, kset->link_modes.advertising,
                     pl->supported);
 
@@ -1121,7 +1119,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
        if (phylink_validate(pl, pl->supported, &config))
                return -EINVAL;
 
-       /* If autonegotiation is enabled, we must have an advertisment */
+       /* If autonegotiation is enabled, we must have an advertisement */
        if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
                return -EINVAL;
 
@@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream,
        bool changed;
        u8 port;
 
-       sfp_parse_support(pl->sfp_bus, id, support);
-       port = sfp_parse_port(pl->sfp_bus, id, support);
-       iface = sfp_parse_interface(pl->sfp_bus, id);
-
        ASSERT_RTNL();
 
-       switch (iface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-       case PHY_INTERFACE_MODE_2500BASEX:
-       case PHY_INTERFACE_MODE_10GKR:
-               break;
-       default:
-               return -EINVAL;
-       }
+       sfp_parse_support(pl->sfp_bus, id, support);
+       port = sfp_parse_port(pl->sfp_bus, id, support);
 
        memset(&config, 0, sizeof(config));
        linkmode_copy(config.advertising, support);
-       config.interface = iface;
+       config.interface = PHY_INTERFACE_MODE_NA;
        config.speed = SPEED_UNKNOWN;
        config.duplex = DUPLEX_UNKNOWN;
        config.pause = MLO_PAUSE_AN;
@@ -1610,6 +1597,22 @@ static int phylink_sfp_module_insert(void *upstream,
 
        /* Ignore errors if we're expecting a PHY to attach later */
        ret = phylink_validate(pl, support, &config);
+       if (ret) {
+               netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+                          __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+               return ret;
+       }
+
+       iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+       if (iface == PHY_INTERFACE_MODE_NA) {
+               netdev_err(pl->netdev,
+                          "selection of interface failed, advertisement %*pb\n",
+                          __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+               return -EINVAL;
+       }
+
+       config.interface = iface;
+       ret = phylink_validate(pl, support, &config);
        if (ret) {
                netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
                           phylink_an_mode_str(MLO_AN_INBAND),
index 8961209..3d4ff5d 100644 (file)
@@ -105,68 +105,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 }
 EXPORT_SYMBOL_GPL(sfp_parse_port);
 
-/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                   const struct sfp_eeprom_id *id)
-{
-       phy_interface_t iface;
-
-       /* Setting the serdes link mode is guesswork: there's no field in
-        * the EEPROM which indicates what mode should be used.
-        *
-        * If the module wants 64b66b, then it must be >= 10G.
-        *
-        * If it's a gigabit-only fiber module, it probably does not have
-        * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
-        * to SGMII mode (which is required to support non-gigabit speeds).
-        */
-       switch (id->base.encoding) {
-       case SFP_ENCODING_8472_64B66B:
-               iface = PHY_INTERFACE_MODE_10GKR;
-               break;
-
-       case SFP_ENCODING_8B10B:
-               if (!id->base.e1000_base_t &&
-                   !id->base.e100_base_lx &&
-                   !id->base.e100_base_fx)
-                       iface = PHY_INTERFACE_MODE_1000BASEX;
-               else
-                       iface = PHY_INTERFACE_MODE_SGMII;
-               break;
-
-       default:
-               if (id->base.e1000_base_cx) {
-                       iface = PHY_INTERFACE_MODE_1000BASEX;
-                       break;
-               }
-
-               iface = PHY_INTERFACE_MODE_NA;
-               dev_err(bus->sfp_dev,
-                       "SFP module encoding does not support 8b10b nor 64b66b\n");
-               break;
-       }
-
-       return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
 /**
  * sfp_parse_support() - Parse the eeprom id for supported link modes
  * @bus: a pointer to the &struct sfp_bus structure for the sfp module
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                       unsigned long *support)
 {
        unsigned int br_min, br_nom, br_max;
-
-       phylink_set(support, Autoneg);
-       phylink_set(support, Pause);
-       phylink_set(support, Asym_Pause);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
 
        /* Decode the bitrate information to MBd */
        br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 
        /* Set ethtool support from the compliance fields. */
        if (id->base.e10g_base_sr)
-               phylink_set(support, 10000baseSR_Full);
+               phylink_set(modes, 10000baseSR_Full);
        if (id->base.e10g_base_lr)
-               phylink_set(support, 10000baseLR_Full);
+               phylink_set(modes, 10000baseLR_Full);
        if (id->base.e10g_base_lrm)
-               phylink_set(support, 10000baseLRM_Full);
+               phylink_set(modes, 10000baseLRM_Full);
        if (id->base.e10g_base_er)
-               phylink_set(support, 10000baseER_Full);
+               phylink_set(modes, 10000baseER_Full);
        if (id->base.e1000_base_sx ||
            id->base.e1000_base_lx ||
            id->base.e1000_base_cx)
-               phylink_set(support, 1000baseX_Full);
+               phylink_set(modes, 1000baseX_Full);
        if (id->base.e1000_base_t) {
-               phylink_set(support, 1000baseT_Half);
-               phylink_set(support, 1000baseT_Full);
+               phylink_set(modes, 1000baseT_Half);
+               phylink_set(modes, 1000baseT_Full);
        }
 
        /* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
                /* This may look odd, but some manufacturers use 12000MBd */
                if (br_min <= 12000 && br_max >= 10300)
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
                if (br_min <= 3200 && br_max >= 3100)
-                       phylink_set(support, 2500baseX_Full);
+                       phylink_set(modes, 2500baseX_Full);
                if (br_min <= 1300 && br_max >= 1200)
-                       phylink_set(support, 1000baseX_Full);
+                       phylink_set(modes, 1000baseX_Full);
        }
        if (id->base.sfp_ct_passive) {
                if (id->base.passive.sff8431_app_e)
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
        }
        if (id->base.sfp_ct_active) {
                if (id->base.active.sff8431_app_e ||
                    id->base.active.sff8431_lim) {
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
                }
        }
 
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        case 0x00: /* Unspecified */
                break;
        case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
-               phylink_set(support, 100000baseSR4_Full);
-               phylink_set(support, 25000baseSR_Full);
+               phylink_set(modes, 100000baseSR4_Full);
+               phylink_set(modes, 25000baseSR_Full);
                break;
        case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
        case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
-               phylink_set(support, 100000baseLR4_ER4_Full);
+               phylink_set(modes, 100000baseLR4_ER4_Full);
                break;
        case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
        case 0x0c: /* 25Gbase-CR CA-S */
        case 0x0d: /* 25Gbase-CR CA-N */
-               phylink_set(support, 100000baseCR4_Full);
-               phylink_set(support, 25000baseCR_Full);
+               phylink_set(modes, 100000baseCR4_Full);
+               phylink_set(modes, 25000baseCR_Full);
                break;
        default:
                dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
            id->base.fc_speed_200 ||
            id->base.fc_speed_400) {
                if (id->base.br_nominal >= 31)
-                       phylink_set(support, 2500baseX_Full);
+                       phylink_set(modes, 2500baseX_Full);
                if (id->base.br_nominal >= 12)
-                       phylink_set(support, 1000baseX_Full);
+                       phylink_set(modes, 1000baseX_Full);
        }
+
+       /* If we haven't discovered any modes that this module supports, try
+        * the encoding and bitrate to determine supported modes. Some BiDi
+        * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+        * the differing wavelengths, so do not set any transceiver bits.
+        */
+       if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+               /* If the encoding and bit rate allows 1000baseX */
+               if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+                   br_min <= 1300 && br_max >= 1200)
+                       phylink_set(modes, 1000baseX_Full);
+       }
+
+       bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+       phylink_set(support, Autoneg);
+       phylink_set(support, Pause);
+       phylink_set(support, Asym_Pause);
 }
 EXPORT_SYMBOL_GPL(sfp_parse_support);
 
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                    const struct sfp_eeprom_id *id,
+                                    unsigned long *link_modes)
+{
+       if (phylink_test(link_modes, 10000baseCR_Full) ||
+           phylink_test(link_modes, 10000baseSR_Full) ||
+           phylink_test(link_modes, 10000baseLR_Full) ||
+           phylink_test(link_modes, 10000baseLRM_Full) ||
+           phylink_test(link_modes, 10000baseER_Full))
+               return PHY_INTERFACE_MODE_10GKR;
+
+       if (phylink_test(link_modes, 2500baseX_Full))
+               return PHY_INTERFACE_MODE_2500BASEX;
+
+       if (id->base.e1000_base_t ||
+           id->base.e100_base_lx ||
+           id->base.e100_base_fx)
+               return PHY_INTERFACE_MODE_SGMII;
+
+       if (phylink_test(link_modes, 1000baseX_Full))
+               return PHY_INTERFACE_MODE_1000BASEX;
+
+       dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+       return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
 static LIST_HEAD(sfp_buses);
 static DEFINE_MUTEX(sfp_mutex);
 
index 6c7d928..83bf495 100644 (file)
@@ -42,6 +42,7 @@ enum {
 
        SFP_MOD_EMPTY = 0,
        SFP_MOD_PROBE,
+       SFP_MOD_HPOWER,
        SFP_MOD_PRESENT,
        SFP_MOD_ERROR,
 
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
  * access the I2C EEPROM.  However, Avago modules require 300ms.
  */
 #define T_PROBE_INIT   msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL msecs_to_jiffies(300)
 #define T_PROBE_RETRY  msecs_to_jiffies(100)
 
 /* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
        struct sfp_bus *sfp_bus;
        struct phy_device *mod_phy;
        const struct sff_data *type;
+       u32 max_power_mW;
 
        unsigned int (*get_state)(struct sfp *);
        void (*set_state)(struct sfp *, unsigned int);
        int (*read)(struct sfp *, bool, u8, void *, size_t);
+       int (*write)(struct sfp *, bool, u8, void *, size_t);
 
        struct gpio_desc *gpio[GPIO_MAX];
 
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
        }
 }
 
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
-                        void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+                       size_t len)
 {
        struct i2c_msg msgs[2];
+       u8 bus_addr = a2 ? 0x51 : 0x50;
        int ret;
 
        msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
        msgs[1].len = len;
        msgs[1].buf = buf;
 
-       ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+       ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
        if (ret < 0)
                return ret;
 
        return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
-                       size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+       size_t len)
 {
-       return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+       struct i2c_msg msgs[1];
+       u8 bus_addr = a2 ? 0x51 : 0x50;
+       int ret;
+
+       msgs[0].addr = bus_addr;
+       msgs[0].flags = 0;
+       msgs[0].len = 1 + len;
+       msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+       if (!msgs[0].buf)
+               return -ENOMEM;
+
+       msgs[0].buf[0] = dev_addr;
+       memcpy(&msgs[0].buf[1], buf, len);
+
+       ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+       kfree(msgs[0].buf);
+
+       if (ret < 0)
+               return ret;
+
+       return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
 static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
 
        sfp->i2c = i2c;
        sfp->read = sfp_i2c_read;
+       sfp->write = sfp_i2c_write;
 
        i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
        if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
        return sfp->read(sfp, a2, addr, buf, len);
 }
 
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+       return sfp->write(sfp, a2, addr, buf, len);
+}
+
 static unsigned int sfp_check(void *buf, size_t len)
 {
        u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
                sfp_sm_probe_phy(sfp);
 }
 
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+       u32 power;
+       u8 val;
+       int err;
+
+       power = 1000;
+       if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+               power = 1500;
+       if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+               power = 2000;
+
+       if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+           (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+           SFP_DIAGMON_DDM) {
+               /* The module appears not to implement bus address 0xa2,
+                * or requires an address change sequence, so assume that
+                * the module powers up in the indicated power mode.
+                */
+               if (power > sfp->max_power_mW) {
+                       dev_err(sfp->dev,
+                               "Host does not support %u.%uW modules\n",
+                               power / 1000, (power / 100) % 10);
+                       return -EINVAL;
+               }
+               return 0;
+       }
+
+       if (power > sfp->max_power_mW) {
+               dev_warn(sfp->dev,
+                        "Host does not support %u.%uW modules, module left in power mode 1\n",
+                        power / 1000, (power / 100) % 10);
+               return 0;
+       }
+
+       if (power <= 1000)
+               return 0;
+
+       err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+       if (err != sizeof(val)) {
+               dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+               err = -EAGAIN;
+               goto err;
+       }
+
+       val |= BIT(0);
+
+       err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+       if (err != sizeof(val)) {
+               dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+               err = -EAGAIN;
+               goto err;
+       }
+
+       dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+                power / 1000, (power / 100) % 10);
+       return T_HPOWER_LEVEL;
+
+err:
+       return err;
+}
+
 static int sfp_sm_mod_probe(struct sfp *sfp)
 {
        /* SFP module inserted - read I2C data */
        struct sfp_eeprom_id id;
        u8 check;
-       int err;
+       int ret;
 
-       err = sfp_read(sfp, false, 0, &id, sizeof(id));
-       if (err < 0) {
-               dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+       ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+       if (ret < 0) {
+               dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
                return -EAGAIN;
        }
 
-       if (err != sizeof(id)) {
-               dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+       if (ret != sizeof(id)) {
+               dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
                return -EAGAIN;
        }
 
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
                dev_warn(sfp->dev,
                         "module address swap to access page 0xA2 is not supported.\n");
 
-       return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+       ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+       if (ret < 0)
+               return ret;
+
+       return sfp_sm_mod_hpower(sfp);
 }
 
 static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
                if (event == SFP_E_REMOVE) {
                        sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
                } else if (event == SFP_E_TIMEOUT) {
-                       int err = sfp_sm_mod_probe(sfp);
+                       int val = sfp_sm_mod_probe(sfp);
 
-                       if (err == 0)
+                       if (val == 0)
                                sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-                       else if (err == -EAGAIN)
-                               sfp_sm_set_timer(sfp, T_PROBE_RETRY);
-                       else
+                       else if (val > 0)
+                               sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+                       else if (val != -EAGAIN)
                                sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+                       else
+                               sfp_sm_set_timer(sfp, T_PROBE_RETRY);
                }
                break;
 
+       case SFP_MOD_HPOWER:
+               if (event == SFP_E_TIMEOUT) {
+                       sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+                       break;
+               }
+               /* fallthrough */
        case SFP_MOD_PRESENT:
        case SFP_MOD_ERROR:
                if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
        if (!(sfp->gpio[GPIO_MODDEF0]))
                sfp->get_state = sff_gpio_get_state;
 
+       device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+                                &sfp->max_power_mW);
+       if (!sfp->max_power_mW)
+               sfp->max_power_mW = 1000;
+
+       dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+                sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
        sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
        if (!sfp->sfp_bus)
                return -ENOMEM;
index fb2cef7..22f3bdd 100644 (file)
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
                                MDIO_PHYXS_LNSTAT_SYNC3 | \
                                MDIO_PHYXS_LNSTAT_ALIGN)
 
-static int teranetics_config_init(struct phy_device *phydev)
-{
-       phydev->supported = SUPPORTED_10000baseT_Full;
-       phydev->advertising = SUPPORTED_10000baseT_Full;
-
-       return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int teranetics_aneg_done(struct phy_device *phydev)
 {
-       int reg;
-
        /* auto negotiation state can only be checked when using copper
         * port, if using fiber port, just lie it's done.
         */
-       if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
-               reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-               return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-       }
+       if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+               return genphy_c45_aneg_done(phydev);
 
        return 1;
 }
 
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int teranetics_read_status(struct phy_device *phydev)
 {
        int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
        .phy_id         = PHY_ID_TN2020,
        .phy_id_mask    = 0xffffffff,
        .name           = "Teranetics TN2020",
-       .soft_reset     = teranetics_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .aneg_done      = teranetics_aneg_done,
-       .config_init    = teranetics_config_init,
-       .config_aneg    = teranetics_config_aneg,
+       .config_init    = gen10g_config_init,
+       .config_aneg    = gen10g_config_aneg,
        .read_status    = teranetics_read_status,
        .match_phy_device = teranetics_match_phy_device,
 },
index 255a5de..a393c1d 100644 (file)
@@ -971,6 +971,7 @@ static struct pernet_operations ppp_net_ops = {
        .exit = ppp_exit_net,
        .id   = &ppp_net_id,
        .size = sizeof(struct ppp_net),
+       .async = true,
 };
 
 static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
index 5aa59f4..c10e618 100644 (file)
@@ -714,7 +714,7 @@ err_put:
 }
 
 static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
-                 int *usockaddr_len, int peer)
+                 int peer)
 {
        int len = sizeof(struct sockaddr_pppox);
        struct sockaddr_pppox sp;
@@ -726,9 +726,7 @@ static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
 
        memcpy(uaddr, &sp, len);
 
-       *usockaddr_len = len;
-
-       return 0;
+       return len;
 }
 
 static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
@@ -1163,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = {
        .exit = pppoe_exit_net,
        .id   = &pppoe_net_id,
        .size = sizeof(struct pppoe_net),
+       .async = true,
 };
 
 static int __init pppoe_init(void)
index 6dde9a0..8249d46 100644 (file)
@@ -483,7 +483,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
 }
 
 static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
-       int *usockaddr_len, int peer)
+       int peer)
 {
        int len = sizeof(struct sockaddr_pppox);
        struct sockaddr_pppox sp;
@@ -496,9 +496,7 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
 
        memcpy(uaddr, &sp, len);
 
-       *usockaddr_len = len;
-
-       return 0;
+       return len;
 }
 
 static int pptp_release(struct socket *sock)
index a468439..5dd781e 100644 (file)
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
 }
 #endif
 
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+                              struct netlink_ext_ack *extack)
 {
        struct netdev_lag_upper_info lag_upper_info;
        int err;
 
        lag_upper_info.tx_type = team->mode->lag_tx_type;
        err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
-                                          &lag_upper_info, NULL);
+                                          &lag_upper_info, extack);
        if (err)
                return err;
        port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
 static int team_dev_type_check_change(struct net_device *dev,
                                      struct net_device *port_dev);
 
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+                        struct netlink_ext_ack *extack)
 {
        struct net_device *dev = team->dev;
        struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
        int err;
 
        if (port_dev->flags & IFF_LOOPBACK) {
+               NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
                netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
                           portname);
                return -EINVAL;
        }
 
        if (team_port_exists(port_dev)) {
+               NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
                netdev_err(dev, "Device %s is already a port "
                                "of a team device\n", portname);
                return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 
        if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
            vlan_uses_dev(dev)) {
+               NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
                netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
                           portname);
                return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
                return err;
 
        if (port_dev->flags & IFF_UP) {
+               NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
                netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
                           portname);
                return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
                goto err_handler_register;
        }
 
-       err = team_upper_dev_link(team, port);
+       err = team_upper_dev_link(team, port, extack);
        if (err) {
                netdev_err(dev, "Device %s failed to set upper link\n",
                           portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
        int err;
 
        mutex_lock(&team->lock);
-       err = team_port_add(team, port_dev);
+       err = team_port_add(team, port_dev, extack);
        mutex_unlock(&team->lock);
 
        if (!err)
index ca5e375..e0d6760 100644 (file)
@@ -166,6 +166,8 @@ struct tbnet_ring {
  * @connected_work: Worker that finalizes the ThunderboltIP connection
  *                 setup and enables DMA paths for high speed data
  *                 transfers
+ * @disconnect_work: Worker that handles tearing down the ThunderboltIP
+ *                  connection
  * @rx_hdr: Copy of the currently processed Rx frame. Used when a
  *         network packet consists of multiple Thunderbolt frames.
  *         In host byte order.
@@ -190,6 +192,7 @@ struct tbnet {
        int login_retries;
        struct delayed_work login_work;
        struct work_struct connected_work;
+       struct work_struct disconnect_work;
        struct thunderbolt_ip_frame_header rx_hdr;
        struct tbnet_ring rx_ring;
        atomic_t frame_id;
@@ -445,7 +448,7 @@ static int tbnet_handle_packet(const void *buf, size_t size, void *data)
        case TBIP_LOGOUT:
                ret = tbnet_logout_response(net, route, sequence, command_id);
                if (!ret)
-                       tbnet_tear_down(net, false);
+                       queue_work(system_long_wq, &net->disconnect_work);
                break;
 
        default:
@@ -659,6 +662,13 @@ static void tbnet_login_work(struct work_struct *work)
        }
 }
 
+static void tbnet_disconnect_work(struct work_struct *work)
+{
+       struct tbnet *net = container_of(work, typeof(*net), disconnect_work);
+
+       tbnet_tear_down(net, false);
+}
+
 static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
                              const struct thunderbolt_ip_frame_header *hdr)
 {
@@ -881,6 +891,7 @@ static int tbnet_stop(struct net_device *dev)
 
        napi_disable(&net->napi);
 
+       cancel_work_sync(&net->disconnect_work);
        tbnet_tear_down(net, true);
 
        tb_ring_free(net->rx_ring.ring);
@@ -1195,6 +1206,7 @@ static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id)
        net = netdev_priv(dev);
        INIT_DELAYED_WORK(&net->login_work, tbnet_login_work);
        INIT_WORK(&net->connected_work, tbnet_connected_work);
+       INIT_WORK(&net->disconnect_work, tbnet_disconnect_work);
        mutex_init(&net->connection_lock);
        atomic_set(&net->command_id, 0);
        atomic_set(&net->frame_id, 0);
@@ -1270,10 +1282,7 @@ static int __maybe_unused tbnet_suspend(struct device *dev)
        stop_login(net);
        if (netif_running(net->dev)) {
                netif_device_detach(net->dev);
-               tb_ring_stop(net->rx_ring.ring);
-               tb_ring_stop(net->tx_ring.ring);
-               tbnet_free_buffers(&net->rx_ring);
-               tbnet_free_buffers(&net->tx_ring);
+               tbnet_tear_down(net, true);
        }
 
        return 0;
index 81e6cc9..d531954 100644 (file)
@@ -78,6 +78,7 @@
 #include <linux/mutex.h>
 
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
@@ -1489,27 +1490,23 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
        skb->truesize += skb->data_len;
 
        for (i = 1; i < it->nr_segs; i++) {
+               struct page_frag *pfrag = &current->task_frag;
                size_t fragsz = it->iov[i].iov_len;
-               unsigned long offset;
-               struct page *page;
-               void *data;
 
                if (fragsz == 0 || fragsz > PAGE_SIZE) {
                        err = -EINVAL;
                        goto free;
                }
 
-               local_bh_disable();
-               data = napi_alloc_frag(fragsz);
-               local_bh_enable();
-               if (!data) {
+               if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
                        err = -ENOMEM;
                        goto free;
                }
 
-               page = virt_to_head_page(data);
-               offset = data - page_address(page);
-               skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+               skb_fill_page_desc(skb, i - 1, pfrag->page,
+                                  pfrag->offset, fragsz);
+               page_ref_inc(pfrag->page);
+               pfrag->offset += fragsz;
        }
 
        return skb;
@@ -2290,11 +2287,67 @@ static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
        return -EINVAL;
 }
 
+static size_t tun_get_size(const struct net_device *dev)
+{
+       BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
+       BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
+
+       return nla_total_size(sizeof(uid_t)) + /* OWNER */
+              nla_total_size(sizeof(gid_t)) + /* GROUP */
+              nla_total_size(sizeof(u8)) + /* TYPE */
+              nla_total_size(sizeof(u8)) + /* PI */
+              nla_total_size(sizeof(u8)) + /* VNET_HDR */
+              nla_total_size(sizeof(u8)) + /* PERSIST */
+              nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
+              nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
+              nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
+              0;
+}
+
+static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+       struct tun_struct *tun = netdev_priv(dev);
+
+       if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
+               goto nla_put_failure;
+       if (uid_valid(tun->owner) &&
+           nla_put_u32(skb, IFLA_TUN_OWNER,
+                       from_kuid_munged(current_user_ns(), tun->owner)))
+               goto nla_put_failure;
+       if (gid_valid(tun->group) &&
+           nla_put_u32(skb, IFLA_TUN_GROUP,
+                       from_kgid_munged(current_user_ns(), tun->group)))
+               goto nla_put_failure;
+       if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
+               goto nla_put_failure;
+       if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
+               goto nla_put_failure;
+       if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
+               goto nla_put_failure;
+       if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
+                      !!(tun->flags & IFF_MULTI_QUEUE)))
+               goto nla_put_failure;
+       if (tun->flags & IFF_MULTI_QUEUE) {
+               if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
+                               tun->numdisabled))
+                       goto nla_put_failure;
+       }
+
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+
 static struct rtnl_link_ops tun_link_ops __read_mostly = {
        .kind           = DRV_NAME,
        .priv_size      = sizeof(struct tun_struct),
        .setup          = tun_setup,
        .validate       = tun_validate,
+       .get_size       = tun_get_size,
+       .fill_info      = tun_fill_info,
 };
 
 static void tun_sock_write_space(struct sock *sk)
@@ -2793,6 +2846,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        struct tun_struct *tun;
        void __user* argp = (void __user*)arg;
        struct ifreq ifr;
+       struct net *net;
        kuid_t owner;
        kgid_t group;
        int sndbuf;
@@ -2801,7 +2855,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        int le;
        int ret;
 
-       if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
+       if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
+           (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
                if (copy_from_user(&ifr, argp, ifreq_len))
                        return -EFAULT;
        } else {
@@ -2821,6 +2876,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        rtnl_lock();
 
        tun = tun_get(tfile);
+       net = sock_net(&tfile->sk);
        if (cmd == TUNSETIFF) {
                ret = -EEXIST;
                if (tun)
@@ -2828,7 +2884,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 
                ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
-               ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
+               ret = tun_set_iff(net, file, &ifr);
 
                if (ret)
                        goto unlock;
@@ -2850,6 +2906,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                tfile->ifindex = ifindex;
                goto unlock;
        }
+       if (cmd == SIOCGSKNS) {
+               ret = -EPERM;
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+                       goto unlock;
+
+               ret = open_related_ns(&net->ns, get_net_ns);
+               goto unlock;
+       }
 
        ret = -EBADFD;
        if (!tun)
index ce0b0b4..1ec523b 100644 (file)
@@ -114,14 +114,14 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr)
                return -ENOMEM;
 
        memcpy(usb_buf, init_msg_1, 12);
-       status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_1)
-               / sizeof(init_msg_1[0]), usb_buf, 24);
+       status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1),
+                                        usb_buf, 24);
        if (status != 0)
                return status;
 
        memcpy(usb_buf, init_msg_2, 12);
-       status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_2)
-               / sizeof(init_msg_2[0]), usb_buf, 28);
+       status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2),
+                                        usb_buf, 28);
        if (status != 0)
                return status;
 
index d0a1137..7a6a1fe 100644 (file)
@@ -954,10 +954,11 @@ static int smsc75xx_set_features(struct net_device *netdev,
        /* it's racing here! */
 
        ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
-       if (ret < 0)
+       if (ret < 0) {
                netdev_warn(dev->net, "Error writing RFE_CTL\n");
-
-       return ret;
+               return ret;
+       }
+       return 0;
 }
 
 static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm)
index 626c273..426dcf7 100644 (file)
@@ -443,12 +443,8 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
        sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
 
        err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
-       if (unlikely(err)) {
-               struct page *page = virt_to_head_page(xdp->data);
-
-               put_page(page);
-               return false;
-       }
+       if (unlikely(err))
+               return false; /* Caller handle free/refcnt */
 
        return true;
 }
@@ -456,8 +452,18 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
 static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
 {
        struct virtnet_info *vi = netdev_priv(dev);
-       bool sent = __virtnet_xdp_xmit(vi, xdp);
+       struct receive_queue *rq = vi->rq;
+       struct bpf_prog *xdp_prog;
+       bool sent;
 
+       /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+        * indicate XDP resources have been successfully allocated.
+        */
+       xdp_prog = rcu_dereference(rq->xdp_prog);
+       if (!xdp_prog)
+               return -ENXIO;
+
+       sent = __virtnet_xdp_xmit(vi, xdp);
        if (!sent)
                return -ENOSPC;
        return 0;
@@ -498,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
        page_off += *len;
 
        while (--*num_buf) {
+               int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
                unsigned int buflen;
                void *buf;
                int off;
@@ -512,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
                /* guard against a misconfigured or uncooperative backend that
                 * is sending packet larger than the MTU.
                 */
-               if ((page_off + buflen) > PAGE_SIZE) {
+               if ((page_off + buflen + tailroom) > PAGE_SIZE) {
                        put_page(p);
                        goto err_buf;
                }
@@ -546,8 +553,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
        unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
                              SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
        struct page *page = virt_to_head_page(buf);
-       unsigned int delta = 0, err;
+       unsigned int delta = 0;
        struct page *xdp_page;
+       bool sent;
+       int err;
+
        len -= vi->hdr_len;
 
        rcu_read_lock();
@@ -558,7 +568,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
                void *orig_data;
                u32 act;
 
-               if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
+               if (unlikely(hdr->hdr.gso_type))
                        goto err_xdp;
 
                if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
@@ -596,16 +606,19 @@ static struct sk_buff *receive_small(struct net_device *dev,
                        delta = orig_data - xdp.data;
                        break;
                case XDP_TX:
-                       if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
+                       sent = __virtnet_xdp_xmit(vi, &xdp);
+                       if (unlikely(!sent)) {
                                trace_xdp_exception(vi->dev, xdp_prog, act);
-                       else
-                               *xdp_xmit = true;
+                               goto err_xdp;
+                       }
+                       *xdp_xmit = true;
                        rcu_read_unlock();
                        goto xdp_xmit;
                case XDP_REDIRECT:
                        err = xdp_do_redirect(dev, &xdp, xdp_prog);
-                       if (!err)
-                               *xdp_xmit = true;
+                       if (err)
+                               goto err_xdp;
+                       *xdp_xmit = true;
                        rcu_read_unlock();
                        goto xdp_xmit;
                default:
@@ -677,6 +690,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        struct bpf_prog *xdp_prog;
        unsigned int truesize;
        unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+       bool sent;
        int err;
 
        head_skb = NULL;
@@ -689,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                void *data;
                u32 act;
 
-               /* This happens when rx buffer size is underestimated */
+               /* This happens when rx buffer size is underestimated
+                * or headroom is not enough because of the buffer
+                * was refilled before XDP is set. This should only
+                * happen for the first several packets, so we don't
+                * care much about its performance.
+                */
                if (unlikely(num_buf > 1 ||
                             headroom < virtnet_get_headroom(vi))) {
                        /* linearize data for XDP */
@@ -724,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
                act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-               if (act != XDP_PASS)
-                       ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
                switch (act) {
                case XDP_PASS:
                        /* recalculate offset to account for any header
@@ -746,18 +762,28 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                        }
                        break;
                case XDP_TX:
-                       if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
+                       sent = __virtnet_xdp_xmit(vi, &xdp);
+                       if (unlikely(!sent)) {
                                trace_xdp_exception(vi->dev, xdp_prog, act);
-                       else
-                               *xdp_xmit = true;
+                               if (unlikely(xdp_page != page))
+                                       put_page(xdp_page);
+                               goto err_xdp;
+                       }
+                       *xdp_xmit = true;
                        if (unlikely(xdp_page != page))
                                goto err_xdp;
                        rcu_read_unlock();
                        goto xdp_xmit;
                case XDP_REDIRECT:
                        err = xdp_do_redirect(dev, &xdp, xdp_prog);
-                       if (!err)
-                               *xdp_xmit = true;
+                       if (err) {
+                               if (unlikely(xdp_page != page))
+                                       put_page(xdp_page);
+                               goto err_xdp;
+                       }
+                       *xdp_xmit = true;
+                       if (unlikely(xdp_page != page))
+                               goto err_xdp;
                        rcu_read_unlock();
                        goto xdp_xmit;
                default:
@@ -1003,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 }
 
 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
-                                         struct ewma_pkt_len *avg_pkt_len)
+                                         struct ewma_pkt_len *avg_pkt_len,
+                                         unsigned int room)
 {
        const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        unsigned int len;
 
-       len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+       if (room)
+               return PAGE_SIZE - room;
+
+       len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
                                rq->min_buf_len, PAGE_SIZE - hdr_len);
+
        return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -1018,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 {
        struct page_frag *alloc_frag = &rq->alloc_frag;
        unsigned int headroom = virtnet_get_headroom(vi);
+       unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+       unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
        char *buf;
        void *ctx;
        int err;
        unsigned int len, hole;
 
-       len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
-       if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+       /* Extra tailroom is needed to satisfy XDP's assumption. This
+        * means rx frags coalescing won't work, but consider we've
+        * disabled GSO for XDP, it won't be a big issue.
+        */
+       len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+       if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
                return -ENOMEM;
 
        buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
        buf += headroom; /* advance address leaving hole at front of pkt */
        get_page(alloc_frag->page);
-       alloc_frag->offset += len + headroom;
+       alloc_frag->offset += len + room;
        hole = alloc_frag->size - alloc_frag->offset;
-       if (hole < len + headroom) {
+       if (hole < len + room) {
                /* To avoid internal fragmentation, if there is very likely not
                 * enough space for another buffer, add the remaining space to
                 * the current buffer.
@@ -2566,12 +2603,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 {
        struct virtnet_info *vi = netdev_priv(queue->dev);
        unsigned int queue_index = get_netdev_rx_queue_index(queue);
+       unsigned int headroom = virtnet_get_headroom(vi);
+       unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
        struct ewma_pkt_len *avg;
 
        BUG_ON(queue_index >= vi->max_queue_pairs);
        avg = &vi->rq[queue_index].mrg_avg_pkt_len;
        return sprintf(buf, "%u\n",
-                      get_mergeable_buf_len(&vi->rq[queue_index], avg));
+                      get_mergeable_buf_len(&vi->rq[queue_index], avg,
+                                      SKB_DATA_ALIGN(headroom + tailroom)));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
index 139c61c..c6be49d 100644 (file)
@@ -736,7 +736,6 @@ static int vrf_rtable_create(struct net_device *dev)
                return -ENOMEM;
 
        rth->dst.output = vrf_output;
-       rth->rt_table_id = vrf->tb_id;
 
        rcu_assign_pointer(vrf->rth, rth);
 
@@ -942,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
                                             const struct net_device *dev,
                                             struct flowi6 *fl6,
                                             int ifindex,
+                                            const struct sk_buff *skb,
                                             int flags)
 {
        struct net_vrf *vrf = netdev_priv(dev);
@@ -960,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
        if (!table)
                return NULL;
 
-       return ip6_pol_route(net, table, ifindex, fl6, flags);
+       return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
 }
 
 static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
@@ -978,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
        struct net *net = dev_net(vrf_dev);
        struct rt6_info *rt6;
 
-       rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+       rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
                                   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
        if (unlikely(!rt6))
                return;
@@ -1111,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
        if (!ipv6_addr_any(&fl6->saddr))
                flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-       rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+       rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
        if (rt)
                dst = &rt->dst;
 
@@ -1146,6 +1146,7 @@ static inline size_t vrf_fib_rule_nl_size(void)
        sz  = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
        sz += nla_total_size(sizeof(u8));       /* FRA_L3MDEV */
        sz += nla_total_size(sizeof(u32));      /* FRA_PRIORITY */
+       sz += nla_total_size(sizeof(u8));       /* FRA_PROTOCOL */
 
        return sz;
 }
@@ -1176,6 +1177,9 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
        frh->family = family;
        frh->action = FR_ACT_TO_TBL;
 
+       if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
+               goto nla_put_failure;
+
        if (nla_put_u8(skb, FRA_L3MDEV, 1))
                goto nla_put_failure;
 
@@ -1431,6 +1435,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
        .init = vrf_netns_init,
        .id   = &vrf_net_id,
        .size = sizeof(bool),
+       .async = true,
 };
 
 static int __init vrf_init_module(void)
index fab7a4d..aa5f034 100644 (file)
@@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = {
        .exit_batch = vxlan_exit_batch_net,
        .id   = &vxlan_net_id,
        .size = sizeof(struct vxlan_net),
+       .async = true,
 };
 
 static int __init vxlan_init_module(void)
index 768f63f..b799a53 100644 (file)
@@ -1599,7 +1599,8 @@ static void wil_probe_client_handle(struct wil6210_priv *wil,
         */
        bool alive = (sta->status == wil_sta_connected);
 
-       cfg80211_probe_status(ndev, sta->addr, req->cookie, alive, GFP_KERNEL);
+       cfg80211_probe_status(ndev, sta->addr, req->cookie, alive,
+                             0, false, GFP_KERNEL);
 }
 
 static struct list_head *next_probe_client(struct wil6210_priv *wil)
index 1cf22e6..7b6c364 100644 (file)
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
 
 struct hwsim_net {
        int netgroup;
@@ -267,11 +267,13 @@ static inline int hwsim_net_get_netgroup(struct net *net)
        return hwsim_net->netgroup;
 }
 
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
 {
        struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
 
-       hwsim_net->netgroup = hwsim_netgroup++;
+       hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+                                            0, 0, GFP_KERNEL);
+       return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
 }
 
 static inline u32 hwsim_net_get_wmediumd(struct net *net)
@@ -493,6 +495,7 @@ static LIST_HEAD(hwsim_radios);
 static struct workqueue_struct *hwsim_wq;
 static struct rhashtable hwsim_radios_rht;
 static int hwsim_radio_idx;
+static int hwsim_radios_generation = 1;
 
 static struct platform_driver mac80211_hwsim_driver = {
        .driver = {
@@ -637,6 +640,7 @@ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = {
        [HWSIM_ATTR_RADIO_NAME] = { .type = NLA_STRING },
        [HWSIM_ATTR_NO_VIF] = { .type = NLA_FLAG },
        [HWSIM_ATTR_FREQ] = { .type = NLA_U32 },
+       [HWSIM_ATTR_PERM_ADDR] = { .type = NLA_UNSPEC, .len = ETH_ALEN },
 };
 
 static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
@@ -2408,6 +2412,7 @@ struct hwsim_new_radio_params {
        bool destroy_on_close;
        const char *hwname;
        bool no_vif;
+       const u8 *perm_addr;
 };
 
 static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb,
@@ -2572,15 +2577,25 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
        skb_queue_head_init(&data->pending);
 
        SET_IEEE80211_DEV(hw, data->dev);
-       eth_zero_addr(addr);
-       addr[0] = 0x02;
-       addr[3] = idx >> 8;
-       addr[4] = idx;
-       memcpy(data->addresses[0].addr, addr, ETH_ALEN);
-       memcpy(data->addresses[1].addr, addr, ETH_ALEN);
-       data->addresses[1].addr[0] |= 0x40;
-       hw->wiphy->n_addresses = 2;
-       hw->wiphy->addresses = data->addresses;
+       if (!param->perm_addr) {
+               eth_zero_addr(addr);
+               addr[0] = 0x02;
+               addr[3] = idx >> 8;
+               addr[4] = idx;
+               memcpy(data->addresses[0].addr, addr, ETH_ALEN);
+               /* Why need here second address ? */
+               data->addresses[1].addr[0] |= 0x40;
+               memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+               hw->wiphy->n_addresses = 2;
+               hw->wiphy->addresses = data->addresses;
+               /* possible address clash is checked at hash table insertion */
+       } else {
+               memcpy(data->addresses[0].addr, param->perm_addr, ETH_ALEN);
+               /* compatibility with automatically generated mac addr */
+               memcpy(data->addresses[1].addr, param->perm_addr, ETH_ALEN);
+               hw->wiphy->n_addresses = 2;
+               hw->wiphy->addresses = data->addresses;
+       }
 
        data->channels = param->channels;
        data->use_chanctx = param->use_chanctx;
@@ -2785,13 +2800,17 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
        err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
                                     hwsim_rht_params);
        if (err < 0) {
-               pr_debug("mac80211_hwsim: radio index %d already present\n",
-                        idx);
+               if (info) {
+                       GENL_SET_ERR_MSG(info, "perm addr already present");
+                       NL_SET_BAD_ATTR(info->extack,
+                                       info->attrs[HWSIM_ATTR_PERM_ADDR]);
+               }
                spin_unlock_bh(&hwsim_radio_lock);
                goto failed_final_insert;
        }
 
        list_add_tail(&data->list, &hwsim_radios);
+       hwsim_radios_generation++;
        spin_unlock_bh(&hwsim_radio_lock);
 
        if (idx > 0)
@@ -3210,6 +3229,19 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
                param.regd = hwsim_world_regdom_custom[idx];
        }
 
+       if (info->attrs[HWSIM_ATTR_PERM_ADDR]) {
+               if (!is_valid_ether_addr(
+                               nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]))) {
+                       GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
+                       NL_SET_BAD_ATTR(info->extack,
+                                       info->attrs[HWSIM_ATTR_PERM_ADDR]);
+                       return -EINVAL;
+               }
+
+
+               param.perm_addr = nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]);
+       }
+
        ret = mac80211_hwsim_new_radio(info, &param);
        kfree(hwname);
        return ret;
@@ -3249,6 +3281,7 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
                list_del(&data->list);
                rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
                                       hwsim_rht_params);
+               hwsim_radios_generation++;
                spin_unlock_bh(&hwsim_radio_lock);
                mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
                                         info);
@@ -3305,17 +3338,19 @@ out_err:
 static int hwsim_dump_radio_nl(struct sk_buff *skb,
                               struct netlink_callback *cb)
 {
-       int idx = cb->args[0];
+       int last_idx = cb->args[0];
        struct mac80211_hwsim_data *data = NULL;
-       int res;
+       int res = 0;
+       void *hdr;
 
        spin_lock_bh(&hwsim_radio_lock);
+       cb->seq = hwsim_radios_generation;
 
-       if (idx == hwsim_radio_idx)
+       if (last_idx >= hwsim_radio_idx-1)
                goto done;
 
        list_for_each_entry(data, &hwsim_radios, list) {
-               if (data->idx < idx)
+               if (data->idx <= last_idx)
                        continue;
 
                if (!net_eq(wiphy_net(data->hw->wiphy), sock_net(skb->sk)))
@@ -3328,14 +3363,25 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb,
                if (res < 0)
                        break;
 
-               idx = data->idx + 1;
+               last_idx = data->idx;
        }
 
-       cb->args[0] = idx;
+       cb->args[0] = last_idx;
+
+       /* list changed, but no new element sent, set interrupted flag */
+       if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
+               hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+                                 cb->nlh->nlmsg_seq, &hwsim_genl_family,
+                                 NLM_F_MULTI, HWSIM_CMD_GET_RADIO);
+               if (!hdr)
+                       res = -EMSGSIZE;
+               genl_dump_check_consistent(cb, hdr);
+               genlmsg_end(skb, hdr);
+       }
 
 done:
        spin_unlock_bh(&hwsim_radio_lock);
-       return skb->len;
+       return res ?: skb->len;
 }
 
 /* Generic Netlink operations array */
@@ -3393,6 +3439,7 @@ static void destroy_radio(struct work_struct *work)
        struct mac80211_hwsim_data *data =
                container_of(work, struct mac80211_hwsim_data, destroy_work);
 
+       hwsim_radios_generation++;
        mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL);
 }
 
@@ -3462,9 +3509,7 @@ failure:
 
 static __net_init int hwsim_init_net(struct net *net)
 {
-       hwsim_net_set_netgroup(net);
-
-       return 0;
+       return hwsim_net_set_netgroup(net);
 }
 
 static void __net_exit hwsim_exit_net(struct net *net)
@@ -3487,6 +3532,8 @@ static void __net_exit hwsim_exit_net(struct net *net)
                queue_work(hwsim_wq, &data->destroy_work);
        }
        spin_unlock_bh(&hwsim_radio_lock);
+
+       ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
 }
 
 static struct pernet_operations hwsim_net_ops = {
@@ -3494,6 +3541,7 @@ static struct pernet_operations hwsim_net_ops = {
        .exit = hwsim_exit_net,
        .id   = &hwsim_net_id,
        .size = sizeof(struct hwsim_net),
+       .async = true,
 };
 
 static void hwsim_exit_netlink(void)
@@ -3516,7 +3564,7 @@ static int __init init_mac80211_hwsim(void)
 
        spin_lock_init(&hwsim_radio_lock);
 
-       hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0);
+       hwsim_wq = alloc_workqueue("hwsim_wq", 0, 0);
        if (!hwsim_wq)
                return -ENOMEM;
        rhashtable_init(&hwsim_radios_rht, &hwsim_rht_params);
index a96a79c..0fe3199 100644 (file)
@@ -68,7 +68,12 @@ enum hwsim_tx_control_flags {
  *     %HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE
  * @HWSIM_CMD_NEW_RADIO: create a new radio with the given parameters,
  *     returns the radio ID (>= 0) or negative on errors, if successful
- *     then multicast the result
+ *     then multicast the result, uses optional parameter:
+ *     %HWSIM_ATTR_REG_STRICT_REG, %HWSIM_ATTR_SUPPORT_P2P_DEVICE,
+ *     %HWSIM_ATTR_DESTROY_RADIO_ON_CLOSE, %HWSIM_ATTR_CHANNELS,
+ *     %HWSIM_ATTR_NO_VIF, %HWSIM_ATTR_RADIO_NAME, %HWSIM_ATTR_USE_CHANCTX,
+ *     %HWSIM_ATTR_REG_HINT_ALPHA2, %HWSIM_ATTR_REG_CUSTOM_REG,
+ *     %HWSIM_ATTR_PERM_ADDR
  * @HWSIM_CMD_DEL_RADIO: destroy a radio, reply is multicasted
  * @HWSIM_CMD_GET_RADIO: fetch information about existing radios, uses:
  *     %HWSIM_ATTR_RADIO_ID
@@ -126,6 +131,7 @@ enum {
  * @HWSIM_ATTR_FREQ: Frequency at which packet is transmitted or received.
  * @HWSIM_ATTR_TX_INFO_FLAGS: additional flags for corresponding
  *     rates of %HWSIM_ATTR_TX_INFO
+ * @HWSIM_ATTR_PERM_ADDR: permanent mac address of new radio
  * @__HWSIM_ATTR_MAX: enum limit
  */
 
@@ -153,6 +159,7 @@ enum {
        HWSIM_ATTR_FREQ,
        HWSIM_ATTR_PAD,
        HWSIM_ATTR_TX_INFO_FLAGS,
+       HWSIM_ATTR_PERM_ADDR,
        __HWSIM_ATTR_MAX,
 };
 #define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1)
index b1cf7c6..ef58870 100644 (file)
@@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
        BUG();
 }
 
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
 {
        struct xenvif_pkt_state pkt;
 
index f431c32..0fe7ea3 100644 (file)
@@ -120,8 +120,12 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
        int ret;
 
        ret = nvme_reset_ctrl(ctrl);
-       if (!ret)
+       if (!ret) {
                flush_work(&ctrl->reset_work);
+               if (ctrl->state != NVME_CTRL_LIVE)
+                       ret = -ENETRESET;
+       }
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
@@ -265,7 +269,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
        switch (new_state) {
        case NVME_CTRL_ADMIN_ONLY:
                switch (old_state) {
-               case NVME_CTRL_RECONNECTING:
+               case NVME_CTRL_CONNECTING:
                        changed = true;
                        /* FALLTHRU */
                default:
@@ -276,7 +280,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                switch (old_state) {
                case NVME_CTRL_NEW:
                case NVME_CTRL_RESETTING:
-               case NVME_CTRL_RECONNECTING:
+               case NVME_CTRL_CONNECTING:
                        changed = true;
                        /* FALLTHRU */
                default:
@@ -294,9 +298,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                        break;
                }
                break;
-       case NVME_CTRL_RECONNECTING:
+       case NVME_CTRL_CONNECTING:
                switch (old_state) {
-               case NVME_CTRL_LIVE:
+               case NVME_CTRL_NEW:
                case NVME_CTRL_RESETTING:
                        changed = true;
                        /* FALLTHRU */
@@ -309,7 +313,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                case NVME_CTRL_LIVE:
                case NVME_CTRL_ADMIN_ONLY:
                case NVME_CTRL_RESETTING:
-               case NVME_CTRL_RECONNECTING:
+               case NVME_CTRL_CONNECTING:
                        changed = true;
                        /* FALLTHRU */
                default:
@@ -518,9 +522,11 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
                u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
                u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
 
-               range[n].cattr = cpu_to_le32(0);
-               range[n].nlb = cpu_to_le32(nlb);
-               range[n].slba = cpu_to_le64(slba);
+               if (n < segments) {
+                       range[n].cattr = cpu_to_le32(0);
+                       range[n].nlb = cpu_to_le32(nlb);
+                       range[n].slba = cpu_to_le64(slba);
+               }
                n++;
        }
 
@@ -794,13 +800,9 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 
 static int nvme_keep_alive(struct nvme_ctrl *ctrl)
 {
-       struct nvme_command c;
        struct request *rq;
 
-       memset(&c, 0, sizeof(c));
-       c.common.opcode = nvme_admin_keep_alive;
-
-       rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
+       rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
                        NVME_QID_ANY);
        if (IS_ERR(rq))
                return PTR_ERR(rq);
@@ -832,6 +834,8 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
                return;
 
        INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+       memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
+       ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
        schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
 EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
@@ -1117,14 +1121,19 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 
 static void nvme_update_formats(struct nvme_ctrl *ctrl)
 {
-       struct nvme_ns *ns;
+       struct nvme_ns *ns, *next;
+       LIST_HEAD(rm_list);
 
        mutex_lock(&ctrl->namespaces_mutex);
        list_for_each_entry(ns, &ctrl->namespaces, list) {
-               if (ns->disk && nvme_revalidate_disk(ns->disk))
-                       nvme_ns_remove(ns);
+               if (ns->disk && nvme_revalidate_disk(ns->disk)) {
+                       list_move_tail(&ns->list, &rm_list);
+               }
        }
        mutex_unlock(&ctrl->namespaces_mutex);
+
+       list_for_each_entry_safe(ns, next, &rm_list, list)
+               nvme_ns_remove(ns);
 }
 
 static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -2687,7 +2696,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
                [NVME_CTRL_LIVE]        = "live",
                [NVME_CTRL_ADMIN_ONLY]  = "only-admin",
                [NVME_CTRL_RESETTING]   = "resetting",
-               [NVME_CTRL_RECONNECTING]= "reconnecting",
+               [NVME_CTRL_CONNECTING]  = "connecting",
                [NVME_CTRL_DELETING]    = "deleting",
                [NVME_CTRL_DEAD]        = "dead",
        };
index 25b19f7..a3145d9 100644 (file)
@@ -171,13 +171,14 @@ static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
            cmd->common.opcode != nvme_fabrics_command ||
            cmd->fabrics.fctype != nvme_fabrics_type_connect) {
                /*
-                * Reconnecting state means transport disruption, which can take
-                * a long time and even might fail permanently, fail fast to
-                * give upper layers a chance to failover.
+                * Connecting state means transport disruption or initial
+                * establishment, which can take a long time and even might
+                * fail permanently, fail fast to give upper layers a chance
+                * to failover.
                 * Deleting state means that the ctrl will never accept commands
                 * again, fail it permanently.
                 */
-               if (ctrl->state == NVME_CTRL_RECONNECTING ||
+               if (ctrl->state == NVME_CTRL_CONNECTING ||
                    ctrl->state == NVME_CTRL_DELETING) {
                        nvme_req(rq)->status = NVME_SC_ABORT_REQ;
                        return BLK_STS_IOERR;
index b856d7c..7f51f84 100644 (file)
@@ -55,9 +55,7 @@ struct nvme_fc_queue {
 
 enum nvme_fcop_flags {
        FCOP_FLAGS_TERMIO       = (1 << 0),
-       FCOP_FLAGS_RELEASED     = (1 << 1),
-       FCOP_FLAGS_COMPLETE     = (1 << 2),
-       FCOP_FLAGS_AEN          = (1 << 3),
+       FCOP_FLAGS_AEN          = (1 << 1),
 };
 
 struct nvmefc_ls_req_op {
@@ -532,7 +530,7 @@ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
 {
        switch (ctrl->ctrl.state) {
        case NVME_CTRL_NEW:
-       case NVME_CTRL_RECONNECTING:
+       case NVME_CTRL_CONNECTING:
                /*
                 * As all reconnects were suppressed, schedule a
                 * connect.
@@ -777,7 +775,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
                }
                break;
 
-       case NVME_CTRL_RECONNECTING:
+       case NVME_CTRL_CONNECTING:
                /*
                 * The association has already been terminated and the
                 * controller is attempting reconnects.  No need to do anything
@@ -1470,7 +1468,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 
 /* *********************** NVME Ctrl Routines **************************** */
 
-static void __nvme_fc_final_op_cleanup(struct request *rq);
 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
 
 static int
@@ -1512,13 +1509,19 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
 static int
 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
 {
-       int state;
+       unsigned long flags;
+       int opstate;
+
+       spin_lock_irqsave(&ctrl->lock, flags);
+       opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
+       if (opstate != FCPOP_STATE_ACTIVE)
+               atomic_set(&op->state, opstate);
+       else if (ctrl->flags & FCCTRL_TERMIO)
+               ctrl->iocnt++;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
 
-       state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
-       if (state != FCPOP_STATE_ACTIVE) {
-               atomic_set(&op->state, state);
+       if (opstate != FCPOP_STATE_ACTIVE)
                return -ECANCELED;
-       }
 
        ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
                                        &ctrl->rport->remoteport,
@@ -1532,60 +1535,26 @@ static void
 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 {
        struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
-       unsigned long flags;
-       int i, ret;
-
-       for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
-               if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
-                       continue;
-
-               spin_lock_irqsave(&ctrl->lock, flags);
-               if (ctrl->flags & FCCTRL_TERMIO) {
-                       ctrl->iocnt++;
-                       aen_op->flags |= FCOP_FLAGS_TERMIO;
-               }
-               spin_unlock_irqrestore(&ctrl->lock, flags);
-
-               ret = __nvme_fc_abort_op(ctrl, aen_op);
-               if (ret) {
-                       /*
-                        * if __nvme_fc_abort_op failed the io wasn't
-                        * active. Thus this call path is running in
-                        * parallel to the io complete. Treat as non-error.
-                        */
+       int i;
 
-                       /* back out the flags/counters */
-                       spin_lock_irqsave(&ctrl->lock, flags);
-                       if (ctrl->flags & FCCTRL_TERMIO)
-                               ctrl->iocnt--;
-                       aen_op->flags &= ~FCOP_FLAGS_TERMIO;
-                       spin_unlock_irqrestore(&ctrl->lock, flags);
-                       return;
-               }
-       }
+       for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
+               __nvme_fc_abort_op(ctrl, aen_op);
 }
 
-static inline int
+static inline void
 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
-               struct nvme_fc_fcp_op *op)
+               struct nvme_fc_fcp_op *op, int opstate)
 {
        unsigned long flags;
-       bool complete_rq = false;
 
-       spin_lock_irqsave(&ctrl->lock, flags);
-       if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+       if (opstate == FCPOP_STATE_ABORTED) {
+               spin_lock_irqsave(&ctrl->lock, flags);
                if (ctrl->flags & FCCTRL_TERMIO) {
                        if (!--ctrl->iocnt)
                                wake_up(&ctrl->ioabort_wait);
                }
+               spin_unlock_irqrestore(&ctrl->lock, flags);
        }
-       if (op->flags & FCOP_FLAGS_RELEASED)
-               complete_rq = true;
-       else
-               op->flags |= FCOP_FLAGS_COMPLETE;
-       spin_unlock_irqrestore(&ctrl->lock, flags);
-
-       return complete_rq;
 }
 
 static void
@@ -1601,6 +1570,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
        __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
        union nvme_result result;
        bool terminate_assoc = true;
+       int opstate;
 
        /*
         * WARNING:
@@ -1639,11 +1609,12 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
         * association to be terminated.
         */
 
+       opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
+
        fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
                                sizeof(op->rsp_iu), DMA_FROM_DEVICE);
 
-       if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
-                       op->flags & FCOP_FLAGS_TERMIO)
+       if (opstate == FCPOP_STATE_ABORTED)
                status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
        else if (freq->status)
                status = cpu_to_le16(NVME_SC_INTERNAL << 1);
@@ -1708,7 +1679,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 done:
        if (op->flags & FCOP_FLAGS_AEN) {
                nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
-               __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+               __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
                atomic_set(&op->state, FCPOP_STATE_IDLE);
                op->flags = FCOP_FLAGS_AEN;     /* clear other flags */
                nvme_fc_ctrl_put(ctrl);
@@ -1722,13 +1693,11 @@ done:
        if (status &&
            (blk_queue_dying(rq->q) ||
             ctrl->ctrl.state == NVME_CTRL_NEW ||
-            ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
+            ctrl->ctrl.state == NVME_CTRL_CONNECTING))
                status |= cpu_to_le16(NVME_SC_DNR << 1);
 
-       if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
-               __nvme_fc_final_op_cleanup(rq);
-       else
-               nvme_end_request(rq, status, result);
+       __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+       nvme_end_request(rq, status, result);
 
 check_error:
        if (terminate_assoc)
@@ -2415,46 +2384,16 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
 }
 
 static void
-__nvme_fc_final_op_cleanup(struct request *rq)
+nvme_fc_complete_rq(struct request *rq)
 {
        struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
        struct nvme_fc_ctrl *ctrl = op->ctrl;
 
        atomic_set(&op->state, FCPOP_STATE_IDLE);
-       op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
-                       FCOP_FLAGS_COMPLETE);
 
        nvme_fc_unmap_data(ctrl, rq, op);
        nvme_complete_rq(rq);
        nvme_fc_ctrl_put(ctrl);
-
-}
-
-static void
-nvme_fc_complete_rq(struct request *rq)
-{
-       struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-       struct nvme_fc_ctrl *ctrl = op->ctrl;
-       unsigned long flags;
-       bool completed = false;
-
-       /*
-        * the core layer, on controller resets after calling
-        * nvme_shutdown_ctrl(), calls complete_rq without our
-        * calling blk_mq_complete_request(), thus there may still
-        * be live i/o outstanding with the LLDD. Means transport has
-        * to track complete calls vs fcpio_done calls to know what
-        * path to take on completes and dones.
-        */
-       spin_lock_irqsave(&ctrl->lock, flags);
-       if (op->flags & FCOP_FLAGS_COMPLETE)
-               completed = true;
-       else
-               op->flags |= FCOP_FLAGS_RELEASED;
-       spin_unlock_irqrestore(&ctrl->lock, flags);
-
-       if (completed)
-               __nvme_fc_final_op_cleanup(rq);
 }
 
 /*
@@ -2476,35 +2415,11 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
        struct nvme_ctrl *nctrl = data;
        struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
        struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
-       unsigned long flags;
-       int status;
 
        if (!blk_mq_request_started(req))
                return;
 
-       spin_lock_irqsave(&ctrl->lock, flags);
-       if (ctrl->flags & FCCTRL_TERMIO) {
-               ctrl->iocnt++;
-               op->flags |= FCOP_FLAGS_TERMIO;
-       }
-       spin_unlock_irqrestore(&ctrl->lock, flags);
-
-       status = __nvme_fc_abort_op(ctrl, op);
-       if (status) {
-               /*
-                * if __nvme_fc_abort_op failed the io wasn't
-                * active. Thus this call path is running in
-                * parallel to the io complete. Treat as non-error.
-                */
-
-               /* back out the flags/counters */
-               spin_lock_irqsave(&ctrl->lock, flags);
-               if (ctrl->flags & FCCTRL_TERMIO)
-                       ctrl->iocnt--;
-               op->flags &= ~FCOP_FLAGS_TERMIO;
-               spin_unlock_irqrestore(&ctrl->lock, flags);
-               return;
-       }
+       __nvme_fc_abort_op(ctrl, op);
 }
 
 
@@ -2943,7 +2858,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
        unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
        bool recon = true;
 
-       if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
+       if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
                return;
 
        if (portptr->port_state == FC_OBJSTATE_ONLINE)
@@ -2991,10 +2906,10 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
        /* will block will waiting for io to terminate */
        nvme_fc_delete_association(ctrl);
 
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
                dev_err(ctrl->ctrl.device,
                        "NVME-FC{%d}: error_recovery: Couldn't change state "
-                       "to RECONNECTING\n", ctrl->cnum);
+                       "to CONNECTING\n", ctrl->cnum);
                return;
        }
 
@@ -3195,7 +3110,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         * transport errors (frame drop, LS failure) inherently must kill
         * the association. The transport is coded so that any command used
         * to create the association (prior to a LIVE state transition
-        * while NEW or RECONNECTING) will fail if it completes in error or
+        * while NEW or CONNECTING) will fail if it completes in error or
         * times out.
         *
         * As such: as the connect request was mostly likely due to a
index 8e4550f..0521e47 100644 (file)
@@ -123,7 +123,7 @@ enum nvme_ctrl_state {
        NVME_CTRL_LIVE,
        NVME_CTRL_ADMIN_ONLY,    /* Only admin queue live */
        NVME_CTRL_RESETTING,
-       NVME_CTRL_RECONNECTING,
+       NVME_CTRL_CONNECTING,
        NVME_CTRL_DELETING,
        NVME_CTRL_DEAD,
 };
@@ -183,6 +183,7 @@ struct nvme_ctrl {
        struct work_struct scan_work;
        struct work_struct async_event_work;
        struct delayed_work ka_work;
+       struct nvme_command ka_cmd;
        struct work_struct fw_act_work;
 
        /* Power saving configuration */
index 6fe7af0..73036d2 100644 (file)
@@ -1141,7 +1141,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
        /* If there is a reset/reinit ongoing, we shouldn't reset again. */
        switch (dev->ctrl.state) {
        case NVME_CTRL_RESETTING:
-       case NVME_CTRL_RECONNECTING:
+       case NVME_CTRL_CONNECTING:
                return false;
        default:
                break;
@@ -1215,13 +1215,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         * cancellation error. All outstanding requests are completed on
         * shutdown, so we return BLK_EH_HANDLED.
         */
-       if (dev->ctrl.state == NVME_CTRL_RESETTING) {
+       switch (dev->ctrl.state) {
+       case NVME_CTRL_CONNECTING:
+       case NVME_CTRL_RESETTING:
                dev_warn(dev->ctrl.device,
                         "I/O %d QID %d timeout, disable controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
                nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                return BLK_EH_HANDLED;
+       default:
+               break;
        }
 
        /*
@@ -1364,18 +1368,14 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
 static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
                                int qid, int depth)
 {
-       if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
-               unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
-                                                     dev->ctrl.page_size);
-               nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
-               nvmeq->sq_cmds_io = dev->cmb + offset;
-       } else {
-               nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
-                                       &nvmeq->sq_dma_addr, GFP_KERNEL);
-               if (!nvmeq->sq_cmds)
-                       return -ENOMEM;
-       }
+       /* CMB SQEs will be mapped before creation */
+       if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
+               return 0;
 
+       nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+                                           &nvmeq->sq_dma_addr, GFP_KERNEL);
+       if (!nvmeq->sq_cmds)
+               return -ENOMEM;
        return 0;
 }
 
@@ -1449,6 +1449,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
        struct nvme_dev *dev = nvmeq->dev;
        int result;
 
+       if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+               unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
+                                                     dev->ctrl.page_size);
+               nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
+               nvmeq->sq_cmds_io = dev->cmb + offset;
+       }
+
        nvmeq->cq_vector = qid - 1;
        result = adapter_alloc_cq(dev, qid, nvmeq);
        if (result < 0)
@@ -2288,12 +2295,12 @@ static void nvme_reset_work(struct work_struct *work)
                nvme_dev_disable(dev, false);
 
        /*
-        * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
+        * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
         * initializing procedure here.
         */
-       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) {
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
                dev_warn(dev->ctrl.device,
-                       "failed to mark controller RECONNECTING\n");
+                       "failed to mark controller CONNECTING\n");
                goto out;
        }
 
index 2bc059f..3a51ed5 100644 (file)
@@ -887,7 +887,7 @@ free_ctrl:
 static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 {
        /* If we are resetting/deleting then do nothing */
-       if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+       if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
                WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
                        ctrl->ctrl.state == NVME_CTRL_LIVE);
                return;
@@ -973,7 +973,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
        nvme_start_queues(&ctrl->ctrl);
 
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
                /* state change failure should never happen */
                WARN_ON_ONCE(1);
                return;
@@ -1756,7 +1756,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
        nvme_stop_ctrl(&ctrl->ctrl);
        nvme_rdma_shutdown_ctrl(ctrl, false);
 
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
                /* state change failure should never happen */
                WARN_ON_ONCE(1);
                return;
@@ -1784,11 +1784,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
        return;
 
 out_fail:
-       dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_rdma_shutdown_ctrl(ctrl, true);
-       nvme_uninit_ctrl(&ctrl->ctrl);
-       nvme_put_ctrl(&ctrl->ctrl);
+       ++ctrl->ctrl.nr_reconnects;
+       nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1942,6 +1939,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        if (!ctrl->queues)
                goto out_uninit_ctrl;
 
+       changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
+       WARN_ON_ONCE(!changed);
+
        ret = nvme_rdma_configure_admin_queue(ctrl, true);
        if (ret)
                goto out_kfree_queues;
index 0a4372a..28bbdff 100644 (file)
@@ -105,10 +105,13 @@ static void nvmet_execute_flush(struct nvmet_req *req)
 static u16 nvmet_discard_range(struct nvmet_ns *ns,
                struct nvme_dsm_range *range, struct bio **bio)
 {
-       if (__blkdev_issue_discard(ns->bdev,
+       int ret;
+
+       ret = __blkdev_issue_discard(ns->bdev,
                        le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
                        le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
-                       GFP_KERNEL, 0, bio))
+                       GFP_KERNEL, 0, bio);
+       if (ret && ret != -EOPNOTSUPP)
                return NVME_SC_INTERNAL | NVME_SC_DNR;
        return 0;
 }
index 36ed84e..f46828e 100644 (file)
@@ -977,11 +977,11 @@ static int of_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
        return 0;
 }
 
-static void *
+static const void *
 of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
                                const struct device *dev)
 {
-       return (void *)of_device_get_match_data(dev);
+       return of_device_get_match_data(dev);
 }
 
 const struct fwnode_operations of_fwnode_ops = {
index 2d87bc1..0c09107 100644 (file)
@@ -55,7 +55,7 @@ int dev_pm_opp_init_cpufreq_table(struct device *dev,
        if (max_opps <= 0)
                return max_opps ? max_opps : -ENODATA;
 
-       freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_ATOMIC);
+       freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL);
        if (!freq_table)
                return -ENOMEM;
 
index fc73401..8b14bd3 100644 (file)
@@ -3419,22 +3419,29 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE,
 
 static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
 {
-       pci_set_vpd_size(dev, 8192);
-}
-
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd);
+       int chip = (dev->device & 0xf000) >> 12;
+       int func = (dev->device & 0x0f00) >>  8;
+       int prod = (dev->device & 0x00ff) >>  0;
+
+       /*
+        * If this is a T3-based adapter, there's a 1KB VPD area at offset
+        * 0xc00 which contains the preferred VPD values.  If this is a T4 or
+        * later based adapter, the special VPD is at offset 0x400 for the
+        * Physical Functions (the SR-IOV Virtual Functions have no VPD
+        * Capabilities).  The PCI VPD Access core routines will normally
+        * compute the size of the VPD by parsing the VPD Data Structure at
+        * offset 0x000.  This will result in silent failures when attempting
+        * to accesses these other VPD areas which are beyond those computed
+        * limits.
+        */
+       if (chip == 0x0 && prod >= 0x20)
+               pci_set_vpd_size(dev, 8192);
+       else if (chip >= 0x4 && func < 0x8)
+               pci_set_vpd_size(dev, 2048);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+                       quirk_chelsio_extend_vpd);
 
 #ifdef CONFIG_ACPI
 /*
index 7bc5eee..0c2ed11 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/sched/clock.h>
 #include <linux/spinlock.h>
@@ -26,6 +25,9 @@
 
 #include <asm/irq_regs.h>
 
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
+static DEFINE_PER_CPU(int, cpu_irq);
+
 static int
 armpmu_map_cache_event(const unsigned (*cache_map)
                                      [PERF_COUNT_HW_CACHE_MAX]
@@ -320,17 +322,9 @@ validate_group(struct perf_event *event)
        return 0;
 }
 
-static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu)
-{
-       struct platform_device *pdev = armpmu->plat_device;
-
-       return pdev ? dev_get_platdata(&pdev->dev) : NULL;
-}
-
 static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 {
        struct arm_pmu *armpmu;
-       struct arm_pmu_platdata *plat;
        int ret;
        u64 start_clock, finish_clock;
 
@@ -341,14 +335,11 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
         * dereference.
         */
        armpmu = *(void **)dev;
-
-       plat = armpmu_get_platdata(armpmu);
+       if (WARN_ON_ONCE(!armpmu))
+               return IRQ_NONE;
 
        start_clock = sched_clock();
-       if (plat && plat->handle_irq)
-               ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
-       else
-               ret = armpmu->handle_irq(irq, armpmu);
+       ret = armpmu->handle_irq(irq, armpmu);
        finish_clock = sched_clock();
 
        perf_sample_event_took(finish_clock - start_clock);
@@ -531,54 +522,41 @@ int perf_num_counters(void)
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
-void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
+static int armpmu_count_irq_users(const int irq)
 {
-       struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
-       int irq = per_cpu(hw_events->irq, cpu);
+       int cpu, count = 0;
 
-       if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
-               return;
-
-       if (irq_is_percpu_devid(irq)) {
-               free_percpu_irq(irq, &hw_events->percpu_pmu);
-               cpumask_clear(&armpmu->active_irqs);
-               return;
+       for_each_possible_cpu(cpu) {
+               if (per_cpu(cpu_irq, cpu) == irq)
+                       count++;
        }
 
-       free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+       return count;
 }
 
-void armpmu_free_irqs(struct arm_pmu *armpmu)
+void armpmu_free_irq(int irq, int cpu)
 {
-       int cpu;
+       if (per_cpu(cpu_irq, cpu) == 0)
+               return;
+       if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
+               return;
+
+       if (!irq_is_percpu_devid(irq))
+               free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu));
+       else if (armpmu_count_irq_users(irq) == 1)
+               free_percpu_irq(irq, &cpu_armpmu);
 
-       for_each_cpu(cpu, &armpmu->supported_cpus)
-               armpmu_free_irq(armpmu, cpu);
+       per_cpu(cpu_irq, cpu) = 0;
 }
 
-int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
+int armpmu_request_irq(int irq, int cpu)
 {
        int err = 0;
-       struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
        const irq_handler_t handler = armpmu_dispatch_irq;
-       int irq = per_cpu(hw_events->irq, cpu);
        if (!irq)
                return 0;
 
-       if (irq_is_percpu_devid(irq) && cpumask_empty(&armpmu->active_irqs)) {
-               err = request_percpu_irq(irq, handler, "arm-pmu",
-                                        &hw_events->percpu_pmu);
-       } else if (irq_is_percpu_devid(irq)) {
-               int other_cpu = cpumask_first(&armpmu->active_irqs);
-               int other_irq = per_cpu(hw_events->irq, other_cpu);
-
-               if (irq != other_irq) {
-                       pr_warn("mismatched PPIs detected.\n");
-                       err = -EINVAL;
-                       goto err_out;
-               }
-       } else {
-               struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu);
+       if (!irq_is_percpu_devid(irq)) {
                unsigned long irq_flags;
 
                err = irq_force_affinity(irq, cpumask_of(cpu));
@@ -589,22 +567,22 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
                        goto err_out;
                }
 
-               if (platdata && platdata->irq_flags) {
-                       irq_flags = platdata->irq_flags;
-               } else {
-                       irq_flags = IRQF_PERCPU |
-                                   IRQF_NOBALANCING |
-                                   IRQF_NO_THREAD;
-               }
+               irq_flags = IRQF_PERCPU |
+                           IRQF_NOBALANCING |
+                           IRQF_NO_THREAD;
 
+               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                err = request_irq(irq, handler, irq_flags, "arm-pmu",
-                                 per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+                                 per_cpu_ptr(&cpu_armpmu, cpu));
+       } else if (armpmu_count_irq_users(irq) == 0) {
+               err = request_percpu_irq(irq, handler, "arm-pmu",
+                                        &cpu_armpmu);
        }
 
        if (err)
                goto err_out;
 
-       cpumask_set_cpu(cpu, &armpmu->active_irqs);
+       per_cpu(cpu_irq, cpu) = irq;
        return 0;
 
 err_out:
@@ -612,19 +590,6 @@ err_out:
        return err;
 }
 
-int armpmu_request_irqs(struct arm_pmu *armpmu)
-{
-       int cpu, err;
-
-       for_each_cpu(cpu, &armpmu->supported_cpus) {
-               err = armpmu_request_irq(armpmu, cpu);
-               if (err)
-                       break;
-       }
-
-       return err;
-}
-
 static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
 {
        struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
@@ -647,12 +612,14 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
        if (pmu->reset)
                pmu->reset(pmu);
 
+       per_cpu(cpu_armpmu, cpu) = pmu;
+
        irq = armpmu_get_cpu_irq(pmu, cpu);
        if (irq) {
-               if (irq_is_percpu_devid(irq)) {
+               if (irq_is_percpu_devid(irq))
                        enable_percpu_irq(irq, IRQ_TYPE_NONE);
-                       return 0;
-               }
+               else
+                       enable_irq(irq);
        }
 
        return 0;
@@ -667,8 +634,14 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
                return 0;
 
        irq = armpmu_get_cpu_irq(pmu, cpu);
-       if (irq && irq_is_percpu_devid(irq))
-               disable_percpu_irq(irq);
+       if (irq) {
+               if (irq_is_percpu_devid(irq))
+                       disable_percpu_irq(irq);
+               else
+                       disable_irq(irq);
+       }
+
+       per_cpu(cpu_armpmu, cpu) = NULL;
 
        return 0;
 }
@@ -800,18 +773,18 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
                                            &cpu_pmu->node);
 }
 
-struct arm_pmu *armpmu_alloc(void)
+static struct arm_pmu *__armpmu_alloc(gfp_t flags)
 {
        struct arm_pmu *pmu;
        int cpu;
 
-       pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+       pmu = kzalloc(sizeof(*pmu), flags);
        if (!pmu) {
                pr_info("failed to allocate PMU device!\n");
                goto out;
        }
 
-       pmu->hw_events = alloc_percpu(struct pmu_hw_events);
+       pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
        if (!pmu->hw_events) {
                pr_info("failed to allocate per-cpu PMU data.\n");
                goto out_free_pmu;
@@ -857,6 +830,17 @@ out:
        return NULL;
 }
 
+struct arm_pmu *armpmu_alloc(void)
+{
+       return __armpmu_alloc(GFP_KERNEL);
+}
+
+struct arm_pmu *armpmu_alloc_atomic(void)
+{
+       return __armpmu_alloc(GFP_ATOMIC);
+}
+
+
 void armpmu_free(struct arm_pmu *pmu)
 {
        free_percpu(pmu->hw_events);
index 705f1a3..0f19751 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/acpi.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 
@@ -87,7 +89,13 @@ static int arm_pmu_acpi_parse_irqs(void)
                        pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu);
                }
 
+               /*
+                * Log and request the IRQ so the core arm_pmu code can manage
+                * it. We'll have to sanity-check IRQs later when we associate
+                * them with their PMUs.
+                */
                per_cpu(pmu_irqs, cpu) = irq;
+               armpmu_request_irq(irq, cpu);
        }
 
        return 0;
@@ -127,7 +135,7 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
                return pmu;
        }
 
-       pmu = armpmu_alloc();
+       pmu = armpmu_alloc_atomic();
        if (!pmu) {
                pr_warn("Unable to allocate PMU for CPU%d\n",
                        smp_processor_id());
@@ -139,6 +147,35 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
        return pmu;
 }
 
+/*
+ * Check whether the new IRQ is compatible with those already associated with
+ * the PMU (e.g. we don't have mismatched PPIs).
+ */
+static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
+{
+       struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+       int cpu;
+
+       if (!irq)
+               return true;
+
+       for_each_cpu(cpu, &pmu->supported_cpus) {
+               int other_irq = per_cpu(hw_events->irq, cpu);
+               if (!other_irq)
+                       continue;
+
+               if (irq == other_irq)
+                       continue;
+               if (!irq_is_percpu_devid(irq) && !irq_is_percpu_devid(other_irq))
+                       continue;
+
+               pr_warn("mismatched PPIs detected\n");
+               return false;
+       }
+
+       return true;
+}
+
 /*
  * This must run before the common arm_pmu hotplug logic, so that we can
  * associate a CPU and its interrupt before the common code tries to manage the
@@ -164,19 +201,14 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
        if (!pmu)
                return -ENOMEM;
 
-       cpumask_set_cpu(cpu, &pmu->supported_cpus);
-
        per_cpu(probed_pmus, cpu) = pmu;
 
-       /*
-        * Log and request the IRQ so the core arm_pmu code can manage it.  In
-        * some situations (e.g. mismatched PPIs), we may fail to request the
-        * IRQ. However, it may be too late for us to do anything about it.
-        * The common ARM PMU code will log a warning in this case.
-        */
-       hw_events = pmu->hw_events;
-       per_cpu(hw_events->irq, cpu) = irq;
-       armpmu_request_irq(pmu, cpu);
+       if (pmu_irq_matches(pmu, irq)) {
+               hw_events = pmu->hw_events;
+               per_cpu(hw_events->irq, cpu) = irq;
+       }
+
+       cpumask_set_cpu(cpu, &pmu->supported_cpus);
 
        /*
         * Ideally, we'd probe the PMU here when we find the first matching
@@ -247,11 +279,6 @@ static int arm_pmu_acpi_init(void)
        if (acpi_disabled)
                return 0;
 
-       /*
-        * We can't request IRQs yet, since we don't know the cookie value
-        * until we know which CPUs share the same logical PMU. We'll handle
-        * that in arm_pmu_acpi_cpu_starting().
-        */
        ret = arm_pmu_acpi_parse_irqs();
        if (ret)
                return ret;
index 46501cc..7729eda 100644 (file)
@@ -127,13 +127,6 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
                        pdev->dev.of_node);
        }
 
-       /*
-        * Some platforms have all PMU IRQs OR'd into a single IRQ, with a
-        * special platdata function that attempts to demux them.
-        */
-       if (dev_get_platdata(&pdev->dev))
-               cpumask_setall(&pmu->supported_cpus);
-
        for (i = 0; i < num_irqs; i++) {
                int cpu, irq;
 
@@ -164,6 +157,36 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
        return 0;
 }
 
+static int armpmu_request_irqs(struct arm_pmu *armpmu)
+{
+       struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+       int cpu, err;
+
+       for_each_cpu(cpu, &armpmu->supported_cpus) {
+               int irq = per_cpu(hw_events->irq, cpu);
+               if (!irq)
+                       continue;
+
+               err = armpmu_request_irq(irq, cpu);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+static void armpmu_free_irqs(struct arm_pmu *armpmu)
+{
+       int cpu;
+       struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+
+       for_each_cpu(cpu, &armpmu->supported_cpus) {
+               int irq = per_cpu(hw_events->irq, cpu);
+
+               armpmu_free_irq(irq, cpu);
+       }
+}
+
 int arm_pmu_device_probe(struct platform_device *pdev,
                         const struct of_device_id *of_table,
                         const struct pmu_probe_info *probe_table)
index 2a68f59..c52c672 100644 (file)
@@ -126,24 +126,6 @@ static const struct dmi_system_id dell_device_table[] __initconst = {
                        DMI_MATCH(DMI_CHASSIS_TYPE, "32"), /*Detachable*/
                },
        },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-                       DMI_MATCH(DMI_CHASSIS_TYPE, "30"), /*Tablet*/
-               },
-       },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-                       DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /*Convertible*/
-               },
-       },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-                       DMI_MATCH(DMI_CHASSIS_TYPE, "32"), /*Detachable*/
-               },
-       },
        {
                .ident = "Dell Computer Corporation",
                .matches = {
@@ -1279,7 +1261,7 @@ static int kbd_get_state(struct kbd_state *state)
        struct calling_interface_buffer buffer;
        int ret;
 
-       dell_fill_request(&buffer, 0, 0, 0, 0);
+       dell_fill_request(&buffer, 0x1, 0, 0, 0);
        ret = dell_send_request(&buffer,
                                CLASS_KBD_BACKLIGHT, SELECT_KBD_BACKLIGHT);
        if (ret)
index 5b6f18b..535199c 100644 (file)
@@ -113,7 +113,7 @@ MODULE_PARM_DESC(no_bt_rfkill, "No rfkill for bluetooth.");
 /*
  * ACPI Helpers
  */
-#define IDEAPAD_EC_TIMEOUT (100) /* in ms */
+#define IDEAPAD_EC_TIMEOUT (200) /* in ms */
 
 static int read_method_int(acpi_handle handle, const char *method, int *val)
 {
index daa68ac..c0c8945 100644 (file)
@@ -933,7 +933,7 @@ static int wmi_dev_probe(struct device *dev)
                        goto probe_failure;
                }
 
-               buf = kmalloc(strlen(wdriver->driver.name) + 4, GFP_KERNEL);
+               buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL);
                if (!buf) {
                        ret = -ENOMEM;
                        goto probe_string_failure;
index ba2e085..8f5c1d7 100644 (file)
@@ -1297,6 +1297,9 @@ static int virtio_ccw_cio_notify(struct ccw_device *cdev, int event)
                vcdev->device_lost = true;
                rc = NOTIFY_DONE;
                break;
+       case CIO_OPER:
+               rc = NOTIFY_OK;
+               break;
        default:
                rc = NOTIFY_DONE;
                break;
@@ -1309,6 +1312,27 @@ static struct ccw_device_id virtio_ids[] = {
        {},
 };
 
+#ifdef CONFIG_PM_SLEEP
+static int virtio_ccw_freeze(struct ccw_device *cdev)
+{
+       struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
+
+       return virtio_device_freeze(&vcdev->vdev);
+}
+
+static int virtio_ccw_restore(struct ccw_device *cdev)
+{
+       struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
+       int ret;
+
+       ret = virtio_ccw_set_transport_rev(vcdev);
+       if (ret)
+               return ret;
+
+       return virtio_device_restore(&vcdev->vdev);
+}
+#endif
+
 static struct ccw_driver virtio_ccw_driver = {
        .driver = {
                .owner = THIS_MODULE,
@@ -1321,6 +1345,11 @@ static struct ccw_driver virtio_ccw_driver = {
        .set_online = virtio_ccw_online,
        .notify = virtio_ccw_cio_notify,
        .int_class = IRQIO_VIR,
+#ifdef CONFIG_PM_SLEEP
+       .freeze = virtio_ccw_freeze,
+       .thaw = virtio_ccw_restore,
+       .restore = virtio_ccw_restore,
+#endif
 };
 
 static int __init pure_hex(char **cp, unsigned int *val, int min_digit,
index fcfd28d..de1b3fc 100644 (file)
@@ -185,7 +185,6 @@ ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \
 CFLAGS_ncr53c8xx.o     := $(ncr53c8xx-flags-y) $(ncr53c8xx-flags-m)
 zalon7xx-objs  := zalon.o ncr53c8xx.o
 NCR_Q720_mod-objs      := NCR_Q720.o ncr53c8xx.o
-oktagon_esp_mod-objs   := oktagon_esp.o oktagon_io.o
 
 # Files generated that shall be removed upon make clean
 clean-files := 53c700_d.h 53c700_u.h
index b3b931a..2664ea0 100644 (file)
@@ -1693,8 +1693,10 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
         *      Map in the registers from the adapter.
         */
        aac->base_size = AAC_MIN_FOOTPRINT_SIZE;
-       if ((*aac_drivers[index].init)(aac))
+       if ((*aac_drivers[index].init)(aac)) {
+               error = -ENODEV;
                goto out_unmap;
+       }
 
        if (aac->sync_mode) {
                if (aac_sync_mode)
diff --git a/drivers/scsi/aic7xxx/aiclib.c b/drivers/scsi/aic7xxx/aiclib.c
deleted file mode 100644 (file)
index 828ae3d..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Implementation of Utility functions for all SCSI device types.
- *
- * Copyright (c) 1997, 1998, 1999 Justin T. Gibbs.
- * Copyright (c) 1997, 1998 Kenneth D. Merry.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions, and the following disclaimer,
- *    without modification, immediately at the beginning of the file.
- * 2. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/cam/scsi/scsi_all.c,v 1.38 2002/09/23 04:56:35 mjacob Exp $
- * $Id$
- */
-
-#include "aiclib.h"
-
index 8e2f767..5a645b8 100644 (file)
@@ -1889,6 +1889,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req,
                /* we will not receive ABTS response for this IO */
                BNX2FC_IO_DBG(io_req, "Timer context finished processing "
                           "this scsi cmd\n");
+               return;
        }
 
        /* Cancel the timeout_work, as we received IO completion */
index be5ee2d..7dbbbb8 100644 (file)
@@ -114,7 +114,7 @@ static enum csio_ln_ev fwevt_to_lnevt[] = {
 static struct csio_lnode *
 csio_ln_lookup_by_portid(struct csio_hw *hw, uint8_t portid)
 {
-       struct csio_lnode *ln = hw->rln;
+       struct csio_lnode *ln;
        struct list_head *tmp;
 
        /* Match siblings lnode with portid */
index 022e421..4b44325 100644 (file)
@@ -876,6 +876,11 @@ static void alua_rtpg_work(struct work_struct *work)
 
 /**
  * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ * @pg: ALUA port group associated with @sdev.
+ * @sdev: SCSI device for which to submit an RTPG.
+ * @qdata: Information about the callback to invoke after the RTPG.
+ * @force: Whether or not to submit an RTPG if a work item that will submit an
+ *         RTPG already has been scheduled.
  *
  * Returns true if and only if alua_rtpg_work() will be called asynchronously.
  * That function is responsible for calling @qdata->fn().
index 9a0696f..b81a53c 100644 (file)
@@ -367,7 +367,7 @@ enum ibmvfc_fcp_rsp_info_codes {
 };
 
 struct ibmvfc_fcp_rsp_info {
-       __be16 reserved;
+       u8 reserved[3];
        u8 rsp_code;
        u8 reserved2[4];
 }__attribute__((packed, aligned (2)));
index 6198559..0ad00db 100644 (file)
@@ -732,7 +732,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
        struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
        struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
        struct sockaddr_in6 addr;
-       int rc, len;
+       int rc;
 
        switch(param) {
        case ISCSI_PARAM_CONN_PORT:
@@ -745,12 +745,12 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
                }
                if (param == ISCSI_PARAM_LOCAL_PORT)
                        rc = kernel_getsockname(tcp_sw_conn->sock,
-                                               (struct sockaddr *)&addr, &len);
+                                               (struct sockaddr *)&addr);
                else
                        rc = kernel_getpeername(tcp_sw_conn->sock,
-                                               (struct sockaddr *)&addr, &len);
+                                               (struct sockaddr *)&addr);
                spin_unlock_bh(&conn->session->frwd_lock);
-               if (rc)
+               if (rc < 0)
                        return rc;
 
                return iscsi_conn_get_addr_param((struct sockaddr_storage *)
@@ -771,7 +771,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
        struct iscsi_tcp_conn *tcp_conn;
        struct iscsi_sw_tcp_conn *tcp_sw_conn;
        struct sockaddr_in6 addr;
-       int rc, len;
+       int rc;
 
        switch (param) {
        case ISCSI_HOST_PARAM_IPADDRESS:
@@ -793,9 +793,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
                }
 
                rc = kernel_getsockname(tcp_sw_conn->sock,
-                                       (struct sockaddr *)&addr, &len);
+                                       (struct sockaddr *)&addr);
                spin_unlock_bh(&session->frwd_lock);
-               if (rc)
+               if (rc < 0)
                        return rc;
 
                return iscsi_conn_get_addr_param((struct sockaddr_storage *)
index 13d6e4e..59a87ca 100644 (file)
@@ -2410,8 +2410,11 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
                                continue;
                        }
 
-                       for_each_cpu(cpu, mask)
+                       for_each_cpu_and(cpu, mask, cpu_online_mask) {
+                               if (cpu >= ioc->cpu_msix_table_sz)
+                                       break;
                                ioc->cpu_msix_table[cpu] = reply_q->msix_index;
+                       }
                }
                return;
        }
index 029e2e6..f57a94b 100644 (file)
@@ -1724,7 +1724,6 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf)
 {
        struct qedi_ctx *qedi = data;
        struct nvm_iscsi_initiator *initiator;
-       char *str = buf;
        int rc = 1;
        u32 ipv6_en, dhcp_en, ip_len;
        struct nvm_iscsi_block *block;
@@ -1758,32 +1757,32 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf)
 
        switch (type) {
        case ISCSI_BOOT_ETH_IP_ADDR:
-               rc = snprintf(str, ip_len, fmt, ip);
+               rc = snprintf(buf, ip_len, fmt, ip);
                break;
        case ISCSI_BOOT_ETH_SUBNET_MASK:
-               rc = snprintf(str, ip_len, fmt, sub);
+               rc = snprintf(buf, ip_len, fmt, sub);
                break;
        case ISCSI_BOOT_ETH_GATEWAY:
-               rc = snprintf(str, ip_len, fmt, gw);
+               rc = snprintf(buf, ip_len, fmt, gw);
                break;
        case ISCSI_BOOT_ETH_FLAGS:
-               rc = snprintf(str, 3, "%hhd\n",
+               rc = snprintf(buf, 3, "%hhd\n",
                              SYSFS_FLAG_FW_SEL_BOOT);
                break;
        case ISCSI_BOOT_ETH_INDEX:
-               rc = snprintf(str, 3, "0\n");
+               rc = snprintf(buf, 3, "0\n");
                break;
        case ISCSI_BOOT_ETH_MAC:
-               rc = sysfs_format_mac(str, qedi->mac, ETH_ALEN);
+               rc = sysfs_format_mac(buf, qedi->mac, ETH_ALEN);
                break;
        case ISCSI_BOOT_ETH_VLAN:
-               rc = snprintf(str, 12, "%d\n",
+               rc = snprintf(buf, 12, "%d\n",
                              GET_FIELD2(initiator->generic_cont0,
                                         NVM_ISCSI_CFG_INITIATOR_VLAN));
                break;
        case ISCSI_BOOT_ETH_ORIGIN:
                if (dhcp_en)
-                       rc = snprintf(str, 3, "3\n");
+                       rc = snprintf(buf, 3, "3\n");
                break;
        default:
                rc = 0;
@@ -1819,7 +1818,6 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf)
 {
        struct qedi_ctx *qedi = data;
        struct nvm_iscsi_initiator *initiator;
-       char *str = buf;
        int rc;
        struct nvm_iscsi_block *block;
 
@@ -1831,8 +1829,8 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf)
 
        switch (type) {
        case ISCSI_BOOT_INI_INITIATOR_NAME:
-               rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
-                             initiator->initiator_name.byte);
+               rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN,
+                            initiator->initiator_name.byte);
                break;
        default:
                rc = 0;
@@ -1860,7 +1858,6 @@ static ssize_t
 qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
                        char *buf, enum qedi_nvm_tgts idx)
 {
-       char *str = buf;
        int rc = 1;
        u32 ctrl_flags, ipv6_en, chap_en, mchap_en, ip_len;
        struct nvm_iscsi_block *block;
@@ -1899,48 +1896,48 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
 
        switch (type) {
        case ISCSI_BOOT_TGT_NAME:
-               rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
-                             block->target[idx].target_name.byte);
+               rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN,
+                            block->target[idx].target_name.byte);
                break;
        case ISCSI_BOOT_TGT_IP_ADDR:
                if (ipv6_en)
-                       rc = snprintf(str, ip_len, "%pI6\n",
+                       rc = snprintf(buf, ip_len, "%pI6\n",
                                      block->target[idx].ipv6_addr.byte);
                else
-                       rc = snprintf(str, ip_len, "%pI4\n",
+                       rc = snprintf(buf, ip_len, "%pI4\n",
                                      block->target[idx].ipv4_addr.byte);
                break;
        case ISCSI_BOOT_TGT_PORT:
-               rc = snprintf(str, 12, "%d\n",
+               rc = snprintf(buf, 12, "%d\n",
                              GET_FIELD2(block->target[idx].generic_cont0,
                                         NVM_ISCSI_CFG_TARGET_TCP_PORT));
                break;
        case ISCSI_BOOT_TGT_LUN:
-               rc = snprintf(str, 22, "%.*d\n",
+               rc = snprintf(buf, 22, "%.*d\n",
                              block->target[idx].lun.value[1],
                              block->target[idx].lun.value[0]);
                break;
        case ISCSI_BOOT_TGT_CHAP_NAME:
-               rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
-                             chap_name);
+               rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+                            chap_name);
                break;
        case ISCSI_BOOT_TGT_CHAP_SECRET:
-               rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
-                             chap_secret);
+               rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+                            chap_secret);
                break;
        case ISCSI_BOOT_TGT_REV_CHAP_NAME:
-               rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
-                             mchap_name);
+               rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+                            mchap_name);
                break;
        case ISCSI_BOOT_TGT_REV_CHAP_SECRET:
-               rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
-                             mchap_secret);
+               rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+                            mchap_secret);
                break;
        case ISCSI_BOOT_TGT_FLAGS:
-               rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT);
+               rc = snprintf(buf, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT);
                break;
        case ISCSI_BOOT_TGT_NIC_ASSOC:
-               rc = snprintf(str, 3, "0\n");
+               rc = snprintf(buf, 3, "0\n");
                break;
        default:
                rc = 0;
index aececf6..2dea112 100644 (file)
@@ -59,8 +59,6 @@ qla2x00_sp_timeout(struct timer_list *t)
        req->outstanding_cmds[sp->handle] = NULL;
        iocb = &sp->u.iocb_cmd;
        iocb->timeout(sp);
-       if (sp->type != SRB_ELS_DCMD)
-               sp->free(sp);
        spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
 }
 
@@ -102,7 +100,6 @@ qla2x00_async_iocb_timeout(void *data)
        srb_t *sp = data;
        fc_port_t *fcport = sp->fcport;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
-       struct event_arg ea;
 
        if (fcport) {
                ql_dbg(ql_dbg_disc, fcport->vha, 0x2071,
@@ -117,25 +114,13 @@ qla2x00_async_iocb_timeout(void *data)
 
        switch (sp->type) {
        case SRB_LOGIN_CMD:
-               if (!fcport)
-                       break;
                /* Retry as needed. */
                lio->u.logio.data[0] = MBS_COMMAND_ERROR;
                lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ?
                        QLA_LOGIO_LOGIN_RETRIED : 0;
-               memset(&ea, 0, sizeof(ea));
-               ea.event = FCME_PLOGI_DONE;
-               ea.fcport = sp->fcport;
-               ea.data[0] = lio->u.logio.data[0];
-               ea.data[1] = lio->u.logio.data[1];
-               ea.sp = sp;
-               qla24xx_handle_plogi_done_event(fcport->vha, &ea);
+               sp->done(sp, QLA_FUNCTION_TIMEOUT);
                break;
        case SRB_LOGOUT_CMD:
-               if (!fcport)
-                       break;
-               qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT);
-               break;
        case SRB_CT_PTHRU_CMD:
        case SRB_MB_IOCB:
        case SRB_NACK_PLOGI:
@@ -235,12 +220,10 @@ static void
 qla2x00_async_logout_sp_done(void *ptr, int res)
 {
        srb_t *sp = ptr;
-       struct srb_iocb *lio = &sp->u.iocb_cmd;
 
        sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
-       if (!test_bit(UNLOADING, &sp->vha->dpc_flags))
-               qla2x00_post_async_logout_done_work(sp->vha, sp->fcport,
-                   lio->u.logio.data);
+       sp->fcport->login_gen++;
+       qlt_logo_completion_handler(sp->fcport, res);
        sp->free(sp);
 }
 
index 1b62e94..8d00d55 100644 (file)
@@ -3275,12 +3275,11 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb)
        memset(abt_iocb, 0, sizeof(struct abort_entry_24xx));
        abt_iocb->entry_type = ABORT_IOCB_TYPE;
        abt_iocb->entry_count = 1;
-       abt_iocb->handle =
-            cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no,
-                aio->u.abt.cmd_hndl));
+       abt_iocb->handle = cpu_to_le32(MAKE_HANDLE(req->id, sp->handle));
        abt_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id);
        abt_iocb->handle_to_abort =
-           cpu_to_le32(MAKE_HANDLE(req->id, aio->u.abt.cmd_hndl));
+           cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no,
+                                   aio->u.abt.cmd_hndl));
        abt_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
        abt_iocb->port_id[1] = sp->fcport->d_id.b.area;
        abt_iocb->port_id[2] = sp->fcport->d_id.b.domain;
index 14109d8..89f93eb 100644 (file)
@@ -272,7 +272,8 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
        struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
 
        /* Read all mbox registers? */
-       mboxes = (1 << ha->mbx_count) - 1;
+       WARN_ON_ONCE(ha->mbx_count > 32);
+       mboxes = (1ULL << ha->mbx_count) - 1;
        if (!ha->mcp)
                ql_dbg(ql_dbg_async, vha, 0x5001, "MBX pointer ERROR.\n");
        else
@@ -2880,7 +2881,8 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
        struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
 
        /* Read all mbox registers? */
-       mboxes = (1 << ha->mbx_count) - 1;
+       WARN_ON_ONCE(ha->mbx_count > 32);
+       mboxes = (1ULL << ha->mbx_count) - 1;
        if (!ha->mcp)
                ql_dbg(ql_dbg_async, vha, 0x504e, "MBX pointer ERROR.\n");
        else
index 12ee6e0..afcb556 100644 (file)
@@ -3625,6 +3625,8 @@ qla2x00_remove_one(struct pci_dev *pdev)
        }
        qla2x00_wait_for_hba_ready(base_vha);
 
+       qla2x00_wait_for_sess_deletion(base_vha);
+
        /*
         * if UNLOAD flag is already set, then continue unload,
         * where it was set first.
index fc89af8..896b2d8 100644 (file)
@@ -4871,8 +4871,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
                                    sess);
                                qlt_send_term_imm_notif(vha, iocb, 1);
                                res = 0;
-                               spin_lock_irqsave(&tgt->ha->tgt.sess_lock,
-                                   flags);
                                break;
                        }
 
index fc23371..817f312 100644 (file)
 #define DEV_DB_NON_PERSISTENT  0
 #define DEV_DB_PERSISTENT      1
 
+#define QL4_ISP_REG_DISCONNECT 0xffffffffU
+
 #define COPY_ISID(dst_isid, src_isid) {                        \
        int i, j;                                       \
        for (i = 0, j = ISID_SIZE - 1; i < ISID_SIZE;)  \
index 82e889b..fc2c97d 100644 (file)
@@ -262,6 +262,24 @@ static struct iscsi_transport qla4xxx_iscsi_transport = {
 
 static struct scsi_transport_template *qla4xxx_scsi_transport;
 
+static int qla4xxx_isp_check_reg(struct scsi_qla_host *ha)
+{
+       u32 reg_val = 0;
+       int rval = QLA_SUCCESS;
+
+       if (is_qla8022(ha))
+               reg_val = readl(&ha->qla4_82xx_reg->host_status);
+       else if (is_qla8032(ha) || is_qla8042(ha))
+               reg_val = qla4_8xxx_rd_direct(ha, QLA8XXX_PEG_ALIVE_COUNTER);
+       else
+               reg_val = readw(&ha->reg->ctrl_status);
+
+       if (reg_val == QL4_ISP_REG_DISCONNECT)
+               rval = QLA_ERROR;
+
+       return rval;
+}
+
 static int qla4xxx_send_ping(struct Scsi_Host *shost, uint32_t iface_num,
                             uint32_t iface_type, uint32_t payload_size,
                             uint32_t pid, struct sockaddr *dst_addr)
@@ -9186,10 +9204,17 @@ static int qla4xxx_eh_abort(struct scsi_cmnd *cmd)
        struct srb *srb = NULL;
        int ret = SUCCESS;
        int wait = 0;
+       int rval;
 
        ql4_printk(KERN_INFO, ha, "scsi%ld:%d:%llu: Abort command issued cmd=%p, cdb=0x%x\n",
                   ha->host_no, id, lun, cmd, cmd->cmnd[0]);
 
+       rval = qla4xxx_isp_check_reg(ha);
+       if (rval != QLA_SUCCESS) {
+               ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+               return FAILED;
+       }
+
        spin_lock_irqsave(&ha->hardware_lock, flags);
        srb = (struct srb *) CMD_SP(cmd);
        if (!srb) {
@@ -9241,6 +9266,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
        struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
        struct ddb_entry *ddb_entry = cmd->device->hostdata;
        int ret = FAILED, stat;
+       int rval;
 
        if (!ddb_entry)
                return ret;
@@ -9260,6 +9286,12 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
                      cmd, jiffies, cmd->request->timeout / HZ,
                      ha->dpc_flags, cmd->result, cmd->allowed));
 
+       rval = qla4xxx_isp_check_reg(ha);
+       if (rval != QLA_SUCCESS) {
+               ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+               return FAILED;
+       }
+
        /* FIXME: wait for hba to go online */
        stat = qla4xxx_reset_lun(ha, ddb_entry, cmd->device->lun);
        if (stat != QLA_SUCCESS) {
@@ -9303,6 +9335,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
        struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
        struct ddb_entry *ddb_entry = cmd->device->hostdata;
        int stat, ret;
+       int rval;
 
        if (!ddb_entry)
                return FAILED;
@@ -9320,6 +9353,12 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
                      ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
                      ha->dpc_flags, cmd->result, cmd->allowed));
 
+       rval = qla4xxx_isp_check_reg(ha);
+       if (rval != QLA_SUCCESS) {
+               ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+               return FAILED;
+       }
+
        stat = qla4xxx_reset_target(ha, ddb_entry);
        if (stat != QLA_SUCCESS) {
                starget_printk(KERN_INFO, scsi_target(cmd->device),
@@ -9374,9 +9413,16 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd)
 {
        int return_status = FAILED;
        struct scsi_qla_host *ha;
+       int rval;
 
        ha = to_qla_host(cmd->device->host);
 
+       rval = qla4xxx_isp_check_reg(ha);
+       if (rval != QLA_SUCCESS) {
+               ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+               return FAILED;
+       }
+
        if ((is_qla8032(ha) || is_qla8042(ha)) && ql4xdontresethba)
                qla4_83xx_set_idc_dontreset(ha);
 
index 40fc7a5..6be5ab3 100644 (file)
@@ -1657,7 +1657,7 @@ static struct scsi_host_template scsi_driver = {
        .eh_timed_out =         storvsc_eh_timed_out,
        .slave_alloc =          storvsc_device_alloc,
        .slave_configure =      storvsc_device_configure,
-       .cmd_per_lun =          255,
+       .cmd_per_lun =          2048,
        .this_id =              -1,
        .use_clustering =       ENABLE_CLUSTERING,
        /* Make sure we dont get a sg segment crosses a page boundary */
index ca360da..378af30 100644 (file)
@@ -536,7 +536,7 @@ sym_getsync(struct sym_hcb *np, u_char dt, u_char sfac, u_char *divp, u_char *fa
         *  Look for the greatest clock divisor that allows an 
         *  input speed faster than the period.
         */
-       while (div-- > 0)
+       while (--div > 0)
                if (kpc >= (div_10M[div] << 2)) break;
 
        /*
index a355d98..c7da2c1 100644 (file)
@@ -4352,6 +4352,8 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
        /* REPORT SUPPORTED OPERATION CODES is not supported */
        sdev->no_report_opcodes = 1;
 
+       /* WRITE_SAME command is not supported */
+       sdev->no_write_same = 1;
 
        ufshcd_set_queue_depth(sdev);
 
index 877611d..3219822 100644 (file)
@@ -586,7 +586,6 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
                                      struct sockaddr_qrtr *sq)
 {
        struct socket *sock;
-       int sl = sizeof(*sq);
        int ret;
 
        ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
@@ -594,7 +593,7 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
        if (ret < 0)
                return ERR_PTR(ret);
 
-       ret = kernel_getsockname(sock, (struct sockaddr *)sq, &sl);
+       ret = kernel_getsockname(sock, (struct sockaddr *)sq);
        if (ret < 0) {
                sock_release(sock);
                return ERR_PTR(ret);
index bbdc53b..6dbba5a 100644 (file)
@@ -702,30 +702,32 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
        size_t pgstart, pgend;
        int ret = -EINVAL;
 
+       mutex_lock(&ashmem_mutex);
+
        if (unlikely(!asma->file))
-               return -EINVAL;
+               goto out_unlock;
 
-       if (unlikely(copy_from_user(&pin, p, sizeof(pin))))
-               return -EFAULT;
+       if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) {
+               ret = -EFAULT;
+               goto out_unlock;
+       }
 
        /* per custom, you can pass zero for len to mean "everything onward" */
        if (!pin.len)
                pin.len = PAGE_ALIGN(asma->size) - pin.offset;
 
        if (unlikely((pin.offset | pin.len) & ~PAGE_MASK))
-               return -EINVAL;
+               goto out_unlock;
 
        if (unlikely(((__u32)-1) - pin.offset < pin.len))
-               return -EINVAL;
+               goto out_unlock;
 
        if (unlikely(PAGE_ALIGN(asma->size) < pin.offset + pin.len))
-               return -EINVAL;
+               goto out_unlock;
 
        pgstart = pin.offset / PAGE_SIZE;
        pgend = pgstart + (pin.len / PAGE_SIZE) - 1;
 
-       mutex_lock(&ashmem_mutex);
-
        switch (cmd) {
        case ASHMEM_PIN:
                ret = ashmem_pin(asma, pgstart, pgend);
@@ -738,6 +740,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
                break;
        }
 
+out_unlock:
        mutex_unlock(&ashmem_mutex);
 
        return ret;
index 94e0692..49718c9 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/cma.h>
 #include <linux/scatterlist.h>
+#include <linux/highmem.h>
 
 #include "ion.h"
 
@@ -42,6 +43,22 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
        if (!pages)
                return -ENOMEM;
 
+       if (PageHighMem(pages)) {
+               unsigned long nr_clear_pages = nr_pages;
+               struct page *page = pages;
+
+               while (nr_clear_pages > 0) {
+                       void *vaddr = kmap_atomic(page);
+
+                       memset(vaddr, 0, PAGE_SIZE);
+                       kunmap_atomic(vaddr);
+                       page++;
+                       nr_clear_pages--;
+               }
+       } else {
+               memset(page_address(pages), 0, size);
+       }
+
        table = kmalloc(sizeof(*table), GFP_KERNEL);
        if (!table)
                goto err;
index 1f91000..b35ef7e 100644 (file)
@@ -7,7 +7,7 @@
 
 config FSL_MC_BUS
        bool "QorIQ DPAA2 fsl-mc bus driver"
-       depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86 || PPC)))
+       depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86_LOCAL_APIC || PPC)))
        select GENERIC_MSI_IRQ_DOMAIN
        help
          Driver to enable the bus infrastructure for the QorIQ DPAA2
index 5064d5d..fc2013a 100644 (file)
@@ -73,6 +73,8 @@ static int __init its_fsl_mc_msi_init(void)
 
        for (np = of_find_matching_node(NULL, its_device_id); np;
             np = of_find_matching_node(np, its_device_id)) {
+               if (!of_device_is_available(np))
+                       continue;
                if (!of_property_read_bool(np, "msi-controller"))
                        continue;
 
index f015955..425e8b8 100644 (file)
 #define AD7192_GPOCON_P1DAT    BIT(1) /* P1 state */
 #define AD7192_GPOCON_P0DAT    BIT(0) /* P0 state */
 
+#define AD7192_EXT_FREQ_MHZ_MIN        2457600
+#define AD7192_EXT_FREQ_MHZ_MAX        5120000
 #define AD7192_INT_FREQ_MHZ    4915200
 
 /* NOTE:
@@ -218,6 +220,12 @@ static int ad7192_calibrate_all(struct ad7192_state *st)
                                ARRAY_SIZE(ad7192_calib_arr));
 }
 
+static inline bool ad7192_valid_external_frequency(u32 freq)
+{
+       return (freq >= AD7192_EXT_FREQ_MHZ_MIN &&
+               freq <= AD7192_EXT_FREQ_MHZ_MAX);
+}
+
 static int ad7192_setup(struct ad7192_state *st,
                        const struct ad7192_platform_data *pdata)
 {
@@ -243,17 +251,20 @@ static int ad7192_setup(struct ad7192_state *st,
                         id);
 
        switch (pdata->clock_source_sel) {
-       case AD7192_CLK_EXT_MCLK1_2:
-       case AD7192_CLK_EXT_MCLK2:
-               st->mclk = AD7192_INT_FREQ_MHZ;
-               break;
        case AD7192_CLK_INT:
        case AD7192_CLK_INT_CO:
-               if (pdata->ext_clk_hz)
-                       st->mclk = pdata->ext_clk_hz;
-               else
-                       st->mclk = AD7192_INT_FREQ_MHZ;
+               st->mclk = AD7192_INT_FREQ_MHZ;
                break;
+       case AD7192_CLK_EXT_MCLK1_2:
+       case AD7192_CLK_EXT_MCLK2:
+               if (ad7192_valid_external_frequency(pdata->ext_clk_hz)) {
+                       st->mclk = pdata->ext_clk_hz;
+                       break;
+               }
+               dev_err(&st->sd.spi->dev, "Invalid frequency setting %u\n",
+                       pdata->ext_clk_hz);
+               ret = -EINVAL;
+               goto out;
        default:
                ret = -EINVAL;
                goto out;
index 2b28fb9..3bcf494 100644 (file)
@@ -648,8 +648,6 @@ static int ad5933_register_ring_funcs_and_init(struct iio_dev *indio_dev)
        /* Ring buffer functions - here trigger setup related */
        indio_dev->setup_ops = &ad5933_ring_setup_ops;
 
-       indio_dev->modes |= INDIO_BUFFER_HARDWARE;
-
        return 0;
 }
 
@@ -762,7 +760,7 @@ static int ad5933_probe(struct i2c_client *client,
        indio_dev->dev.parent = &client->dev;
        indio_dev->info = &ad5933_info;
        indio_dev->name = id->name;
-       indio_dev->modes = INDIO_DIRECT_MODE;
+       indio_dev->modes = (INDIO_BUFFER_SOFTWARE | INDIO_DIRECT_MODE);
        indio_dev->channels = ad5933_channels;
        indio_dev->num_channels = ARRAY_SIZE(ad5933_channels);
 
index d21a9d1..5703dd1 100644 (file)
@@ -1577,7 +1577,7 @@ out:
 
 
 static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
-                       int *uaddr_len, int peer)
+                       int peer)
 {
        struct ipx_address *addr;
        struct sockaddr_ipx sipx;
@@ -1585,8 +1585,6 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
        struct ipx_sock *ipxs = ipx_sk(sk);
        int rc;
 
-       *uaddr_len = sizeof(struct sockaddr_ipx);
-
        lock_sock(sk);
        if (peer) {
                rc = -ENOTCONN;
@@ -1620,7 +1618,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
        sipx.sipx_zero   = 0;
        memcpy(uaddr, &sipx, sizeof(sipx));
 
-       rc = 0;
+       rc = sizeof(struct sockaddr_ipx);
 out:
        release_sock(sk);
        return rc;
index 2f1e9ab..c13553a 100644 (file)
@@ -697,7 +697,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
  *
  */
 static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
-                       int *uaddr_len, int peer)
+                       int peer)
 {
        struct sockaddr_irda saddr;
        struct sock *sk = sock->sk;
@@ -720,11 +720,9 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
        pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
        pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
 
-       /* uaddr_len come to us uninitialised */
-       *uaddr_len = sizeof (struct sockaddr_irda);
-       memcpy(uaddr, &saddr, *uaddr_len);
+       memcpy(uaddr, &saddr, sizeof (struct sockaddr_irda));
 
-       return 0;
+       return sizeof (struct sockaddr_irda);
 }
 
 /*
index ce93806..1bee667 100644 (file)
@@ -448,14 +448,13 @@ int
 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
 {
        struct sockaddr_in sin;
-       int len = sizeof(sin);
        int rc;
 
        if (remote)
-               rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
+               rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
        else
-               rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
-       if (rc) {
+               rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
+       if (rc < 0) {
                CERROR("Error %d getting sock %s IP/port\n",
                       rc, remote ? "peer" : "local");
                return rc;
index 64c5a57..9950178 100644 (file)
@@ -1020,7 +1020,7 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
        struct socket *new_sock, *sock = np->np_socket;
        struct sockaddr_in sock_in;
        struct sockaddr_in6 sock_in6;
-       int rc, err;
+       int rc;
 
        rc = kernel_accept(sock, &new_sock, 0);
        if (rc < 0)
@@ -1033,8 +1033,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
                memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
 
                rc = conn->sock->ops->getname(conn->sock,
-                               (struct sockaddr *)&sock_in6, &err, 1);
-               if (!rc) {
+                               (struct sockaddr *)&sock_in6, 1);
+               if (rc >= 0) {
                        if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
                                memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6));
                        } else {
@@ -1047,8 +1047,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
                }
 
                rc = conn->sock->ops->getname(conn->sock,
-                               (struct sockaddr *)&sock_in6, &err, 0);
-               if (!rc) {
+                               (struct sockaddr *)&sock_in6, 0);
+               if (rc >= 0) {
                        if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
                                memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6));
                        } else {
@@ -1063,13 +1063,13 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
                memset(&sock_in, 0, sizeof(struct sockaddr_in));
 
                rc = conn->sock->ops->getname(conn->sock,
-                               (struct sockaddr *)&sock_in, &err, 1);
-               if (!rc)
+                               (struct sockaddr *)&sock_in, 1);
+               if (rc >= 0)
                        memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
 
                rc = conn->sock->ops->getname(conn->sock,
-                               (struct sockaddr *)&sock_in, &err, 0);
-               if (!rc)
+                               (struct sockaddr *)&sock_in, 0);
+               if (rc >= 0)
                        memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
        }
 
index f699aba..148f3ee 100644 (file)
@@ -19,6 +19,12 @@ config USB_EHCI_BIG_ENDIAN_MMIO
 config USB_EHCI_BIG_ENDIAN_DESC
        bool
 
+config USB_UHCI_BIG_ENDIAN_MMIO
+       bool
+
+config USB_UHCI_BIG_ENDIAN_DESC
+       bool
+
 menuconfig USB_SUPPORT
        bool "USB support"
        depends on HAS_IOMEM
index 06b3b54..7b366a6 100644 (file)
@@ -174,6 +174,7 @@ static int acm_wb_alloc(struct acm *acm)
                wb = &acm->wb[wbn];
                if (!wb->use) {
                        wb->use = 1;
+                       wb->len = 0;
                        return wbn;
                }
                wbn = (wbn + 1) % ACM_NW;
@@ -805,16 +806,18 @@ static int acm_tty_write(struct tty_struct *tty,
 static void acm_tty_flush_chars(struct tty_struct *tty)
 {
        struct acm *acm = tty->driver_data;
-       struct acm_wb *cur = acm->putbuffer;
+       struct acm_wb *cur;
        int err;
        unsigned long flags;
 
+       spin_lock_irqsave(&acm->write_lock, flags);
+
+       cur = acm->putbuffer;
        if (!cur) /* nothing to do */
-               return;
+               goto out;
 
        acm->putbuffer = NULL;
        err = usb_autopm_get_interface_async(acm->control);
-       spin_lock_irqsave(&acm->write_lock, flags);
        if (err < 0) {
                cur->use = 0;
                acm->putbuffer = cur;
index 4024926..f4a5484 100644 (file)
@@ -226,6 +226,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        { USB_DEVICE(0x1a0a, 0x0200), .driver_info =
                        USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+       /* Corsair K70 RGB */
+       { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
+
        /* Corsair Strafe RGB */
        { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT },
 
index e4c3ce0..5bcad1d 100644 (file)
@@ -1917,7 +1917,9 @@ static void dwc2_hsotg_program_zlp(struct dwc2_hsotg *hsotg,
                /* Not specific buffer needed for ep0 ZLP */
                dma_addr_t dma = hs_ep->desc_list_dma;
 
-               dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep);
+               if (!index)
+                       dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep);
+
                dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, dma, 0);
        } else {
                dwc2_writel(DXEPTSIZ_MC(1) | DXEPTSIZ_PKTCNT(1) |
@@ -2974,9 +2976,13 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
        if (ints & DXEPINT_STSPHSERCVD) {
                dev_dbg(hsotg->dev, "%s: StsPhseRcvd\n", __func__);
 
-               /* Move to STATUS IN for DDMA */
-               if (using_desc_dma(hsotg))
-                       dwc2_hsotg_ep0_zlp(hsotg, true);
+               /* Safety check EP0 state when STSPHSERCVD asserted */
+               if (hsotg->ep0_state == DWC2_EP0_DATA_OUT) {
+                       /* Move to STATUS IN for DDMA */
+                       if (using_desc_dma(hsotg))
+                               dwc2_hsotg_ep0_zlp(hsotg, true);
+               }
+
        }
 
        if (ints & DXEPINT_BACK2BACKSETUP)
@@ -3375,12 +3381,6 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
        dwc2_writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
               DXEPCTL_USBACTEP, hsotg->regs + DIEPCTL0);
 
-       dwc2_hsotg_enqueue_setup(hsotg);
-
-       dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
-               dwc2_readl(hsotg->regs + DIEPCTL0),
-               dwc2_readl(hsotg->regs + DOEPCTL0));
-
        /* clear global NAKs */
        val = DCTL_CGOUTNAK | DCTL_CGNPINNAK;
        if (!is_usb_reset)
@@ -3391,6 +3391,12 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
        mdelay(3);
 
        hsotg->lx_state = DWC2_L0;
+
+       dwc2_hsotg_enqueue_setup(hsotg);
+
+       dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
+               dwc2_readl(hsotg->regs + DIEPCTL0),
+               dwc2_readl(hsotg->regs + DOEPCTL0));
 }
 
 static void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg)
index ade2ab0..f1d838a 100644 (file)
@@ -100,6 +100,8 @@ static void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
        reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG));
        reg |= DWC3_GCTL_PRTCAPDIR(mode);
        dwc3_writel(dwc->regs, DWC3_GCTL, reg);
+
+       dwc->current_dr_role = mode;
 }
 
 static void __dwc3_set_mode(struct work_struct *work)
@@ -133,8 +135,6 @@ static void __dwc3_set_mode(struct work_struct *work)
 
        dwc3_set_prtcap(dwc, dwc->desired_dr_role);
 
-       dwc->current_dr_role = dwc->desired_dr_role;
-
        spin_unlock_irqrestore(&dwc->lock, flags);
 
        switch (dwc->desired_dr_role) {
@@ -219,7 +219,7 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
         * XHCI driver will reset the host block. If dwc3 was configured for
         * host-only mode, then we can return early.
         */
-       if (dwc->dr_mode == USB_DR_MODE_HOST)
+       if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
                return 0;
 
        reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -234,6 +234,9 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
                udelay(1);
        } while (--retries);
 
+       phy_exit(dwc->usb3_generic_phy);
+       phy_exit(dwc->usb2_generic_phy);
+
        return -ETIMEDOUT;
 }
 
@@ -483,6 +486,22 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc)
        parms->hwparams8 = dwc3_readl(dwc->regs, DWC3_GHWPARAMS8);
 }
 
+static int dwc3_core_ulpi_init(struct dwc3 *dwc)
+{
+       int intf;
+       int ret = 0;
+
+       intf = DWC3_GHWPARAMS3_HSPHY_IFC(dwc->hwparams.hwparams3);
+
+       if (intf == DWC3_GHWPARAMS3_HSPHY_IFC_ULPI ||
+           (intf == DWC3_GHWPARAMS3_HSPHY_IFC_UTMI_ULPI &&
+            dwc->hsphy_interface &&
+            !strncmp(dwc->hsphy_interface, "ulpi", 4)))
+               ret = dwc3_ulpi_init(dwc);
+
+       return ret;
+}
+
 /**
  * dwc3_phy_setup - Configure USB PHY Interface of DWC3 Core
  * @dwc: Pointer to our controller context structure
@@ -494,7 +513,6 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc)
 static int dwc3_phy_setup(struct dwc3 *dwc)
 {
        u32 reg;
-       int ret;
 
        reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0));
 
@@ -565,9 +583,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
                }
                /* FALLTHROUGH */
        case DWC3_GHWPARAMS3_HSPHY_IFC_ULPI:
-               ret = dwc3_ulpi_init(dwc);
-               if (ret)
-                       return ret;
                /* FALLTHROUGH */
        default:
                break;
@@ -724,6 +739,7 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc)
 }
 
 static int dwc3_core_get_phy(struct dwc3 *dwc);
+static int dwc3_core_ulpi_init(struct dwc3 *dwc);
 
 /**
  * dwc3_core_init - Low-level initialization of DWC3 Core
@@ -755,17 +771,27 @@ static int dwc3_core_init(struct dwc3 *dwc)
                        dwc->maximum_speed = USB_SPEED_HIGH;
        }
 
-       ret = dwc3_core_get_phy(dwc);
+       ret = dwc3_phy_setup(dwc);
        if (ret)
                goto err0;
 
-       ret = dwc3_core_soft_reset(dwc);
-       if (ret)
-               goto err0;
+       if (!dwc->ulpi_ready) {
+               ret = dwc3_core_ulpi_init(dwc);
+               if (ret)
+                       goto err0;
+               dwc->ulpi_ready = true;
+       }
 
-       ret = dwc3_phy_setup(dwc);
+       if (!dwc->phys_ready) {
+               ret = dwc3_core_get_phy(dwc);
+               if (ret)
+                       goto err0a;
+               dwc->phys_ready = true;
+       }
+
+       ret = dwc3_core_soft_reset(dwc);
        if (ret)
-               goto err0;
+               goto err0a;
 
        dwc3_core_setup_global_control(dwc);
        dwc3_core_num_eps(dwc);
@@ -838,6 +864,9 @@ err1:
        phy_exit(dwc->usb2_generic_phy);
        phy_exit(dwc->usb3_generic_phy);
 
+err0a:
+       dwc3_ulpi_exit(dwc);
+
 err0:
        return ret;
 }
@@ -916,7 +945,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc)
 
        switch (dwc->dr_mode) {
        case USB_DR_MODE_PERIPHERAL:
-               dwc->current_dr_role = DWC3_GCTL_PRTCAP_DEVICE;
                dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE);
 
                if (dwc->usb2_phy)
@@ -932,7 +960,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc)
                }
                break;
        case USB_DR_MODE_HOST:
-               dwc->current_dr_role = DWC3_GCTL_PRTCAP_HOST;
                dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST);
 
                if (dwc->usb2_phy)
@@ -1234,7 +1261,6 @@ err4:
 
 err3:
        dwc3_free_event_buffers(dwc);
-       dwc3_ulpi_exit(dwc);
 
 err2:
        pm_runtime_allow(&pdev->dev);
@@ -1284,7 +1310,7 @@ static int dwc3_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int dwc3_suspend_common(struct dwc3 *dwc)
+static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
 {
        unsigned long   flags;
 
@@ -1296,6 +1322,10 @@ static int dwc3_suspend_common(struct dwc3 *dwc)
                dwc3_core_exit(dwc);
                break;
        case DWC3_GCTL_PRTCAP_HOST:
+               /* do nothing during host runtime_suspend */
+               if (!PMSG_IS_AUTO(msg))
+                       dwc3_core_exit(dwc);
+               break;
        default:
                /* do nothing */
                break;
@@ -1304,7 +1334,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc)
        return 0;
 }
 
-static int dwc3_resume_common(struct dwc3 *dwc)
+static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
 {
        unsigned long   flags;
        int             ret;
@@ -1320,6 +1350,13 @@ static int dwc3_resume_common(struct dwc3 *dwc)
                spin_unlock_irqrestore(&dwc->lock, flags);
                break;
        case DWC3_GCTL_PRTCAP_HOST:
+               /* nothing to do on host runtime_resume */
+               if (!PMSG_IS_AUTO(msg)) {
+                       ret = dwc3_core_init(dwc);
+                       if (ret)
+                               return ret;
+               }
+               break;
        default:
                /* do nothing */
                break;
@@ -1331,12 +1368,11 @@ static int dwc3_resume_common(struct dwc3 *dwc)
 static int dwc3_runtime_checks(struct dwc3 *dwc)
 {
        switch (dwc->current_dr_role) {
-       case USB_DR_MODE_PERIPHERAL:
-       case USB_DR_MODE_OTG:
+       case DWC3_GCTL_PRTCAP_DEVICE:
                if (dwc->connected)
                        return -EBUSY;
                break;
-       case USB_DR_MODE_HOST:
+       case DWC3_GCTL_PRTCAP_HOST:
        default:
                /* do nothing */
                break;
@@ -1353,7 +1389,7 @@ static int dwc3_runtime_suspend(struct device *dev)
        if (dwc3_runtime_checks(dwc))
                return -EBUSY;
 
-       ret = dwc3_suspend_common(dwc);
+       ret = dwc3_suspend_common(dwc, PMSG_AUTO_SUSPEND);
        if (ret)
                return ret;
 
@@ -1369,7 +1405,7 @@ static int dwc3_runtime_resume(struct device *dev)
 
        device_init_wakeup(dev, false);
 
-       ret = dwc3_resume_common(dwc);
+       ret = dwc3_resume_common(dwc, PMSG_AUTO_RESUME);
        if (ret)
                return ret;
 
@@ -1416,7 +1452,7 @@ static int dwc3_suspend(struct device *dev)
        struct dwc3     *dwc = dev_get_drvdata(dev);
        int             ret;
 
-       ret = dwc3_suspend_common(dwc);
+       ret = dwc3_suspend_common(dwc, PMSG_SUSPEND);
        if (ret)
                return ret;
 
@@ -1432,7 +1468,7 @@ static int dwc3_resume(struct device *dev)
 
        pinctrl_pm_select_default_state(dev);
 
-       ret = dwc3_resume_common(dwc);
+       ret = dwc3_resume_common(dwc, PMSG_RESUME);
        if (ret)
                return ret;
 
index 03c7aaa..860d2bc 100644 (file)
 #define DWC3_GDBGFIFOSPACE_TYPE(n)     (((n) << 5) & 0x1e0)
 #define DWC3_GDBGFIFOSPACE_SPACE_AVAILABLE(n) (((n) >> 16) & 0xffff)
 
-#define DWC3_TXFIFOQ           1
-#define DWC3_RXFIFOQ           3
-#define DWC3_TXREQQ            5
-#define DWC3_RXREQQ            7
-#define DWC3_RXINFOQ           9
-#define DWC3_DESCFETCHQ                13
-#define DWC3_EVENTQ            15
+#define DWC3_TXFIFOQ           0
+#define DWC3_RXFIFOQ           1
+#define DWC3_TXREQQ            2
+#define DWC3_RXREQQ            3
+#define DWC3_RXINFOQ           4
+#define DWC3_PSTATQ            5
+#define DWC3_DESCFETCHQ                6
+#define DWC3_EVENTQ            7
+#define DWC3_AUXEVENTQ         8
 
 /* Global RX Threshold Configuration Register */
 #define DWC3_GRXTHRCFG_MAXRXBURSTSIZE(n) (((n) & 0x1f) << 19)
@@ -795,7 +797,9 @@ struct dwc3_scratchpad_array {
  * @usb3_phy: pointer to USB3 PHY
  * @usb2_generic_phy: pointer to USB2 PHY
  * @usb3_generic_phy: pointer to USB3 PHY
+ * @phys_ready: flag to indicate that PHYs are ready
  * @ulpi: pointer to ulpi interface
+ * @ulpi_ready: flag to indicate that ULPI is initialized
  * @u2sel: parameter from Set SEL request.
  * @u2pel: parameter from Set SEL request.
  * @u1sel: parameter from Set SEL request.
@@ -893,7 +897,10 @@ struct dwc3 {
        struct phy              *usb2_generic_phy;
        struct phy              *usb3_generic_phy;
 
+       bool                    phys_ready;
+
        struct ulpi             *ulpi;
+       bool                    ulpi_ready;
 
        void __iomem            *regs;
        size_t                  regs_size;
index 7ae0eef..e54c362 100644 (file)
@@ -143,6 +143,7 @@ static int dwc3_of_simple_remove(struct platform_device *pdev)
                clk_disable_unprepare(simple->clks[i]);
                clk_put(simple->clks[i]);
        }
+       simple->num_clocks = 0;
 
        reset_control_assert(simple->resets);
        reset_control_put(simple->resets);
index a4719e8..ed8b865 100644 (file)
@@ -582,9 +582,25 @@ static int dwc3_omap_resume(struct device *dev)
        return 0;
 }
 
+static void dwc3_omap_complete(struct device *dev)
+{
+       struct dwc3_omap        *omap = dev_get_drvdata(dev);
+
+       if (extcon_get_state(omap->edev, EXTCON_USB))
+               dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID);
+       else
+               dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF);
+
+       if (extcon_get_state(omap->edev, EXTCON_USB_HOST))
+               dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND);
+       else
+               dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT);
+}
+
 static const struct dev_pm_ops dwc3_omap_dev_pm_ops = {
 
        SET_SYSTEM_SLEEP_PM_OPS(dwc3_omap_suspend, dwc3_omap_resume)
+       .complete = dwc3_omap_complete,
 };
 
 #define DEV_PM_OPS     (&dwc3_omap_dev_pm_ops)
index 9c2e4a1..18be31d 100644 (file)
@@ -854,7 +854,12 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc,
                trb++;
                trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
                trace_dwc3_complete_trb(ep0, trb);
-               ep0->trb_enqueue = 0;
+
+               if (r->direction)
+                       dwc->eps[1]->trb_enqueue = 0;
+               else
+                       dwc->eps[0]->trb_enqueue = 0;
+
                dwc->ep0_bounced = false;
        }
 
index 616ef49..2bda4eb 100644 (file)
@@ -2745,6 +2745,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
                break;
        }
 
+       dwc->eps[1]->endpoint.maxpacket = dwc->gadget.ep0->maxpacket;
+
        /* Enable USB2 LPM Capability */
 
        if ((dwc->revision > DWC3_REVISION_194A) &&
index 8f2cf3b..c2592d8 100644 (file)
@@ -1855,44 +1855,20 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 
        spin_lock_irqsave(&func->ffs->eps_lock, flags);
        while(count--) {
-               struct usb_endpoint_descriptor *ds;
-               struct usb_ss_ep_comp_descriptor *comp_desc = NULL;
-               int needs_comp_desc = false;
-               int desc_idx;
-
-               if (ffs->gadget->speed == USB_SPEED_SUPER) {
-                       desc_idx = 2;
-                       needs_comp_desc = true;
-               } else if (ffs->gadget->speed == USB_SPEED_HIGH)
-                       desc_idx = 1;
-               else
-                       desc_idx = 0;
-
-               /* fall-back to lower speed if desc missing for current speed */
-               do {
-                       ds = ep->descs[desc_idx];
-               } while (!ds && --desc_idx >= 0);
-
-               if (!ds) {
-                       ret = -EINVAL;
-                       break;
-               }
-
                ep->ep->driver_data = ep;
-               ep->ep->desc = ds;
 
-               if (needs_comp_desc) {
-                       comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
-                                       USB_DT_ENDPOINT_SIZE);
-                       ep->ep->maxburst = comp_desc->bMaxBurst + 1;
-                       ep->ep->comp_desc = comp_desc;
+               ret = config_ep_by_speed(func->gadget, &func->function, ep->ep);
+               if (ret) {
+                       pr_err("%s: config_ep_by_speed(%s) returned %d\n",
+                                       __func__, ep->ep->name, ret);
+                       break;
                }
 
                ret = usb_ep_enable(ep->ep);
                if (likely(!ret)) {
                        epfile->ep = ep;
-                       epfile->in = usb_endpoint_dir_in(ds);
-                       epfile->isoc = usb_endpoint_xfer_isoc(ds);
+                       epfile->in = usb_endpoint_dir_in(ep->ep->desc);
+                       epfile->isoc = usb_endpoint_xfer_isoc(ep->ep->desc);
                } else {
                        break;
                }
@@ -2979,10 +2955,8 @@ static int _ffs_func_bind(struct usb_configuration *c,
        struct ffs_data *ffs = func->ffs;
 
        const int full = !!func->ffs->fs_descs_count;
-       const int high = gadget_is_dualspeed(func->gadget) &&
-               func->ffs->hs_descs_count;
-       const int super = gadget_is_superspeed(func->gadget) &&
-               func->ffs->ss_descs_count;
+       const int high = !!func->ffs->hs_descs_count;
+       const int super = !!func->ffs->ss_descs_count;
 
        int fs_len, hs_len, ss_len, ret, i;
        struct ffs_ep *eps_ptr;
index 11fe788..d2dc1f0 100644 (file)
@@ -524,6 +524,8 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
                dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
                return ret;
        }
+       iad_desc.bFirstInterface = ret;
+
        std_ac_if_desc.bInterfaceNumber = ret;
        uac2->ac_intf = ret;
        uac2->ac_alt = 0;
index 1e95670..0875d38 100644 (file)
@@ -274,7 +274,6 @@ config USB_SNP_UDC_PLAT
        tristate "Synopsys USB 2.0 Device controller"
        depends on USB_GADGET && OF && HAS_DMA
        depends on EXTCON || EXTCON=n
-       select USB_GADGET_DUALSPEED
        select USB_SNP_CORE
        default ARCH_BCM_IPROC
        help
index 1e940f0..6dbc489 100644 (file)
@@ -77,6 +77,7 @@ static int bdc_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
        if (ret) {
                dev_err(&pci->dev,
                        "couldn't add resources to bdc device\n");
+               platform_device_put(bdc);
                return ret;
        }
 
index 859d5b1..1f8b19d 100644 (file)
@@ -180,8 +180,8 @@ EXPORT_SYMBOL_GPL(usb_ep_alloc_request);
 void usb_ep_free_request(struct usb_ep *ep,
                                       struct usb_request *req)
 {
-       ep->ops->free_request(ep, req);
        trace_usb_ep_free_request(ep, req, 0);
+       ep->ops->free_request(ep, req);
 }
 EXPORT_SYMBOL_GPL(usb_ep_free_request);
 
index e5b4ee9..56b517a 100644 (file)
@@ -1305,7 +1305,7 @@ static void udc_reset_ep_queue(struct fsl_udc *udc, u8 pipe)
 {
        struct fsl_ep *ep = get_ep_by_pipe(udc, pipe);
 
-       if (ep->name)
+       if (ep->ep.name)
                nuke(ep, -ESHUTDOWN);
 }
 
@@ -1693,7 +1693,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
                curr_ep = get_ep_by_pipe(udc, i);
 
                /* If the ep is configured */
-               if (curr_ep->name == NULL) {
+               if (!curr_ep->ep.name) {
                        WARNING("Invalid EP?");
                        continue;
                }
index 6e87af2..409cde4 100644 (file)
@@ -2410,7 +2410,7 @@ static int renesas_usb3_remove(struct platform_device *pdev)
        __renesas_usb3_ep_free_request(usb3->ep0_req);
        if (usb3->phy)
                phy_put(usb3->phy);
-       pm_runtime_disable(usb3_to_dev(usb3));
+       pm_runtime_disable(&pdev->dev);
 
        return 0;
 }
index 6150bed..4fcfb30 100644 (file)
@@ -633,14 +633,6 @@ config USB_UHCI_ASPEED
        bool
        default y if ARCH_ASPEED
 
-config USB_UHCI_BIG_ENDIAN_MMIO
-       bool
-       default y if SPARC_LEON
-
-config USB_UHCI_BIG_ENDIAN_DESC
-       bool
-       default y if SPARC_LEON
-
 config USB_FHCI_HCD
        tristate "Freescale QE USB Host Controller support"
        depends on OF_GPIO && QE_GPIO && QUICC_ENGINE
index facafdf..d7641cb 100644 (file)
@@ -774,12 +774,12 @@ static struct urb *request_single_step_set_feature_urb(
        atomic_inc(&urb->use_count);
        atomic_inc(&urb->dev->urbnum);
        urb->setup_dma = dma_map_single(
-                       hcd->self.controller,
+                       hcd->self.sysdev,
                        urb->setup_packet,
                        sizeof(struct usb_ctrlrequest),
                        DMA_TO_DEVICE);
        urb->transfer_dma = dma_map_single(
-                       hcd->self.controller,
+                       hcd->self.sysdev,
                        urb->transfer_buffer,
                        urb->transfer_buffer_length,
                        DMA_FROM_DEVICE);
index 8815832..3276304 100644 (file)
@@ -1188,10 +1188,10 @@ static int submit_single_step_set_feature(
         * 15 secs after the setup
         */
        if (is_setup) {
-               /* SETUP pid */
+               /* SETUP pid, and interrupt after SETUP completion */
                qtd_fill(ehci, qtd, urb->setup_dma,
                                sizeof(struct usb_ctrlrequest),
-                               token | (2 /* "setup" */ << 8), 8);
+                               QTD_IOC | token | (2 /* "setup" */ << 8), 8);
 
                submit_async(ehci, urb, &qtd_list, GFP_ATOMIC);
                return 0; /*Return now; we shall come back after 15 seconds*/
@@ -1228,12 +1228,8 @@ static int submit_single_step_set_feature(
        qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma);
        list_add_tail(&qtd->qtd_list, head);
 
-       /* dont fill any data in such packets */
-       qtd_fill(ehci, qtd, 0, 0, token, 0);
-
-       /* by default, enable interrupt on urb completion */
-       if (likely(!(urb->transfer_flags & URB_NO_INTERRUPT)))
-               qtd->hw_token |= cpu_to_hc32(ehci, QTD_IOC);
+       /* Interrupt after STATUS completion */
+       qtd_fill(ehci, qtd, 0, 0, token | QTD_IOC, 0);
 
        submit_async(ehci, urb, &qtd_list, GFP_KERNEL);
 
index ee96763..84f88fa 100644 (file)
@@ -74,6 +74,7 @@ static const char     hcd_name [] = "ohci_hcd";
 
 #define        STATECHANGE_DELAY       msecs_to_jiffies(300)
 #define        IO_WATCHDOG_DELAY       msecs_to_jiffies(275)
+#define        IO_WATCHDOG_OFF         0xffffff00
 
 #include "ohci.h"
 #include "pci-quirks.h"
@@ -231,7 +232,7 @@ static int ohci_urb_enqueue (
                }
 
                /* Start up the I/O watchdog timer, if it's not running */
-               if (!timer_pending(&ohci->io_watchdog) &&
+               if (ohci->prev_frame_no == IO_WATCHDOG_OFF &&
                                list_empty(&ohci->eds_in_use) &&
                                !(ohci->flags & OHCI_QUIRK_QEMU)) {
                        ohci->prev_frame_no = ohci_frame_no(ohci);
@@ -501,6 +502,7 @@ static int ohci_init (struct ohci_hcd *ohci)
                return 0;
 
        timer_setup(&ohci->io_watchdog, io_watchdog_func, 0);
+       ohci->prev_frame_no = IO_WATCHDOG_OFF;
 
        ohci->hcca = dma_alloc_coherent (hcd->self.controller,
                        sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL);
@@ -730,7 +732,7 @@ static void io_watchdog_func(struct timer_list *t)
        u32             head;
        struct ed       *ed;
        struct td       *td, *td_start, *td_next;
-       unsigned        frame_no;
+       unsigned        frame_no, prev_frame_no = IO_WATCHDOG_OFF;
        unsigned long   flags;
 
        spin_lock_irqsave(&ohci->lock, flags);
@@ -835,7 +837,7 @@ static void io_watchdog_func(struct timer_list *t)
                        }
                }
                if (!list_empty(&ohci->eds_in_use)) {
-                       ohci->prev_frame_no = frame_no;
+                       prev_frame_no = frame_no;
                        ohci->prev_wdh_cnt = ohci->wdh_cnt;
                        ohci->prev_donehead = ohci_readl(ohci,
                                        &ohci->regs->donehead);
@@ -845,6 +847,7 @@ static void io_watchdog_func(struct timer_list *t)
        }
 
  done:
+       ohci->prev_frame_no = prev_frame_no;
        spin_unlock_irqrestore(&ohci->lock, flags);
 }
 
@@ -973,6 +976,7 @@ static void ohci_stop (struct usb_hcd *hcd)
        if (quirk_nec(ohci))
                flush_work(&ohci->nec_work);
        del_timer_sync(&ohci->io_watchdog);
+       ohci->prev_frame_no = IO_WATCHDOG_OFF;
 
        ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable);
        ohci_usb_reset(ohci);
index fb7aaa3..634f3c7 100644 (file)
@@ -311,8 +311,10 @@ static int ohci_bus_suspend (struct usb_hcd *hcd)
                rc = ohci_rh_suspend (ohci, 0);
        spin_unlock_irq (&ohci->lock);
 
-       if (rc == 0)
+       if (rc == 0) {
                del_timer_sync(&ohci->io_watchdog);
+               ohci->prev_frame_no = IO_WATCHDOG_OFF;
+       }
        return rc;
 }
 
index b2ec8c3..4ccb85a 100644 (file)
@@ -1019,6 +1019,8 @@ skip_ed:
                 * have modified this list.  normally it's just prepending
                 * entries (which we'd ignore), but paranoia won't hurt.
                 */
+               *last = ed->ed_next;
+               ed->ed_next = NULL;
                modified = 0;
 
                /* unlink urbs as requested, but rescan the list after
@@ -1077,21 +1079,22 @@ rescan_this:
                        goto rescan_this;
 
                /*
-                * If no TDs are queued, take ED off the ed_rm_list.
+                * If no TDs are queued, ED is now idle.
                 * Otherwise, if the HC is running, reschedule.
-                * If not, leave it on the list for further dequeues.
+                * If the HC isn't running, add ED back to the
+                * start of the list for later processing.
                 */
                if (list_empty(&ed->td_list)) {
-                       *last = ed->ed_next;
-                       ed->ed_next = NULL;
                        ed->state = ED_IDLE;
                        list_del(&ed->in_use_list);
                } else if (ohci->rh_state == OHCI_RH_RUNNING) {
-                       *last = ed->ed_next;
-                       ed->ed_next = NULL;
                        ed_schedule(ohci, ed);
                } else {
-                       last = &ed->ed_next;
+                       ed->ed_next = ohci->ed_rm_list;
+                       ohci->ed_rm_list = ed;
+                       /* Don't loop on the same ED */
+                       if (last == &ohci->ed_rm_list)
+                               last = &ed->ed_next;
                }
 
                if (modified)
index 1615367..67ad4bb 100644 (file)
 #define        AX_INDXC                0x30
 #define        AX_DATAC                0x34
 
+#define PT_ADDR_INDX           0xE8
+#define PT_READ_INDX           0xE4
+#define PT_SIG_1_ADDR          0xA520
+#define PT_SIG_2_ADDR          0xA521
+#define PT_SIG_3_ADDR          0xA522
+#define PT_SIG_4_ADDR          0xA523
+#define PT_SIG_1_DATA          0x78
+#define PT_SIG_2_DATA          0x56
+#define PT_SIG_3_DATA          0x34
+#define PT_SIG_4_DATA          0x12
+#define PT4_P1_REG             0xB521
+#define PT4_P2_REG             0xB522
+#define PT2_P1_REG             0xD520
+#define PT2_P2_REG             0xD521
+#define PT1_P1_REG             0xD522
+#define PT1_P2_REG             0xD523
+
 #define        NB_PCIE_INDX_ADDR       0xe0
 #define        NB_PCIE_INDX_DATA       0xe4
 #define        PCIE_P_CNTL             0x10040
@@ -512,6 +529,98 @@ void usb_amd_dev_put(void)
 }
 EXPORT_SYMBOL_GPL(usb_amd_dev_put);
 
+/*
+ * Check if port is disabled in BIOS on AMD Promontory host.
+ * BIOS Disabled ports may wake on connect/disconnect and need
+ * driver workaround to keep them disabled.
+ * Returns true if port is marked disabled.
+ */
+bool usb_amd_pt_check_port(struct device *device, int port)
+{
+       unsigned char value, port_shift;
+       struct pci_dev *pdev;
+       u16 reg;
+
+       pdev = to_pci_dev(device);
+       pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_1_ADDR);
+
+       pci_read_config_byte(pdev, PT_READ_INDX, &value);
+       if (value != PT_SIG_1_DATA)
+               return false;
+
+       pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_2_ADDR);
+
+       pci_read_config_byte(pdev, PT_READ_INDX, &value);
+       if (value != PT_SIG_2_DATA)
+               return false;
+
+       pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_3_ADDR);
+
+       pci_read_config_byte(pdev, PT_READ_INDX, &value);
+       if (value != PT_SIG_3_DATA)
+               return false;
+
+       pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_4_ADDR);
+
+       pci_read_config_byte(pdev, PT_READ_INDX, &value);
+       if (value != PT_SIG_4_DATA)
+               return false;
+
+       /* Check disabled port setting, if bit is set port is enabled */
+       switch (pdev->device) {
+       case 0x43b9:
+       case 0x43ba:
+       /*
+        * device is AMD_PROMONTORYA_4(0x43b9) or PROMONTORYA_3(0x43ba)
+        * PT4_P1_REG bits[7..1] represents USB2.0 ports 6 to 0
+        * PT4_P2_REG bits[6..0] represents ports 13 to 7
+        */
+               if (port > 6) {
+                       reg = PT4_P2_REG;
+                       port_shift = port - 7;
+               } else {
+                       reg = PT4_P1_REG;
+                       port_shift = port + 1;
+               }
+               break;
+       case 0x43bb:
+       /*
+        * device is AMD_PROMONTORYA_2(0x43bb)
+        * PT2_P1_REG bits[7..5] represents USB2.0 ports 2 to 0
+        * PT2_P2_REG bits[5..0] represents ports 9 to 3
+        */
+               if (port > 2) {
+                       reg = PT2_P2_REG;
+                       port_shift = port - 3;
+               } else {
+                       reg = PT2_P1_REG;
+                       port_shift = port + 5;
+               }
+               break;
+       case 0x43bc:
+       /*
+        * device is AMD_PROMONTORYA_1(0x43bc)
+        * PT1_P1_REG[7..4] represents USB2.0 ports 3 to 0
+        * PT1_P2_REG[5..0] represents ports 9 to 4
+        */
+               if (port > 3) {
+                       reg = PT1_P2_REG;
+                       port_shift = port - 4;
+               } else {
+                       reg = PT1_P1_REG;
+                       port_shift = port + 4;
+               }
+               break;
+       default:
+               return false;
+       }
+       pci_write_config_word(pdev, PT_ADDR_INDX, reg);
+       pci_read_config_byte(pdev, PT_READ_INDX, &value);
+
+       return !(value & BIT(port_shift));
+}
+EXPORT_SYMBOL_GPL(usb_amd_pt_check_port);
+
 /*
  * Make sure the controller is completely inactive, unable to
  * generate interrupts or do DMA.
index b68dcb5..4ca0d9b 100644 (file)
@@ -17,6 +17,7 @@ void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
 bool usb_xhci_needs_pci_reset(struct pci_dev *pdev);
+bool usb_amd_pt_check_port(struct device *device, int port);
 #else
 struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
@@ -25,6 +26,10 @@ static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {}
 static inline void usb_amd_dev_put(void) {}
 static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {}
 static inline void sb800_prefetch(struct device *dev, int on) {}
+static inline bool usb_amd_pt_check_port(struct device *device, int port)
+{
+       return false;
+}
 #endif  /* CONFIG_USB_PCI */
 
 #endif  /*  __LINUX_USB_PCI_QUIRKS_H  */
index e26e685..5851052 100644 (file)
@@ -211,7 +211,7 @@ static void xhci_ring_dump_segment(struct seq_file *s,
 static int xhci_ring_trb_show(struct seq_file *s, void *unused)
 {
        int                     i;
-       struct xhci_ring        *ring = s->private;
+       struct xhci_ring        *ring = *(struct xhci_ring **)s->private;
        struct xhci_segment     *seg = ring->first_seg;
 
        for (i = 0; i < ring->num_segs; i++) {
@@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci,
 
        snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
        epriv->root = xhci_debugfs_create_ring_dir(xhci,
-                                                  &dev->eps[ep_index].new_ring,
+                                                  &dev->eps[ep_index].ring,
                                                   epriv->name,
                                                   spriv->root);
        spriv->eps[ep_index] = epriv;
index 46d5e08..72ebbc9 100644 (file)
@@ -1224,17 +1224,17 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                temp = readl(port_array[wIndex]);
                                break;
                        }
-
-                       /* Software should not attempt to set
-                        * port link state above '3' (U3) and the port
-                        * must be enabled.
-                        */
-                       if ((temp & PORT_PE) == 0 ||
-                               (link_state > USB_SS_PORT_LS_U3)) {
-                               xhci_warn(xhci, "Cannot set link state.\n");
+                       /* Port must be enabled */
+                       if (!(temp & PORT_PE)) {
+                               retval = -ENODEV;
+                               break;
+                       }
+                       /* Can't set port link state above '3' (U3) */
+                       if (link_state > USB_SS_PORT_LS_U3) {
+                               xhci_warn(xhci, "Cannot set port %d link state %d\n",
+                                        wIndex, link_state);
                                goto error;
                        }
-
                        if (link_state == USB_SS_PORT_LS_U3) {
                                slot_id = xhci_find_slot_id_by_port(hcd, xhci,
                                                wIndex + 1);
@@ -1522,6 +1522,13 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
                                t2 |= PORT_WKOC_E | PORT_WKCONN_E;
                                t2 &= ~PORT_WKDISC_E;
                        }
+
+                       if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) &&
+                           (hcd->speed < HCD_USB3)) {
+                               if (usb_amd_pt_check_port(hcd->self.controller,
+                                                         port_index))
+                                       t2 &= ~PORT_WAKE_BITS;
+                       }
                } else
                        t2 &= ~PORT_WAKE_BITS;
 
index 6c79037..5262fa5 100644 (file)
 #define PCI_DEVICE_ID_INTEL_APL_XHCI                   0x5aa8
 #define PCI_DEVICE_ID_INTEL_DNV_XHCI                   0x19d0
 
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_4                        0x43b9
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_3                        0x43ba
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_2                        0x43bb
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_1                        0x43bc
 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI               0x1142
 
 static const char hcd_name[] = "xhci_hcd";
@@ -125,6 +129,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_AMD)
                xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
+       if ((pdev->vendor == PCI_VENDOR_ID_AMD) &&
+               ((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) ||
+               (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) ||
+               (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) ||
+               (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1)))
+               xhci->quirks |= XHCI_U2_DISABLE_WAKE;
+
        if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
                xhci->quirks |= XHCI_LPM_SUPPORT;
                xhci->quirks |= XHCI_INTEL_HOST;
index 1eeb339..25d4b74 100644 (file)
@@ -646,8 +646,6 @@ static void xhci_stop(struct usb_hcd *hcd)
                return;
        }
 
-       xhci_debugfs_exit(xhci);
-
        xhci_dbc_exit(xhci);
 
        spin_lock_irq(&xhci->lock);
@@ -680,6 +678,7 @@ static void xhci_stop(struct usb_hcd *hcd)
 
        xhci_dbg_trace(xhci, trace_xhci_dbg_init, "cleaning up memory");
        xhci_mem_cleanup(xhci);
+       xhci_debugfs_exit(xhci);
        xhci_dbg_trace(xhci, trace_xhci_dbg_init,
                        "xhci_stop completed - status = %x",
                        readl(&xhci->op_regs->status));
@@ -1014,6 +1013,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 
                xhci_dbg(xhci, "cleaning up memory\n");
                xhci_mem_cleanup(xhci);
+               xhci_debugfs_exit(xhci);
                xhci_dbg(xhci, "xhci_stop completed - status = %x\n",
                            readl(&xhci->op_regs->status));
 
@@ -3544,12 +3544,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
                virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING;
                del_timer_sync(&virt_dev->eps[i].stop_cmd_timer);
        }
-
+       xhci_debugfs_remove_slot(xhci, udev->slot_id);
        ret = xhci_disable_slot(xhci, udev->slot_id);
-       if (ret) {
-               xhci_debugfs_remove_slot(xhci, udev->slot_id);
+       if (ret)
                xhci_free_virt_device(xhci, udev->slot_id);
-       }
 }
 
 int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
index 96099a2..e4d7d3d 100644 (file)
@@ -1822,7 +1822,7 @@ struct xhci_hcd {
 /* For controller with a broken Port Disable implementation */
 #define XHCI_BROKEN_PORT_PED   (1 << 25)
 #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26)
-/* Reserved. It was XHCI_U2_DISABLE_WAKE */
+#define XHCI_U2_DISABLE_WAKE   (1 << 27)
 #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL        (1 << 28)
 #define XHCI_HW_LPM_DISABLE    (1 << 29)
 
index 63b9e85..236a60f 100644 (file)
@@ -42,6 +42,9 @@
 #define USB_DEVICE_ID_LD_MICROCASSYTIME                0x1033  /* USB Product ID of Micro-CASSY Time (reserved) */
 #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035  /* USB Product ID of Micro-CASSY Temperature */
 #define USB_DEVICE_ID_LD_MICROCASSYPH          0x1038  /* USB Product ID of Micro-CASSY pH */
+#define USB_DEVICE_ID_LD_POWERANALYSERCASSY    0x1040  /* USB Product ID of Power Analyser CASSY */
+#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY      0x1042  /* USB Product ID of Converter Controller CASSY */
+#define USB_DEVICE_ID_LD_MACHINETESTCASSY      0x1043  /* USB Product ID of Machine Test CASSY */
 #define USB_DEVICE_ID_LD_JWM           0x1080  /* USB Product ID of Joule and Wattmeter */
 #define USB_DEVICE_ID_LD_DMMP          0x1081  /* USB Product ID of Digital Multimeter P (reserved) */
 #define USB_DEVICE_ID_LD_UMIP          0x1090  /* USB Product ID of UMI P */
@@ -84,6 +87,9 @@ static const struct usb_device_id ld_usb_table[] = {
        { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) },
        { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) },
        { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) },
+       { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) },
+       { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) },
+       { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) },
        { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
        { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
        { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
index 968bf1e..eef4ad5 100644 (file)
@@ -2708,7 +2708,8 @@ static int musb_resume(struct device *dev)
        if ((devctl & mask) != (musb->context.devctl & mask))
                musb->port1_status = 0;
 
-       musb_start(musb);
+       musb_enable_interrupts(musb);
+       musb_platform_enable(musb);
 
        spin_lock_irqsave(&musb->lock, flags);
        error = musb_run_resume_work(musb);
index 394b4ac..45ed32c 100644 (file)
@@ -391,13 +391,7 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb,
                }
        }
 
-       /*
-        * The pipe must be broken if current urb->status is set, so don't
-        * start next urb.
-        * TODO: to minimize the risk of regression, only check urb->status
-        * for RX, until we have a test case to understand the behavior of TX.
-        */
-       if ((!status || !is_in) && qh && qh->is_ready) {
+       if (qh != NULL && qh->is_ready) {
                musb_dbg(musb, "... next ep%d %cX urb %p",
                    hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh));
                musb_start_urb(musb, is_in, qh);
index da031c4..fbec863 100644 (file)
@@ -602,6 +602,9 @@ static enum usb_charger_type mxs_phy_charger_detect(struct usb_phy *phy)
        void __iomem *base = phy->io_priv;
        enum usb_charger_type chgr_type = UNKNOWN_TYPE;
 
+       if (!regmap)
+               return UNKNOWN_TYPE;
+
        if (mxs_charger_data_contact_detect(mxs_phy))
                return chgr_type;
 
index 5925d11..39fa2fc 100644 (file)
@@ -982,6 +982,10 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt,
        if ((uintptr_t)pkt->buf & (USBHS_USB_DMAC_XFER_SIZE - 1))
                goto usbhsf_pio_prepare_pop;
 
+       /* return at this time if the pipe is running */
+       if (usbhs_pipe_is_running(pipe))
+               return 0;
+
        usbhs_pipe_config_change_bfre(pipe, 1);
 
        ret = usbhsf_fifo_select(pipe, fifo, 0);
@@ -1172,6 +1176,7 @@ static int usbhsf_dma_pop_done_with_usb_dmac(struct usbhs_pkt *pkt,
        usbhsf_fifo_clear(pipe, fifo);
        pkt->actual = usbhs_dma_calc_received_size(pkt, chan, rcv_len);
 
+       usbhs_pipe_running(pipe, 0);
        usbhsf_dma_stop(pipe, fifo);
        usbhsf_dma_unmap(pkt);
        usbhsf_fifo_unselect(pipe, pipe->fifo);
index 5db8ed5..2d8d915 100644 (file)
@@ -241,6 +241,7 @@ static void option_instat_callback(struct urb *urb);
 #define QUECTEL_PRODUCT_EC21                   0x0121
 #define QUECTEL_PRODUCT_EC25                   0x0125
 #define QUECTEL_PRODUCT_BG96                   0x0296
+#define QUECTEL_PRODUCT_EP06                   0x0306
 
 #define CMOTECH_VENDOR_ID                      0x16d8
 #define CMOTECH_PRODUCT_6001                   0x6001
@@ -689,6 +690,10 @@ static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
        .reserved = BIT(1) | BIT(4),
 };
 
+static const struct option_blacklist_info quectel_ep06_blacklist = {
+       .reserved = BIT(4) | BIT(5),
+};
+
 static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1203,6 +1208,8 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+       { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06),
+         .driver_info = (kernel_ulong_t)&quectel_ep06_blacklist },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
index 49e5524..dd8ef36 100644 (file)
@@ -73,6 +73,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
                        goto err;
 
                sdev->ud.tcp_socket = socket;
+               sdev->ud.sockfd = sockfd;
 
                spin_unlock_irq(&sdev->ud.lock);
 
@@ -172,6 +173,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
        if (ud->tcp_socket) {
                sockfd_put(ud->tcp_socket);
                ud->tcp_socket = NULL;
+               ud->sockfd = -1;
        }
 
        /* 3. free used data */
@@ -266,6 +268,7 @@ static struct stub_device *stub_device_alloc(struct usb_device *udev)
        sdev->ud.status         = SDEV_ST_AVAILABLE;
        spin_lock_init(&sdev->ud.lock);
        sdev->ud.tcp_socket     = NULL;
+       sdev->ud.sockfd         = -1;
 
        INIT_LIST_HEAD(&sdev->priv_init);
        INIT_LIST_HEAD(&sdev->priv_tx);
index c3e1008..20e3d46 100644 (file)
@@ -984,6 +984,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
        if (vdev->ud.tcp_socket) {
                sockfd_put(vdev->ud.tcp_socket);
                vdev->ud.tcp_socket = NULL;
+               vdev->ud.sockfd = -1;
        }
        pr_info("release socket\n");
 
@@ -1030,6 +1031,7 @@ static void vhci_device_reset(struct usbip_device *ud)
        if (ud->tcp_socket) {
                sockfd_put(ud->tcp_socket);
                ud->tcp_socket = NULL;
+               ud->sockfd = -1;
        }
        ud->status = VDEV_ST_NULL;
 
index 610cba2..b5fb56b 100644 (file)
@@ -1038,7 +1038,7 @@ static struct socket *get_raw_socket(int fd)
                struct sockaddr_ll sa;
                char  buf[MAX_ADDR_LEN];
        } uaddr;
-       int uaddr_len = sizeof uaddr, r;
+       int r;
        struct socket *sock = sockfd_lookup(fd, &r);
 
        if (!sock)
@@ -1050,9 +1050,8 @@ static struct socket *get_raw_socket(int fd)
                goto err;
        }
 
-       r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
-                              &uaddr_len, 0);
-       if (r)
+       r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
+       if (r < 0)
                goto err;
 
        if (uaddr.sa.sll_family != AF_PACKET) {
index 6082f65..67773e8 100644 (file)
@@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info)
        int timeout = 1000;
 
        /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */
-       if (cpu_data(0).x86_mask == 1) {
+       if (cpu_data(0).x86_stepping == 1) {
                pll_table = gx_pll_table_14MHz;
                pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz);
        } else {
index 753d9cb..aedbee3 100644 (file)
@@ -60,6 +60,7 @@ struct sock_mapping {
        bool active_socket;
        struct list_head list;
        struct socket *sock;
+       atomic_t refcount;
        union {
                struct {
                        int irq;
@@ -93,6 +94,32 @@ struct sock_mapping {
        };
 };
 
+static inline struct sock_mapping *pvcalls_enter_sock(struct socket *sock)
+{
+       struct sock_mapping *map;
+
+       if (!pvcalls_front_dev ||
+               dev_get_drvdata(&pvcalls_front_dev->dev) == NULL)
+               return ERR_PTR(-ENOTCONN);
+
+       map = (struct sock_mapping *)sock->sk->sk_send_head;
+       if (map == NULL)
+               return ERR_PTR(-ENOTSOCK);
+
+       pvcalls_enter();
+       atomic_inc(&map->refcount);
+       return map;
+}
+
+static inline void pvcalls_exit_sock(struct socket *sock)
+{
+       struct sock_mapping *map;
+
+       map = (struct sock_mapping *)sock->sk->sk_send_head;
+       atomic_dec(&map->refcount);
+       pvcalls_exit();
+}
+
 static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
 {
        *req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
@@ -369,31 +396,23 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
        if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
                return -EOPNOTSUPP;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
-               return -ENOTCONN;
-       }
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
 
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *)sock->sk->sk_send_head;
-       if (!map) {
-               pvcalls_exit();
-               return -ENOTSOCK;
-       }
-
        spin_lock(&bedata->socket_lock);
        ret = get_request(bedata, &req_id);
        if (ret < 0) {
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return ret;
        }
        ret = create_active(map, &evtchn);
        if (ret < 0) {
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return ret;
        }
 
@@ -423,7 +442,7 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
        smp_rmb();
        ret = bedata->rsp[req_id].ret;
        bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
-       pvcalls_exit();
+       pvcalls_exit_sock(sock);
        return ret;
 }
 
@@ -488,23 +507,15 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
        if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
                return -EOPNOTSUPP;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
-               return -ENOTCONN;
-       }
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *) sock->sk->sk_send_head;
-       if (!map) {
-               pvcalls_exit();
-               return -ENOTSOCK;
-       }
-
        mutex_lock(&map->active.out_mutex);
        if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
                mutex_unlock(&map->active.out_mutex);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return -EAGAIN;
        }
        if (len > INT_MAX)
@@ -526,7 +537,7 @@ again:
                tot_sent = sent;
 
        mutex_unlock(&map->active.out_mutex);
-       pvcalls_exit();
+       pvcalls_exit_sock(sock);
        return tot_sent;
 }
 
@@ -591,19 +602,11 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
        if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
                return -EOPNOTSUPP;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
-               return -ENOTCONN;
-       }
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *) sock->sk->sk_send_head;
-       if (!map) {
-               pvcalls_exit();
-               return -ENOTSOCK;
-       }
-
        mutex_lock(&map->active.in_mutex);
        if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
                len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
@@ -623,7 +626,7 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                ret = 0;
 
        mutex_unlock(&map->active.in_mutex);
-       pvcalls_exit();
+       pvcalls_exit_sock(sock);
        return ret;
 }
 
@@ -637,24 +640,16 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
        if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
                return -EOPNOTSUPP;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
-               return -ENOTCONN;
-       }
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *) sock->sk->sk_send_head;
-       if (map == NULL) {
-               pvcalls_exit();
-               return -ENOTSOCK;
-       }
-
        spin_lock(&bedata->socket_lock);
        ret = get_request(bedata, &req_id);
        if (ret < 0) {
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return ret;
        }
        req = RING_GET_REQUEST(&bedata->ring, req_id);
@@ -684,7 +679,7 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
        bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
 
        map->passive.status = PVCALLS_STATUS_BIND;
-       pvcalls_exit();
+       pvcalls_exit_sock(sock);
        return 0;
 }
 
@@ -695,21 +690,13 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
        struct xen_pvcalls_request *req;
        int notify, req_id, ret;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
-               return -ENOTCONN;
-       }
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *) sock->sk->sk_send_head;
-       if (!map) {
-               pvcalls_exit();
-               return -ENOTSOCK;
-       }
-
        if (map->passive.status != PVCALLS_STATUS_BIND) {
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return -EOPNOTSUPP;
        }
 
@@ -717,7 +704,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
        ret = get_request(bedata, &req_id);
        if (ret < 0) {
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return ret;
        }
        req = RING_GET_REQUEST(&bedata->ring, req_id);
@@ -741,7 +728,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
        bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
 
        map->passive.status = PVCALLS_STATUS_LISTEN;
-       pvcalls_exit();
+       pvcalls_exit_sock(sock);
        return ret;
 }
 
@@ -753,21 +740,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
        struct xen_pvcalls_request *req;
        int notify, req_id, ret, evtchn, nonblock;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
-               return -ENOTCONN;
-       }
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *) sock->sk->sk_send_head;
-       if (!map) {
-               pvcalls_exit();
-               return -ENOTSOCK;
-       }
-
        if (map->passive.status != PVCALLS_STATUS_LISTEN) {
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return -EINVAL;
        }
 
@@ -785,13 +764,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
                        goto received;
                }
                if (nonblock) {
-                       pvcalls_exit();
+                       pvcalls_exit_sock(sock);
                        return -EAGAIN;
                }
                if (wait_event_interruptible(map->passive.inflight_accept_req,
                        !test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                                          (void *)&map->passive.flags))) {
-                       pvcalls_exit();
+                       pvcalls_exit_sock(sock);
                        return -EINTR;
                }
        }
@@ -802,7 +781,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return ret;
        }
        map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
@@ -810,7 +789,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return -ENOMEM;
        }
        ret = create_active(map2, &evtchn);
@@ -819,7 +798,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return ret;
        }
        list_add_tail(&map2->list, &bedata->socket_mappings);
@@ -841,13 +820,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
        /* We could check if we have received a response before returning. */
        if (nonblock) {
                WRITE_ONCE(map->passive.inflight_req_id, req_id);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return -EAGAIN;
        }
 
        if (wait_event_interruptible(bedata->inflight_req,
                READ_ONCE(bedata->rsp[req_id].req_id) == req_id)) {
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return -EINTR;
        }
        /* read req_id, then the content */
@@ -862,7 +841,7 @@ received:
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
                pvcalls_front_free_map(bedata, map2);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return -ENOMEM;
        }
        newsock->sk->sk_send_head = (void *)map2;
@@ -874,7 +853,7 @@ received:
        clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
        wake_up(&map->passive.inflight_accept_req);
 
-       pvcalls_exit();
+       pvcalls_exit_sock(sock);
        return ret;
 }
 
@@ -965,23 +944,16 @@ __poll_t pvcalls_front_poll(struct file *file, struct socket *sock,
        struct sock_mapping *map;
        __poll_t ret;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map))
                return EPOLLNVAL;
-       }
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *) sock->sk->sk_send_head;
-       if (!map) {
-               pvcalls_exit();
-               return EPOLLNVAL;
-       }
        if (map->active_socket)
                ret = pvcalls_front_poll_active(file, bedata, map, wait);
        else
                ret = pvcalls_front_poll_passive(file, bedata, map, wait);
-       pvcalls_exit();
+       pvcalls_exit_sock(sock);
        return ret;
 }
 
@@ -995,25 +967,20 @@ int pvcalls_front_release(struct socket *sock)
        if (sock->sk == NULL)
                return 0;
 
-       pvcalls_enter();
-       if (!pvcalls_front_dev) {
-               pvcalls_exit();
-               return -EIO;
+       map = pvcalls_enter_sock(sock);
+       if (IS_ERR(map)) {
+               if (PTR_ERR(map) == -ENOTCONN)
+                       return -EIO;
+               else
+                       return 0;
        }
-
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-       map = (struct sock_mapping *) sock->sk->sk_send_head;
-       if (map == NULL) {
-               pvcalls_exit();
-               return 0;
-       }
-
        spin_lock(&bedata->socket_lock);
        ret = get_request(bedata, &req_id);
        if (ret < 0) {
                spin_unlock(&bedata->socket_lock);
-               pvcalls_exit();
+               pvcalls_exit_sock(sock);
                return ret;
        }
        sock->sk->sk_send_head = NULL;
@@ -1043,14 +1010,20 @@ int pvcalls_front_release(struct socket *sock)
                /*
                 * We need to make sure that sendmsg/recvmsg on this socket have
                 * not started before we've cleared sk_send_head here. The
-                * easiest (though not optimal) way to guarantee this is to see
-                * that no pvcall (other than us) is in progress.
+                * easiest way to guarantee this is to see that no pvcalls
+                * (other than us) is in progress on this socket.
                 */
-               while (atomic_read(&pvcalls_refcount) > 1)
+               while (atomic_read(&map->refcount) > 1)
                        cpu_relax();
 
                pvcalls_front_free_map(bedata, map);
        } else {
+               wake_up(&bedata->inflight_req);
+               wake_up(&map->passive.inflight_accept_req);
+
+               while (atomic_read(&map->refcount) > 1)
+                       cpu_relax();
+
                spin_lock(&bedata->socket_lock);
                list_del(&map->list);
                spin_unlock(&bedata->socket_lock);
index bf13d1e..04e7b3b 100644 (file)
@@ -284,6 +284,10 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset,
        int pool = tmem_frontswap_poolid;
        int ret;
 
+       /* THP isn't supported */
+       if (PageTransHuge(page))
+               return -1;
+
        if (pool < 0)
                return -1;
        if (ind64 != ind)
index 149c5e7..0929811 100644 (file)
@@ -76,6 +76,7 @@ struct xb_req_data {
        struct list_head list;
        wait_queue_head_t wq;
        struct xsd_sockmsg msg;
+       uint32_t caller_req_id;
        enum xsd_sockmsg_type type;
        char *body;
        const struct kvec *vec;
index 5b081a0..d239fc3 100644 (file)
@@ -309,6 +309,7 @@ static int process_msg(void)
                        goto out;
 
                if (req->state == xb_req_state_wait_reply) {
+                       req->msg.req_id = req->caller_req_id;
                        req->msg.type = state.msg.type;
                        req->msg.len = state.msg.len;
                        req->body = state.body;
index 3e59590..3f3b293 100644 (file)
@@ -227,6 +227,8 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
        req->state = xb_req_state_queued;
        init_waitqueue_head(&req->wq);
 
+       /* Save the caller req_id and restore it later in the reply */
+       req->caller_req_id = req->msg.req_id;
        req->msg.req_id = xs_request_enter(req);
 
        mutex_lock(&xb_write_mutex);
@@ -310,6 +312,7 @@ static void *xs_talkv(struct xenbus_transaction t,
        req->num_vecs = num_vecs;
        req->cb = xs_wake_up;
 
+       msg.req_id = 0;
        msg.tx_id = t.id;
        msg.type = type;
        msg.len = 0;
index e4054e5..f94b2d8 100644 (file)
@@ -1264,7 +1264,16 @@ again:
        while (node) {
                ref = rb_entry(node, struct prelim_ref, rbnode);
                node = rb_next(&ref->rbnode);
-               WARN_ON(ref->count < 0);
+               /*
+                * ref->count < 0 can happen here if there are delayed
+                * refs with a node->action of BTRFS_DROP_DELAYED_REF.
+                * prelim_ref_insert() relies on this when merging
+                * identical refs to keep the overall count correct.
+                * prelim_ref_insert() will merge only those refs
+                * which compare identically.  Any refs having
+                * e.g. different offsets would not be merged,
+                * and would retain their original ref->count < 0.
+                */
                if (roots && ref->count && ref->root_id && ref->parent == 0) {
                        if (sc && sc->root_objectid &&
                            ref->root_id != sc->root_objectid) {
index a1a40cf..7ab5e01 100644 (file)
@@ -821,7 +821,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        spin_unlock(&delayed_refs->lock);
 
        if (qrecord_inserted)
-               return btrfs_qgroup_trace_extent_post(fs_info, record);
+               btrfs_qgroup_trace_extent_post(fs_info, record);
+
        return 0;
 
 free_head_ref:
index 05751a6..c1618ab 100644 (file)
@@ -2147,6 +2147,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
                        u64 bytes;
                        struct request_queue *req_q;
 
+                       if (!stripe->dev->bdev) {
+                               ASSERT(btrfs_test_opt(fs_info, DEGRADED));
+                               continue;
+                       }
                        req_q = bdev_get_queue(stripe->dev->bdev);
                        if (!blk_queue_discard(req_q))
                                continue;
index 53ca025..a79299a 100644 (file)
@@ -1335,8 +1335,11 @@ next_slot:
                leaf = path->nodes[0];
                if (path->slots[0] >= btrfs_header_nritems(leaf)) {
                        ret = btrfs_next_leaf(root, path);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               if (cow_start != (u64)-1)
+                                       cur_offset = cow_start;
                                goto error;
+                       }
                        if (ret > 0)
                                break;
                        leaf = path->nodes[0];
@@ -3385,6 +3388,11 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
                ret = btrfs_orphan_reserve_metadata(trans, inode);
                ASSERT(!ret);
                if (ret) {
+                       /*
+                        * dec doesn't need spin_lock as ->orphan_block_rsv
+                        * would be released only if ->orphan_inodes is
+                        * zero.
+                        */
                        atomic_dec(&root->orphan_inodes);
                        clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
                                  &inode->runtime_flags);
@@ -3399,12 +3407,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
        if (insert >= 1) {
                ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
                if (ret) {
-                       atomic_dec(&root->orphan_inodes);
                        if (reserve) {
                                clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
                                          &inode->runtime_flags);
                                btrfs_orphan_release_metadata(inode);
                        }
+                       /*
+                        * btrfs_orphan_commit_root may race with us and set
+                        * ->orphan_block_rsv to zero, in order to avoid that,
+                        * decrease ->orphan_inodes after everything is done.
+                        */
+                       atomic_dec(&root->orphan_inodes);
                        if (ret != -EEXIST) {
                                clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                                          &inode->runtime_flags);
@@ -3436,28 +3449,26 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 {
        struct btrfs_root *root = inode->root;
        int delete_item = 0;
-       int release_rsv = 0;
        int ret = 0;
 
-       spin_lock(&root->orphan_lock);
        if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                               &inode->runtime_flags))
                delete_item = 1;
 
+       if (delete_item && trans)
+               ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+
        if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
                               &inode->runtime_flags))
-               release_rsv = 1;
-       spin_unlock(&root->orphan_lock);
+               btrfs_orphan_release_metadata(inode);
 
-       if (delete_item) {
+       /*
+        * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
+        * to zero, in order to avoid that, decrease ->orphan_inodes after
+        * everything is done.
+        */
+       if (delete_item)
                atomic_dec(&root->orphan_inodes);
-               if (trans)
-                       ret = btrfs_del_orphan_item(trans, root,
-                                                   btrfs_ino(inode));
-       }
-
-       if (release_rsv)
-               btrfs_orphan_release_metadata(inode);
 
        return ret;
 }
@@ -5281,7 +5292,7 @@ void btrfs_evict_inode(struct inode *inode)
        trace_btrfs_inode_evict(inode);
 
        if (!root) {
-               kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+               clear_inode(inode);
                return;
        }
 
index 9e61dd6..aa259d6 100644 (file)
@@ -1442,8 +1442,13 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
        int ret;
 
        ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
-       if (ret < 0)
-               return ret;
+       if (ret < 0) {
+               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+               btrfs_warn(fs_info,
+"error accounting new delayed refs extent (err code: %d), quota inconsistent",
+                       ret);
+               return 0;
+       }
 
        /*
         * Here we don't need to get the lock of
index afadaad..4fd19b4 100644 (file)
@@ -29,6 +29,7 @@
 #include "hash.h"
 #include "compression.h"
 #include "qgroup.h"
+#include "inode-map.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -2472,6 +2473,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                        clean_tree_block(fs_info, next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
+                               } else {
+                                       if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+                                               clear_extent_buffer_dirty(next);
                                }
 
                                WARN_ON(root_owner !=
@@ -2552,6 +2556,9 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                        clean_tree_block(fs_info, next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
+                               } else {
+                                       if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+                                               clear_extent_buffer_dirty(next);
                                }
 
                                WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
@@ -2630,6 +2637,9 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                                clean_tree_block(fs_info, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
+                       } else {
+                               if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+                                       clear_extent_buffer_dirty(next);
                        }
 
                        WARN_ON(log->root_key.objectid !=
@@ -3018,13 +3028,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 
        while (1) {
                ret = find_first_extent_bit(&log->dirty_log_pages,
-                               0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
+                               0, &start, &end,
+                               EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT,
                                NULL);
                if (ret)
                        break;
 
                clear_extent_bits(&log->dirty_log_pages, start, end,
-                                 EXTENT_DIRTY | EXTENT_NEW);
+                                 EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
        }
 
        /*
@@ -5677,6 +5688,23 @@ again:
                                                      path);
                }
 
+               if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
+                       struct btrfs_root *root = wc.replay_dest;
+
+                       btrfs_release_path(path);
+
+                       /*
+                        * We have just replayed everything, and the highest
+                        * objectid of fs roots probably has changed in case
+                        * some inode_item's got replayed.
+                        *
+                        * root->objectid_mutex is not acquired as log replay
+                        * could only happen during mount.
+                        */
+                       ret = btrfs_find_highest_objectid(root,
+                                                 &root->highest_objectid);
+               }
+
                key.offset = found_key.offset - 1;
                wc.replay_dest->log_root = NULL;
                free_extent_buffer(log->node);
index b5036bd..2ceb924 100644 (file)
@@ -645,6 +645,7 @@ static void btrfs_free_stale_devices(const char *path,
                                btrfs_sysfs_remove_fsid(fs_devs);
                                list_del(&fs_devs->list);
                                free_fs_devices(fs_devs);
+                               break;
                        } else {
                                fs_devs->num_devices--;
                                list_del(&dev->dev_list);
index cff79ea..5243989 100644 (file)
@@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk)
 {
        struct connection *con;
        struct sockaddr_storage saddr;
-       int buflen;
        void (*orig_report)(struct sock *) = NULL;
 
        read_lock_bh(&sk->sk_callback_lock);
@@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk)
 
        orig_report = listen_sock.sk_error_report;
        if (con->sock == NULL ||
-           kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
+           kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
                printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
                                   "sending to node %d, port %d, "
                                   "sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con)
 
        /* Get the connected socket's peer */
        memset(&peeraddr, 0, sizeof(peeraddr));
-       if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
-                                 &len, 2)) {
+       len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
+       if (len < 0) {
                result = -ECONNABORTED;
                goto accept_err;
        }
index 5f22e74..8e56842 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <linux/efi.h>
+#include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
@@ -74,6 +75,11 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
        ssize_t size = 0;
        int err;
 
+       while (!__ratelimit(&file->f_cred->user->ratelimit)) {
+               if (!msleep_interruptible(50))
+                       return -EINTR;
+       }
+
        err = efivar_entry_size(var, &datasize);
 
        /*
index 8686379..86d6a44 100644 (file)
@@ -716,7 +716,7 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
        __be64 *ptr;
        sector_t lblock;
        sector_t lend;
-       int ret;
+       int ret = 0;
        int eob;
        unsigned int len;
        struct buffer_head *bh;
@@ -728,12 +728,14 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
                goto out;
        }
 
-       if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) {
-               gfs2_stuffed_iomap(inode, iomap);
-               if (pos >= iomap->length)
-                       return -ENOENT;
-               ret = 0;
-               goto out;
+       if (gfs2_is_stuffed(ip)) {
+               if (flags & IOMAP_REPORT) {
+                       gfs2_stuffed_iomap(inode, iomap);
+                       if (pos >= iomap->length)
+                               ret = -ENOENT;
+                       goto out;
+               }
+               BUG_ON(!(flags & IOMAP_WRITE));
        }
 
        lblock = pos >> inode->i_blkbits;
@@ -744,7 +746,7 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
        iomap->type = IOMAP_HOLE;
        iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
        iomap->flags = IOMAP_F_MERGED;
-       bmap_lock(ip, 0);
+       bmap_lock(ip, flags & IOMAP_WRITE);
 
        /*
         * Directory data blocks have a struct gfs2_meta_header header, so the
@@ -787,27 +789,28 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
                iomap->flags |= IOMAP_F_BOUNDARY;
        iomap->length = (u64)len << inode->i_blkbits;
 
-       ret = 0;
-
 out_release:
        release_metapath(&mp);
-       bmap_unlock(ip, 0);
+       bmap_unlock(ip, flags & IOMAP_WRITE);
 out:
        trace_gfs2_iomap_end(ip, iomap, ret);
        return ret;
 
 do_alloc:
-       if (!(flags & IOMAP_WRITE)) {
-               if (pos >= i_size_read(inode)) {
+       if (flags & IOMAP_WRITE) {
+               ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+       } else if (flags & IOMAP_REPORT) {
+               loff_t size = i_size_read(inode);
+               if (pos >= size)
                        ret = -ENOENT;
-                       goto out_release;
-               }
-               ret = 0;
-               iomap->length = hole_size(inode, lblock, &mp);
-               goto out_release;
+               else if (height <= ip->i_height)
+                       iomap->length = hole_size(inode, lblock, &mp);
+               else
+                       iomap->length = size - pos;
+       } else {
+               if (height <= ip->i_height)
+                       iomap->length = hole_size(inode, lblock, &mp);
        }
-
-       ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
        goto out_release;
 }
 
index 9c36d61..2dee4e0 100644 (file)
@@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = {
        .exit = lockd_exit_net,
        .id = &lockd_net_id,
        .size = sizeof(struct lockd_net),
+       .async = true,
 };
 
 
index 7d89354..6c3083c 100644 (file)
@@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = {
        .exit = nfs_net_exit,
        .id   = &nfs_net_id,
        .size = sizeof(struct nfs_net),
+       .async = true,
 };
 
 /*
index 5be08f0..8c743a4 100644 (file)
@@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = {
        .exit = grace_exit_net,
        .id   = &grace_net_id,
        .size = sizeof(struct list_head),
+       .async = true,
 };
 
 static int __init
index 36b0772..60702d6 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns,
 
        return fd;
 }
+EXPORT_SYMBOL_GPL(open_related_ns);
 
 static long ns_ioctl(struct file *filp, unsigned int ioctl,
                        unsigned long arg)
index eac5140..e507618 100644 (file)
@@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void)
 
 static int o2net_accept_one(struct socket *sock, int *more)
 {
-       int ret, slen;
+       int ret;
        struct sockaddr_in sin;
        struct socket *new_sock = NULL;
        struct o2nm_node *node = NULL;
@@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
                goto out;
        }
 
-       slen = sizeof(sin);
-       ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
-                                      &slen, 1);
+       ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
        if (ret < 0)
                goto out;
 
index e8a93bc..d1e8276 100644 (file)
@@ -510,6 +510,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
                        /* we have to zero-fill user buffer even if no read */
                        if (copy_to_user(buffer, buf, tsz))
                                return -EFAULT;
+               } else if (m->type == KCORE_USER) {
+                       /* User page is handled prior to normal kernel page: */
+                       if (copy_to_user(buffer, (char *)start, tsz))
+                               return -EFAULT;
                } else {
                        if (kern_addr_valid(start)) {
                                /*
index 68c06ae..da6f873 100644 (file)
@@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net)
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
        .init = proc_net_ns_init,
        .exit = proc_net_ns_exit,
+       .async = true,
 };
 
 int __init proc_net_init(void)
index 9990957..76bf9cc 100644 (file)
@@ -118,13 +118,22 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
                err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
 #endif
 #ifdef BUS_MCEERR_AO
-               /* 
+               /*
+                * Other callers might not initialize the si_lsb field,
+                * so check explicitly for the right codes here.
+                */
+               if (kinfo->si_signo == SIGBUS &&
+                    kinfo->si_code == BUS_MCEERR_AO)
+                       err |= __put_user((short) kinfo->si_addr_lsb,
+                                         &uinfo->ssi_addr_lsb);
+#endif
+#ifdef BUS_MCEERR_AR
+               /*
                 * Other callers might not initialize the si_lsb field,
                 * so check explicitly for the right codes here.
                 */
                if (kinfo->si_signo == SIGBUS &&
-                   (kinfo->si_code == BUS_MCEERR_AR ||
-                    kinfo->si_code == BUS_MCEERR_AO))
+                   kinfo->si_code == BUS_MCEERR_AR)
                        err |= __put_user((short) kinfo->si_addr_lsb,
                                          &uinfo->ssi_addr_lsb);
 #endif
index bc39757..67ab280 100644 (file)
@@ -7,7 +7,8 @@
  * @nr: Bit to set
  * @addr: Address to count from
  *
- * This operation is atomic and provides acquire barrier semantics.
+ * This operation is atomic and provides acquire barrier semantics if
+ * the returned value is 0.
  * It can be used to implement bit locks.
  */
 #define test_and_set_bit_lock(nr, addr)        test_and_set_bit(nr, addr)
index 963b755..a7613e1 100644 (file)
@@ -52,6 +52,7 @@ struct bug_entry {
 #ifndef HAVE_ARCH_BUG
 #define BUG() do { \
        printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
+       barrier_before_unreachable(); \
        panic("BUG!"); \
 } while (0)
 #endif
index 1c27526..cf13842 100644 (file)
@@ -134,6 +134,15 @@ struct drm_crtc_commit {
         * &drm_pending_vblank_event pointer to clean up private events.
         */
        struct drm_pending_vblank_event *event;
+
+       /**
+        * @abort_completion:
+        *
+        * A flag that's set after drm_atomic_helper_setup_commit takes a second
+        * reference for the completion of $drm_crtc_state.event. It's used by
+        * the free code to remove the second reference if commit fails.
+        */
+       bool abort_completion;
 };
 
 struct __drm_planes_state {
index 76e237b..6914633 100644 (file)
@@ -77,5 +77,6 @@ void drm_kms_helper_hotplug_event(struct drm_device *dev);
 
 void drm_kms_helper_poll_disable(struct drm_device *dev);
 void drm_kms_helper_poll_enable(struct drm_device *dev);
+bool drm_kms_helper_is_poll_worker(void);
 
 #endif
index 172744a..7b16564 100644 (file)
 #define        DP83867_RGMIIDCTL_3_75_NS       0xe
 #define        DP83867_RGMIIDCTL_4_00_NS       0xf
 
+/* IO_MUX_CFG - Clock output selection */
+#define DP83867_CLK_O_SEL_CHN_A_RCLK           0x0
+#define DP83867_CLK_O_SEL_CHN_B_RCLK           0x1
+#define DP83867_CLK_O_SEL_CHN_C_RCLK           0x2
+#define DP83867_CLK_O_SEL_CHN_D_RCLK           0x3
+#define DP83867_CLK_O_SEL_CHN_A_RCLK_DIV5      0x4
+#define DP83867_CLK_O_SEL_CHN_B_RCLK_DIV5      0x5
+#define DP83867_CLK_O_SEL_CHN_C_RCLK_DIV5      0x6
+#define DP83867_CLK_O_SEL_CHN_D_RCLK_DIV5      0x7
+#define DP83867_CLK_O_SEL_CHN_A_TCLK           0x8
+#define DP83867_CLK_O_SEL_CHN_B_TCLK           0x9
+#define DP83867_CLK_O_SEL_CHN_C_TCLK           0xA
+#define DP83867_CLK_O_SEL_CHN_D_TCLK           0xB
+#define DP83867_CLK_O_SEL_REF_CLK              0xC
 #endif
index 64e1074..968173e 100644 (file)
@@ -587,7 +587,7 @@ extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
 const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
                                               const struct device *dev);
 
-void *acpi_get_match_data(const struct device *dev);
+const void *acpi_device_get_match_data(const struct device *dev);
 extern bool acpi_driver_match_device(struct device *dev,
                                     const struct device_driver *drv);
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
@@ -766,7 +766,7 @@ static inline const struct acpi_device_id *acpi_match_device(
        return NULL;
 }
 
-static inline void *acpi_get_match_data(const struct device *dev)
+static inline const void *acpi_device_get_match_data(const struct device *dev)
 {
        return NULL;
 }
index 4d356e1..4037392 100644 (file)
@@ -113,10 +113,12 @@ extern void aarp_proto_init(void);
 /* Inter module exports */
 
 /* Give a device find its atif control structure */
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 static inline struct atalk_iface *atalk_find_dev(struct net_device *dev)
 {
        return dev->atalk_ptr;
 }
+#endif
 
 extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev);
 extern struct net_device *atrtr_get_dev(struct atalk_addr *sa);
index 3ce6134..b0a7f31 100644 (file)
@@ -136,15 +136,21 @@ enum virtchnl_ops {
        VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
        VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
        VIRTCHNL_OP_REQUEST_QUEUES = 29,
+       VIRTCHNL_OP_ENABLE_CHANNELS = 30,
+       VIRTCHNL_OP_DISABLE_CHANNELS = 31,
+       VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
+       VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 };
 
-/* This macro is used to generate a compilation error if a structure
+/* These macros are used to generate compilation errors if a structure/union
  * is not exactly the correct length. It gives a divide by zero error if the
- * structure is not of the correct size, otherwise it creates an enum that is
- * never used.
+ * structure/union is not of the correct size, otherwise it creates an enum
+ * that is never used.
  */
 #define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \
        { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \
+       { virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
 
 /* Virtual channel message descriptor. This overlays the admin queue
  * descriptor. All other data is passed in external buffers.
@@ -244,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_ENCAP              0X00100000
 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM         0X00200000
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM      0X00400000
+#define VIRTCHNL_VF_OFFLOAD_ADQ                        0X00800000
 
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
                               VIRTCHNL_VF_OFFLOAD_VLAN | \
@@ -496,6 +503,81 @@ struct virtchnl_rss_hena {
 
 VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
 
+/* VIRTCHNL_OP_ENABLE_CHANNELS
+ * VIRTCHNL_OP_DISABLE_CHANNELS
+ * VF sends these messages to enable or disable channels based on
+ * the user specified queue count and queue offset for each traffic class.
+ * This struct encompasses all the information that the PF needs from
+ * VF to create a channel.
+ */
+struct virtchnl_channel_info {
+       u16 count; /* number of queues in a channel */
+       u16 offset; /* queues in a channel start from 'offset' */
+       u32 pad;
+       u64 max_tx_rate;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info);
+
+struct virtchnl_tc_info {
+       u32     num_tc;
+       u32     pad;
+       struct  virtchnl_channel_info list[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
+
+/* VIRTCHNL_ADD_CLOUD_FILTER
+ * VIRTCHNL_DEL_CLOUD_FILTER
+ * VF sends these messages to add or delete a cloud filter based on the
+ * user specified match and action filters. These structures encompass
+ * all the information that the PF needs from the VF to add/delete a
+ * cloud filter.
+ */
+
+struct virtchnl_l4_spec {
+       u8      src_mac[ETH_ALEN];
+       u8      dst_mac[ETH_ALEN];
+       __be16  vlan_id;
+       __be16  pad; /* reserved for future use */
+       __be32  src_ip[4];
+       __be32  dst_ip[4];
+       __be16  src_port;
+       __be16  dst_port;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec);
+
+union virtchnl_flow_spec {
+       struct  virtchnl_l4_spec tcp_spec;
+       u8      buffer[128]; /* reserved for future use */
+};
+
+VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec);
+
+enum virtchnl_action {
+       /* action types */
+       VIRTCHNL_ACTION_DROP = 0,
+       VIRTCHNL_ACTION_TC_REDIRECT,
+};
+
+enum virtchnl_flow_type {
+       /* flow types */
+       VIRTCHNL_TCP_V4_FLOW = 0,
+       VIRTCHNL_TCP_V6_FLOW,
+};
+
+struct virtchnl_filter {
+       union   virtchnl_flow_spec data;
+       union   virtchnl_flow_spec mask;
+       enum    virtchnl_flow_type flow_type;
+       enum    virtchnl_action action;
+       u32     action_meta;
+       __u8    field_flags;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
+
 /* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
@@ -711,6 +793,25 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
        case VIRTCHNL_OP_REQUEST_QUEUES:
                valid_len = sizeof(struct virtchnl_vf_res_request);
                break;
+       case VIRTCHNL_OP_ENABLE_CHANNELS:
+               valid_len = sizeof(struct virtchnl_tc_info);
+               if (msglen >= valid_len) {
+                       struct virtchnl_tc_info *vti =
+                               (struct virtchnl_tc_info *)msg;
+                       valid_len += vti->num_tc *
+                               sizeof(struct virtchnl_channel_info);
+                       if (vti->num_tc == 0)
+                               err_msg_format = true;
+               }
+               break;
+       case VIRTCHNL_OP_DISABLE_CHANNELS:
+               break;
+       case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+               valid_len = sizeof(struct virtchnl_filter);
+               break;
+       case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+               valid_len = sizeof(struct virtchnl_filter);
+               break;
        /* These are always errors coming from the VF. */
        case VIRTCHNL_OP_EVENT:
        case VIRTCHNL_OP_UNKNOWN:
index 4f3df80..ed63f3b 100644 (file)
@@ -49,7 +49,7 @@ struct blk_stat_callback;
 #define BLKDEV_MIN_RQ  4
 #define BLKDEV_MAX_RQ  128     /* Default maximum */
 
-/* Must be consisitent with blk_mq_poll_stats_bkt() */
+/* Must be consistent with blk_mq_poll_stats_bkt() */
 #define BLK_MQ_POLL_STATS_BKTS 16
 
 /*
index a7f16e0..8a45666 100644 (file)
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)                                     \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled && sk) {                                        \
+       if (cgroup_bpf_enabled) {                                              \
                __ret = __cgroup_bpf_run_filter_sk(sk,                         \
                                                 BPF_CGROUP_INET_SOCK_CREATE); \
        }                                                                      \
index 631354a..901c1cc 100644 (file)
 
 #if GCC_VERSION >= 40100
 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-
-#define __nostackprotector     __attribute__((__optimize__("no-stack-protector")))
 #endif
 
 #if GCC_VERSION >= 40300
 #endif /* __CHECKER__ */
 #endif /* GCC_VERSION >= 40300 */
 
+#if GCC_VERSION >= 40400
+#define __optimize(level)      __attribute__((__optimize__(level)))
+#define __nostackprotector     __optimize("no-stack-protector")
+#endif /* GCC_VERSION >= 40400 */
+
 #if GCC_VERSION >= 40500
 
 #ifndef __CHECKER__
 #endif
 #endif
 
+/*
+ * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
+ * confuse the stack allocation in gcc, leading to overly large stack
+ * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
+ *
+ * Adding an empty inline assembly before it works around the problem
+ */
+#define barrier_before_unreachable() asm volatile("")
+
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
 #define unreachable() \
-       do { annotate_unreachable(); __builtin_unreachable(); } while (0)
+       do {                                    \
+               annotate_unreachable();         \
+               barrier_before_unreachable();   \
+               __builtin_unreachable();        \
+       } while (0)
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone      __attribute__((__noclone__, __optimize__("no-tracer")))
index c2cc57a..ab4711c 100644 (file)
@@ -86,6 +86,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 # define barrier_data(ptr) barrier()
 #endif
 
+/* workaround for GCC PR82365 if needed */
+#ifndef barrier_before_unreachable
+# define barrier_before_unreachable() do { } while (0)
+#endif
+
 /* Unreachable code */
 #ifdef CONFIG_STACK_VALIDATION
 /*
@@ -277,6 +282,10 @@ unsigned long read_word_at_a_time(const void *addr)
 
 #endif /* __ASSEMBLY__ */
 
+#ifndef __optimize
+# define __optimize(level)
+#endif
+
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
index 871f9e2..0b3fc22 100644 (file)
@@ -225,7 +225,7 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev,
 }
 #endif
 
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
 void cpuidle_poll_state_init(struct cpuidle_driver *drv);
 #else
 static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {}
index d4a2a7d..bf53d89 100644 (file)
@@ -170,6 +170,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_not(cpu, mask)            \
        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_wrap(cpu, mask, start)    \
+       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
 #define for_each_cpu_and(cpu, mask, and)       \
        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
 #else
index 34fe846..eb9eab4 100644 (file)
@@ -578,7 +578,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 
 /*
  * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please
- * don't use this is new code.
+ * don't use this in new code.
  */
 #ifndef arch_dma_supported
 #define arch_dma_supported(dev, mask)  (1)
index 276932d..fdb691b 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 
-#include <net/xdp.h>
 #include <net/sch_generic.h>
 
 #include <uapi/linux/filter.h>
@@ -30,6 +29,7 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 struct bpf_prog_aux;
+struct xdp_rxq_info;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
index 4fa1a48..4fe8f28 100644 (file)
@@ -73,8 +73,8 @@ struct fwnode_operations {
        struct fwnode_handle *(*get)(struct fwnode_handle *fwnode);
        void (*put)(struct fwnode_handle *fwnode);
        bool (*device_is_available)(const struct fwnode_handle *fwnode);
-       void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
-                                      const struct device *dev);
+       const void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
+                                            const struct device *dev);
        bool (*property_present)(const struct fwnode_handle *fwnode,
                                 const char *propname);
        int (*property_read_int_array)(const struct fwnode_handle *fwnode,
index ee6657a..8fe7e43 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2111,7 +2112,7 @@ enum ieee80211_key_len {
 #define FILS_ERP_MAX_REALM_LEN         253
 #define FILS_ERP_MAX_RRK_LEN           64
 
-#define PMK_MAX_LEN                    48
+#define PMK_MAX_LEN                    64
 
 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
 enum ieee80211_pub_actioncode {
@@ -2501,6 +2502,17 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr)
                return (u8 *)hdr + 24;
 }
 
+/**
+ * ieee80211_get_tid - get qos TID
+ * @hdr: the frame
+ */
+static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr)
+{
+       u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+       return qc[0] & IEEE80211_QOS_CTL_TID_MASK;
+}
+
 /**
  * ieee80211_get_SA - get pointer to SA
  * @hdr: the frame
index fec5076..dcde947 100644 (file)
@@ -4,6 +4,12 @@
 
 #include <generated/autoconf.h>
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#else
+#define __LITTLE_ENDIAN 1234
+#endif
+
 #define __ARG_PLACEHOLDER_1 0,
 #define __take_second_arg(__ignored, val, ...) val
 
@@ -64,4 +70,7 @@
  */
 #define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
 
+/* Make sure we always have all types and struct attributes defined. */
+#include <linux/compiler_types.h>
+
 #endif /* __LINUX_KCONFIG_H */
index 7ff25a8..80db19d 100644 (file)
@@ -10,6 +10,7 @@ enum kcore_type {
        KCORE_VMALLOC,
        KCORE_RAM,
        KCORE_VMEMMAP,
+       KCORE_USER,
        KCORE_OTHER,
 };
 
index 8820468..c46016b 100644 (file)
@@ -523,9 +523,11 @@ static inline void __mod_memcg_state(struct mem_cgroup *memcg,
 static inline void mod_memcg_state(struct mem_cgroup *memcg,
                                   int idx, int val)
 {
-       preempt_disable();
+       unsigned long flags;
+
+       local_irq_save(flags);
        __mod_memcg_state(memcg, idx, val);
-       preempt_enable();
+       local_irq_restore(flags);
 }
 
 /**
@@ -606,9 +608,11 @@ static inline void __mod_lruvec_state(struct lruvec *lruvec,
 static inline void mod_lruvec_state(struct lruvec *lruvec,
                                    enum node_stat_item idx, int val)
 {
-       preempt_disable();
+       unsigned long flags;
+
+       local_irq_save(flags);
        __mod_lruvec_state(lruvec, idx, val);
-       preempt_enable();
+       local_irq_restore(flags);
 }
 
 static inline void __mod_lruvec_page_state(struct page *page,
@@ -630,9 +634,11 @@ static inline void __mod_lruvec_page_state(struct page *page,
 static inline void mod_lruvec_page_state(struct page *page,
                                         enum node_stat_item idx, int val)
 {
-       preempt_disable();
+       unsigned long flags;
+
+       local_irq_save(flags);
        __mod_lruvec_page_state(page, idx, val);
-       preempt_enable();
+       local_irq_restore(flags);
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
@@ -659,9 +665,11 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
 static inline void count_memcg_events(struct mem_cgroup *memcg,
                                      int idx, unsigned long count)
 {
-       preempt_disable();
+       unsigned long flags;
+
+       local_irq_save(flags);
        __count_memcg_events(memcg, idx, count);
-       preempt_enable();
+       local_irq_restore(flags);
 }
 
 /* idx can be of type enum memcg_event_item or vm_event_item */
index 48c181a..445ad19 100644 (file)
@@ -60,6 +60,7 @@ struct mlx5_core_cq {
        } tasklet_ctx;
        int                     reset_notify_added;
        struct list_head        reset_notify;
+       struct mlx5_eq          *eq;
 };
 
 
@@ -171,8 +172,17 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
        mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
 }
 
-int mlx5_init_cq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
+static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
+{
+       refcount_inc(&cq->refcount);
+}
+
+static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
+{
+       if (refcount_dec_and_test(&cq->refcount))
+               complete(&cq->free);
+}
+
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
                        u32 *in, int inlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
index 6ed79a8..4814cad 100644 (file)
@@ -345,13 +345,6 @@ struct mlx5_buf_list {
        dma_addr_t              map;
 };
 
-struct mlx5_buf {
-       struct mlx5_buf_list    direct;
-       int                     npages;
-       int                     size;
-       u8                      page_shift;
-};
-
 struct mlx5_frag_buf {
        struct mlx5_buf_list    *frags;
        int                     npages;
@@ -359,6 +352,15 @@ struct mlx5_frag_buf {
        u8                      page_shift;
 };
 
+struct mlx5_frag_buf_ctrl {
+       struct mlx5_frag_buf    frag_buf;
+       u32                     sz_m1;
+       u32                     frag_sz_m1;
+       u8                      log_sz;
+       u8                      log_stride;
+       u8                      log_frag_strides;
+};
+
 struct mlx5_eq_tasklet {
        struct list_head list;
        struct list_head process_list;
@@ -375,11 +377,18 @@ struct mlx5_eq_pagefault {
        mempool_t               *pool;
 };
 
+struct mlx5_cq_table {
+       /* protect radix tree */
+       spinlock_t              lock;
+       struct radix_tree_root  tree;
+};
+
 struct mlx5_eq {
        struct mlx5_core_dev   *dev;
+       struct mlx5_cq_table    cq_table;
        __be32 __iomem         *doorbell;
        u32                     cons_index;
-       struct mlx5_buf         buf;
+       struct mlx5_frag_buf    buf;
        int                     size;
        unsigned int            irqn;
        u8                      eqn;
@@ -526,13 +535,6 @@ struct mlx5_core_health {
        struct delayed_work             recover_work;
 };
 
-struct mlx5_cq_table {
-       /* protect radix tree
-        */
-       spinlock_t              lock;
-       struct radix_tree_root  tree;
-};
-
 struct mlx5_qp_table {
        /* protect radix tree
         */
@@ -654,10 +656,6 @@ struct mlx5_priv {
        struct dentry          *cmdif_debugfs;
        /* end: qp staff */
 
-       /* start: cq staff */
-       struct mlx5_cq_table    cq_table;
-       /* end: cq staff */
-
        /* start: mkey staff */
        struct mlx5_mkey_table  mkey_table;
        /* end: mkey staff */
@@ -936,9 +934,9 @@ struct mlx5_hca_vport_context {
        bool                    grh_required;
 };
 
-static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset)
 {
-               return buf->direct.buf + offset;
+               return buf->frags->buf + offset;
 }
 
 #define STRUCT_FIELD(header, field) \
@@ -977,6 +975,25 @@ static inline u32 mlx5_base_mkey(const u32 key)
        return key & 0xffffff00u;
 }
 
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+                                             void *cqc)
+{
+       fbc->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
+       fbc->log_sz     = MLX5_GET(cqc, cqc, log_cq_size);
+       fbc->sz_m1      = (1 << fbc->log_sz) - 1;
+       fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
+       fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1;
+}
+
+static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
+                                         u32 ix)
+{
+       unsigned int frag = (ix >> fbc->log_frag_strides);
+
+       return fbc->frag_buf.frags[frag].buf +
+               ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
@@ -1002,9 +1019,10 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-                       struct mlx5_buf *buf, int node);
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+                       struct mlx5_frag_buf *buf, int node);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+                  int size, struct mlx5_frag_buf *buf);
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
 int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
                             struct mlx5_frag_buf *buf, int node);
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
@@ -1049,22 +1067,12 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
 void mlx5_unregister_debugfs(void);
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-                      int nent, u64 mask, const char *name,
-                      enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1076,14 +1084,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
                         int size_in, void *data_out, int size_out,
                         u16 reg_num, int arg, int write);
 
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-                      u32 *out, int outlen);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
                       int node);
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
        return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+       (MLX5_CAP_GEN(mdev, vport_group_manager) && \
+        (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+        mlx5_core_is_pf(mdev))
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
        if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644 (file)
index 0000000..d3c9db4
--- /dev/null
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+       SRIOV_NONE,
+       SRIOV_LEGACY,
+       SRIOV_OFFLOADS
+};
+
+enum {
+       REP_ETH,
+       REP_IB,
+       NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+       int                    (*load)(struct mlx5_core_dev *dev,
+                                      struct mlx5_eswitch_rep *rep);
+       void                   (*unload)(struct mlx5_eswitch_rep *rep);
+       void                   *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+       void                    *priv;
+       bool                   valid;
+};
+
+struct mlx5_eswitch_rep {
+       struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+       u16                    vport;
+       u8                     hw_id[ETH_ALEN];
+       u16                    vlan;
+       u32                    vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+                                    int vport_index,
+                                    struct mlx5_eswitch_rep_if *rep_if,
+                                    u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+                                      int vport_index,
+                                      u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+                                int vport,
+                                u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+                                               int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+                                   int vport, u32 sqn);
+#endif
index c30b32e..10191c2 100644 (file)
@@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page)
 
 #define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
 
-#ifdef arch_unmap_kpfn
-extern void arch_unmap_kpfn(unsigned long pfn);
-#else
-static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
-#endif
-
 #endif
index 5396521..7ed82e4 100644 (file)
@@ -4,11 +4,10 @@
 
 #include <linux/in.h>
 #include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
 #include <net/fib_rules.h>
 #include <net/fib_notifier.h>
 #include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
@@ -56,18 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
 }
 #endif
 
-struct vif_device {
-       struct net_device       *dev;                   /* Device we are using */
-       struct netdev_phys_item_id dev_parent_id;       /* Device parent ID    */
-       unsigned long   bytes_in,bytes_out;
-       unsigned long   pkt_in,pkt_out;         /* Statistics                   */
-       unsigned long   rate_limit;             /* Traffic shaping (NI)         */
-       unsigned char   threshold;              /* TTL threshold                */
-       unsigned short  flags;                  /* Control flags                */
-       __be32          local,remote;           /* Addresses(remote for tunnels)*/
-       int             link;                   /* Physical interface index     */
-};
-
 struct vif_entry_notifier_info {
        struct fib_notifier_info info;
        struct net_device *dev;
@@ -78,34 +65,6 @@ struct vif_entry_notifier_info {
 
 #define VIFF_STATIC 0x8000
 
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
-       struct list_head        list;
-       possible_net_t          net;
-       u32                     id;
-       struct sock __rcu       *mroute_sk;
-       struct timer_list       ipmr_expire_timer;
-       struct list_head        mfc_unres_queue;
-       struct vif_device       vif_table[MAXVIFS];
-       struct rhltable         mfc_hash;
-       struct list_head        mfc_cache_list;
-       int                     maxvif;
-       atomic_t                cache_resolve_queue_len;
-       bool                    mroute_do_assert;
-       bool                    mroute_do_pim;
-       int                     mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
-       MFC_STATIC = BIT(0),
-       MFC_OFFLOAD = BIT(1),
-};
-
 struct mfc_cache_cmp_arg {
        __be32 mfc_mcastgrp;
        __be32 mfc_origin;
@@ -113,28 +72,13 @@ struct mfc_cache_cmp_arg {
 
 /**
  * struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
  * @mfc_mcastgrp: destination multicast group address
  * @mfc_origin: source address
  * @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
  */
 struct mfc_cache {
-       struct rhlist_head mnode;
+       struct mr_mfc _c;
        union {
                struct {
                        __be32 mfc_mcastgrp;
@@ -142,28 +86,6 @@ struct mfc_cache {
                };
                struct mfc_cache_cmp_arg cmparg;
        };
-       vifi_t mfc_parent;
-       int mfc_flags;
-
-       union {
-               struct {
-                       unsigned long expires;
-                       struct sk_buff_head unresolved;
-               } unres;
-               struct {
-                       unsigned long last_assert;
-                       int minvif;
-                       int maxvif;
-                       unsigned long bytes;
-                       unsigned long pkt;
-                       unsigned long wrong_if;
-                       unsigned long lastuse;
-                       unsigned char ttls[MAXVIFS];
-                       refcount_t refcount;
-               } res;
-       } mfc_un;
-       struct list_head list;
-       struct rcu_head rcu;
 };
 
 struct mfc_entry_notifier_info {
@@ -187,12 +109,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
 
 static inline void ipmr_cache_put(struct mfc_cache *c)
 {
-       if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+       if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
                ipmr_cache_free(c);
 }
 static inline void ipmr_cache_hold(struct mfc_cache *c)
 {
-       refcount_inc(&c->mfc_un.res.refcount);
+       refcount_inc(&c->_c.mfc_un.res.refcount);
 }
 
 #endif
index 3014c52..1ac38e6 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/skbuff.h>      /* for struct sk_buff_head */
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -62,57 +63,24 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
-struct mif_device {
-       struct net_device       *dev;                   /* Device we are using */
-       unsigned long   bytes_in,bytes_out;
-       unsigned long   pkt_in,pkt_out;         /* Statistics                   */
-       unsigned long   rate_limit;             /* Traffic shaping (NI)         */
-       unsigned char   threshold;              /* TTL threshold                */
-       unsigned short  flags;                  /* Control flags                */
-       int             link;                   /* Physical interface index     */
-};
-
 #define VIFF_STATIC 0x8000
 
-struct mfc6_cache {
-       struct list_head list;
-       struct in6_addr mf6c_mcastgrp;                  /* Group the entry belongs to   */
-       struct in6_addr mf6c_origin;                    /* Source of packet             */
-       mifi_t mf6c_parent;                     /* Source interface             */
-       int mfc_flags;                          /* Flags on line                */
+struct mfc6_cache_cmp_arg {
+       struct in6_addr mf6c_mcastgrp;
+       struct in6_addr mf6c_origin;
+};
 
+struct mfc6_cache {
+       struct mr_mfc _c;
        union {
                struct {
-                       unsigned long expires;
-                       struct sk_buff_head unresolved; /* Unresolved buffers           */
-               } unres;
-               struct {
-                       unsigned long last_assert;
-                       int minvif;
-                       int maxvif;
-                       unsigned long bytes;
-                       unsigned long pkt;
-                       unsigned long wrong_if;
-                       unsigned long lastuse;
-                       unsigned char ttls[MAXMIFS];    /* TTL thresholds               */
-               } res;
-       } mfc_un;
+                       struct in6_addr mf6c_mcastgrp;
+                       struct in6_addr mf6c_origin;
+               };
+               struct mfc6_cache_cmp_arg cmparg;
+       };
 };
 
-#define MFC_STATIC             1
-#define MFC_NOTIFY             2
-
-#define MFC6_LINES             64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
-                         (__force u32)(a)->s6_addr32[1] ^ \
-                         (__force u32)(a)->s6_addr32[2] ^ \
-                         (__force u32)(a)->s6_addr32[3] ^ \
-                         (__force u32)(g)->s6_addr32[0] ^ \
-                         (__force u32)(g)->s6_addr32[1] ^ \
-                         (__force u32)(g)->s6_addr32[2] ^ \
-                         (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
 #define MFC_ASSERT_THRESH (3*HZ)               /* Maximal freq. of asserts */
 
 struct rtmsg;
@@ -120,12 +88,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
                           struct rtmsg *rtm, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-       return NULL;
+       return false;
 }
 static inline int ip6mr_sk_done(struct sock *sk)
 {
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644 (file)
index 0000000..c2560cb
--- /dev/null
@@ -0,0 +1,346 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+       struct net_device *dev;
+       unsigned long bytes_in, bytes_out;
+       unsigned long pkt_in, pkt_out;
+       unsigned long rate_limit;
+       unsigned char threshold;
+       unsigned short flags;
+       int link;
+
+       /* Currently only used by ipmr */
+       struct netdev_phys_item_id dev_parent_id;
+       __be32 local, remote;
+};
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS        32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+       MFC_STATIC = BIT(0),
+       MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+       struct rhlist_head mnode;
+       unsigned short mfc_parent;
+       int mfc_flags;
+
+       union {
+               struct {
+                       unsigned long expires;
+                       struct sk_buff_head unresolved;
+               } unres;
+               struct {
+                       unsigned long last_assert;
+                       int minvif;
+                       int maxvif;
+                       unsigned long bytes;
+                       unsigned long pkt;
+                       unsigned long wrong_if;
+                       unsigned long lastuse;
+                       unsigned char ttls[MAXVIFS];
+                       refcount_t refcount;
+               } res;
+       } mfc_un;
+       struct list_head list;
+       struct rcu_head rcu;
+};
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+       const struct rhashtable_params *rht_params;
+       void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+       struct list_head        list;
+       possible_net_t          net;
+       struct mr_table_ops     ops;
+       u32                     id;
+       struct sock __rcu       *mroute_sk;
+       struct timer_list       ipmr_expire_timer;
+       struct list_head        mfc_unres_queue;
+       struct vif_device       vif_table[MAXVIFS];
+       struct rhltable         mfc_hash;
+       struct list_head        mfc_cache_list;
+       int                     maxvif;
+       atomic_t                cache_resolve_queue_len;
+       bool                    mroute_do_assert;
+       bool                    mroute_do_pim;
+       int                     mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+                    struct net_device *dev,
+                    unsigned long rate_limit,
+                    unsigned char threshold,
+                    unsigned short flags,
+                    unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+                        void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                  struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                    struct mr_table *(*iter)(struct net *net,
+                                             struct mr_table *mrt),
+                    int (*fill)(struct mr_table *mrt,
+                                struct sk_buff *skb,
+                                u32 portid, u32 seq, struct mr_mfc *c,
+                                int cmd, int flags),
+                    spinlock_t *lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+                                  struct net_device *dev,
+                                  unsigned long rate_limit,
+                                  unsigned char threshold,
+                                  unsigned short flags,
+                                  unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net))
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+                                      void *hasharg, int parent)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+                                          int vifi)
+{
+       return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+                                            int vifi, void *hasharg)
+{
+       return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                                struct mr_mfc *c, struct rtmsg *rtm)
+{
+       return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                struct mr_table *(*iter)(struct net *net,
+                                         struct mr_table *mrt),
+                int (*fill)(struct mr_table *mrt,
+                            struct sk_buff *skb,
+                            u32 portid, u32 seq, struct mr_mfc *c,
+                            int cmd, int flags),
+                spinlock_t *lock)
+{
+       return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+       return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+       struct seq_net_private p;
+       struct mr_table *mrt;
+       int ct;
+};
+
+struct mr_mfc_iter {
+       struct seq_net_private p;
+       struct mr_table *mrt;
+       struct list_head *cache;
+
+       /* Lock protecting the mr_table's unresolved queue */
+       spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+                                    seq->private, *pos - 1)
+                   : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+                    struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                     loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+                                    struct mr_table *mrt, spinlock_t *lock)
+{
+       struct mr_mfc_iter *it = seq->private;
+
+       it->mrt = mrt;
+       it->cache = NULL;
+       it->lock = lock;
+
+       return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+                                    seq->private, *pos - 1)
+                   : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+       struct mr_mfc_iter *it = seq->private;
+       struct mr_table *mrt = it->mrt;
+
+       if (it->cache == &mrt->mfc_unres_queue)
+               spin_unlock_bh(it->lock);
+       else if (it->cache == &mrt->mfc_cache_list)
+               rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+                                  loff_t pos)
+{
+       return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+                                   void *v, loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+                                  struct mr_mfc_iter *it, loff_t pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                                   loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+                                    struct mr_table *mrt, spinlock_t *lock)
+{
+       return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
index 91216b1..000d1aa 100644 (file)
@@ -146,7 +146,7 @@ struct proto_ops {
                                      struct socket *newsock, int flags, bool kern);
        int             (*getname)   (struct socket *sock,
                                      struct sockaddr *addr,
-                                     int *sockaddr_len, int peer);
+                                     int peer);
        __poll_t        (*poll)      (struct file *file, struct socket *sock,
                                      struct poll_table_struct *wait);
        int             (*ioctl)     (struct socket *sock, unsigned int cmd,
@@ -294,10 +294,8 @@ int kernel_listen(struct socket *sock, int backlog);
 int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
                   int flags);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-                      int *addrlen);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-                      int *addrlen);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
 int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
                      int *optlen);
 int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
index 5eef6c8..dbe6344 100644 (file)
@@ -1798,11 +1798,17 @@ struct net_device {
 #if IS_ENABLED(CONFIG_TIPC)
        struct tipc_bearer __rcu *tipc_ptr;
 #endif
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
        void                    *atalk_ptr;
+#endif
        struct in_device __rcu  *ip_ptr;
+#if IS_ENABLED(CONFIG_DECNET)
        struct dn_dev __rcu     *dn_ptr;
+#endif
        struct inet6_dev __rcu  *ip6_ptr;
+#if IS_ENABLED(CONFIG_AX25)
        void                    *ax25_ptr;
+#endif
        struct wireless_dev     *ieee80211_ptr;
        struct wpan_dev         *ieee802154_ptr;
 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
index b99bced..fbc98e2 100644 (file)
 static inline unsigned long array_index_mask_nospec(unsigned long index,
                                                    unsigned long size)
 {
-       /*
-        * Warn developers about inappropriate array_index_nospec() usage.
-        *
-        * Even if the CPU speculates past the WARN_ONCE branch, the
-        * sign bit of @index is taken into account when generating the
-        * mask.
-        *
-        * This warning is compiled out when the compiler can infer that
-        * @index and @size are less than LONG_MAX.
-        */
-       if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
-                       "array_index_nospec() limited to range of [0, LONG_MAX]\n"))
-               return 0;
-
        /*
         * Always calculate and emit the mask even if the compiler
         * thinks the mask is not needed. The compiler does not take
@@ -43,6 +29,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 }
 #endif
 
+/*
+ * Warn developers about inappropriate array_index_nospec() usage.
+ *
+ * Even if the CPU speculates past the WARN_ONCE branch, the
+ * sign bit of @index is taken into account when generating the
+ * mask.
+ *
+ * This warning is compiled out when the compiler can infer that
+ * @index and @size are less than LONG_MAX.
+ */
+#define array_index_mask_nospec_check(index, size)                             \
+({                                                                             \
+       if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,                      \
+           "array_index_nospec() limited to range of [0, LONG_MAX]\n"))        \
+               _mask = 0;                                                      \
+       else                                                                    \
+               _mask = array_index_mask_nospec(index, size);                   \
+       _mask;                                                                  \
+})
+
 /*
  * array_index_nospec - sanitize an array index after a bounds check
  *
@@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 ({                                                                     \
        typeof(index) _i = (index);                                     \
        typeof(size) _s = (size);                                       \
-       unsigned long _mask = array_index_mask_nospec(_i, _s);          \
+       unsigned long _mask = array_index_mask_nospec_check(_i, _s);    \
                                                                        \
        BUILD_BUG_ON(sizeof(_i) > sizeof(long));                        \
        BUILD_BUG_ON(sizeof(_s) > sizeof(long));                        \
index af0f44e..40036a5 100644 (file)
 
 #include <linux/interrupt.h>
 #include <linux/perf_event.h>
+#include <linux/platform_device.h>
 #include <linux/sysfs.h>
 #include <asm/cputype.h>
 
-/*
- * struct arm_pmu_platdata - ARM PMU platform data
- *
- * @handle_irq: an optional handler which will be called from the
- *     interrupt and passed the address of the low level handler,
- *     and can be used to implement any platform specific handling
- *     before or after calling it.
- *
- * @irq_flags: if non-zero, these flags will be passed to request_irq
- *             when requesting interrupts for this PMU device.
- */
-struct arm_pmu_platdata {
-       irqreturn_t (*handle_irq)(int irq, void *dev,
-                                 irq_handler_t pmu_handler);
-       unsigned long irq_flags;
-};
-
 #ifdef CONFIG_ARM_PMU
 
 /*
@@ -92,7 +76,6 @@ enum armpmu_attr_groups {
 
 struct arm_pmu {
        struct pmu      pmu;
-       cpumask_t       active_irqs;
        cpumask_t       supported_cpus;
        char            *name;
        irqreturn_t     (*handle_irq)(int irq_num, void *dev);
@@ -174,12 +157,11 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
 
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
+struct arm_pmu *armpmu_alloc_atomic(void);
 void armpmu_free(struct arm_pmu *pmu);
 int armpmu_register(struct arm_pmu *pmu);
-int armpmu_request_irqs(struct arm_pmu *armpmu);
-void armpmu_free_irqs(struct arm_pmu *armpmu);
-int armpmu_request_irq(struct arm_pmu *armpmu, int cpu);
-void armpmu_free_irq(struct arm_pmu *armpmu, int cpu);
+int armpmu_request_irq(int irq, int cpu);
+void armpmu_free_irq(int irq, int cpu);
 
 #define ARMV8_PMU_PDEV_NAME "armv8-pmu"
 
index 5a0c3e5..6e38c69 100644 (file)
@@ -994,6 +994,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
 static inline int phy_read_status(struct phy_device *phydev)
 {
        if (!phydev->drv)
index 769d372..2eea4b3 100644 (file)
@@ -283,7 +283,7 @@ bool device_dma_supported(struct device *dev);
 
 enum dev_dma_attr device_get_dma_attr(struct device *dev);
 
-void *device_get_match_data(struct device *dev);
+const void *device_get_match_data(struct device *dev);
 
 int device_get_phy_mode(struct device *dev);
 
index a079656..0592420 100644 (file)
@@ -75,5 +75,9 @@ void __init ptp_classifier_init(void);
 static inline void ptp_classifier_init(void)
 {
 }
+static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
+{
+       return PTP_CLASS_NONE;
+}
 #endif
 #endif /* _PTP_CLASSIFY_H_ */
index b884b77..6894976 100644 (file)
@@ -296,13 +296,14 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
 {
        void *ptr;
 
+       /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
+        * accessing data through the pointer is up to date. Pairs
+        * with smp_wmb in __ptr_ring_produce.
+        */
        ptr = __ptr_ring_peek(r);
        if (ptr)
                __ptr_ring_discard_one(r);
 
-       /* Make sure anyone accessing data through the pointer is up to date. */
-       /* Pairs with smp_wmb in __ptr_ring_produce. */
-       smp_read_barrier_depends();
        return ptr;
 }
 
@@ -469,7 +470,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
  */
 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
 {
-       if (size * sizeof(void *) > KMALLOC_MAX_SIZE)
+       if (size > KMALLOC_MAX_SIZE / sizeof(void *))
                return NULL;
        return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
 }
index 1fdcde9..3573b4b 100644 (file)
@@ -35,7 +35,7 @@ extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
+extern struct rw_semaphore net_sem;
 
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
index 1149533..9806184 100644 (file)
@@ -36,7 +36,18 @@ static inline void mmgrab(struct mm_struct *mm)
        atomic_inc(&mm->mm_count);
 }
 
-extern void mmdrop(struct mm_struct *mm);
+extern void __mmdrop(struct mm_struct *mm);
+
+static inline void mmdrop(struct mm_struct *mm)
+{
+       /*
+        * The implicit full barrier implied by atomic_dec_and_test() is
+        * required by the membarrier system call before returning to
+        * user-space, after storing to rq->curr.
+        */
+       if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+               __mmdrop(mm);
+}
 
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
index 0dcf4e4..96fe289 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/uidgid.h>
 #include <linux/atomic.h>
+#include <linux/ratelimit.h>
 
 struct key;
 
@@ -41,6 +42,9 @@ struct user_struct {
     defined(CONFIG_NET)
        atomic_long_t locked_vm;
 #endif
+
+       /* Miscellaneous per-user rate limit */
+       struct ratelimit_state ratelimit;
 };
 
 extern int uids_sysfs_init(void);
index dc368b8..11c86fb 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Distributed under the terms of the GNU GPL, version 2
  *
- * Please see kernel/semaphore.c for documentation of these functions
+ * Please see kernel/locking/semaphore.c for documentation of these functions
  */
 #ifndef __LINUX_SEMAPHORE_H
 #define __LINUX_SEMAPHORE_H
index e724d5a..ebce9e2 100644 (file)
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
 #if IS_ENABLED(CONFIG_SFP)
 int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                   unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                   const struct sfp_eeprom_id *id);
 void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                       unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                    const struct sfp_eeprom_id *id,
+                                    unsigned long *link_modes);
 
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
        return PORT_OTHER;
 }
 
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                               const struct sfp_eeprom_id *id)
-{
-       return PHY_INTERFACE_MODE_NA;
-}
-
 static inline void sfp_parse_support(struct sfp_bus *bus,
                                     const struct sfp_eeprom_id *id,
                                     unsigned long *support)
 {
 }
 
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                                  const struct sfp_eeprom_id *id,
+                                                  unsigned long *link_modes)
+{
+       return PHY_INTERFACE_MODE_NA;
+}
+
 static inline int sfp_get_module_info(struct sfp_bus *bus,
                                      struct ethtool_modinfo *modinfo)
 {
index 5ebc0f8..9bc1750 100644 (file)
@@ -466,6 +466,9 @@ struct ubuf_info {
 
 #define skb_uarg(SKB)  ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
 
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
+void mm_unaccount_pinned_pages(struct mmpin *mmp);
+
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
 struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
                                        struct ubuf_info *uarg);
@@ -3646,7 +3649,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
        return true;
 }
 
-/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
+/* For small packets <= CHECKSUM_BREAK perform checksum complete directly
  * in checksum_init.
  */
 #define CHECKSUM_BREAK 76
index 9286a5a..1ce1f76 100644 (file)
@@ -353,4 +353,6 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen
                          unsigned int flags, struct timespec *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
                          unsigned int vlen, unsigned int flags);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
index 7b6a59f..a1a3f4e 100644 (file)
@@ -337,8 +337,6 @@ extern void deactivate_file_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
 extern void swap_setup(void);
 
-extern void add_page_to_unevictable_list(struct page *page);
-
 extern void lru_cache_add_active_or_unevictable(struct page *page,
                                                struct vm_area_struct *vma);
 
index 4a54ef9..bc0cda1 100644 (file)
@@ -465,6 +465,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
                                     int max_active);
+extern struct work_struct *current_work(void);
 extern bool current_is_workqueue_rescuer(void);
 extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
index 27fb5c9..9cce0d8 100644 (file)
@@ -20,8 +20,6 @@ struct net_device *cs89x0_probe(int unit);
 struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
-struct net_device *mac8390_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
 struct net_device *cops_probe(int unit);
 struct net_device *ltpc_probe(void);
 
index 6ed9692..9c2f226 100644 (file)
@@ -87,12 +87,15 @@ struct tc_action_ops {
                       struct tcf_result *);
        int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
        void    (*cleanup)(struct tc_action *);
-       int     (*lookup)(struct net *, struct tc_action **, u32);
+       int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
+                         struct netlink_ext_ack *extack);
        int     (*init)(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act, int ovr,
-                       int bind);
+                       int bind, struct netlink_ext_ack *extack);
        int     (*walk)(struct net *, struct sk_buff *,
-                       struct netlink_callback *, int, const struct tc_action_ops *);
+                       struct netlink_callback *, int,
+                       const struct tc_action_ops *,
+                       struct netlink_ext_ack *);
        void    (*stats_update)(struct tc_action *, u64, u32, u64);
        struct net_device *(*get_dev)(const struct tc_action *a);
 };
@@ -137,7 +140,8 @@ static inline void tc_action_net_exit(struct list_head *net_list,
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
                       struct netlink_callback *cb, int type,
-                      const struct tc_action_ops *ops);
+                      const struct tc_action_ops *ops,
+                      struct netlink_ext_ack *extack);
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
                    int bind);
@@ -162,10 +166,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
                    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
                    struct nlattr *est, char *name, int ovr, int bind,
-                   struct list_head *actions);
+                   struct list_head *actions, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind);
+                                   char *name, int ovr, int bind,
+                                   struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
index 76fb39c..c91bc87 100644 (file)
@@ -318,10 +318,12 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *);
 extern ax25_dev *ax25_dev_list;
 extern spinlock_t ax25_dev_lock;
 
+#if IS_ENABLED(CONFIG_AX25)
 static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
 {
        return dev->ax25_ptr;
 }
+#endif
 
 ax25_dev *ax25_addr_ax25dev(ax25_address *);
 void ax25_dev_device_up(struct net_device *);
index 81174f9..fc40843 100644 (file)
@@ -1147,6 +1147,7 @@ struct cfg80211_tid_stats {
  * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
  * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
  *     (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
+ * @ack_signal: signal strength (in dBm) of the last ACK frame.
  */
 struct station_info {
        u64 filled;
@@ -1191,6 +1192,7 @@ struct station_info {
        u64 rx_duration;
        u8 rx_beacon_signal_avg;
        struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
+       s8 ack_signal;
 };
 
 #if IS_ENABLED(CONFIG_CFG80211)
@@ -1905,11 +1907,16 @@ struct cfg80211_auth_request {
  * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
  * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
  * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
+ * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external
+ *     authentication capability. Drivers can offload authentication to
+ *     userspace if this flag is set. Only applicable for cfg80211_connect()
+ *     request (connect callback).
  */
 enum cfg80211_assoc_req_flags {
-       ASSOC_REQ_DISABLE_HT            = BIT(0),
-       ASSOC_REQ_DISABLE_VHT           = BIT(1),
-       ASSOC_REQ_USE_RRM               = BIT(2),
+       ASSOC_REQ_DISABLE_HT                    = BIT(0),
+       ASSOC_REQ_DISABLE_VHT                   = BIT(1),
+       ASSOC_REQ_USE_RRM                       = BIT(2),
+       CONNECT_REQ_EXTERNAL_AUTH_SUPPORT       = BIT(3),
 };
 
 /**
@@ -2600,6 +2607,33 @@ struct cfg80211_pmk_conf {
        const u8 *pmk_r0_name;
 };
 
+/**
+ * struct cfg80211_external_auth_params - Trigger External authentication.
+ *
+ * Commonly used across the external auth request and event interfaces.
+ *
+ * @action: action type / trigger for external authentication. Only significant
+ *     for the authentication request event interface (driver to user space).
+ * @bssid: BSSID of the peer with which the authentication has
+ *     to happen. Used by both the authentication request event and
+ *     authentication response command interface.
+ * @ssid: SSID of the AP.  Used by both the authentication request event and
+ *     authentication response command interface.
+ * @key_mgmt_suite: AKM suite of the respective authentication. Used by the
+ *     authentication request event interface.
+ * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication,
+ *     use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you
+ *     the real status code for failures. Used only for the authentication
+ *     response command interface (user space to driver).
+ */
+struct cfg80211_external_auth_params {
+       enum nl80211_external_auth_action action;
+       u8 bssid[ETH_ALEN] __aligned(2);
+       struct cfg80211_ssid ssid;
+       unsigned int key_mgmt_suite;
+       u16 status;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -2923,6 +2957,9 @@ struct cfg80211_pmk_conf {
  *     (invoked with the wireless_dev mutex held)
  * @del_pmk: delete the previously configured PMK for the given authenticator.
  *     (invoked with the wireless_dev mutex held)
+ *
+ * @external_auth: indicates result of offloaded authentication processing from
+ *     user space
  */
 struct cfg80211_ops {
        int     (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3216,6 +3253,8 @@ struct cfg80211_ops {
                           const struct cfg80211_pmk_conf *conf);
        int     (*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
                           const u8 *aa);
+       int     (*external_auth)(struct wiphy *wiphy, struct net_device *dev,
+                                struct cfg80211_external_auth_params *params);
 };
 
 /*
@@ -3516,6 +3555,35 @@ enum wiphy_vendor_command_flags {
        WIPHY_VENDOR_CMD_NEED_RUNNING = BIT(2),
 };
 
+/**
+ * enum wiphy_opmode_flag - Station's ht/vht operation mode information flags
+ *
+ * @STA_OPMODE_MAX_BW_CHANGED: Max Bandwidth changed
+ * @STA_OPMODE_SMPS_MODE_CHANGED: SMPS mode changed
+ * @STA_OPMODE_N_SS_CHANGED: max N_SS (number of spatial streams) changed
+ *
+ */
+enum wiphy_opmode_flag {
+       STA_OPMODE_MAX_BW_CHANGED       = BIT(0),
+       STA_OPMODE_SMPS_MODE_CHANGED    = BIT(1),
+       STA_OPMODE_N_SS_CHANGED         = BIT(2),
+};
+
+/**
+ * struct sta_opmode_info - Station's ht/vht operation mode information
+ * @changed: contains value from &enum wiphy_opmode_flag
+ * @smps_mode: New SMPS mode of a station
+ * @bw: new max bandwidth value of a station
+ * @rx_nss: new rx_nss value of a station
+ */
+
+struct sta_opmode_info {
+       u32 changed;
+       u8 smps_mode;
+       u8 bw;
+       u8 rx_nss;
+};
+
 /**
  * struct wiphy_vendor_command - vendor command definition
  * @info: vendor command identifying information, as used in nl80211
@@ -4342,10 +4410,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  *     of it being pushed into the SKB
  * @addr: the device MAC address
  * @iftype: the virtual interface type
+ * @data_offset: offset of payload after the 802.11 header
  * Return: 0 on success. Non-zero on error.
  */
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-                                 const u8 *addr, enum nl80211_iftype iftype);
+                                 const u8 *addr, enum nl80211_iftype iftype,
+                                 u8 data_offset);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -4357,7 +4427,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
                                         enum nl80211_iftype iftype)
 {
-       return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+       return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
 }
 
 /**
@@ -5684,6 +5754,20 @@ void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp);
 void cfg80211_radar_event(struct wiphy *wiphy,
                          struct cfg80211_chan_def *chandef, gfp_t gfp);
 
+/**
+ * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
+ * @dev: network device
+ * @mac: MAC address of a station which opmode got modified
+ * @sta_opmode: station's current opmode value
+ * @gfp: context flags
+ *
+ * Driver should call this function when station's opmode modified via action
+ * frame.
+ */
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+                                      struct sta_opmode_info *sta_opmode,
+                                      gfp_t gfp);
+
 /**
  * cfg80211_cac_event - Channel availability check (CAC) event
  * @netdev: network device
@@ -5758,10 +5842,13 @@ bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
  * @addr: the address of the peer
  * @cookie: the cookie filled in @probe_client previously
  * @acked: indicates whether probe was acked or not
+ * @ack_signal: signal strength (in dBm) of the ACK frame.
+ * @is_valid_ack_signal: indicates the ack_signal is valid or not.
  * @gfp: allocation flags
  */
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-                          u64 cookie, bool acked, gfp_t gfp);
+                          u64 cookie, bool acked, s32 ack_signal,
+                          bool is_valid_ack_signal, gfp_t gfp);
 
 /**
  * cfg80211_report_obss_beacon - report beacon from other APs
@@ -6202,6 +6289,17 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
+/**
+ * cfg80211_external_auth_request - userspace request for authentication
+ * @netdev: network device
+ * @params: External authentication parameters
+ * @gfp: allocation flags
+ * Returns: 0 on success, < 0 on error
+ */
+int cfg80211_external_auth_request(struct net_device *netdev,
+                                  struct cfg80211_external_auth_params *params,
+                                  gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
index 6545b03..8d1c3f2 100644 (file)
@@ -234,13 +234,9 @@ struct devlink_dpipe_headers {
 /**
  * struct devlink_resource_ops - resource ops
  * @occ_get: get the occupied size
- * @size_validate: validate the size of the resource before update, reload
- *                 is needed for changes to take place
  */
 struct devlink_resource_ops {
        u64 (*occ_get)(struct devlink *devlink);
-       int (*size_validate)(struct devlink *devlink, u64 size,
-                            struct netlink_ext_ack *extack);
 };
 
 /**
index 6cb602d..60fb4ec 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/ethtool.h>
+#include <linux/net_tstamp.h>
 #include <net/devlink.h>
 #include <net/switchdev.h>
 
@@ -101,6 +102,7 @@ struct dsa_platform_data {
 };
 
 struct packet_type;
+struct dsa_switch;
 
 struct dsa_device_ops {
        struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -357,7 +359,7 @@ struct dsa_switch_ops {
        void    (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
        void    (*get_ethtool_stats)(struct dsa_switch *ds,
                                     int port, uint64_t *data);
-       int     (*get_sset_count)(struct dsa_switch *ds);
+       int     (*get_sset_count)(struct dsa_switch *ds, int port);
 
        /*
         * ethtool Wake-on-LAN
@@ -367,6 +369,12 @@ struct dsa_switch_ops {
        int     (*set_wol)(struct dsa_switch *ds, int port,
                           struct ethtool_wolinfo *w);
 
+       /*
+        * ethtool timestamp info
+        */
+       int     (*get_ts_info)(struct dsa_switch *ds, int port,
+                              struct ethtool_ts_info *ts);
+
        /*
         * Suspend and resume
         */
@@ -469,6 +477,18 @@ struct dsa_switch_ops {
                                         int port, struct net_device *br);
        void    (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
                                          int port, struct net_device *br);
+
+       /*
+        * PTP functionality
+        */
+       int     (*port_hwtstamp_get)(struct dsa_switch *ds, int port,
+                                    struct ifreq *ifr);
+       int     (*port_hwtstamp_set)(struct dsa_switch *ds, int port,
+                                    struct ifreq *ifr);
+       bool    (*port_txtstamp)(struct dsa_switch *ds, int port,
+                                struct sk_buff *clone, unsigned int type);
+       bool    (*port_rxtstamp)(struct dsa_switch *ds, int port,
+                                struct sk_buff *skb, unsigned int type);
 };
 
 struct dsa_switch_driver {
index bb7f467..29ba069 100644 (file)
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
 };
 
 #endif /* !LINUX_NET_ETHOC_H */
-
index 648caf9..e5cfcfc 100644 (file)
@@ -26,7 +26,8 @@ struct fib_rule {
        u32                     table;
        u8                      action;
        u8                      l3mdev;
-       /* 2 bytes hole, try to use */
+       u8                      proto;
+       u8                      ip_proto;
        u32                     target;
        __be64                  tun_id;
        struct fib_rule __rcu   *ctarget;
@@ -39,11 +40,14 @@ struct fib_rule {
        char                    iifname[IFNAMSIZ];
        char                    oifname[IFNAMSIZ];
        struct fib_kuid_range   uid_range;
+       struct fib_rule_port_range      sport_range;
+       struct fib_rule_port_range      dport_range;
        struct rcu_head         rcu;
 };
 
 struct fib_lookup_arg {
        void                    *lookup_ptr;
+       const void              *lookup_data;
        void                    *result;
        struct fib_rule         *rule;
        u32                     table;
@@ -108,7 +112,12 @@ struct fib_rule_notifier_info {
        [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
        [FRA_GOTO]      = { .type = NLA_U32 }, \
        [FRA_L3MDEV]    = { .type = NLA_U8 }, \
-       [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }
+       [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
+       [FRA_PROTOCOL]  = { .type = NLA_U8 }, \
+       [FRA_IP_PROTO]  = { .type = NLA_U8 }, \
+       [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+       [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
@@ -142,6 +151,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
        return frh->table;
 }
 
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+       return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+                                        __be16 port)
+{
+       return ntohs(port) >= a->start &&
+               ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+       return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+               a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+                                              struct fib_rule_port_range *b)
+{
+       return a->start == b->start &&
+               a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+       return rule->ip_proto ||
+               fib_rule_port_range_set(&rule->sport_range) ||
+               fib_rule_port_range_set(&rule->dport_range);
+}
+
 struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
                                         struct net *);
 void fib_rules_unregister(struct fib_rules_ops *);
index f1624fd..8ce2179 100644 (file)
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
        fl4->daddr = daddr;
        fl4->saddr = saddr;
 }
-                                     
+
 
 struct flowi6 {
        struct flowi_common     __fl_common;
@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family)
 
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
-static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
-{
-       struct flow_keys keys;
-
-       return __get_hash_from_flowi6(fl6, &keys);
-}
-
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
-
-static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
-{
-       struct flow_keys keys;
-
-       return __get_hash_from_flowi4(fl4, &keys);
-}
-
 #endif
index f90585d..797142e 100644 (file)
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
                     bool *csum_err, __be16 proto, int nhs);
 
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
 static inline int gre_calc_hlen(__be16 o_flags)
 {
        int addend = 4;
index d91f9e7..960236f 100644 (file)
@@ -149,6 +149,8 @@ enum ieee80211_radiotap_ampdu_flags {
        IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008,
        IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010,
        IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020,
+       IEEE80211_RADIOTAP_AMPDU_EOF = 0x0040,
+       IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN = 0x0080,
 };
 
 /* for IEEE80211_RADIOTAP_VHT */
index 5a54c95..500f813 100644 (file)
@@ -32,7 +32,7 @@ int inet_shutdown(struct socket *sock, int how);
 int inet_listen(struct socket *sock, int backlog);
 void inet_sock_destruct(struct sock *sk);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet_getname(struct socket *sock, struct sockaddr *uaddr,
                 int peer);
 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int inet_ctl_sock_create(struct sock **sk, unsigned short family,
index c1a93ce..b68fea0 100644 (file)
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
        u16         net_header_len;
        u16         net_frag_header_len;
        u16         sockaddr_len;
-       int         (*setsockopt)(struct sock *sk, int level, int optname, 
+       int         (*setsockopt)(struct sock *sk, int level, int optname,
                                  char __user *optval, unsigned int optlen);
-       int         (*getsockopt)(struct sock *sk, int level, int optname, 
+       int         (*getsockopt)(struct sock *sk, int level, int optname,
                                  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
        int         (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
 
 /** inet_connection_sock - INET connection oriented sock
  *
- * @icsk_accept_queue:    FIFO of established children 
+ * @icsk_accept_queue:    FIFO of established children
  * @icsk_bind_hash:       Bind node
  * @icsk_timeout:         Timeout
  * @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
                unsigned long     timeout;       /* Currently scheduled timeout            */
                __u32             lrcvtime;      /* timestamp of last received data packet */
                __u16             last_seg_size; /* Size of last incoming segment          */
-               __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */ 
+               __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */
        } icsk_ack;
        struct {
                int               enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-       
+
        if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
                icsk->icsk_pending = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
index 746abff..fe63ba9 100644 (file)
@@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void ip4_datagram_release_cb(struct sock *sk);
 
 struct ip_reply_arg {
-       struct kvec iov[1];   
+       struct kvec iov[1];
        int         flags;
        __wsum      csum;
        int         csumoffset; /* u16 offset of csum in iov[0].iov_base */
-                               /* -1 if not needed */ 
+                               /* -1 if not needed */
        int         bound_dev_if;
        u8          tos;
        kuid_t      uid;
-}; 
+};
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
 
@@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net);
 /*
  *     Functions provided by ip_forward.c
  */
+
 int ip_forward(struct sk_buff *skb);
+
 /*
  *     Functions provided by ip_options.c
  */
+
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
                      __be32 daddr, struct rtable *rt, int is_frag);
 
index 34ec321..5e86fd9 100644 (file)
@@ -350,7 +350,8 @@ struct fib6_table {
 
 typedef struct rt6_info *(*pol_lookup_t)(struct net *,
                                         struct fib6_table *,
-                                        struct flowi6 *, int);
+                                        struct flowi6 *,
+                                        const struct sk_buff *, int);
 
 struct fib6_entry_notifier_info {
        struct fib_notifier_info info; /* must be first */
@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info {
 struct fib6_table *fib6_get_table(struct net *net, u32 id);
 struct fib6_table *fib6_new_table(struct net *net, u32 id);
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+                                  const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup);
 
 struct fib6_node *fib6_lookup(struct fib6_node *root,
@@ -415,6 +417,24 @@ void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
 int fib6_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi6 *fl6,
+                                                struct flow_keys *flkeys)
+{
+       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+       if (!net->ipv6.fib6_rules_require_fldissect)
+               return false;
+
+       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       fl6->fl6_sport = flkeys->ports.src;
+       fl6->fl6_dport = flkeys->ports.dst;
+       fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+       return true;
+}
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -436,5 +456,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
 {
        return 0;
 }
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi6 *fl6,
+                                                struct flow_keys *flkeys)
+{
+       return false;
+}
 #endif
 #endif
index 27d23a6..ce2abc0 100644 (file)
@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
                                         struct net_device *dev,
-                                        struct flowi6 *fl6, int flags);
+                                        struct flowi6 *fl6,
+                                        const struct sk_buff *skb, int flags);
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
                                         struct flowi6 *fl6, int flags);
@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-                                  int flags);
+                                  const struct sk_buff *skb, int flags);
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-                              int ifindex, struct flowi6 *fl6, int flags);
+                              int ifindex, struct flowi6 *fl6,
+                              const struct sk_buff *skb, int flags);
 
 void ip6_route_init_special_entries(void);
 int ip6_route_init(void);
@@ -126,8 +128,10 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 }
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-                           const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+                           const struct in6_addr *saddr, int oif,
+                           const struct sk_buff *skb, int flags);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+                      const struct sk_buff *skb, struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
@@ -266,4 +270,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
               ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
               !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
 }
+
 #endif
index f805243..7c7522e 100644 (file)
@@ -157,7 +157,7 @@ struct fib_result_nl {
        unsigned char   nh_sel;
        unsigned char   type;
        unsigned char   scope;
-       int             err;      
+       int             err;
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
        return 0;
 }
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi4 *fl4,
+                                                struct flow_keys *flkeys)
+{
+       return false;
+}
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
 int fib4_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib4_rules_seq_read(struct net *net);
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi4 *fl4,
+                                                struct flow_keys *flkeys)
+{
+       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+       if (!net->ipv4.fib_rules_require_fldissect)
+               return false;
+
+       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       fl4->fl4_sport = flkeys->ports.src;
+       fl4->fl4_dport = flkeys->ports.dst;
+       fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+       return true;
+}
+
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
@@ -370,8 +395,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-                      const struct sk_buff *skb);
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+                      const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
index 1f16773..cbe5add 100644 (file)
@@ -254,6 +254,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 
 #ifdef CONFIG_INET
 
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+                                      int proto,
+                                      __be32 daddr, __be32 saddr,
+                                      __be32 key, __u8 tos, int oif,
+                                      __u32 mark)
+{
+       memset(fl4, 0, sizeof(*fl4));
+       fl4->flowi4_oif = oif;
+       fl4->daddr = daddr;
+       fl4->saddr = saddr;
+       fl4->flowi4_tos = tos;
+       fl4->flowi4_proto = proto;
+       fl4->fl4_gre_key = key;
+       fl4->flowi4_mark = mark;
+}
+
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
index 8606c91..cabd3cd 100644 (file)
 
 #define IPV6_ADDR_ANY          0x0000U
 
-#define IPV6_ADDR_UNICAST              0x0001U 
-#define IPV6_ADDR_MULTICAST            0x0002U 
+#define IPV6_ADDR_UNICAST      0x0001U
+#define IPV6_ADDR_MULTICAST    0x0002U
 
 #define IPV6_ADDR_LOOPBACK     0x0010U
 #define IPV6_ADDR_LINKLOCAL    0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 #endif
 }
 
-static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
                                    const struct in6_addr *addr,
                                    int plen)
 {
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
        addr[1] = wl;
 }
 
-static inline void ipv6_addr_set(struct in6_addr *addr, 
+static inline void ipv6_addr_set(struct in6_addr *addr,
                                     __be32 w1, __be32 w2,
                                     __be32 w3, __be32 w4)
 {
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
        }
 
        /*
-        *      we should *never* get to this point since that 
+        *      we should *never* get to this point since that
         *      would mean the addrs are equal
         *
         *      However, we do get to it 8) And exacly, when
@@ -1056,7 +1056,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
 
 int inet6_release(struct socket *sock);
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
                  int peer);
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
index d747ef9..33fd9ba 100644 (file)
@@ -127,6 +127,17 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
 int lwtunnel_xmit(struct sk_buff *skb);
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+       if (lwtunnel_output_redirect(dst->lwtstate)) {
+               dst->lwtstate->orig_output = dst->output;
+               dst->output = lwtunnel_output;
+       }
+       if (lwtunnel_input_redirect(dst->lwtstate)) {
+               dst->lwtstate->orig_input = dst->input;
+               dst->input = lwtunnel_input;
+       }
+}
 #else
 
 static inline void lwtstate_free(struct lwtunnel_state *lws)
@@ -158,6 +169,10 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
        return false;
 }
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+}
+
 static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
                                             unsigned int mtu)
 {
index 906e902..2fd59ed 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -934,6 +935,7 @@ struct ieee80211_tx_info {
                        u8 ampdu_len;
                        u8 antenna;
                        u16 tx_time;
+                       bool is_valid_ack_signal;
                        void *status_driver_data[19 / sizeof(void *)];
                } status;
                struct {
@@ -1098,6 +1100,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *     the first subframe.
  * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
  *     be done in the hardware.
+ * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
+ *     frame
+ * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
  */
 enum mac80211_rx_flags {
        RX_FLAG_MMIC_ERROR              = BIT(0),
@@ -1124,6 +1129,8 @@ enum mac80211_rx_flags {
        RX_FLAG_MIC_STRIPPED            = BIT(21),
        RX_FLAG_ALLOW_SAME_PN           = BIT(22),
        RX_FLAG_ICV_STRIPPED            = BIT(23),
+       RX_FLAG_AMPDU_EOF_BIT           = BIT(24),
+       RX_FLAG_AMPDU_EOF_BIT_KNOWN     = BIT(25),
 };
 
 /**
@@ -2063,6 +2070,14 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
  *     TDLS links.
  *
+ * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
+ *     mgd_prepare_tx() callback to be called before transmission of a
+ *     deauthentication frame in case the association was completed but no
+ *     beacon was heard. This is required in multi-channel scenarios, where the
+ *     virtual interface might not be given air time for the transmission of
+ *     the frame, as it is not synced with the AP/P2P GO yet, and thus the
+ *     deauthentication frame might not be transmitted.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2106,6 +2121,7 @@ enum ieee80211_hw_flags {
        IEEE80211_HW_REPORTS_LOW_ACK,
        IEEE80211_HW_SUPPORTS_TX_FRAG,
        IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
+       IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
 
        /* keep last, obviously */
        NUM_IEEE80211_HW_FLAGS
@@ -3350,6 +3366,9 @@ enum ieee80211_reconfig_type {
  *     management frame prior to having successfully associated to allow the
  *     driver to give it channel time for the transmission, to get a response
  *     and to be able to synchronize with the GO.
+ *     For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
+ *     would also call this function before transmitting a deauthentication
+ *     frame in case that no beacon was heard from the AP/P2P GO.
  *     The callback will be called before each transmission and upon return
  *     mac80211 will transmit the frame right away.
  *     The callback is optional and can (should!) sleep.
@@ -4149,7 +4168,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid);
  * The TX headroom reserved by mac80211 for its own tx_status functions.
  * This is enough for the radiotap header.
  */
-#define IEEE80211_TX_STATUS_HEADROOM   14
+#define IEEE80211_TX_STATUS_HEADROOM   ALIGN(14, 4)
 
 /**
  * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames
index f306b2a..d441749 100644 (file)
@@ -59,8 +59,12 @@ struct net {
        atomic64_t              cookie_gen;
 
        struct list_head        list;           /* list of network namespaces */
-       struct list_head        cleanup_list;   /* namespaces on death row */
-       struct list_head        exit_list;      /* Use only net_mutex */
+       struct list_head        exit_list;      /* To linked to call pernet exit
+                                                * methods on dead net (net_sem
+                                                * read locked), or to unregister
+                                                * pernet ops (net_sem wr locked).
+                                                */
+       struct llist_node       cleanup_list;   /* namespaces on death row */
 
        struct user_namespace   *user_ns;       /* Owning user namespace */
        struct ucounts          *ucounts;
@@ -89,7 +93,7 @@ struct net {
        /* core fib_rules */
        struct list_head        rules_ops;
 
-       struct list_head        fib_notifier_ops;  /* protected by net_mutex */
+       struct list_head        fib_notifier_ops;  /* protected by net_sem */
 
        struct net_device       *loopback_dev;          /* The loopback */
        struct netns_core       core;
@@ -313,6 +317,12 @@ struct pernet_operations {
        void (*exit_batch)(struct list_head *net_exit_list);
        unsigned int *id;
        size_t size;
+       /*
+        * Indicates above methods are allowed to be executed in parallel
+        * with methods of any other pernet_operations, i.e. they are not
+        * need write locked net_sem.
+        */
+       bool async;
 };
 
 /*
index 40e7bab..d991826 100644 (file)
@@ -26,7 +26,8 @@ enum netevent_notif_type {
        NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
        NETEVENT_REDIRECT,         /* arg is struct netevent_redirect ptr */
        NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
-       NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
+       NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+       NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
index 44668c2..3a970e4 100644 (file)
@@ -52,6 +52,7 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        struct fib_rules_ops    *rules_ops;
        bool                    fib_has_custom_rules;
+       unsigned int            fib_rules_require_fldissect;
        struct fib_table __rcu  *fib_main;
        struct fib_table __rcu  *fib_default;
 #endif
index 987cc45..5b51110 100644 (file)
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
        int ip6_rt_gc_elasticity;
        int ip6_rt_mtu_expires;
        int ip6_rt_min_advmss;
+       int multipath_hash_policy;
        int flowlabel_consistency;
        int auto_flowlabels;
        int icmpv6_time;
@@ -71,7 +72,8 @@ struct netns_ipv6 {
        unsigned int             ip6_rt_gc_expire;
        unsigned long            ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-       bool                     fib6_has_custom_rules;
+       unsigned int            fib6_rules_require_fldissect;
+       bool                    fib6_has_custom_rules;
        struct rt6_info         *ip6_prohibit_entry;
        struct rt6_info         *ip6_blk_hole_entry;
        struct fib6_table       *fib6_local_tbl;
@@ -84,7 +86,7 @@ struct netns_ipv6 {
        struct sock             *mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-       struct mr6_table        *mrt6;
+       struct mr_table         *mrt6;
 #else
        struct list_head        mr6_tables;
        struct fib_rules_ops    *mr6_rules_ops;
index 8740625..e828d31 100644 (file)
@@ -806,6 +806,7 @@ enum tc_prio_command {
        TC_PRIO_REPLACE,
        TC_PRIO_DESTROY,
        TC_PRIO_STATS,
+       TC_PRIO_GRAFT,
 };
 
 struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
        struct gnet_stats_queue *qstats;
 };
 
+struct tc_prio_qopt_offload_graft_params {
+       u8 band;
+       u32 child_handle;
+};
+
 struct tc_prio_qopt_offload {
        enum tc_prio_command command;
        u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
        union {
                struct tc_prio_qopt_offload_params replace_params;
                struct tc_qopt_offload_stats stats;
+               struct tc_prio_qopt_offload_graft_params graft_params;
        };
 };
+
 #endif
index ebc5a2e..f83cacc 100644 (file)
@@ -78,7 +78,7 @@ struct regulatory_request {
        int wiphy_idx;
        enum nl80211_reg_initiator initiator;
        enum nl80211_user_reg_hint_type user_reg_hint_type;
-       char alpha2[2];
+       char alpha2[3];
        enum nl80211_dfs_regions dfs_region;
        bool intersect;
        bool processed;
index 1eb9ce4..158833e 100644 (file)
@@ -65,8 +65,6 @@ struct rtable {
        /* Miscellaneous cached information */
        u32                     rt_pmtu;
 
-       u32                     rt_table_id;
-
        struct list_head        rt_uncached;
        struct uncached_list    *rt_uncached_list;
 };
index e2ab136..d4907b5 100644 (file)
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
        return false;
 }
 
-/* Reset all TX qdiscs greater then index of a device.  */
+/* Reset all TX qdiscs greater than index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
        struct Qdisc *qdisc;
index 169c92a..b962458 100644 (file)
@@ -417,6 +417,7 @@ struct sock {
        struct page_frag        sk_frag;
        netdev_features_t       sk_route_caps;
        netdev_features_t       sk_route_nocaps;
+       netdev_features_t       sk_route_forced_caps;
        int                     sk_gso_type;
        unsigned int            sk_gso_max_size;
        gfp_t                   sk_allocation;
@@ -1584,7 +1585,7 @@ int sock_no_bind(struct socket *, struct sockaddr *, int);
 int sock_no_connect(struct socket *, struct sockaddr *, int, int);
 int sock_no_socketpair(struct socket *, struct socket *);
 int sock_no_accept(struct socket *, struct socket *, int, bool);
-int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
+int sock_no_getname(struct socket *, struct sockaddr *, int);
 __poll_t sock_no_poll(struct file *, struct socket *,
                          struct poll_table_struct *);
 int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
@@ -1862,15 +1863,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
        sk->sk_route_caps &= ~flags;
 }
 
-static inline bool sk_check_csum_caps(struct sock *sk)
-{
-       return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
-              (sk->sk_family == PF_INET &&
-               (sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
-              (sk->sk_family == PF_INET6 &&
-               (sk->sk_route_caps & NETIF_F_IPV6_CSUM));
-}
-
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
                                           struct iov_iter *from, char *to,
                                           int copy, int offset)
index e3fc667..9c9b376 100644 (file)
@@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
                                              struct sk_buff *skb,
                                              const struct tcphdr *th);
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
-                          struct request_sock *req, bool fastopen);
+                          struct request_sock *req, bool fastopen,
+                          bool *lost_race);
 int tcp_child_process(struct sock *parent, struct sock *child,
                      struct sk_buff *skb);
 void tcp_enter_loss(struct sock *sk);
@@ -510,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-                    int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
                               int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -980,8 +979,8 @@ struct tcp_congestion_ops {
        u32  (*undo_cwnd)(struct sock *sk);
        /* hook for packet ack accounting (optional) */
        void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-       /* suggest number of segments for each skb to transmit (optional) */
-       u32 (*tso_segs_goal)(struct sock *sk);
+       /* override sysctl_tcp_min_tso_segs */
+       u32 (*min_tso_segs)(struct sock *sk);
        /* returns the multiplier used in tcp_sndbuf_expand (optional) */
        u32 (*sndbuf_expand)(struct sock *sk);
        /* call when packets are delivered to update cwnd and pacing rate,
index 50e78a7..2875e16 100644 (file)
@@ -32,21 +32,21 @@ enum {
 
 #define TCP_STATE_MASK 0xF
 
-#define TCP_ACTION_FIN (1 << 7)
+#define TCP_ACTION_FIN (1 << TCP_CLOSE)
 
 enum {
-       TCPF_ESTABLISHED = (1 << 1),
-       TCPF_SYN_SENT    = (1 << 2),
-       TCPF_SYN_RECV    = (1 << 3),
-       TCPF_FIN_WAIT1   = (1 << 4),
-       TCPF_FIN_WAIT2   = (1 << 5),
-       TCPF_TIME_WAIT   = (1 << 6),
-       TCPF_CLOSE       = (1 << 7),
-       TCPF_CLOSE_WAIT  = (1 << 8),
-       TCPF_LAST_ACK    = (1 << 9),
-       TCPF_LISTEN      = (1 << 10),
-       TCPF_CLOSING     = (1 << 11),
-       TCPF_NEW_SYN_RECV = (1 << 12),
+       TCPF_ESTABLISHED = (1 << TCP_ESTABLISHED),
+       TCPF_SYN_SENT    = (1 << TCP_SYN_SENT),
+       TCPF_SYN_RECV    = (1 << TCP_SYN_RECV),
+       TCPF_FIN_WAIT1   = (1 << TCP_FIN_WAIT1),
+       TCPF_FIN_WAIT2   = (1 << TCP_FIN_WAIT2),
+       TCPF_TIME_WAIT   = (1 << TCP_TIME_WAIT),
+       TCPF_CLOSE       = (1 << TCP_CLOSE),
+       TCPF_CLOSE_WAIT  = (1 << TCP_CLOSE_WAIT),
+       TCPF_LAST_ACK    = (1 << TCP_LAST_ACK),
+       TCPF_LISTEN      = (1 << TCP_LISTEN),
+       TCPF_CLOSING     = (1 << TCP_CLOSING),
+       TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
 };
 
 #endif /* _LINUX_TCP_STATES_H */
index 81bdbf9..9185e45 100644 (file)
@@ -64,6 +64,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
                UDP_SKB_CB(skb)->cscov = cscov;
                if (skb->ip_summed == CHECKSUM_COMPLETE)
                        skb->ip_summed = CHECKSUM_NONE;
+               skb->csum_valid = 0;
         }
 
        return 0;
index 7d20776..aa027ba 100644 (file)
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
 static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{ 
-       return 1; 
-} 
+{
+       return 1;
+}
 static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
 {
        return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
 {
        if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
            (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
-            ipv6_addr_any((struct in6_addr *)saddr) || 
+            ipv6_addr_any((struct in6_addr *)saddr) ||
             ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
                return 1;
        return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
 static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
        return -ENOPROTOOPT;
-} 
+}
 
 static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
index c2d8116..2cdf8dc 100644 (file)
@@ -28,10 +28,6 @@ enum rdma_restrack_type {
         * @RDMA_RESTRACK_QP: Queue pair (QP)
         */
        RDMA_RESTRACK_QP,
-       /**
-        * @RDMA_RESTRACK_XRCD: XRC domain (XRCD)
-        */
-       RDMA_RESTRACK_XRCD,
        /**
         * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
         */
index 6da4407..38287d9 100644 (file)
@@ -276,10 +276,7 @@ struct uverbs_object_tree_def {
  */
 
 struct uverbs_ptr_attr {
-       union {
-               u64             data;
-               void    __user *ptr;
-       };
+       u64             data;
        u16             len;
        /* Combination of bits from enum UVERBS_ATTR_F_XXXX */
        u16             flags;
@@ -351,38 +348,60 @@ static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr
 }
 
 static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
-                                size_t idx, const void *from)
+                                size_t idx, const void *from, size_t size)
 {
        const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
        u16 flags;
+       size_t min_size;
 
        if (IS_ERR(attr))
                return PTR_ERR(attr);
 
+       min_size = min_t(size_t, attr->ptr_attr.len, size);
+       if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+               return -EFAULT;
+
        flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT;
-       return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) &&
-               !put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT;
+       if (put_user(flags, &attr->uattr->flags))
+               return -EFAULT;
+
+       return 0;
 }
 
-static inline int _uverbs_copy_from(void *to, size_t to_size,
+static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
+{
+       return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
+}
+
+static inline int _uverbs_copy_from(void *to,
                                    const struct uverbs_attr_bundle *attrs_bundle,
-                                   size_t idx)
+                                   size_t idx,
+                                   size_t size)
 {
        const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
 
        if (IS_ERR(attr))
                return PTR_ERR(attr);
 
-       if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data))
+       /*
+        * Validation ensures attr->ptr_attr.len >= size. If the caller is
+        * using UVERBS_ATTR_SPEC_F_MIN_SZ then it must call copy_from with
+        * the right size.
+        */
+       if (unlikely(size < attr->ptr_attr.len))
+               return -EINVAL;
+
+       if (uverbs_attr_ptr_is_inline(attr))
                memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len);
-       else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len))
+       else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+                               attr->ptr_attr.len))
                return -EFAULT;
 
        return 0;
 }
 
 #define uverbs_copy_from(to, attrs_bundle, idx)                                      \
-       _uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx)
+       _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to))
 
 /* =================================================
  *      Definitions -> Specs infrastructure
index 4bb86d3..9a4fa0c 100644 (file)
@@ -31,7 +31,7 @@
 #define AC97_HEADPHONE         0x04    /* Headphone Volume (optional) */
 #define AC97_MASTER_MONO       0x06    /* Master Volume Mono (optional) */
 #define AC97_MASTER_TONE       0x08    /* Master Tone (Bass & Treble) (optional) */
-#define AC97_PC_BEEP           0x0a    /* PC Beep Volume (optinal) */
+#define AC97_PC_BEEP           0x0a    /* PC Beep Volume (optional) */
 #define AC97_PHONE             0x0c    /* Phone Volume (optional) */
 #define AC97_MIC               0x0e    /* MIC Volume */
 #define AC97_LINE              0x10    /* Line In Volume */
index b8adf05..7dd8f34 100644 (file)
@@ -368,7 +368,7 @@ TRACE_EVENT(xen_mmu_flush_tlb,
            TP_printk("%s", "")
        );
 
-TRACE_EVENT(xen_mmu_flush_tlb_single,
+TRACE_EVENT(xen_mmu_flush_tlb_one_user,
            TP_PROTO(unsigned long addr),
            TP_ARGS(addr),
            TP_STRUCT__entry(
index db6bdc3..2a66769 100644 (file)
@@ -800,6 +800,7 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
 #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
+#define BPF_F_SEQ_NUMBER               (1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
index 2b642bf..232df14 100644 (file)
@@ -23,7 +23,7 @@ struct fib_rule_hdr {
        __u8            tos;
 
        __u8            table;
-       __u8            res1;   /* reserved */
+       __u8            res1;   /* reserved */
        __u8            res2;   /* reserved */
        __u8            action;
 
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
        __u32           end;
 };
 
+struct fib_rule_port_range {
+       __u16           start;
+       __u16           end;
+};
+
 enum {
        FRA_UNSPEC,
        FRA_DST,        /* destination address */
@@ -58,6 +63,10 @@ enum {
        FRA_PAD,
        FRA_L3MDEV,     /* iif or oif is l3mdev goto its table */
        FRA_UID_RANGE,  /* UID range */
+       FRA_PROTOCOL,   /* Originator of the rule */
+       FRA_IP_PROTO,   /* ip proto */
+       FRA_SPORT_RANGE, /* sport */
+       FRA_DPORT_RANGE, /* dport */
        __FRA_MAX
 };
 
index f8cb576..2e4a6c1 100644 (file)
@@ -23,7 +23,6 @@
 #define _UAPI_LINUX_IF_ETHER_H
 
 #include <linux/types.h>
-#include <linux/libc-compat.h>
 
 /*
  *     IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
@@ -89,6 +88,7 @@
 #define ETH_P_AOE      0x88A2          /* ATA over Ethernet            */
 #define ETH_P_8021AD   0x88A8          /* 802.1ad Service VLAN         */
 #define ETH_P_802_EX1  0x88B5          /* 802.1 Local Experimental 1.  */
+#define ETH_P_PREAUTH  0x88C7          /* 802.11 Preauthentication */
 #define ETH_P_TIPC     0x88CA          /* TIPC                         */
 #define ETH_P_MACSEC   0x88E5          /* 802.1ae MACsec */
 #define ETH_P_8021AH   0x88E7          /* 802.1ah Backbone Service Tag */
  *     This is an Ethernet frame header.
  */
 
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR              1
+#endif
+
 #if __UAPI_DEF_ETHHDR
 struct ethhdr {
        unsigned char   h_dest[ETH_ALEN];       /* destination eth addr */
index 6d94477..11d0c0e 100644 (file)
@@ -941,4 +941,22 @@ enum {
        IFLA_EVENT_BONDING_OPTIONS,     /* change in bonding options */
 };
 
+/* tun section */
+
+enum {
+       IFLA_TUN_UNSPEC,
+       IFLA_TUN_OWNER,
+       IFLA_TUN_GROUP,
+       IFLA_TUN_TYPE,
+       IFLA_TUN_PI,
+       IFLA_TUN_VNET_HDR,
+       IFLA_TUN_PERSIST,
+       IFLA_TUN_MULTI_QUEUE,
+       IFLA_TUN_NUM_QUEUES,
+       IFLA_TUN_NUM_DISABLED_QUEUES,
+       __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
index fc29efa..8254c93 100644 (file)
 
 #endif /* __GLIBC__ */
 
-/* Definitions for if_ether.h */
-/* allow libcs like musl to deactivate this, glibc does not implement this. */
-#ifndef __UAPI_DEF_ETHHDR
-#define __UAPI_DEF_ETHHDR              1
-#endif
-
 #endif /* _UAPI_LIBC_COMPAT_H */
index c587a61..c13c843 100644 (file)
  *
  * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
  *
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ *     drivers that do not define separate commands for authentication and
+ *     association, but rely on user space for the authentication to happen.
+ *     This interface acts both as the event request (driver to user space)
+ *     to trigger the authentication and command response (userspace to
+ *     driver) to indicate the authentication status.
+ *
+ *     User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ *     trigger a connection. The host driver selects a BSS and further uses
+ *     this interface to offload only the authentication part to the user
+ *     space. Authentication frames are passed between the driver and user
+ *     space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ *     further with the association after getting successful authentication
+ *     status. User space indicates the authentication status through
+ *     %NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ *     command interface.
+ *
+ *     Host driver reports this status on an authentication failure to the
+ *     user space through the connect result as the user space would have
+ *     initiated the connection through the connect request.
+ *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ *     ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE,
+ *     &NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its
+ *     address(specified in &NL80211_ATTR_MAC).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1198,6 +1224,10 @@ enum nl80211_commands {
 
        NL80211_CMD_RELOAD_REGDB,
 
+       NL80211_CMD_EXTERNAL_AUTH,
+
+       NL80211_CMD_STA_OPMODE_CHANGED,
+
        /* add new commands above here */
 
        /* used to define NL80211_CMD_MAX below */
@@ -2153,6 +2183,19 @@ enum nl80211_commands {
  * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
  * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
  *
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ *     authentication operation (u32 attribute with an
+ *     &enum nl80211_external_auth_action value). This is used with the
+ *     &NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ *     space supports external authentication. This attribute shall be used
+ *     only with %NL80211_CMD_CONNECT request. The driver may offload
+ *     authentication processing to user space if this capability is indicated
+ *     in NL80211_CMD_CONNECT requests from the user space.
+ *
+ * @NL80211_ATTR_NSS: Station's New/updated  RX_NSS value notified using this
+ *     u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2579,6 +2622,12 @@ enum nl80211_attrs {
        NL80211_ATTR_PMKR0_NAME,
        NL80211_ATTR_PORT_AUTHORIZED,
 
+       NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+       NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
+       NL80211_ATTR_NSS,
+       NL80211_ATTR_ACK_SIGNAL,
+
        /* add attributes here, update the policy in nl80211.c */
 
        __NL80211_ATTR_AFTER_LAST,
@@ -2899,6 +2948,7 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
  *     received from the station (u64, usec)
  * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2937,6 +2987,7 @@ enum nl80211_sta_info {
        NL80211_STA_INFO_TID_STATS,
        NL80211_STA_INFO_RX_DURATION,
        NL80211_STA_INFO_PAD,
+       NL80211_STA_INFO_ACK_SIGNAL,
 
        /* keep last */
        __NL80211_STA_INFO_AFTER_LAST,
@@ -4945,6 +4996,9 @@ enum nl80211_feature_flags {
  *     probe request tx deferral and suppression
  * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
  *     value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4972,6 +5026,9 @@ enum nl80211_ext_feature_index {
        NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
        NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
        NL80211_EXT_FEATURE_MFP_OPTIONAL,
+       NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+       NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+       NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
 
        /* add new features before the definition below */
        NUM_NL80211_EXT_FEATURES,
@@ -5032,6 +5089,10 @@ enum nl80211_timeout_reason {
  * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
  * requests.
  *
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
  * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
  * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
  * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
@@ -5059,7 +5120,20 @@ enum nl80211_timeout_reason {
  *     and suppression (if it has received a broadcast Probe Response frame,
  *     Beacon frame or FILS Discovery frame from an AP that the STA considers
  *     a suitable candidate for (re-)association - suitable in terms of
- *     SSID and/or RSSI
+ *     SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ *     accomplish the scan. Thus, this flag intends the driver to perform the
+ *     scan request with lesser span/duration. It is specific to the driver
+ *     implementations on how this is accomplished. Scan accuracy may get
+ *     impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ *     optimal possible power. Drivers can resort to their specific means to
+ *     optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ *     results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ *     possible scan results. This flag hints the driver to use the best
+ *     possible scan configuration to improve the accuracy in scanning.
+ *     Latency and power use may get impacted with this flag.
  */
 enum nl80211_scan_flags {
        NL80211_SCAN_FLAG_LOW_PRIORITY                          = 1<<0,
@@ -5070,6 +5144,9 @@ enum nl80211_scan_flags {
        NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP               = 1<<5,
        NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE            = 1<<6,
        NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION    = 1<<7,
+       NL80211_SCAN_FLAG_LOW_SPAN                              = 1<<8,
+       NL80211_SCAN_FLAG_LOW_POWER                             = 1<<9,
+       NL80211_SCAN_FLAG_HIGH_ACCURACY                         = 1<<10,
 };
 
 /**
@@ -5469,4 +5546,15 @@ enum nl80211_nan_match_attributes {
        NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
 };
 
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ *     authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+       NL80211_EXTERNAL_AUTH_START,
+       NL80211_EXTERNAL_AUTH_ABORT,
+};
+
 #endif /* __LINUX_NL80211_H */
index 46c5066..7cafb26 100644 (file)
@@ -555,7 +555,8 @@ enum {
 #define        TCF_EM_VLAN             6
 #define        TCF_EM_CANID            7
 #define        TCF_EM_IPSET            8
-#define        TCF_EM_MAX              8
+#define        TCF_EM_IPT              9
+#define        TCF_EM_MAX              9
 
 enum {
        TCF_EM_PROG_TC
index e46d82b..d5a1b8a 100644 (file)
@@ -69,8 +69,8 @@ struct ptrace_peeksiginfo_args {
 #define PTRACE_SECCOMP_GET_METADATA    0x420d
 
 struct seccomp_metadata {
-       unsigned long filter_off;       /* Input: which filter */
-       unsigned int flags;             /* Output: filter's flags */
+       __u64 filter_off;       /* Input: which filter */
+       __u64 flags;            /* Output: filter's flags */
 };
 
 /* Read signals from a shared (process wide) queue */
index e71d449..a66b213 100644 (file)
 #define RDS_CMSG_MASKED_ATOMIC_FADD    8
 #define RDS_CMSG_MASKED_ATOMIC_CSWP    9
 #define RDS_CMSG_RXPATH_LATENCY                11
+#define        RDS_CMSG_ZCOPY_COOKIE           12
+#define        RDS_CMSG_ZCOPY_COMPLETION       13
 
 #define RDS_INFO_FIRST                 10000
 #define RDS_INFO_COUNTERS              10000
@@ -316,6 +318,12 @@ struct rds_rdma_notify {
 #define RDS_RDMA_DROPPED       3
 #define RDS_RDMA_OTHER_ERROR   4
 
+#define        RDS_MAX_ZCOOKIES        8
+struct rds_zcopy_cookies {
+       __u32 num;
+       __u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
 /*
  * Common set of flags for all RDMA related structs
  */
diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h
new file mode 100644 (file)
index 0000000..49a6553
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_TC_EM_IPT_H
+#define __LINUX_TC_EM_IPT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+enum {
+       TCA_EM_IPT_UNSPEC,
+       TCA_EM_IPT_HOOK,
+       TCA_EM_IPT_MATCH_NAME,
+       TCA_EM_IPT_MATCH_REVISION,
+       TCA_EM_IPT_NFPROTO,
+       TCA_EM_IPT_MATCH_DATA,
+       __TCA_EM_IPT_MAX
+};
+
+#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
+
+#endif
index 03557b5..46de088 100644 (file)
@@ -65,7 +65,7 @@ struct ib_uverbs_attr {
        __u16 len;              /* only for pointers */
        __u16 flags;            /* combination of UVERBS_ATTR_F_XXXX */
        __u16 reserved;
-       __u64 data;             /* ptr to command, inline data or idr/fd */
+       __aligned_u64 data;     /* ptr to command, inline data or idr/fd */
 };
 
 struct ib_uverbs_ioctl_hdr {
@@ -73,7 +73,7 @@ struct ib_uverbs_ioctl_hdr {
        __u16 object_id;
        __u16 method_id;
        __u16 num_attrs;
-       __u64 reserved;
+       __aligned_u64 reserved;
        struct ib_uverbs_attr  attrs[0];
 };
 
index 227db99..5e49b61 100644 (file)
@@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = {
        .exit = audit_net_exit,
        .id = &audit_net_id,
        .size = sizeof(struct audit_net),
+       .async = true,
 };
 
 /* Initialize audit support at boot time. */
index b1f6648..14750e7 100644 (file)
@@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array)
 {
        int i;
 
-       for (i = 0; i < array->map.max_entries; i++)
+       for (i = 0; i < array->map.max_entries; i++) {
                free_percpu(array->pptrs[i]);
+               cond_resched();
+       }
 }
 
 static int bpf_array_alloc_percpu(struct bpf_array *array)
@@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
                        return -ENOMEM;
                }
                array->pptrs[i] = ptr;
+               cond_resched();
        }
 
        return 0;
@@ -73,11 +76,11 @@ static int array_map_alloc_check(union bpf_attr *attr)
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 {
        bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
-       int numa_node = bpf_map_attr_numa_node(attr);
+       int ret, numa_node = bpf_map_attr_numa_node(attr);
        u32 elem_size, index_mask, max_entries;
        bool unpriv = !capable(CAP_SYS_ADMIN);
+       u64 cost, array_size, mask64;
        struct bpf_array *array;
-       u64 array_size, mask64;
 
        elem_size = round_up(attr->value_size, 8);
 
@@ -109,8 +112,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
                array_size += (u64) max_entries * elem_size;
 
        /* make sure there is no u32 overflow later in round_up() */
-       if (array_size >= U32_MAX - PAGE_SIZE)
+       cost = array_size;
+       if (cost >= U32_MAX - PAGE_SIZE)
                return ERR_PTR(-ENOMEM);
+       if (percpu) {
+               cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
+               if (cost >= U32_MAX - PAGE_SIZE)
+                       return ERR_PTR(-ENOMEM);
+       }
+       cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+       ret = bpf_map_precharge_memlock(cost);
+       if (ret < 0)
+               return ERR_PTR(ret);
 
        /* allocate all map elements and zero-initialize them */
        array = bpf_map_area_alloc(array_size, numa_node);
@@ -121,20 +135,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 
        /* copy mandatory map attributes */
        bpf_map_init_from_attr(&array->map, attr);
+       array->map.pages = cost;
        array->elem_size = elem_size;
 
-       if (!percpu)
-               goto out;
-
-       array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
-
-       if (array_size >= U32_MAX - PAGE_SIZE ||
-           bpf_array_alloc_percpu(array)) {
+       if (percpu && bpf_array_alloc_percpu(array)) {
                bpf_map_area_free(array);
                return ERR_PTR(-ENOMEM);
        }
-out:
-       array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
 
        return &array->map;
 }
index 29ca920..d315b39 100644 (file)
@@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
         * so always copy 'cnt' prog_ids to the user.
         * In a rare race the user will see zero prog_ids
         */
-       ids = kcalloc(cnt, sizeof(u32), GFP_USER);
+       ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
        if (!ids)
                return -ENOMEM;
        rcu_read_lock();
index fbfdada..a4bb0b3 100644 (file)
@@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data)
 static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
                                                       int map_id)
 {
-       gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
+       gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
        struct bpf_cpu_map_entry *rcpu;
        int numa, err;
 
index 7b469d1..b4b5b81 100644 (file)
@@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map)
        struct lpm_trie_node __rcu **slot;
        struct lpm_trie_node *node;
 
-       raw_spin_lock(&trie->lock);
+       /* Wait for outstanding programs to complete
+        * update/lookup/delete/get_next_key and free the trie.
+        */
+       synchronize_rcu();
 
        /* Always start at the root and walk down to a node that has no
         * children. Then free that node, nullify its reference in the parent
@@ -566,10 +569,9 @@ static void trie_free(struct bpf_map *map)
                slot = &trie->root;
 
                for (;;) {
-                       node = rcu_dereference_protected(*slot,
-                                       lockdep_is_held(&trie->lock));
+                       node = rcu_dereference_protected(*slot, 1);
                        if (!node)
-                               goto unlock;
+                               goto out;
 
                        if (rcu_access_pointer(node->child[0])) {
                                slot = &node->child[0];
@@ -587,8 +589,8 @@ static void trie_free(struct bpf_map *map)
                }
        }
 
-unlock:
-       raw_spin_unlock(&trie->lock);
+out:
+       kfree(trie);
 }
 
 static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
index 48c3341..a927e89 100644 (file)
@@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 {
        struct bpf_stab *stab;
-       int err = -EINVAL;
        u64 cost;
+       int err;
 
        if (!capable(CAP_NET_ADMIN))
                return ERR_PTR(-EPERM);
@@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 
        /* make sure page count doesn't overflow */
        cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+       err = -EINVAL;
        if (cost >= U32_MAX - PAGE_SIZE)
                goto free_stab;
 
index 5fb69a8..3c74b16 100644 (file)
@@ -508,10 +508,6 @@ err:
 static const int caller_saved[CALLER_SAVED_REGS] = {
        BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
-       BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
 
 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 
index be8aa5b..e5d9d40 100644 (file)
@@ -592,7 +592,7 @@ static void check_mm(struct mm_struct *mm)
  * is dropped: either by a lazy thread or by
  * mmput. Free the page directory and the mm.
  */
-static void __mmdrop(struct mm_struct *mm)
+void __mmdrop(struct mm_struct *mm)
 {
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
@@ -603,18 +603,7 @@ static void __mmdrop(struct mm_struct *mm)
        put_user_ns(mm->user_ns);
        free_mm(mm);
 }
-
-void mmdrop(struct mm_struct *mm)
-{
-       /*
-        * The implicit full barrier implied by atomic_dec_and_test() is
-        * required by the membarrier system call before returning to
-        * user-space, after storing to rq->curr.
-        */
-       if (unlikely(atomic_dec_and_test(&mm->mm_count)))
-               __mmdrop(mm);
-}
-EXPORT_SYMBOL_GPL(mmdrop);
+EXPORT_SYMBOL_GPL(__mmdrop);
 
 static void mmdrop_async_fn(struct work_struct *work)
 {
index e6a9c36..82b8b18 100644 (file)
@@ -1726,25 +1726,14 @@ static int irq_domain_debug_show(struct seq_file *m, void *p)
        irq_domain_debug_show_one(m, d, 0);
        return 0;
 }
-
-static int irq_domain_debug_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, irq_domain_debug_show, inode->i_private);
-}
-
-static const struct file_operations dfs_domain_ops = {
-       .open           = irq_domain_debug_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
 
 static void debugfs_add_domain_dir(struct irq_domain *d)
 {
        if (!d->name || !domain_dir || d->debugfs_file)
                return;
        d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
-                                             &dfs_domain_ops);
+                                             &irq_domain_debug_fops);
 }
 
 static void debugfs_remove_domain_dir(struct irq_domain *d)
@@ -1760,7 +1749,8 @@ void __init irq_domain_debugfs_init(struct dentry *root)
        if (!domain_dir)
                return;
 
-       debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+       debugfs_create_file("default", 0444, domain_dir, NULL,
+                           &irq_domain_debug_fops);
        mutex_lock(&irq_domain_mutex);
        list_for_each_entry(d, &irq_domain_list, link)
                debugfs_add_domain_dir(d);
index da2ccf1..102160f 100644 (file)
@@ -978,67 +978,90 @@ static int prepare_kprobe(struct kprobe *p)
 }
 
 /* Caller must lock kprobe_mutex */
-static void arm_kprobe_ftrace(struct kprobe *p)
+static int arm_kprobe_ftrace(struct kprobe *p)
 {
-       int ret;
+       int ret = 0;
 
        ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
                                   (unsigned long)p->addr, 0, 0);
-       WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
-       kprobe_ftrace_enabled++;
-       if (kprobe_ftrace_enabled == 1) {
+       if (ret) {
+               pr_debug("Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+               return ret;
+       }
+
+       if (kprobe_ftrace_enabled == 0) {
                ret = register_ftrace_function(&kprobe_ftrace_ops);
-               WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+               if (ret) {
+                       pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
+                       goto err_ftrace;
+               }
        }
+
+       kprobe_ftrace_enabled++;
+       return ret;
+
+err_ftrace:
+       /*
+        * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a
+        * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
+        * empty filter_hash which would undesirably trace all functions.
+        */
+       ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
+       return ret;
 }
 
 /* Caller must lock kprobe_mutex */
-static void disarm_kprobe_ftrace(struct kprobe *p)
+static int disarm_kprobe_ftrace(struct kprobe *p)
 {
-       int ret;
+       int ret = 0;
 
-       kprobe_ftrace_enabled--;
-       if (kprobe_ftrace_enabled == 0) {
+       if (kprobe_ftrace_enabled == 1) {
                ret = unregister_ftrace_function(&kprobe_ftrace_ops);
-               WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+               if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
+                       return ret;
        }
+
+       kprobe_ftrace_enabled--;
+
        ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
                           (unsigned long)p->addr, 1, 0);
        WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+       return ret;
 }
 #else  /* !CONFIG_KPROBES_ON_FTRACE */
 #define prepare_kprobe(p)      arch_prepare_kprobe(p)
-#define arm_kprobe_ftrace(p)   do {} while (0)
-#define disarm_kprobe_ftrace(p)        do {} while (0)
+#define arm_kprobe_ftrace(p)   (-ENODEV)
+#define disarm_kprobe_ftrace(p)        (-ENODEV)
 #endif
 
 /* Arm a kprobe with text_mutex */
-static void arm_kprobe(struct kprobe *kp)
+static int arm_kprobe(struct kprobe *kp)
 {
-       if (unlikely(kprobe_ftrace(kp))) {
-               arm_kprobe_ftrace(kp);
-               return;
-       }
+       if (unlikely(kprobe_ftrace(kp)))
+               return arm_kprobe_ftrace(kp);
+
        cpus_read_lock();
        mutex_lock(&text_mutex);
        __arm_kprobe(kp);
        mutex_unlock(&text_mutex);
        cpus_read_unlock();
+
+       return 0;
 }
 
 /* Disarm a kprobe with text_mutex */
-static void disarm_kprobe(struct kprobe *kp, bool reopt)
+static int disarm_kprobe(struct kprobe *kp, bool reopt)
 {
-       if (unlikely(kprobe_ftrace(kp))) {
-               disarm_kprobe_ftrace(kp);
-               return;
-       }
+       if (unlikely(kprobe_ftrace(kp)))
+               return disarm_kprobe_ftrace(kp);
 
        cpus_read_lock();
        mutex_lock(&text_mutex);
        __disarm_kprobe(kp, reopt);
        mutex_unlock(&text_mutex);
        cpus_read_unlock();
+
+       return 0;
 }
 
 /*
@@ -1362,9 +1385,15 @@ out:
 
        if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
                ap->flags &= ~KPROBE_FLAG_DISABLED;
-               if (!kprobes_all_disarmed)
+               if (!kprobes_all_disarmed) {
                        /* Arm the breakpoint again. */
-                       arm_kprobe(ap);
+                       ret = arm_kprobe(ap);
+                       if (ret) {
+                               ap->flags |= KPROBE_FLAG_DISABLED;
+                               list_del_rcu(&p->list);
+                               synchronize_sched();
+                       }
+               }
        }
        return ret;
 }
@@ -1573,8 +1602,14 @@ int register_kprobe(struct kprobe *p)
        hlist_add_head_rcu(&p->hlist,
                       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
-       if (!kprobes_all_disarmed && !kprobe_disabled(p))
-               arm_kprobe(p);
+       if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
+               ret = arm_kprobe(p);
+               if (ret) {
+                       hlist_del_rcu(&p->hlist);
+                       synchronize_sched();
+                       goto out;
+               }
+       }
 
        /* Try to optimize kprobe */
        try_to_optimize_kprobe(p);
@@ -1608,11 +1643,12 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
 static struct kprobe *__disable_kprobe(struct kprobe *p)
 {
        struct kprobe *orig_p;
+       int ret;
 
        /* Get an original kprobe for return */
        orig_p = __get_valid_kprobe(p);
        if (unlikely(orig_p == NULL))
-               return NULL;
+               return ERR_PTR(-EINVAL);
 
        if (!kprobe_disabled(p)) {
                /* Disable probe if it is a child probe */
@@ -1626,8 +1662,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
                         * should have already been disarmed, so
                         * skip unneed disarming process.
                         */
-                       if (!kprobes_all_disarmed)
-                               disarm_kprobe(orig_p, true);
+                       if (!kprobes_all_disarmed) {
+                               ret = disarm_kprobe(orig_p, true);
+                               if (ret) {
+                                       p->flags &= ~KPROBE_FLAG_DISABLED;
+                                       return ERR_PTR(ret);
+                               }
+                       }
                        orig_p->flags |= KPROBE_FLAG_DISABLED;
                }
        }
@@ -1644,8 +1685,8 @@ static int __unregister_kprobe_top(struct kprobe *p)
 
        /* Disable kprobe. This will disarm it if needed. */
        ap = __disable_kprobe(p);
-       if (ap == NULL)
-               return -EINVAL;
+       if (IS_ERR(ap))
+               return PTR_ERR(ap);
 
        if (ap == p)
                /*
@@ -2078,12 +2119,14 @@ static void kill_kprobe(struct kprobe *p)
 int disable_kprobe(struct kprobe *kp)
 {
        int ret = 0;
+       struct kprobe *p;
 
        mutex_lock(&kprobe_mutex);
 
        /* Disable this kprobe */
-       if (__disable_kprobe(kp) == NULL)
-               ret = -EINVAL;
+       p = __disable_kprobe(kp);
+       if (IS_ERR(p))
+               ret = PTR_ERR(p);
 
        mutex_unlock(&kprobe_mutex);
        return ret;
@@ -2116,7 +2159,9 @@ int enable_kprobe(struct kprobe *kp)
 
        if (!kprobes_all_disarmed && kprobe_disabled(p)) {
                p->flags &= ~KPROBE_FLAG_DISABLED;
-               arm_kprobe(p);
+               ret = arm_kprobe(p);
+               if (ret)
+                       p->flags |= KPROBE_FLAG_DISABLED;
        }
 out:
        mutex_unlock(&kprobe_mutex);
@@ -2407,11 +2452,12 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
        .release        = seq_release,
 };
 
-static void arm_all_kprobes(void)
+static int arm_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
-       unsigned int i;
+       unsigned int i, total = 0, errors = 0;
+       int err, ret = 0;
 
        mutex_lock(&kprobe_mutex);
 
@@ -2428,46 +2474,74 @@ static void arm_all_kprobes(void)
        /* Arming kprobes doesn't optimize kprobe itself */
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
-               hlist_for_each_entry_rcu(p, head, hlist)
-                       if (!kprobe_disabled(p))
-                               arm_kprobe(p);
+               /* Arm all kprobes on a best-effort basis */
+               hlist_for_each_entry_rcu(p, head, hlist) {
+                       if (!kprobe_disabled(p)) {
+                               err = arm_kprobe(p);
+                               if (err)  {
+                                       errors++;
+                                       ret = err;
+                               }
+                               total++;
+                       }
+               }
        }
 
-       printk(KERN_INFO "Kprobes globally enabled\n");
+       if (errors)
+               pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
+                       errors, total);
+       else
+               pr_info("Kprobes globally enabled\n");
 
 already_enabled:
        mutex_unlock(&kprobe_mutex);
-       return;
+       return ret;
 }
 
-static void disarm_all_kprobes(void)
+static int disarm_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
-       unsigned int i;
+       unsigned int i, total = 0, errors = 0;
+       int err, ret = 0;
 
        mutex_lock(&kprobe_mutex);
 
        /* If kprobes are already disarmed, just return */
        if (kprobes_all_disarmed) {
                mutex_unlock(&kprobe_mutex);
-               return;
+               return 0;
        }
 
        kprobes_all_disarmed = true;
-       printk(KERN_INFO "Kprobes globally disabled\n");
 
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
+               /* Disarm all kprobes on a best-effort basis */
                hlist_for_each_entry_rcu(p, head, hlist) {
-                       if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-                               disarm_kprobe(p, false);
+                       if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
+                               err = disarm_kprobe(p, false);
+                               if (err) {
+                                       errors++;
+                                       ret = err;
+                               }
+                               total++;
+                       }
                }
        }
+
+       if (errors)
+               pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
+                       errors, total);
+       else
+               pr_info("Kprobes globally disabled\n");
+
        mutex_unlock(&kprobe_mutex);
 
        /* Wait for disarming all kprobes by optimizer */
        wait_for_kprobe_optimizer();
+
+       return ret;
 }
 
 /*
@@ -2494,6 +2568,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
 {
        char buf[32];
        size_t buf_size;
+       int ret = 0;
 
        buf_size = min(count, (sizeof(buf)-1));
        if (copy_from_user(buf, user_buf, buf_size))
@@ -2504,17 +2579,20 @@ static ssize_t write_enabled_file_bool(struct file *file,
        case 'y':
        case 'Y':
        case '1':
-               arm_all_kprobes();
+               ret = arm_all_kprobes();
                break;
        case 'n':
        case 'N':
        case '0':
-               disarm_all_kprobes();
+               ret = disarm_all_kprobes();
                break;
        default:
                return -EINVAL;
        }
 
+       if (ret)
+               return ret;
+
        return count;
 }
 
index 38ece03..d880296 100644 (file)
@@ -379,6 +379,14 @@ queue:
        tail = encode_tail(smp_processor_id(), idx);
 
        node += idx;
+
+       /*
+        * Ensure that we increment the head node->count before initialising
+        * the actual node. If the compiler is kind enough to reorder these
+        * stores, then an IRQ could overwrite our assignments.
+        */
+       barrier();
+
        node->locked = 0;
        node->next = NULL;
        pv_init_node(node);
@@ -408,14 +416,15 @@ queue:
         */
        if (old & _Q_TAIL_MASK) {
                prev = decode_tail(old);
+
                /*
-                * The above xchg_tail() is also a load of @lock which
-                * generates, through decode_tail(), a pointer.  The address
-                * dependency matches the RELEASE of xchg_tail() such that
-                * the subsequent access to @prev happens after.
+                * We must ensure that the stores to @node are observed before
+                * the write to prev->next. The address dependency from
+                * xchg_tail is not sufficient to ensure this because the read
+                * component of xchg_tail is unordered with respect to the
+                * initialisation of @node.
                 */
-
-               WRITE_ONCE(prev->next, node);
+               smp_store_release(&prev->next, node);
 
                pv_wait_node(node, prev);
                arch_mcs_spin_lock_contended(&node->locked);
index c302940..c955b10 100644 (file)
@@ -163,7 +163,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan)
 {
        struct rchan_buf *buf;
 
-       if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
+       if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
                return NULL;
 
        buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
index bf724c1..e7c535e 100644 (file)
@@ -2601,19 +2601,31 @@ static inline void finish_task(struct task_struct *prev)
 #endif
 }
 
-static inline void finish_lock_switch(struct rq *rq)
+static inline void
+prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
 {
+       /*
+        * Since the runqueue lock will be released by the next
+        * task (which is an invalid locking op but in the case
+        * of the scheduler it's an obvious special-case), so we
+        * do an early lockdep release here:
+        */
+       rq_unpin_lock(rq, rf);
+       spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 #ifdef CONFIG_DEBUG_SPINLOCK
        /* this is a valid case when another task releases the spinlock */
-       rq->lock.owner = current;
+       rq->lock.owner = next;
 #endif
+}
+
+static inline void finish_lock_switch(struct rq *rq)
+{
        /*
         * If we are tracking spinlock dependencies then we have to
         * fix up the runqueue lock - which gets 'carried over' from
         * prev into current:
         */
        spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
        raw_spin_unlock_irq(&rq->lock);
 }
 
@@ -2844,14 +2856,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
        rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
 
-       /*
-        * Since the runqueue lock will be released by the next
-        * task (which is an invalid locking op but in the case
-        * of the scheduler it's an obvious special-case), so we
-        * do an early lockdep release here:
-        */
-       rq_unpin_lock(rq, rf);
-       spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+       prepare_lock_switch(rq, next, rf);
 
        /* Here we just switch the register state and the stack. */
        switch_to(prev, next, prev);
index dd062a1..7936f54 100644 (file)
@@ -19,8 +19,6 @@
 
 #include "sched.h"
 
-#define SUGOV_KTHREAD_PRIORITY 50
-
 struct sugov_tunables {
        struct gov_attr_set attr_set;
        unsigned int rate_limit_us;
index 9bb0e0c..9df0978 100644 (file)
@@ -1153,6 +1153,7 @@ static void update_curr_dl(struct rq *rq)
        struct sched_dl_entity *dl_se = &curr->dl;
        u64 delta_exec, scaled_delta_exec;
        int cpu = cpu_of(rq);
+       u64 now;
 
        if (!dl_task(curr) || !on_dl_rq(dl_se))
                return;
@@ -1165,7 +1166,8 @@ static void update_curr_dl(struct rq *rq)
         * natural solution, but the full ramifications of this
         * approach need further study.
         */
-       delta_exec = rq_clock_task(rq) - curr->se.exec_start;
+       now = rq_clock_task(rq);
+       delta_exec = now - curr->se.exec_start;
        if (unlikely((s64)delta_exec <= 0)) {
                if (unlikely(dl_se->dl_yielded))
                        goto throttle;
@@ -1178,7 +1180,7 @@ static void update_curr_dl(struct rq *rq)
        curr->se.sum_exec_runtime += delta_exec;
        account_group_exec_runtime(curr, delta_exec);
 
-       curr->se.exec_start = rq_clock_task(rq);
+       curr->se.exec_start = now;
        cgroup_account_cputime(curr, delta_exec);
 
        sched_rt_avg_update(rq, delta_exec);
index 663b235..aad4945 100644 (file)
@@ -950,12 +950,13 @@ static void update_curr_rt(struct rq *rq)
 {
        struct task_struct *curr = rq->curr;
        struct sched_rt_entity *rt_se = &curr->rt;
-       u64 now = rq_clock_task(rq);
        u64 delta_exec;
+       u64 now;
 
        if (curr->sched_class != &rt_sched_class)
                return;
 
+       now = rq_clock_task(rq);
        delta_exec = now - curr->se.exec_start;
        if (unlikely((s64)delta_exec <= 0))
                return;
index 940fa40..dc77548 100644 (file)
@@ -1076,14 +1076,16 @@ long seccomp_get_metadata(struct task_struct *task,
 
        size = min_t(unsigned long, size, sizeof(kmd));
 
-       if (copy_from_user(&kmd, data, size))
+       if (size < sizeof(kmd.filter_off))
+               return -EINVAL;
+
+       if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
                return -EFAULT;
 
        filter = get_nth_filter(task, kmd.filter_off);
        if (IS_ERR(filter))
                return PTR_ERR(filter);
 
-       memset(&kmd, 0, sizeof(kmd));
        if (filter->log)
                kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
 
index fc2838a..c0a9e31 100644 (file)
@@ -872,6 +872,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
                return -EINVAL;
        if (copy_from_user(&query, uquery, sizeof(query)))
                return -EFAULT;
+       if (query.ids_len > BPF_TRACE_MAX_PROGS)
+               return -E2BIG;
 
        mutex_lock(&bpf_event_mutex);
        ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
index 9a20acc..36288d8 100644 (file)
@@ -101,6 +101,7 @@ struct user_struct root_user = {
        .sigpending     = ATOMIC_INIT(0),
        .locked_shm     = 0,
        .uid            = GLOBAL_ROOT_UID,
+       .ratelimit      = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
 };
 
 /*
@@ -191,6 +192,8 @@ struct user_struct *alloc_uid(kuid_t uid)
 
                new->uid = uid;
                atomic_set(&new->__count, 1);
+               ratelimit_state_init(&new->ratelimit, HZ, 100);
+               ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
 
                /*
                 * Before adding this, check whether we raced
index 017044c..bb9a519 100644 (file)
@@ -4179,6 +4179,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 }
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 
+/**
+ * current_work - retrieve %current task's work struct
+ *
+ * Determine if %current task is a workqueue worker and what it's working on.
+ * Useful to find out the context that the %current task is running in.
+ *
+ * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
+ */
+struct work_struct *current_work(void)
+{
+       struct worker *worker = current_wq_worker();
+
+       return worker ? worker->current_work : NULL;
+}
+EXPORT_SYMBOL(current_work);
+
 /**
  * current_is_workqueue_rescuer - is %current workqueue rescuer?
  *
index 6088408..64155e3 100644 (file)
@@ -1642,6 +1642,7 @@ config DMA_API_DEBUG
 
 menuconfig RUNTIME_TESTING_MENU
        bool "Runtime Testing"
+       def_bool y
 
 if RUNTIME_TESTING_MENU
 
index 40b1f92..c9e8e21 100644 (file)
@@ -84,6 +84,10 @@ again:
        return page_address(page);
 }
 
+/*
+ * NOTE: this function must never look at the dma_addr argument, because we want
+ * to be able to use it as a helper for iommu implementations as well.
+ */
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
                dma_addr_t dma_addr, unsigned long attrs)
 {
@@ -152,5 +156,6 @@ const struct dma_map_ops dma_direct_ops = {
        .map_sg                 = dma_direct_map_sg,
        .dma_supported          = dma_direct_supported,
        .mapping_error          = dma_direct_mapping_error,
+       .is_phys                = 1,
 };
 EXPORT_SYMBOL(dma_direct_ops);
index c98d77f..99ec5bc 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -431,7 +431,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
                        bitmap = this_cpu_xchg(ida_bitmap, NULL);
                        if (!bitmap)
                                return -EAGAIN;
-                       memset(bitmap, 0, sizeof(*bitmap));
                        bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
                        rcu_assign_pointer(*slot, bitmap);
                }
@@ -464,7 +463,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
                        bitmap = this_cpu_xchg(ida_bitmap, NULL);
                        if (!bitmap)
                                return -EAGAIN;
-                       memset(bitmap, 0, sizeof(*bitmap));
                        __set_bit(bit, bitmap->bitmap);
                        radix_tree_iter_replace(root, &iter, slot, bitmap);
                }
index 9fe6ec8..9539d7a 100644 (file)
@@ -650,6 +650,7 @@ found:
 static struct pernet_operations uevent_net_ops = {
        .init   = uevent_net_init,
        .exit   = uevent_net_exit,
+       .async  = true,
 };
 
 static int __init kobject_uevent_init(void)
index 0a7ae32..8e00138 100644 (file)
@@ -2125,7 +2125,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
                preempt_enable();
 
        if (!this_cpu_read(ida_bitmap)) {
-               struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+               struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
                if (!bitmap)
                        return 0;
                if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
index 77ee6ce..d7a708f 100644 (file)
@@ -1849,7 +1849,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 {
        const int default_width = 2 * sizeof(void *);
 
-       if (!ptr && *fmt != 'K') {
+       if (!ptr && *fmt != 'K' && *fmt != 'x') {
                /*
                 * Print (null) with the same width as a pointer so it makes
                 * tabular output look nice.
index 4b80cce..8291b75 100644 (file)
@@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags)
                return 0;
        }
 
-       arch_unmap_kpfn(pfn);
-
        orig_head = hpage = compound_head(p);
        num_poisoned_pages_inc();
 
index dd8de96..5fcfc24 100644 (file)
@@ -80,7 +80,7 @@
 
 #include "internal.h"
 
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
 #endif
 
index 7939820..74e5a65 100644 (file)
@@ -64,6 +64,12 @@ void clear_page_mlock(struct page *page)
        mod_zone_page_state(page_zone(page), NR_MLOCK,
                            -hpage_nr_pages(page));
        count_vm_event(UNEVICTABLE_PGCLEARED);
+       /*
+        * The previous TestClearPageMlocked() corresponds to the smp_mb()
+        * in __pagevec_lru_add_fn().
+        *
+        * See __pagevec_lru_add_fn for more explanation.
+        */
        if (!isolate_lru_page(page)) {
                putback_lru_page(page);
        } else {
index 81e18ce..cb41672 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
+#include <xen/xen.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
@@ -347,6 +348,9 @@ static inline bool update_defer_init(pg_data_t *pgdat,
        /* Always populate low zones for address-constrained allocations */
        if (zone_end < pgdat_end_pfn(pgdat))
                return true;
+       /* Xen PV domains need page structures early */
+       if (xen_pv_domain())
+               return true;
        (*nr_initialised)++;
        if ((*nr_initialised > pgdat->static_init_pgcnt) &&
            (pfn & (PAGES_PER_SECTION - 1)) == 0) {
index 567a7b9..0f17330 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -445,30 +445,6 @@ void lru_cache_add(struct page *page)
        __lru_cache_add(page);
 }
 
-/**
- * add_page_to_unevictable_list - add a page to the unevictable list
- * @page:  the page to be added to the unevictable list
- *
- * Add page directly to its zone's unevictable list.  To avoid races with
- * tasks that might be making the page evictable, through eg. munlock,
- * munmap or exit, while it's not on the lru, we want to add the page
- * while it's locked or otherwise "invisible" to other tasks.  This is
- * difficult to do when using the pagevec cache, so bypass that.
- */
-void add_page_to_unevictable_list(struct page *page)
-{
-       struct pglist_data *pgdat = page_pgdat(page);
-       struct lruvec *lruvec;
-
-       spin_lock_irq(&pgdat->lru_lock);
-       lruvec = mem_cgroup_page_lruvec(page, pgdat);
-       ClearPageActive(page);
-       SetPageUnevictable(page);
-       SetPageLRU(page);
-       add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
-       spin_unlock_irq(&pgdat->lru_lock);
-}
-
 /**
  * lru_cache_add_active_or_unevictable
  * @page:  the page to be added to LRU
@@ -484,13 +460,9 @@ void lru_cache_add_active_or_unevictable(struct page *page,
 {
        VM_BUG_ON_PAGE(PageLRU(page), page);
 
-       if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+       if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
                SetPageActive(page);
-               lru_cache_add(page);
-               return;
-       }
-
-       if (!TestSetPageMlocked(page)) {
+       else if (!TestSetPageMlocked(page)) {
                /*
                 * We use the irq-unsafe __mod_zone_page_stat because this
                 * counter is not modified from interrupt context, and the pte
@@ -500,7 +472,7 @@ void lru_cache_add_active_or_unevictable(struct page *page,
                                    hpage_nr_pages(page));
                count_vm_event(UNEVICTABLE_PGMLOCKED);
        }
-       add_page_to_unevictable_list(page);
+       lru_cache_add(page);
 }
 
 /*
@@ -886,15 +858,55 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
                                 void *arg)
 {
-       int file = page_is_file_cache(page);
-       int active = PageActive(page);
-       enum lru_list lru = page_lru(page);
+       enum lru_list lru;
+       int was_unevictable = TestClearPageUnevictable(page);
 
        VM_BUG_ON_PAGE(PageLRU(page), page);
 
        SetPageLRU(page);
+       /*
+        * Page becomes evictable in two ways:
+        * 1) Within LRU lock [munlock_vma_pages() and __munlock_pagevec()].
+        * 2) Before acquiring LRU lock to put the page to correct LRU and then
+        *   a) do PageLRU check with lock [check_move_unevictable_pages]
+        *   b) do PageLRU check before lock [clear_page_mlock]
+        *
+        * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
+        * following strict ordering:
+        *
+        * #0: __pagevec_lru_add_fn             #1: clear_page_mlock
+        *
+        * SetPageLRU()                         TestClearPageMlocked()
+        * smp_mb() // explicit ordering        // above provides strict
+        *                                      // ordering
+        * PageMlocked()                        PageLRU()
+        *
+        *
+        * if '#1' does not observe setting of PG_lru by '#0' and fails
+        * isolation, the explicit barrier will make sure that page_evictable
+        * check will put the page in correct LRU. Without smp_mb(), SetPageLRU
+        * can be reordered after PageMlocked check and can make '#1' to fail
+        * the isolation of the page whose Mlocked bit is cleared (#0 is also
+        * looking at the same page) and the evictable page will be stranded
+        * in an unevictable LRU.
+        */
+       smp_mb();
+
+       if (page_evictable(page)) {
+               lru = page_lru(page);
+               update_page_reclaim_stat(lruvec, page_is_file_cache(page),
+                                        PageActive(page));
+               if (was_unevictable)
+                       count_vm_event(UNEVICTABLE_PGRESCUED);
+       } else {
+               lru = LRU_UNEVICTABLE;
+               ClearPageActive(page);
+               SetPageUnevictable(page);
+               if (!was_unevictable)
+                       count_vm_event(UNEVICTABLE_PGCULLED);
+       }
+
        add_page_to_lru_list(page, lruvec, lru);
-       update_page_reclaim_stat(lruvec, file, active);
        trace_mm_lru_insertion(page, lru);
 }
 
@@ -913,7 +925,7 @@ EXPORT_SYMBOL(__pagevec_lru_add);
  * @pvec:      Where the resulting entries are placed
  * @mapping:   The address_space to search
  * @start:     The starting entry index
- * @nr_pages:  The maximum number of pages
+ * @nr_entries:        The maximum number of pages
  * @indices:   The cache indices corresponding to the entries in @pvec
  *
  * pagevec_lookup_entries() will search for and return a group of up
index 6739420..ebff729 100644 (file)
@@ -1943,11 +1943,15 @@ void *vmalloc_exec(unsigned long size)
 }
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
-#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
 #else
-#define GFP_VMALLOC32 GFP_KERNEL
+/*
+ * 64b systems should always have either DMA or DMA32 zones. For others
+ * GFP_DMA32 should do the right thing and use the normal zone.
+ */
+#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
 #endif
 
 /**
index 4447496..bee5349 100644 (file)
@@ -769,64 +769,7 @@ int remove_mapping(struct address_space *mapping, struct page *page)
  */
 void putback_lru_page(struct page *page)
 {
-       bool is_unevictable;
-       int was_unevictable = PageUnevictable(page);
-
-       VM_BUG_ON_PAGE(PageLRU(page), page);
-
-redo:
-       ClearPageUnevictable(page);
-
-       if (page_evictable(page)) {
-               /*
-                * For evictable pages, we can use the cache.
-                * In event of a race, worst case is we end up with an
-                * unevictable page on [in]active list.
-                * We know how to handle that.
-                */
-               is_unevictable = false;
-               lru_cache_add(page);
-       } else {
-               /*
-                * Put unevictable pages directly on zone's unevictable
-                * list.
-                */
-               is_unevictable = true;
-               add_page_to_unevictable_list(page);
-               /*
-                * When racing with an mlock or AS_UNEVICTABLE clearing
-                * (page is unlocked) make sure that if the other thread
-                * does not observe our setting of PG_lru and fails
-                * isolation/check_move_unevictable_pages,
-                * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
-                * the page back to the evictable list.
-                *
-                * The other side is TestClearPageMlocked() or shmem_lock().
-                */
-               smp_mb();
-       }
-
-       /*
-        * page's status can change while we move it among lru. If an evictable
-        * page is on unevictable list, it never be freed. To avoid that,
-        * check after we added it to the list, again.
-        */
-       if (is_unevictable && page_evictable(page)) {
-               if (!isolate_lru_page(page)) {
-                       put_page(page);
-                       goto redo;
-               }
-               /* This means someone else dropped this page from LRU
-                * So, it will be freed or putback to LRU again. There is
-                * nothing to do here.
-                */
-       }
-
-       if (was_unevictable && !is_unevictable)
-               count_vm_event(UNEVICTABLE_PGRESCUED);
-       else if (!was_unevictable && is_unevictable)
-               count_vm_event(UNEVICTABLE_PGCULLED);
-
+       lru_cache_add(page);
        put_page(page);         /* drop ref from isolate */
 }
 
index f8cb83e..01a771e 100644 (file)
@@ -360,7 +360,7 @@ u64 zpool_get_total_size(struct zpool *zpool)
 
 /**
  * zpool_evictable() - Test if zpool is potentially evictable
- * @pool       The zpool to test
+ * @zpool:     The zpool to test
  *
  * Zpool is only potentially evictable when it's created with struct
  * zpool_ops.evict and its driver implements struct zpool_driver.shrink.
index c004aa4..61a5c41 100644 (file)
@@ -1007,6 +1007,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        u8 *src, *dst;
        struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
 
+       /* THP isn't supported */
+       if (PageTransHuge(page)) {
+               ret = -EINVAL;
+               goto reject;
+       }
+
        if (!zswap_enabled || !tree) {
                ret = -ENODEV;
                goto reject;
index bad01b1..bd0ed39 100644 (file)
@@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
        .exit = vlan_exit_net,
        .id   = &vlan_net_id,
        .size = sizeof(struct vlan_net),
+       .async = true,
 };
 
 static int __init vlan_proto_init(void)
index f3a4efc..3aa5a93 100644 (file)
@@ -160,7 +160,8 @@ static void req_done(struct virtqueue *vq)
                spin_unlock_irqrestore(&chan->lock, flags);
                /* Wakeup if anyone waiting for VirtIO ring space. */
                wake_up(chan->vc_wq);
-               p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
+               if (len)
+                       p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
        }
 }
 
index 03a9fc0..9b6bc5a 100644 (file)
@@ -1238,7 +1238,7 @@ out:
  * fields into the sockaddr.
  */
 static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
-                        int *uaddr_len, int peer)
+                        int peer)
 {
        struct sockaddr_at sat;
        struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
                if (atalk_autobind(sk) < 0)
                        goto out;
 
-       *uaddr_len = sizeof(struct sockaddr_at);
        memset(&sat, 0, sizeof(sat));
 
        if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
                sat.sat_port        = at->src_port;
        }
 
-       err = 0;
        sat.sat_family = AF_APPLETALK;
        memcpy(uaddr, &sat, sizeof(sat));
+       err = sizeof(struct sockaddr_at);
 
 out:
        release_sock(sk);
index e1140b3..2cb10af 100644 (file)
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
 }
 
 static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
-                      int *sockaddr_len, int peer)
+                      int peer)
 {
        struct sockaddr_atmpvc *addr;
        struct atm_vcc *vcc = ATM_SD(sock);
 
        if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
                return -ENOTCONN;
-       *sockaddr_len = sizeof(struct sockaddr_atmpvc);
        addr = (struct sockaddr_atmpvc *)sockaddr;
        memset(addr, 0, sizeof(*addr));
        addr->sap_family = AF_ATMPVC;
        addr->sap_addr.itf = vcc->dev->number;
        addr->sap_addr.vpi = vcc->vpi;
        addr->sap_addr.vci = vcc->vci;
-       return 0;
+       return sizeof(struct sockaddr_atmpvc);
 }
 
 static const struct proto_ops pvc_proto_ops = {
index c458adc..2f91b76 100644 (file)
@@ -419,15 +419,14 @@ out:
 }
 
 static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
-                      int *sockaddr_len, int peer)
+                      int peer)
 {
        struct sockaddr_atmsvc *addr;
 
-       *sockaddr_len = sizeof(struct sockaddr_atmsvc);
        addr = (struct sockaddr_atmsvc *) sockaddr;
        memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
               sizeof(struct sockaddr_atmsvc));
-       return 0;
+       return sizeof(struct sockaddr_atmsvc);
 }
 
 int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
index 47fdd39..c8319ed 100644 (file)
@@ -1388,7 +1388,7 @@ out:
 }
 
 static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
-       int *uaddr_len, int peer)
+       int peer)
 {
        struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
        struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
                        fsa->fsa_digipeater[0] = null_ax25_address;
                }
        }
-       *uaddr_len = sizeof (struct full_sockaddr_ax25);
+       err = sizeof (struct full_sockaddr_ax25);
 
 out:
        release_sock(sk);
index 3394e67..66c0781 100644 (file)
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
                /* Slave connection state and connectable mode bit 38
                 * and scannable bit 21.
                 */
-               if (connectable && (!(hdev->le_states[4] & 0x01) ||
-                                   !(hdev->le_states[2] & 0x40)))
+               if (connectable && (!(hdev->le_states[4] & 0x40) ||
+                                   !(hdev->le_states[2] & 0x20)))
                        return false;
        }
 
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
                /* Master connection state and connectable mode bit 35 and
                 * scannable 19.
                 */
-               if (connectable && (!(hdev->le_states[4] & 0x10) ||
+               if (connectable && (!(hdev->le_states[4] & 0x08) ||
                                    !(hdev->le_states[2] & 0x08)))
                        return false;
        }
index 923e9a2..1506e16 100644 (file)
@@ -1340,7 +1340,7 @@ done:
 }
 
 static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
-                           int *addr_len, int peer)
+                           int peer)
 {
        struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
        struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
                goto done;
        }
 
-       *addr_len = sizeof(*haddr);
        haddr->hci_family = AF_BLUETOOTH;
        haddr->hci_dev    = hdev->id;
        haddr->hci_channel= hci_pi(sk)->channel;
+       err = sizeof(*haddr);
 
 done:
        release_sock(sk);
index 67a8642..686bdc6 100644 (file)
@@ -358,7 +358,7 @@ done:
 }
 
 static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
-                             int *len, int peer)
+                             int peer)
 {
        struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
        struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
 
        memset(la, 0, sizeof(struct sockaddr_l2));
        addr->sa_family = AF_BLUETOOTH;
-       *len = sizeof(struct sockaddr_l2);
 
        la->l2_psm = chan->psm;
 
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
                la->l2_bdaddr_type = chan->src_type;
        }
 
-       return 0;
+       return sizeof(struct sockaddr_l2);
 }
 
 static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
index 1aaccf6..93a3b21 100644 (file)
@@ -533,7 +533,7 @@ done:
        return err;
 }
 
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
 {
        struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
        struct sock *sk = sock->sk;
@@ -552,8 +552,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
        else
                bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
 
-       *len = sizeof(struct sockaddr_rc);
-       return 0;
+       return sizeof(struct sockaddr_rc);
 }
 
 static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
index 08df576..413b8ee 100644 (file)
@@ -680,7 +680,7 @@ done:
 }
 
 static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
-                           int *len, int peer)
+                           int peer)
 {
        struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
        struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
        BT_DBG("sock %p, sk %p", sock, sk);
 
        addr->sa_family = AF_BLUETOOTH;
-       *len = sizeof(struct sockaddr_sco);
 
        if (peer)
                bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
        else
                bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
 
-       return 0;
+       return sizeof(struct sockaddr_sco);
 }
 
 static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
index 6bf06e7..7770481 100644 (file)
@@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net)
 
 static struct pernet_operations br_net_ops = {
        .exit   = br_net_exit,
+       .async  = true,
 };
 
 static const struct stp_proto br_stp_proto = {
index 27f1d4f..484f541 100644 (file)
@@ -967,6 +967,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
        .exit = brnf_exit_net,
        .id   = &brnf_net_id,
        .size = sizeof(struct brnf_net),
+       .async = true,
 };
 
 static struct notifier_block brnf_notifier __read_mostly = {
index 0254c35..126a8ea 100644 (file)
@@ -255,6 +255,9 @@ static ssize_t brport_show(struct kobject *kobj,
        struct brport_attribute *brport_attr = to_brport_attr(attr);
        struct net_bridge_port *p = to_brport(kobj);
 
+       if (!brport_attr->show)
+               return -EINVAL;
+
        return brport_attr->show(p, buf);
 }
 
index 279527f..ce7152a 100644 (file)
@@ -187,17 +187,17 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par)
        expected_length += ebt_mac_wormhash_size(wh_src);
 
        if (em->match_size != EBT_ALIGN(expected_length)) {
-               pr_info("wrong size: %d against expected %d, rounded to %zd\n",
-                       em->match_size, expected_length,
-                       EBT_ALIGN(expected_length));
+               pr_err_ratelimited("wrong size: %d against expected %d, rounded to %zd\n",
+                                  em->match_size, expected_length,
+                                  EBT_ALIGN(expected_length));
                return -EINVAL;
        }
        if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
-               pr_info("dst integrity fail: %x\n", -err);
+               pr_err_ratelimited("dst integrity fail: %x\n", -err);
                return -EINVAL;
        }
        if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
-               pr_info("src integrity fail: %x\n", -err);
+               pr_err_ratelimited("src integrity fail: %x\n", -err);
                return -EINVAL;
        }
        return 0;
index 61a9f1b..165b9d6 100644 (file)
@@ -72,8 +72,8 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
        /* Check for overflow. */
        if (info->burst == 0 ||
            user2credits(info->avg * info->burst) < user2credits(info->avg)) {
-               pr_info("overflow, try lower: %u/%u\n",
-                       info->avg, info->burst);
+               pr_info_ratelimited("overflow, try lower: %u/%u\n",
+                                   info->avg, info->burst);
                return -EINVAL;
        }
 
index ac5e5e3..26730d3 100644 (file)
@@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net)
 static struct pernet_operations canbcm_pernet_ops __read_mostly = {
        .init = canbcm_pernet_init,
        .exit = canbcm_pernet_exit,
+       .async = true,
 };
 
 static int __init bcm_module_init(void)
index f2ecc43..1051eee 100644 (file)
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 }
 
 static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
-                      int *len, int peer)
+                      int peer)
 {
        struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
        struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
        addr->can_family  = AF_CAN;
        addr->can_ifindex = ro->ifindex;
 
-       *len = sizeof(*addr);
-
-       return 0;
+       return sizeof(*addr);
 }
 
 static int raw_setsockopt(struct socket *sock, int level, int optname,
index dda9d7b..40fb3ae 100644 (file)
@@ -2378,12 +2378,15 @@ EXPORT_SYMBOL(netdev_set_num_tc);
 
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
+       bool disabling;
        int rc;
 
+       disabling = txq < dev->real_num_tx_queues;
+
        if (txq < 1 || txq > dev->num_tx_queues)
                return -EINVAL;
 
@@ -2399,15 +2402,19 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
                if (dev->num_tc)
                        netif_setup_tc(dev, txq);
 
-               if (txq < dev->real_num_tx_queues) {
+               dev->real_num_tx_queues = txq;
+
+               if (disabling) {
+                       synchronize_net();
                        qdisc_reset_all_tx_gt(dev, txq);
 #ifdef CONFIG_XPS
                        netif_reset_xps_queues_gt(dev, txq);
 #endif
                }
+       } else {
+               dev->real_num_tx_queues = txq;
        }
 
-       dev->real_num_tx_queues = txq;
        return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
@@ -8134,8 +8141,9 @@ void netdev_run_todo(void)
                BUG_ON(!list_empty(&dev->ptype_specific));
                WARN_ON(rcu_access_pointer(dev->ip_ptr));
                WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
                WARN_ON(dev->dn_ptr);
-
+#endif
                if (dev->priv_destructor)
                        dev->priv_destructor(dev);
                if (dev->needs_free_netdev)
@@ -8833,6 +8841,7 @@ static void __net_exit netdev_exit(struct net *net)
 static struct pernet_operations __net_initdata netdev_net_ops = {
        .init = netdev_init,
        .exit = netdev_exit,
+       .async = true,
 };
 
 static void __net_exit default_device_exit(struct net *net)
@@ -8933,6 +8942,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 static struct pernet_operations __net_initdata default_device_ops = {
        .exit = default_device_exit,
        .exit_batch = default_device_exit_batch,
+       .async = true,
 };
 
 /*
index 18d385e..88e8467 100644 (file)
@@ -2338,6 +2338,32 @@ out:
        resource->size_valid = size_valid;
 }
 
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+                              struct netlink_ext_ack *extack)
+{
+       u64 reminder;
+       int err = 0;
+
+       if (size > resource->size_params->size_max) {
+               NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+               err = -EINVAL;
+       }
+
+       if (size < resource->size_params->size_min) {
+               NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+               err = -EINVAL;
+       }
+
+       div64_u64_rem(size, resource->size_params->size_granularity, &reminder);
+       if (reminder) {
+               NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+               err = -EINVAL;
+       }
+
+       return err;
+}
+
 static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
                                       struct genl_info *info)
 {
@@ -2356,12 +2382,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
        if (!resource)
                return -EINVAL;
 
-       if (!resource->resource_ops->size_validate)
-               return -EINVAL;
-
        size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
-       err = resource->resource_ops->size_validate(devlink, size,
-                                                   info->extack);
+       err = devlink_resource_validate_size(resource, size, info->extack);
        if (err)
                return err;
 
index 0c048bd..5ace070 100644 (file)
@@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net)
 static struct pernet_operations fib_notifier_net_ops = {
        .init = fib_notifier_net_init,
        .exit = fib_notifier_net_exit,
+       .async = true,
 };
 
 static int __init fib_notifier_init(void)
index 98e1066..f6f04fc 100644 (file)
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
        if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
            !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
                return false;
+       if (fib_rule_port_range_set(&rule->sport_range))
+               return false;
+       if (fib_rule_port_range_set(&rule->dport_range))
+               return false;
        return true;
 }
 EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -51,6 +55,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
        r->pref = pref;
        r->table = table;
        r->flags = flags;
+       r->proto = RTPROT_KERNEL;
        r->fr_net = ops->fro_net;
        r->uid_range = fib_kuid_range_unset;
 
@@ -220,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
        return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
 }
 
+static int nla_get_port_range(struct nlattr *pattr,
+                             struct fib_rule_port_range *port_range)
+{
+       const struct fib_rule_port_range *pr = nla_data(pattr);
+
+       if (!fib_rule_port_range_valid(pr))
+               return -EINVAL;
+
+       port_range->start = pr->start;
+       port_range->end = pr->end;
+
+       return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+                             struct fib_rule_port_range *range)
+{
+       return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
                          struct flowi *fl, int flags,
                          struct fib_lookup_arg *arg)
@@ -424,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
                    !uid_eq(r->uid_range.end, rule->uid_range.end))
                        continue;
 
+               if (r->ip_proto != rule->ip_proto)
+                       continue;
+
+               if (!fib_rule_port_range_compare(&r->sport_range,
+                                                &rule->sport_range))
+                       continue;
+
+               if (!fib_rule_port_range_compare(&r->dport_range,
+                                                &rule->dport_range))
+                       continue;
+
                if (!ops->compare(r, frh, tb))
                        continue;
                return 1;
@@ -469,6 +505,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
        rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
                                      : fib_default_rule_pref(ops);
 
+       rule->proto = tb[FRA_PROTOCOL] ?
+               nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
        if (tb[FRA_IIFNAME]) {
                struct net_device *dev;
 
@@ -565,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                rule->uid_range = fib_kuid_range_unset;
        }
 
+       if (tb[FRA_IP_PROTO])
+               rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+       if (tb[FRA_SPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+                                        &rule->sport_range);
+               if (err)
+                       goto errout_free;
+       }
+
+       if (tb[FRA_DPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+                                        &rule->dport_range);
+               if (err)
+                       goto errout_free;
+       }
+
        if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
            rule_exists(ops, frh, tb, rule)) {
                err = -EEXIST;
@@ -630,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
        struct net *net = sock_net(skb->sk);
        struct fib_rule_hdr *frh = nlmsg_data(nlh);
+       struct fib_rule_port_range sprange = {0, 0};
+       struct fib_rule_port_range dprange = {0, 0};
        struct fib_rules_ops *ops = NULL;
        struct fib_rule *rule, *r;
        struct nlattr *tb[FRA_MAX+1];
@@ -663,7 +721,25 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                range = fib_kuid_range_unset;
        }
 
+       if (tb[FRA_SPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+                                        &sprange);
+               if (err)
+                       goto errout;
+       }
+
+       if (tb[FRA_DPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+                                        &dprange);
+               if (err)
+                       goto errout;
+       }
+
        list_for_each_entry(rule, &ops->rules_list, list) {
+               if (tb[FRA_PROTOCOL] &&
+                   (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
+                       continue;
+
                if (frh->action && (frh->action != rule->action))
                        continue;
 
@@ -704,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                     !uid_eq(rule->uid_range.end, range.end)))
                        continue;
 
+               if (tb[FRA_IP_PROTO] &&
+                   (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+                       continue;
+
+               if (fib_rule_port_range_set(&sprange) &&
+                   !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+                       continue;
+
+               if (fib_rule_port_range_set(&dprange) &&
+                   !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+                       continue;
+
                if (!ops->compare(rule, frh, tb))
                        continue;
 
@@ -781,7 +869,11 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
                         + nla_total_size(4) /* FRA_FWMARK */
                         + nla_total_size(4) /* FRA_FWMASK */
                         + nla_total_size_64bit(8) /* FRA_TUN_ID */
-                        + nla_total_size(sizeof(struct fib_kuid_range));
+                        + nla_total_size(sizeof(struct fib_kuid_range))
+                        + nla_total_size(1) /* FRA_PROTOCOL */
+                        + nla_total_size(1) /* FRA_IP_PROTO */
+                        + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+                        + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
 
        if (ops->nlmsg_payload)
                payload += ops->nlmsg_payload(rule);
@@ -812,6 +904,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
        frh->action = rule->action;
        frh->flags = rule->flags;
 
+       if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+               goto nla_put_failure;
+
        if (rule->action == FR_ACT_GOTO &&
            rcu_access_pointer(rule->ctarget) == NULL)
                frh->flags |= FIB_RULE_UNRESOLVED;
@@ -843,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
            (rule->l3mdev &&
             nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
            (uid_range_set(&rule->uid_range) &&
-            nla_put_uid_range(skb, &rule->uid_range)))
+            nla_put_uid_range(skb, &rule->uid_range)) ||
+           (fib_rule_port_range_set(&rule->sport_range) &&
+            nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+           (fib_rule_port_range_set(&rule->dport_range) &&
+            nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+           (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
                goto nla_put_failure;
 
        if (rule->suppress_ifgroup != -1) {
@@ -1030,6 +1130,7 @@ static void __net_exit fib_rules_net_exit(struct net *net)
 static struct pernet_operations fib_rules_net_ops = {
        .init = fib_rules_net_init,
        .exit = fib_rules_net_exit,
+       .async = true,
 };
 
 static int __init fib_rules_init(void)
index 08ab4c6..33edfa8 100644 (file)
@@ -2991,7 +2991,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
        struct ip_tunnel_info *info;
 
        if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
-                              BPF_F_DONT_FRAGMENT)))
+                              BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
                return -EINVAL;
        if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
                switch (size) {
@@ -3025,6 +3025,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
                info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
        if (flags & BPF_F_ZERO_CSUM_TX)
                info->key.tun_flags &= ~TUNNEL_CSUM;
+       if (flags & BPF_F_SEQ_NUMBER)
+               info->key.tun_flags |= TUNNEL_SEQ;
 
        info->key.tun_id = cpu_to_be64(from->tunnel_id);
        info->key.tos = from->tunnel_tos;
@@ -3381,17 +3383,13 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
        struct sock *sk = bpf_sock->sk;
        int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
 
-       if (!sk_fullsock(sk))
+       if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
                return -EINVAL;
 
-#ifdef CONFIG_INET
        if (val)
                tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
 
        return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
-#else
-       return -EINVAL;
-#endif
 }
 
 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
index 559db9e..d29f09b 100644 (file)
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
 }
 EXPORT_SYMBOL(__get_hash_from_flowi6);
 
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
-       memset(keys, 0, sizeof(*keys));
-
-       keys->addrs.v4addrs.src = fl4->saddr;
-       keys->addrs.v4addrs.dst = fl4->daddr;
-       keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-       keys->ports.src = fl4->fl4_sport;
-       keys->ports.dst = fl4->fl4_dport;
-       keys->keyid.keyid = fl4->fl4_gre_key;
-       keys->basic.ip_proto = fl4->flowi4_proto;
-
-       return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
 static const struct flow_dissector_key flow_keys_dissector_keys[] = {
        {
                .key_id = FLOW_DISSECTOR_KEY_CONTROL,
index 0a3f88f..98fd127 100644 (file)
@@ -66,6 +66,7 @@ struct net_rate_estimator {
 static void est_fetch_counters(struct net_rate_estimator *e,
                               struct gnet_stats_basic_packed *b)
 {
+       memset(b, 0, sizeof(*b));
        if (e->stats_lock)
                spin_lock(e->stats_lock);
 
index e010bb8..65b51e7 100644 (file)
@@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_proc_ops = {
        .init = dev_proc_net_init,
        .exit = dev_proc_net_exit,
+       .async = true,
 };
 
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
@@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_mc_net_ops = {
        .init = dev_mc_net_init,
        .exit = dev_mc_net_exit,
+       .async = true,
 };
 
 int __init dev_proc_init(void)
index 3cad5f5..690e78c 100644 (file)
@@ -29,7 +29,6 @@
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
-DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -41,6 +40,12 @@ struct net init_net = {
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
+static unsigned nr_sync_pernet_ops;
+/*
+ * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
+ * init_net_initialized and first_device pointer.
+ */
+DECLARE_RWSEM(net_sem);
 
 #define MIN_PERNET_OPS_ID      \
        ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +70,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
 {
        struct net_generic *ng, *old_ng;
 
-       BUG_ON(!mutex_is_locked(&net_mutex));
        BUG_ON(id < MIN_PERNET_OPS_ID);
 
        old_ng = rcu_dereference_protected(net->gen,
-                                          lockdep_is_held(&net_mutex));
+                                          lockdep_is_held(&net_sem));
        if (old_ng->s.len > id) {
                old_ng->ptr[id] = data;
                return 0;
@@ -286,7 +290,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
  */
 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 {
-       /* Must be called with net_mutex held */
+       /* Must be called with net_sem held */
        const struct pernet_operations *ops, *saved_ops;
        int error = 0;
        LIST_HEAD(net_exit_list);
@@ -303,6 +307,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
                if (error < 0)
                        goto out_undo;
        }
+       rtnl_lock();
+       list_add_tail_rcu(&net->list, &net_namespace_list);
+       rtnl_unlock();
 out:
        return error;
 
@@ -331,6 +338,7 @@ static int __net_init net_defaults_init_net(struct net *net)
 
 static struct pernet_operations net_defaults_ops = {
        .init = net_defaults_init_net,
+       .async = true,
 };
 
 static __init int net_defaults_init(void)
@@ -354,7 +362,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 }
 
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
 static struct workqueue_struct *netns_wq;
 
 static struct net *net_alloc(void)
@@ -397,6 +405,7 @@ struct net *copy_net_ns(unsigned long flags,
 {
        struct ucounts *ucounts;
        struct net *net;
+       unsigned write;
        int rv;
 
        if (!(flags & CLONE_NEWNET))
@@ -408,32 +417,38 @@ struct net *copy_net_ns(unsigned long flags,
 
        net = net_alloc();
        if (!net) {
-               dec_net_namespaces(ucounts);
-               return ERR_PTR(-ENOMEM);
+               rv = -ENOMEM;
+               goto dec_ucounts;
        }
-
+       refcount_set(&net->passive, 1);
+       net->ucounts = ucounts;
        get_user_ns(user_ns);
+again:
+       write = READ_ONCE(nr_sync_pernet_ops);
+       if (write)
+               rv = down_write_killable(&net_sem);
+       else
+               rv = down_read_killable(&net_sem);
+       if (rv < 0)
+               goto put_userns;
 
-       rv = mutex_lock_killable(&net_mutex);
-       if (rv < 0) {
-               net_free(net);
-               dec_net_namespaces(ucounts);
-               put_user_ns(user_ns);
-               return ERR_PTR(rv);
+       if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+               up_read(&net_sem);
+               goto again;
        }
-
-       net->ucounts = ucounts;
        rv = setup_net(net, user_ns);
-       if (rv == 0) {
-               rtnl_lock();
-               list_add_tail_rcu(&net->list, &net_namespace_list);
-               rtnl_unlock();
-       }
-       mutex_unlock(&net_mutex);
+
+       if (write)
+               up_write(&net_sem);
+       else
+               up_read(&net_sem);
+
        if (rv < 0) {
-               dec_net_namespaces(ucounts);
+put_userns:
                put_user_ns(user_ns);
                net_drop_ns(net);
+dec_ucounts:
+               dec_net_namespaces(ucounts);
                return ERR_PTR(rv);
        }
        return net;
@@ -466,26 +481,33 @@ static void unhash_nsid(struct net *net, struct net *last)
        spin_unlock_bh(&net->nsid_lock);
 }
 
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
 
 static void cleanup_net(struct work_struct *work)
 {
        const struct pernet_operations *ops;
        struct net *net, *tmp, *last;
-       struct list_head net_kill_list;
+       struct llist_node *net_kill_list;
        LIST_HEAD(net_exit_list);
+       unsigned write;
 
        /* Atomically snapshot the list of namespaces to cleanup */
-       spin_lock_irq(&cleanup_list_lock);
-       list_replace_init(&cleanup_list, &net_kill_list);
-       spin_unlock_irq(&cleanup_list_lock);
+       net_kill_list = llist_del_all(&cleanup_list);
+again:
+       write = READ_ONCE(nr_sync_pernet_ops);
+       if (write)
+               down_write(&net_sem);
+       else
+               down_read(&net_sem);
 
-       mutex_lock(&net_mutex);
+       if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+               up_read(&net_sem);
+               goto again;
+       }
 
        /* Don't let anyone else find us. */
        rtnl_lock();
-       list_for_each_entry(net, &net_kill_list, cleanup_list)
+       llist_for_each_entry(net, net_kill_list, cleanup_list)
                list_del_rcu(&net->list);
        /* Cache last net. After we unlock rtnl, no one new net
         * added to net_namespace_list can assign nsid pointer
@@ -500,7 +522,7 @@ static void cleanup_net(struct work_struct *work)
        last = list_last_entry(&net_namespace_list, struct net, list);
        rtnl_unlock();
 
-       list_for_each_entry(net, &net_kill_list, cleanup_list) {
+       llist_for_each_entry(net, net_kill_list, cleanup_list) {
                unhash_nsid(net, last);
                list_add_tail(&net->exit_list, &net_exit_list);
        }
@@ -520,7 +542,10 @@ static void cleanup_net(struct work_struct *work)
        list_for_each_entry_reverse(ops, &pernet_list, list)
                ops_free_list(ops, &net_exit_list);
 
-       mutex_unlock(&net_mutex);
+       if (write)
+               up_write(&net_sem);
+       else
+               up_read(&net_sem);
 
        /* Ensure there are no outstanding rcu callbacks using this
         * network namespace.
@@ -547,8 +572,8 @@ static void cleanup_net(struct work_struct *work)
  */
 void net_ns_barrier(void)
 {
-       mutex_lock(&net_mutex);
-       mutex_unlock(&net_mutex);
+       down_write(&net_sem);
+       up_write(&net_sem);
 }
 EXPORT_SYMBOL(net_ns_barrier);
 
@@ -557,13 +582,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
 void __put_net(struct net *net)
 {
        /* Cleanup the network namespace in process context */
-       unsigned long flags;
-
-       spin_lock_irqsave(&cleanup_list_lock, flags);
-       list_add(&net->cleanup_list, &cleanup_list);
-       spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
-       queue_work(netns_wq, &net_cleanup_work);
+       if (llist_add(&net->cleanup_list, &cleanup_list))
+               queue_work(netns_wq, &net_cleanup_work);
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
@@ -633,6 +653,7 @@ static __net_exit void net_ns_net_exit(struct net *net)
 static struct pernet_operations __net_initdata net_ns_ops = {
        .init = net_ns_net_init,
        .exit = net_ns_net_exit,
+       .async = true,
 };
 
 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
@@ -875,17 +896,12 @@ static int __init net_ns_init(void)
 
        rcu_assign_pointer(init_net.gen, ng);
 
-       mutex_lock(&net_mutex);
+       down_write(&net_sem);
        if (setup_net(&init_net, &init_user_ns))
                panic("Could not setup the initial network namespace");
 
        init_net_initialized = true;
-
-       rtnl_lock();
-       list_add_tail_rcu(&init_net.list, &net_namespace_list);
-       rtnl_unlock();
-
-       mutex_unlock(&net_mutex);
+       up_write(&net_sem);
 
        register_pernet_subsys(&net_ns_ops);
 
@@ -989,6 +1005,9 @@ again:
                rcu_barrier();
                if (ops->id)
                        ida_remove(&net_generic_ids, *ops->id);
+       } else if (!ops->async) {
+               pr_info_once("Pernet operations %ps are sync.\n", ops);
+               nr_sync_pernet_ops++;
        }
 
        return error;
@@ -996,7 +1015,8 @@ again:
 
 static void unregister_pernet_operations(struct pernet_operations *ops)
 {
-       
+       if (!ops->async)
+               BUG_ON(nr_sync_pernet_ops-- == 0);
        __unregister_pernet_operations(ops);
        rcu_barrier();
        if (ops->id)
@@ -1025,9 +1045,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
 int register_pernet_subsys(struct pernet_operations *ops)
 {
        int error;
-       mutex_lock(&net_mutex);
+       down_write(&net_sem);
        error =  register_pernet_operations(first_device, ops);
-       mutex_unlock(&net_mutex);
+       up_write(&net_sem);
        return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1063,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  */
 void unregister_pernet_subsys(struct pernet_operations *ops)
 {
-       mutex_lock(&net_mutex);
+       down_write(&net_sem);
        unregister_pernet_operations(ops);
-       mutex_unlock(&net_mutex);
+       up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 
@@ -1071,11 +1091,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 int register_pernet_device(struct pernet_operations *ops)
 {
        int error;
-       mutex_lock(&net_mutex);
+       down_write(&net_sem);
        error = register_pernet_operations(&pernet_list, ops);
        if (!error && (first_device == &pernet_list))
                first_device = &ops->list;
-       mutex_unlock(&net_mutex);
+       up_write(&net_sem);
        return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1111,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
  */
 void unregister_pernet_device(struct pernet_operations *ops)
 {
-       mutex_lock(&net_mutex);
+       down_write(&net_sem);
        if (&ops->list == first_device)
                first_device = first_device->next;
        unregister_pernet_operations(ops);
-       mutex_unlock(&net_mutex);
+       up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
 
index bc29041..67f375c 100644 (file)
@@ -454,11 +454,11 @@ static void rtnl_lock_unregistering_all(void)
 void rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
        /* Close the race with cleanup_net() */
-       mutex_lock(&net_mutex);
+       down_write(&net_sem);
        rtnl_lock_unregistering_all();
        __rtnl_link_unregister(ops);
        rtnl_unlock();
-       mutex_unlock(&net_mutex);
+       up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(rtnl_link_unregister);
 
@@ -4724,6 +4724,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net)
 static struct pernet_operations rtnetlink_net_ops = {
        .init = rtnetlink_net_init,
        .exit = rtnetlink_net_exit,
+       .async = true,
 };
 
 void __init rtnetlink_init(void)
index 09bd89c..96d36b8 100644 (file)
@@ -77,8 +77,8 @@
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
 
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
@@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 {
        unsigned long max_pg, num_pg, new_pg, old_pg;
        struct user_struct *user;
@@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
 
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
 {
        if (mmp->user) {
                atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
                free_uid(mmp->user);
        }
 }
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
 
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 {
index c501499..507d8c6 100644 (file)
@@ -1049,18 +1049,21 @@ set_rcvbuf:
                break;
 
        case SO_ZEROCOPY:
-               if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+               if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+                       if (sk->sk_protocol != IPPROTO_TCP)
+                               ret = -ENOTSUPP;
+                       else if (sk->sk_state != TCP_CLOSE)
+                               ret = -EBUSY;
+               } else if (sk->sk_family != PF_RDS) {
                        ret = -ENOTSUPP;
-               else if (sk->sk_protocol != IPPROTO_TCP)
-                       ret = -ENOTSUPP;
-               else if (sk->sk_state != TCP_CLOSE)
-                       ret = -EBUSY;
-               else if (val < 0 || val > 1)
-                       ret = -EINVAL;
-               else
-                       sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
-               break;
-
+               }
+               if (!ret) {
+                       if (val < 0 || val > 1)
+                               ret = -EINVAL;
+                       else
+                               sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+                       break;
+               }
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -1274,7 +1277,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
        {
                char address[128];
 
-               if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+               lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+               if (lv < 0)
                        return -ENOTCONN;
                if (lv < len)
                        return -EINVAL;
@@ -1773,7 +1777,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
        u32 max_segs = 1;
 
        sk_dst_set(sk, dst);
-       sk->sk_route_caps = dst->dev->features;
+       sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
        if (sk->sk_route_caps & NETIF_F_GSO)
                sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
        sk->sk_route_caps &= ~sk->sk_route_nocaps;
@@ -2497,7 +2501,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
 EXPORT_SYMBOL(sock_no_accept);
 
 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
-                   int *len, int peer)
+                   int peer)
 {
        return -EOPNOTSUPP;
 }
@@ -3111,6 +3115,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net)
 static struct pernet_operations net_inuse_ops = {
        .init = sock_inuse_init_net,
        .exit = sock_inuse_exit_net,
+       .async = true,
 };
 
 static __init int net_inuse_init(void)
@@ -3384,6 +3389,7 @@ static __net_exit void proto_exit_net(struct net *net)
 static __net_initdata struct pernet_operations proto_net_ops = {
        .init = proto_init_net,
        .exit = proto_exit_net,
+       .async = true,
 };
 
 static int __init proto_init(void)
index 146b50e..aee5642 100644 (file)
@@ -328,6 +328,7 @@ static void __net_exit diag_net_exit(struct net *net)
 static struct pernet_operations diag_net_ops = {
        .init = diag_net_init,
        .exit = diag_net_exit,
+       .async = true,
 };
 
 static int __init sock_diag_init(void)
index f2d0462..d714f65 100644 (file)
@@ -572,6 +572,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_core_ops = {
        .init = sysctl_core_net_init,
        .exit = sysctl_core_net_exit,
+       .async = true,
 };
 
 static __init int sysctl_core_init(void)
index 91dd09f..2ee8306 100644 (file)
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
 }
 
 
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
 {
        struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
        struct sock *sk = sock->sk;
        struct dn_scp *scp = DN_SK(sk);
 
-       *uaddr_len = sizeof(struct sockaddr_dn);
-
        lock_sock(sk);
 
        if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
 
        release_sock(sk);
 
-       return 0;
+       return sizeof(struct sockaddr_dn);
 }
 
 
@@ -1338,6 +1336,12 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use
        lock_sock(sk);
        err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
        release_sock(sk);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible ENOPROTOOPTs except default case */
+       if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
+           optname != DSO_STREAM && optname != DSO_SEQPACKET)
+               err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
+#endif
 
        return err;
 }
@@ -1445,15 +1449,6 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
                dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
                break;
 
-       default:
-#ifdef CONFIG_NETFILTER
-               return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
-#endif
-       case DSO_LINKINFO:
-       case DSO_STREAM:
-       case DSO_SEQPACKET:
-               return -ENOPROTOOPT;
-
        case DSO_MAXWINDOW:
                if (optlen != sizeof(unsigned long))
                        return -EINVAL;
@@ -1501,6 +1496,12 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
                        return -EINVAL;
                scp->info_loc = u.info;
                break;
+
+       case DSO_LINKINFO:
+       case DSO_STREAM:
+       case DSO_SEQPACKET:
+       default:
+               return -ENOPROTOOPT;
        }
 
        return 0;
@@ -1514,6 +1515,20 @@ static int dn_getsockopt(struct socket *sock, int level, int optname, char __use
        lock_sock(sk);
        err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
        release_sock(sk);
+#ifdef CONFIG_NETFILTER
+       if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
+           optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
+           optname != DSO_CONREJECT) {
+               int len;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
+               if (err >= 0)
+                       err = put_user(len, optlen);
+       }
+#endif
 
        return err;
 }
@@ -1579,26 +1594,6 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
                r_data = &link;
                break;
 
-       default:
-#ifdef CONFIG_NETFILTER
-       {
-               int ret, len;
-
-               if (get_user(len, optlen))
-                       return -EFAULT;
-
-               ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
-               if (ret >= 0)
-                       ret = put_user(len, optlen);
-               return ret;
-       }
-#endif
-       case DSO_STREAM:
-       case DSO_SEQPACKET:
-       case DSO_CONACCEPT:
-       case DSO_CONREJECT:
-               return -ENOPROTOOPT;
-
        case DSO_MAXWINDOW:
                if (r_len > sizeof(unsigned long))
                        r_len = sizeof(unsigned long);
@@ -1630,6 +1625,13 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
                        r_len = sizeof(unsigned char);
                r_data = &scp->info_rem;
                break;
+
+       case DSO_STREAM:
+       case DSO_SEQPACKET:
+       case DSO_CONACCEPT:
+       case DSO_CONREJECT:
+       default:
+               return -ENOPROTOOPT;
        }
 
        if (r_data) {
index 6a9d0f5..e63c554 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
 #include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
 
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
 
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+                                      struct sk_buff *skb)
+{
+       struct dsa_switch *ds = p->dp->ds;
+       unsigned int type;
+
+       if (skb_headroom(skb) < ETH_HLEN)
+               return false;
+
+       __skb_push(skb, ETH_HLEN);
+
+       type = ptp_classify_raw(skb);
+
+       __skb_pull(skb, ETH_HLEN);
+
+       if (type == PTP_CLASS_NONE)
+               return false;
+
+       if (likely(ds->ops->port_rxtstamp))
+               return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+       return false;
+}
+
 static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
                          struct packet_type *pt, struct net_device *unused)
 {
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
        s->rx_bytes += skb->len;
        u64_stats_update_end(&s->syncp);
 
+       if (dsa_skb_defer_rx_timestamp(p, skb))
+               return 0;
+
        netif_receive_skb(skb);
 
        return 0;
index 0058914..90e6df0 100644 (file)
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
                count += ops->get_sset_count(dev, sset);
 
        if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
-               count += ds->ops->get_sset_count(ds);
+               count += ds->ops->get_sset_count(ds, cpu_dp->index);
 
        return count;
 }
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
                 * constructed earlier
                 */
                ds->ops->get_strings(ds, port, ndata);
-               count = ds->ops->get_sset_count(ds);
+               count = ds->ops->get_sset_count(ds, port);
                for (i = 0; i < count; i++) {
                        memmove(ndata + (i * len + sizeof(pfx)),
                                ndata + i * len, len - sizeof(pfx));
index f523072..18561af 100644 (file)
@@ -21,6 +21,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <linux/if_bridge.h>
 #include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
 
 #include "dsa_priv.h"
 
@@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->dp->ds;
+       int port = p->dp->index;
+
+       /* Pass through to switch driver if it supports timestamping */
+       switch (cmd) {
+       case SIOCGHWTSTAMP:
+               if (ds->ops->port_hwtstamp_get)
+                       return ds->ops->port_hwtstamp_get(ds, port, ifr);
+               break;
+       case SIOCSHWTSTAMP:
+               if (ds->ops->port_hwtstamp_set)
+                       return ds->ops->port_hwtstamp_set(ds, port, ifr);
+               break;
+       }
+
        if (!dev->phydev)
                return -ENODEV;
 
@@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
        return NETDEV_TX_OK;
 }
 
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+                                struct sk_buff *skb)
+{
+       struct dsa_switch *ds = p->dp->ds;
+       struct sk_buff *clone;
+       unsigned int type;
+
+       type = ptp_classify_raw(skb);
+       if (type == PTP_CLASS_NONE)
+               return;
+
+       if (!ds->ops->port_txtstamp)
+               return;
+
+       clone = skb_clone_sk(skb);
+       if (!clone)
+               return;
+
+       if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+               return;
+
+       kfree_skb(clone);
+}
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct dsa_slave_priv *p = netdev_priv(dev);
@@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
        s->tx_bytes += skb->len;
        u64_stats_update_end(&s->syncp);
 
+       /* Identify PTP protocol packets, clone them, and pass them to the
+        * switch driver
+        */
+       dsa_skb_tx_timestamp(p, skb);
+
        /* Transmit function may have to reallocate the original SKB,
         * in which case it must have freed it. Only free it here on error.
         */
@@ -559,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 
                count = 4;
                if (ds->ops->get_sset_count)
-                       count += ds->ops->get_sset_count(ds);
+                       count += ds->ops->get_sset_count(ds, dp->index);
 
                return count;
        }
@@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
        return ds->ops->set_rxnfc(ds, dp->index, nfc);
 }
 
+static int dsa_slave_get_ts_info(struct net_device *dev,
+                                struct ethtool_ts_info *ts)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->dp->ds;
+
+       if (!ds->ops->get_ts_info)
+               return -EOPNOTSUPP;
+
+       return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
        .get_drvinfo            = dsa_slave_get_drvinfo,
        .get_regs_len           = dsa_slave_get_regs_len,
@@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
        .set_link_ksettings     = phy_ethtool_set_link_ksettings,
        .get_rxnfc              = dsa_slave_get_rxnfc,
        .set_rxnfc              = dsa_slave_set_rxnfc,
+       .get_ts_info            = dsa_slave_get_ts_info,
 };
 
 /* legacy way, bypassing the bridge *****************************************/
index 974765b..e4f3053 100644 (file)
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
        /* We need an ipv6hdr as minimum len when calling xmit */
        ldev->hard_header_len   = sizeof(struct ipv6hdr);
        ldev->flags             = IFF_BROADCAST | IFF_MULTICAST;
+       ldev->priv_flags        |= IFF_NO_QUEUE;
 
        ldev->netdev_ops        = &lowpan_netdev_ops;
        ldev->header_ops        = &lowpan_header_ops;
index cb7176c..9104943 100644 (file)
@@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg802154_pernet_ops = {
        .exit = cfg802154_pernet_exit,
+       .async = true,
 };
 
 static int __init wpan_phy_class_init(void)
index f48fe6f..80dad30 100644 (file)
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
          Network), but can be distributed all over the Internet. If you want
          to do that, say Y here and to "IP multicast routing" below.
 
+config IP_MROUTE_COMMON
+       bool
+       depends on IP_MROUTE || IPV6_MROUTE
+
 config IP_MROUTE
        bool "IP: multicast routing"
        depends on IP_MULTICAST
+       select IP_MROUTE_COMMON
        help
          This is used if you want your machine to act as a router for IP
          packets that have several destination addresses. It is needed on the
index 47a0a66..a07b7dd 100644 (file)
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
 obj-$(CONFIG_NET_FOU) += fou.o
index e4329e1..e8c7fad 100644 (file)
@@ -723,7 +723,7 @@ EXPORT_SYMBOL(inet_accept);
  *     This does both peername and sockname.
  */
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
-                       int *uaddr_len, int peer)
+                       int peer)
 {
        struct sock *sk         = sock->sk;
        struct inet_sock *inet  = inet_sk(sk);
@@ -745,8 +745,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
                sin->sin_addr.s_addr = addr;
        }
        memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-       *uaddr_len = sizeof(*sin);
-       return 0;
+       return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet_getname);
 
@@ -1736,6 +1735,7 @@ static __net_exit void ipv4_mib_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
        .init = ipv4_mib_init_net,
        .exit = ipv4_mib_exit_net,
+       .async = true,
 };
 
 static int __init init_ipv4_mibs(void)
@@ -1789,6 +1789,7 @@ static __net_exit void inet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations af_inet_ops = {
        .init = inet_init_net,
        .exit = inet_exit_net,
+       .async = true,
 };
 
 static int __init init_inet_pernet_ops(void)
index f28f06c..7dc9de8 100644 (file)
@@ -1447,6 +1447,7 @@ static void __net_exit arp_net_exit(struct net *net)
 static struct pernet_operations arp_net_ops = {
        .init = arp_net_init,
        .exit = arp_net_exit,
+       .async = true,
 };
 
 static int __init arp_proc_init(void)
index 40f0017..5ae0d1f 100644 (file)
@@ -2469,6 +2469,7 @@ static __net_exit void devinet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations devinet_ops = {
        .init = devinet_init_net,
        .exit = devinet_exit_net,
+       .async = true,
 };
 
 static struct rtnl_af_ops inet_af_ops __read_mostly = {
index f05afaf..ac71c3d 100644 (file)
@@ -1362,6 +1362,7 @@ static void __net_exit fib_net_exit(struct net *net)
 static struct pernet_operations fib_net_ops = {
        .init = fib_net_init,
        .exit = fib_net_exit,
+       .async = true,
 };
 
 void __init ip_fib_init(void)
index 35d646a..737d11b 100644 (file)
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
        if (r->tos && (r->tos != fl4->flowi4_tos))
                return 0;
 
+       if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->sport_range) &&
+           !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->dport_range) &&
+           !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+               return 0;
+
        return 1;
 }
 
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        }
 #endif
 
+       if (fib_rule_requires_fldissect(rule))
+               net->ipv4.fib_rules_require_fldissect++;
+
        rule4->src_len = frh->src_len;
        rule4->srcmask = inet_make_mask(rule4->src_len);
        rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
                net->ipv4.fib_num_tclassid_users--;
 #endif
        net->ipv4.fib_has_custom_rules = true;
+
+       if (net->ipv4.fib_rules_require_fldissect &&
+           fib_rule_requires_fldissect(rule))
+               net->ipv4.fib_rules_require_fldissect--;
 errout:
        return err;
 }
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
                goto fail;
        net->ipv4.rules_ops = ops;
        net->ipv4.fib_has_custom_rules = false;
+       net->ipv4.fib_rules_require_fldissect = 0;
        return 0;
 
 fail:
index c586597..e7c602c 100644 (file)
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
                fnhe = rcu_dereference_protected(hash[i].chain, 1);
                while (fnhe) {
                        struct fib_nh_exception *next;
-                       
+
                        next = rcu_dereference_protected(fnhe->fnhe_next, 1);
 
                        rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -646,6 +646,11 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
                                            fi->fib_nh, cfg, extack))
                                return 1;
                }
+#ifdef CONFIG_IP_ROUTE_CLASSID
+               if (cfg->fc_flow &&
+                   cfg->fc_flow != fi->fib_nh->nh_tclassid)
+                       return 1;
+#endif
                if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
                    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
                        return 0;
@@ -1760,14 +1765,12 @@ void fib_select_multipath(struct fib_result *res, int hash)
 void fib_select_path(struct net *net, struct fib_result *res,
                     struct flowi4 *fl4, const struct sk_buff *skb)
 {
-       bool oif_check;
-
-       oif_check = (fl4->flowi4_oif == 0 ||
-                    fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+       if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+               goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-       if (res->fi->fib_nhs > 1 && oif_check) {
-               int h = fib_multipath_hash(res->fi, fl4, skb);
+       if (res->fi->fib_nhs > 1) {
+               int h = fib_multipath_hash(net, fl4, skb, NULL);
 
                fib_select_multipath(res, h);
        }
@@ -1775,10 +1778,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
 #endif
        if (!res->prefixlen &&
            res->table->tb_num_default > 1 &&
-           res->type == RTN_UNICAST && oif_check)
+           res->type == RTN_UNICAST)
                fib_select_default(fl4, res);
 
+check_saddr:
        if (!fl4->saddr)
                fl4->saddr = FIB_RES_PREFSRC(net, *res);
 }
-EXPORT_SYMBOL_GPL(fib_select_path);
index 5530cd6..62243a8 100644 (file)
@@ -50,6 +50,7 @@
 
 #define VERSION "0.409"
 
+#include <linux/cache.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
  */
 static const int sync_pages = 128;
 
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
 
 static inline struct tnode *tn_info(struct key_vector *kv)
 {
index 1617604..cc56efa 100644 (file)
@@ -1257,6 +1257,7 @@ fail:
 static struct pernet_operations __net_initdata icmp_sk_ops = {
        .init = icmp_sk_init,
        .exit = icmp_sk_exit,
+       .async = true,
 };
 
 int __init icmp_init(void)
index f240258..c274376 100644 (file)
@@ -3028,6 +3028,7 @@ static void __net_exit igmp_net_exit(struct net *net)
 static struct pernet_operations igmp_net_ops = {
        .init = igmp_net_init,
        .exit = igmp_net_exit,
+       .async = true,
 };
 #endif
 
index 914d569..1f04bd9 100644 (file)
@@ -6,6 +6,7 @@
  *  Authors:   Andrey V. Savochkin <saw@msu.ru>
  */
 
+#include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -51,7 +52,7 @@
  *             daddr: unchangeable
  */
 
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
 
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
index bbf1b94..5e843ae 100644 (file)
@@ -885,6 +885,7 @@ static void __net_exit ipv4_frags_exit_net(struct net *net)
 static struct pernet_operations ip4_frags_ops = {
        .init = ipv4_frags_init_net,
        .exit = ipv4_frags_exit_net,
+       .async = true,
 };
 
 void __init ipfrag_init(void)
index 45d97e9..95fd225 100644 (file)
@@ -522,6 +522,7 @@ err_free_skb:
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
                        __be16 proto)
 {
+       struct ip_tunnel *tunnel = netdev_priv(dev);
        struct ip_tunnel_info *tun_info;
        const struct ip_tunnel_key *key;
        struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
        if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
                goto err_free_rt;
 
-       flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+       flags = tun_info->key.tun_flags &
+               (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
        gre_build_header(skb, tunnel_hlen, flags, proto,
-                        tunnel_id_to_key32(tun_info->key.tun_id), 0);
+                        tunnel_id_to_key32(tun_info->key.tun_id),
+                        (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
        df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
@@ -1044,6 +1047,7 @@ static struct pernet_operations ipgre_net_ops = {
        .exit_batch = ipgre_exit_batch_net,
        .id   = &ipgre_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1322,6 +1326,12 @@ static void ipgre_tap_setup(struct net_device *dev)
        ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
+bool is_gretap_dev(const struct net_device *dev)
+{
+       return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[],
                         struct netlink_ext_ack *extack)
@@ -1623,6 +1633,7 @@ static struct pernet_operations ipgre_tap_net_ops = {
        .exit_batch = ipgre_tap_exit_batch_net,
        .id   = &gre_tap_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __net_init erspan_init_net(struct net *net)
@@ -1641,6 +1652,7 @@ static struct pernet_operations erspan_net_ops = {
        .exit_batch = erspan_exit_batch_net,
        .id   = &erspan_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __init ipgre_init(void)
index 008be04..74c962b 100644 (file)
@@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
                        src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
                        if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
                                return -EINVAL;
-                       ipc->oif = src_info->ipi6_ifindex;
+                       if (src_info->ipi6_ifindex)
+                               ipc->oif = src_info->ipi6_ifindex;
                        ipc->addr = src_info->ipi6_addr.s6_addr32[3];
                        continue;
                }
@@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
                                return -EINVAL;
                        info = (struct in_pktinfo *)CMSG_DATA(cmsg);
-                       ipc->oif = info->ipi_ifindex;
+                       if (info->ipi_ifindex)
+                               ipc->oif = info->ipi_ifindex;
                        ipc->addr = info->ipi_spec_dst.s_addr;
                        break;
                }
@@ -1567,10 +1569,7 @@ int ip_getsockopt(struct sock *sk, int level,
                if (get_user(len, optlen))
                        return -EFAULT;
 
-               lock_sock(sk);
-               err = nf_getsockopt(sk, PF_INET, optname, optval,
-                               &len);
-               release_sock(sk);
+               err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
                if (err >= 0)
                        err = put_user(len, optlen);
                return err;
@@ -1602,9 +1601,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
                if (get_user(len, optlen))
                        return -EFAULT;
 
-               lock_sock(sk);
                err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
-               release_sock(sk);
                if (err >= 0)
                        err = put_user(len, optlen);
                return err;
index d786a84..b2117d8 100644 (file)
@@ -290,22 +290,6 @@ failed:
        return ERR_PTR(err);
 }
 
-static inline void init_tunnel_flow(struct flowi4 *fl4,
-                                   int proto,
-                                   __be32 daddr, __be32 saddr,
-                                   __be32 key, __u8 tos, int oif,
-                                   __u32 mark)
-{
-       memset(fl4, 0, sizeof(*fl4));
-       fl4->flowi4_oif = oif;
-       fl4->daddr = daddr;
-       fl4->saddr = saddr;
-       fl4->flowi4_tos = tos;
-       fl4->flowi4_proto = proto;
-       fl4->fl4_gre_key = key;
-       fl4->flowi4_mark = mark;
-}
-
 static int ip_tunnel_bind_dev(struct net_device *dev)
 {
        struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
                struct flowi4 fl4;
                struct rtable *rt;
 
-               init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
-                                iph->saddr, tunnel->parms.o_key,
-                                RT_TOS(iph->tos), tunnel->parms.link,
-                                tunnel->fwmark);
+               ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+                                   iph->saddr, tunnel->parms.o_key,
+                                   RT_TOS(iph->tos), tunnel->parms.link,
+                                   tunnel->fwmark);
                rt = ip_route_output_key(tunnel->net, &fl4);
 
                if (!IS_ERR(rt)) {
@@ -581,8 +565,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
                else if (skb->protocol == htons(ETH_P_IPV6))
                        tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
        }
-       init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
-                        RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+       ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+                           RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
        if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
                goto tx_error;
        rt = ip_route_output_key(tunnel->net, &fl4);
@@ -711,14 +695,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
        }
 
        if (tunnel->fwmark) {
-               init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-                                tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-                                tunnel->fwmark);
+               ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+                                   tunnel->parms.o_key, RT_TOS(tos),
+                                   tunnel->parms.link, tunnel->fwmark);
        }
        else {
-               init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-                                tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-                                skb->mark);
+               ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+                                   tunnel->parms.o_key, RT_TOS(tos),
+                                   tunnel->parms.link, skb->mark);
        }
 
        if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
index 51b1669..b10bf56 100644 (file)
@@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = {
        .exit_batch = vti_exit_batch_net,
        .id   = &vti_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
index c891235..9c5a4d1 100644 (file)
@@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = {
        .exit_batch = ipip_exit_batch_net,
        .id   = &ipip_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __init ipip_init(void)
index b05689b..d752a70 100644 (file)
@@ -28,9 +28,9 @@
 
 #include <linux/uaccess.h>
 #include <linux/types.h>
+#include <linux/cache.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -52,7 +52,6 @@
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/route.h>
-#include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
 #include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
  * In this case data path is free of exclusive locks at all.
  */
 
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
                             struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-                             struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
                                 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
 #define ipmr_for_each_table(mrt, net) \
        list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+                                          struct mr_table *mrt)
+{
+       struct mr_table *ret;
+
+       if (!mrt)
+               ret = list_entry_rcu(net->ipv4.mr_tables.next,
+                                    struct mr_table, list);
+       else
+               ret = list_entry_rcu(mrt->list.next,
+                                    struct mr_table, list);
+
+       if (&ret->list == &net->ipv4.mr_tables)
+               return NULL;
+       return ret;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
        struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
 #define ipmr_for_each_table(mrt, net) \
        for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+                                          struct mr_table *mrt)
+{
+       if (!mrt)
+               return net->ipv4.mrt;
+       return NULL;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
        return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
 }
 
 static const struct rhashtable_params ipmr_rht_params = {
-       .head_offset = offsetof(struct mfc_cache, mnode),
+       .head_offset = offsetof(struct mr_mfc, mnode),
        .key_offset = offsetof(struct mfc_cache, cmparg),
        .key_len = sizeof(struct mfc_cache_cmp_arg),
        .nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
        .automatic_shrinking = true,
 };
 
+static void ipmr_new_table_set(struct mr_table *mrt,
+                              struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+       list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+       .mfc_mcastgrp = htonl(INADDR_ANY),
+       .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+       .rht_params = &ipmr_rht_params,
+       .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 {
        struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
        if (mrt)
                return mrt;
 
-       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-       if (!mrt)
-               return ERR_PTR(-ENOMEM);
-       write_pnet(&mrt->net, net);
-       mrt->id = id;
-
-       rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
-       INIT_LIST_HEAD(&mrt->mfc_cache_list);
-       INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-       timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
-       mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-       list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
-       return mrt;
+       return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+                             ipmr_expire_process, ipmr_new_table_set);
 }
 
 static void ipmr_free_table(struct mr_table *mrt)
@@ -760,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
 static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
-       struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+       struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 
-       kmem_cache_free(mrt_cachep, c);
+       kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
 }
 
 void ipmr_cache_free(struct mfc_cache *c)
 {
-       call_rcu(&c->rcu, ipmr_cache_free_rcu);
+       call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
 }
 EXPORT_SYMBOL(ipmr_cache_free);
 
@@ -782,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 
        atomic_dec(&mrt->cache_resolve_queue_len);
 
-       while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));
@@ -806,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 static void ipmr_expire_process(struct timer_list *t)
 {
        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
-       unsigned long now;
+       struct mr_mfc *c, *next;
        unsigned long expires;
-       struct mfc_cache *c, *next;
+       unsigned long now;
 
        if (!spin_trylock(&mfc_unres_lock)) {
                mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t)
                }
 
                list_del(&c->list);
-               mroute_netlink_event(mrt, c, RTM_DELROUTE);
-               ipmr_destroy_unres(mrt, c);
+               mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+               ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
        }
 
        if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +867,7 @@ out:
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
                                   unsigned char *ttls)
 {
        int vifi;
@@ -944,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        ip_rt_multicast_event(in_dev);
 
        /* Fill in the VIF structures */
+       vif_device_init(v, dev, vifc->vifc_rate_limit,
+                       vifc->vifc_threshold,
+                       vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+                       (VIFF_TUNNEL | VIFF_REGISTER));
 
        attr.orig_dev = dev;
        if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        } else {
                v->dev_parent_id.id_len = 0;
        }
-       v->rate_limit = vifc->vifc_rate_limit;
+
        v->local = vifc->vifc_lcl_addr.s_addr;
        v->remote = vifc->vifc_rmt_addr.s_addr;
-       v->flags = vifc->vifc_flags;
-       if (!mrtsock)
-               v->flags |= VIFF_STATIC;
-       v->threshold = vifc->vifc_threshold;
-       v->bytes_in = 0;
-       v->bytes_out = 0;
-       v->pkt_in = 0;
-       v->pkt_out = 0;
-       v->link = dev->ifindex;
-       if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
-               v->link = dev_get_iflink(dev);
 
        /* And finish update writing critical data */
        write_lock_bh(&mrt_lock);
@@ -988,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               return c;
-
-       return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
-                                                   int vifi)
-{
-       struct mfc_cache_cmp_arg arg = {
-                       .mfc_mcastgrp = htonl(INADDR_ANY),
-                       .mfc_origin = htonl(INADDR_ANY)
-       };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
 
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               if (c->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-
-       return NULL;
+       return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
@@ -1025,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = htonl(INADDR_ANY)
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c, *proxy;
 
        if (mcastgrp == htonl(INADDR_ANY))
-               goto skip;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-               if (c->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-
-               /* It's ok if the vifi is part of the static tree */
-               proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
-               if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-       }
-
-skip:
-       return ipmr_cache_find_any_parent(mrt, vifi);
+               return mr_mfc_find_any_parent(mrt, vifi);
+       return mr_mfc_find_any(mrt, vifi, &arg);
 }
 
 /* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin,
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               if (parent == -1 || parent == c->mfc_parent)
-                       return c;
 
-       return NULL;
+       return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
 /* Allocate a multicast cache entry */
@@ -1072,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void)
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
        if (c) {
-               c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-               c->mfc_un.res.minvif = MAXVIFS;
-               refcount_set(&c->mfc_un.res.refcount, 1);
+               c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+               c->_c.mfc_un.res.minvif = MAXVIFS;
+               refcount_set(&c->_c.mfc_un.res.refcount, 1);
        }
        return c;
 }
@@ -1084,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 
        if (c) {
-               skb_queue_head_init(&c->mfc_un.unres.unresolved);
-               c->mfc_un.unres.expires = jiffies + 10*HZ;
+               skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+               c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
        }
        return c;
 }
@@ -1098,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
        struct nlmsgerr *e;
 
        /* Play the pending entries through our router */
-       while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+       while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));
 
-                       if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+                       if (mr_fill_mroute(mrt, skb, &c->_c,
+                                          nlmsg_data(nlh)) > 0) {
                                nlh->nlmsg_len = skb_tail_pointer(skb) -
                                                 (u8 *)nlh;
                        } else {
@@ -1211,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
        int err;
 
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+       list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
                if (c->mfc_mcastgrp == iph->daddr &&
                    c->mfc_origin == iph->saddr) {
                        found = true;
@@ -1230,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                }
 
                /* Fill in the new cache entry */
-               c->mfc_parent   = -1;
+               c->_c.mfc_parent = -1;
                c->mfc_origin   = iph->saddr;
                c->mfc_mcastgrp = iph->daddr;
 
                /* Reflect first query at mrouted. */
                err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
                if (err < 0) {
                        /* If the report failed throw the cache entry
                           out - Brad Parker
@@ -1248,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                }
 
                atomic_inc(&mrt->cache_resolve_queue_len);
-               list_add(&c->list, &mrt->mfc_unres_queue);
+               list_add(&c->_c.list, &mrt->mfc_unres_queue);
                mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
                if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
-                       mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+                       mod_timer(&mrt->ipmr_expire_timer,
+                                 c->_c.mfc_un.unres.expires);
        }
 
        /* See if we can append the packet */
-       if (c->mfc_un.unres.unresolved.qlen > 3) {
+       if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
@@ -1264,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                        skb->dev = dev;
                        skb->skb_iif = dev->ifindex;
                }
-               skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+               skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
                err = 0;
        }
 
@@ -1286,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
        rcu_read_unlock();
        if (!c)
                return -ENOENT;
-       rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-       list_del_rcu(&c->list);
+       rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+       list_del_rcu(&c->_c.list);
        call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
        mroute_netlink_event(mrt, c, RTM_DELROUTE);
        ipmr_cache_put(c);
@@ -1299,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
                        struct mfcctl *mfc, int mrtsock, int parent)
 {
        struct mfc_cache *uc, *c;
+       struct mr_mfc *_uc;
        bool found;
        int ret;
 
@@ -1312,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
        rcu_read_unlock();
        if (c) {
                write_lock_bh(&mrt_lock);
-               c->mfc_parent = mfc->mfcc_parent;
-               ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+               c->_c.mfc_parent = mfc->mfcc_parent;
+               ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
                if (!mrtsock)
-                       c->mfc_flags |= MFC_STATIC;
+                       c->_c.mfc_flags |= MFC_STATIC;
                write_unlock_bh(&mrt_lock);
                call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
                                              mrt->id);
@@ -1333,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
        c->mfc_origin = mfc->mfcc_origin.s_addr;
        c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
-       c->mfc_parent = mfc->mfcc_parent;
-       ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+       c->_c.mfc_parent = mfc->mfcc_parent;
+       ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
        if (!mrtsock)
-               c->mfc_flags |= MFC_STATIC;
+               c->_c.mfc_flags |= MFC_STATIC;
 
-       ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+       ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
                                  ipmr_rht_params);
        if (ret) {
                pr_err("ipmr: rhtable insert error %d\n", ret);
                ipmr_cache_free(c);
                return ret;
        }
-       list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+       list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
        /* Check to see if we resolved a queued list. If so we
         * need to send on the frames and tidy up.
         */
        found = false;
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+       list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+               uc = (struct mfc_cache *)_uc;
                if (uc->mfc_origin == c->mfc_origin &&
                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-                       list_del(&uc->list);
+                       list_del(&_uc->list);
                        atomic_dec(&mrt->cache_resolve_queue_len);
                        found = true;
                        break;
@@ -1377,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
        struct net *net = read_pnet(&mrt->net);
-       struct mfc_cache *c, *tmp;
+       struct mr_mfc *c, *tmp;
+       struct mfc_cache *cache;
        LIST_HEAD(list);
        int i;
 
@@ -1395,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
                        continue;
                rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
                list_del_rcu(&c->list);
-               call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+               cache = (struct mfc_cache *)c;
+               call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
                                              mrt->id);
-               mroute_netlink_event(mrt, c, RTM_DELROUTE);
-               ipmr_cache_put(c);
+               mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+               ipmr_cache_put(cache);
        }
 
        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
                spin_lock_bh(&mfc_unres_lock);
                list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
                        list_del(&c->list);
-                       mroute_netlink_event(mrt, c, RTM_DELROUTE);
-                       ipmr_destroy_unres(mrt, c);
+                       cache = (struct mfc_cache *)c;
+                       mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+                       ipmr_destroy_unres(mrt, cache);
                }
                spin_unlock_bh(&mfc_unres_lock);
        }
@@ -1698,9 +1677,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1751,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1977,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct net_device *dev, struct sk_buff *skb,
-                         struct mfc_cache *cache, int local)
+                         struct mfc_cache *c, int local)
 {
        int true_vifi = ipmr_find_vif(mrt, dev);
        int psend = -1;
        int vif, ct;
 
-       vif = cache->mfc_parent;
-       cache->mfc_un.res.pkt++;
-       cache->mfc_un.res.bytes += skb->len;
-       cache->mfc_un.res.lastuse = jiffies;
+       vif = c->_c.mfc_parent;
+       c->_c.mfc_un.res.pkt++;
+       c->_c.mfc_un.res.bytes += skb->len;
+       c->_c.mfc_un.res.lastuse = jiffies;
 
-       if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+       if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
                struct mfc_cache *cache_proxy;
 
                /* For an (*,G) entry, we only check that the incomming
                 * interface is part of the static tree.
                 */
-               cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+               cache_proxy = mr_mfc_find_any_parent(mrt, vif);
                if (cache_proxy &&
-                   cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+                   cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
                        goto forward;
        }
 
@@ -2038,7 +2017,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                        goto dont_forward;
                }
 
-               cache->mfc_un.res.wrong_if++;
+               c->_c.mfc_un.res.wrong_if++;
 
                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +2026,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                     * large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
-                    cache->mfc_un.res.ttls[true_vifi] < 255) &&
+                    c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
                    time_after(jiffies,
-                              cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-                       cache->mfc_un.res.last_assert = jiffies;
+                              c->_c.mfc_un.res.last_assert +
+                              MFC_ASSERT_THRESH)) {
+                       c->_c.mfc_un.res.last_assert = jiffies;
                        ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
                }
                goto dont_forward;
@@ -2061,33 +2041,33 @@ forward:
        mrt->vif_table[vif].bytes_in += skb->len;
 
        /* Forward the frame */
-       if (cache->mfc_origin == htonl(INADDR_ANY) &&
-           cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+       if (c->mfc_origin == htonl(INADDR_ANY) &&
+           c->mfc_mcastgrp == htonl(INADDR_ANY)) {
                if (true_vifi >= 0 &&
-                   true_vifi != cache->mfc_parent &&
+                   true_vifi != c->_c.mfc_parent &&
                    ip_hdr(skb)->ttl >
-                               cache->mfc_un.res.ttls[cache->mfc_parent]) {
+                               c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
                        /* It's an (*,*) entry and the packet is not coming from
                         * the upstream: forward the packet to the upstream
                         * only.
                         */
-                       psend = cache->mfc_parent;
+                       psend = c->_c.mfc_parent;
                        goto last_forward;
                }
                goto dont_forward;
        }
-       for (ct = cache->mfc_un.res.maxvif - 1;
-            ct >= cache->mfc_un.res.minvif; ct--) {
+       for (ct = c->_c.mfc_un.res.maxvif - 1;
+            ct >= c->_c.mfc_un.res.minvif; ct--) {
                /* For (*,G) entry, don't forward to the incoming interface */
-               if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+               if ((c->mfc_origin != htonl(INADDR_ANY) ||
                     ct != true_vifi) &&
-                   ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+                   ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
                                if (skb2)
                                        ipmr_queue_xmit(net, mrt, true_vifi,
-                                                       skb2, cache, psend);
+                                                       skb2, c, psend);
                        }
                        psend = ct;
                }
@@ -2099,9 +2079,9 @@ last_forward:
 
                        if (skb2)
                                ipmr_queue_xmit(net, mrt, true_vifi, skb2,
-                                               cache, psend);
+                                               c, psend);
                } else {
-                       ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+                       ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
                        return;
                }
        }
@@ -2299,62 +2279,6 @@ drop:
 }
 #endif
 
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-                             struct mfc_cache *c, struct rtmsg *rtm)
-{
-       struct rta_mfc_stats mfcs;
-       struct nlattr *mp_attr;
-       struct rtnexthop *nhp;
-       unsigned long lastuse;
-       int ct;
-
-       /* If cache is unresolved, don't try to parse IIF and OIF */
-       if (c->mfc_parent >= MAXVIFS) {
-               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-               return -ENOENT;
-       }
-
-       if (VIF_EXISTS(mrt, c->mfc_parent) &&
-           nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
-               return -EMSGSIZE;
-
-       if (c->mfc_flags & MFC_OFFLOAD)
-               rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
-       if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
-               return -EMSGSIZE;
-
-       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-               if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-                       if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
-                               nla_nest_cancel(skb, mp_attr);
-                               return -EMSGSIZE;
-                       }
-
-                       nhp->rtnh_flags = 0;
-                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-                       nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
-                       nhp->rtnh_len = sizeof(*nhp);
-               }
-       }
-
-       nla_nest_end(skb, mp_attr);
-
-       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-       mfcs.mfcs_packets = c->mfc_un.res.pkt;
-       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-                             RTA_PAD))
-               return -EMSGSIZE;
-
-       rtm->rtm_type = RTN_MULTICAST;
-       return 1;
-}
-
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
                   __be32 saddr, __be32 daddr,
                   struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2336,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
        }
 
        read_lock(&mrt_lock);
-       err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+       err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
        read_unlock(&mrt_lock);
        rcu_read_unlock();
        return err;
@@ -2440,7 +2364,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                goto nla_put_failure;
        rtm->rtm_type     = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-       if (c->mfc_flags & MFC_STATIC)
+       if (c->_c.mfc_flags & MFC_STATIC)
                rtm->rtm_protocol = RTPROT_STATIC;
        else
                rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2373,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
        if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
            nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
                goto nla_put_failure;
-       err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+       err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
        /* do not break the dump if cache is unresolved */
        if (err < 0 && err != -ENOENT)
                goto nla_put_failure;
@@ -2462,6 +2386,14 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                            u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+                            int flags)
+{
+       return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+                               cmd, flags);
+}
+
 static size_t mroute_msgsize(bool unresolved, int maxvif)
 {
        size_t len =
@@ -2490,7 +2422,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+       skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+                                      mrt->maxvif),
                        GFP_ATOMIC);
        if (!skb)
                goto errout;
@@ -2634,62 +2567,8 @@ errout_free:
 
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct net *net = sock_net(skb->sk);
-       struct mr_table *mrt;
-       struct mfc_cache *mfc;
-       unsigned int t = 0, s_t;
-       unsigned int e = 0, s_e;
-
-       s_t = cb->args[0];
-       s_e = cb->args[1];
-
-       rcu_read_lock();
-       ipmr_for_each_table(mrt, net) {
-               if (t < s_t)
-                       goto next_table;
-               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-                       if (e < s_e)
-                               goto next_entry;
-                       if (ipmr_fill_mroute(mrt, skb,
-                                            NETLINK_CB(cb->skb).portid,
-                                            cb->nlh->nlmsg_seq,
-                                            mfc, RTM_NEWROUTE,
-                                            NLM_F_MULTI) < 0)
-                               goto done;
-next_entry:
-                       e++;
-               }
-               e = 0;
-               s_e = 0;
-
-               spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-                       if (e < s_e)
-                               goto next_entry2;
-                       if (ipmr_fill_mroute(mrt, skb,
-                                            NETLINK_CB(cb->skb).portid,
-                                            cb->nlh->nlmsg_seq,
-                                            mfc, RTM_NEWROUTE,
-                                            NLM_F_MULTI) < 0) {
-                               spin_unlock_bh(&mfc_unres_lock);
-                               goto done;
-                       }
-next_entry2:
-                       e++;
-               }
-               spin_unlock_bh(&mfc_unres_lock);
-               e = 0;
-               s_e = 0;
-next_table:
-               t++;
-       }
-done:
-       rcu_read_unlock();
-
-       cb->args[1] = e;
-       cb->args[0] = t;
-
-       return skb->len;
+       return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+                               _ipmr_fill_mroute, &mfc_unres_lock);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2825,11 @@ out:
 /* The /proc interfaces to multicast routing :
  * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
  */
-struct ipmr_vif_iter {
-       struct seq_net_private p;
-       struct mr_table *mrt;
-       int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
-                                          struct ipmr_vif_iter *iter,
-                                          loff_t pos)
-{
-       struct mr_table *mrt = iter->mrt;
-
-       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-               if (!VIF_EXISTS(mrt, iter->ct))
-                       continue;
-               if (pos-- == 0)
-                       return &mrt->vif_table[iter->ct];
-       }
-       return NULL;
-}
 
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(mrt_lock)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;
 
@@ -2981,26 +2840,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        iter->mrt = mrt;
 
        read_lock(&mrt_lock);
-       return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_vif_iter *iter = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr_table *mrt = iter->mrt;
-
-       ++*pos;
-       if (v == SEQ_START_TOKEN)
-               return ipmr_vif_seq_idx(net, iter, 0);
-
-       while (++iter->ct < mrt->maxvif) {
-               if (!VIF_EXISTS(mrt, iter->ct))
-                       continue;
-               return &mrt->vif_table[iter->ct];
-       }
-       return NULL;
+       return mr_vif_seq_start(seq, pos);
 }
 
 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2851,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct mr_table *mrt = iter->mrt;
 
        if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2859,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
        } else {
                const struct vif_device *vif = v;
-               const char *name =  vif->dev ? vif->dev->name : "none";
+               const char *name =  vif->dev ?
+                                   vif->dev->name : "none";
 
                seq_printf(seq,
                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2874,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_vif_seq_ops = {
        .start = ipmr_vif_seq_start,
-       .next  = ipmr_vif_seq_next,
+       .next  = mr_vif_seq_next,
        .stop  = ipmr_vif_seq_stop,
        .show  = ipmr_vif_seq_show,
 };
@@ -3041,7 +2882,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
 static int ipmr_vif_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_vif_seq_ops,
-                           sizeof(struct ipmr_vif_iter));
+                           sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2892,8 @@ static const struct file_operations ipmr_vif_fops = {
        .release = seq_release_net,
 };
 
-struct ipmr_mfc_iter {
-       struct seq_net_private p;
-       struct mr_table *mrt;
-       struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
-                                         struct ipmr_mfc_iter *it, loff_t pos)
-{
-       struct mr_table *mrt = it->mrt;
-       struct mfc_cache *mfc;
-
-       rcu_read_lock();
-       it->cache = &mrt->mfc_cache_list;
-       list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-               if (pos-- == 0)
-                       return mfc;
-       rcu_read_unlock();
-
-       spin_lock_bh(&mfc_unres_lock);
-       it->cache = &mrt->mfc_unres_queue;
-       list_for_each_entry(mfc, it->cache, list)
-               if (pos-- == 0)
-                       return mfc;
-       spin_unlock_bh(&mfc_unres_lock);
-
-       it->cache = NULL;
-       return NULL;
-}
-
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct ipmr_mfc_iter *it = seq->private;
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;
 
@@ -3092,54 +2901,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
        if (!mrt)
                return ERR_PTR(-ENOENT);
 
-       it->mrt = mrt;
-       it->cache = NULL;
-       return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr_table *mrt = it->mrt;
-       struct mfc_cache *mfc = v;
-
-       ++*pos;
-
-       if (v == SEQ_START_TOKEN)
-               return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-       if (mfc->list.next != it->cache)
-               return list_entry(mfc->list.next, struct mfc_cache, list);
-
-       if (it->cache == &mrt->mfc_unres_queue)
-               goto end_of_list;
-
-       /* exhausted cache_array, show unresolved */
-       rcu_read_unlock();
-       it->cache = &mrt->mfc_unres_queue;
-
-       spin_lock_bh(&mfc_unres_lock);
-       if (!list_empty(it->cache))
-               return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
-       spin_unlock_bh(&mfc_unres_lock);
-       it->cache = NULL;
-
-       return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct mr_table *mrt = it->mrt;
-
-       if (it->cache == &mrt->mfc_unres_queue)
-               spin_unlock_bh(&mfc_unres_lock);
-       else if (it->cache == &mrt->mfc_cache_list)
-               rcu_read_unlock();
+       return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2913,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
        } else {
                const struct mfc_cache *mfc = v;
-               const struct ipmr_mfc_iter *it = seq->private;
+               const struct mr_mfc_iter *it = seq->private;
                const struct mr_table *mrt = it->mrt;
 
                seq_printf(seq, "%08X %08X %-3hd",
                           (__force u32) mfc->mfc_mcastgrp,
                           (__force u32) mfc->mfc_origin,
-                          mfc->mfc_parent);
+                          mfc->_c.mfc_parent);
 
                if (it->cache != &mrt->mfc_unres_queue) {
                        seq_printf(seq, " %8lu %8lu %8lu",
-                                  mfc->mfc_un.res.pkt,
-                                  mfc->mfc_un.res.bytes,
-                                  mfc->mfc_un.res.wrong_if);
-                       for (n = mfc->mfc_un.res.minvif;
-                            n < mfc->mfc_un.res.maxvif; n++) {
+                                  mfc->_c.mfc_un.res.pkt,
+                                  mfc->_c.mfc_un.res.bytes,
+                                  mfc->_c.mfc_un.res.wrong_if);
+                       for (n = mfc->_c.mfc_un.res.minvif;
+                            n < mfc->_c.mfc_un.res.maxvif; n++) {
                                if (VIF_EXISTS(mrt, n) &&
-                                   mfc->mfc_un.res.ttls[n] < 255)
+                                   mfc->_c.mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
                                           " %2d:%-3d",
-                                          n, mfc->mfc_un.res.ttls[n]);
+                                          n, mfc->_c.mfc_un.res.ttls[n]);
                        }
                } else {
                        /* unresolved mfc_caches don't contain
@@ -3185,15 +2947,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
        .start = ipmr_mfc_seq_start,
-       .next  = ipmr_mfc_seq_next,
-       .stop  = ipmr_mfc_seq_stop,
+       .next  = mr_mfc_seq_next,
+       .stop  = mr_mfc_seq_stop,
        .show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-                           sizeof(struct ipmr_mfc_iter));
+                           sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ipmr_mfc_fops = {
@@ -3229,7 +2991,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
 
        ipmr_for_each_table(mrt, net) {
                struct vif_device *v = &mrt->vif_table[0];
-               struct mfc_cache *mfc;
+               struct mr_mfc *mfc;
                int vifi;
 
                /* Notifiy on table VIF entries */
@@ -3246,7 +3008,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
                /* Notify on table MFC entries */
                list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
                        call_ipmr_mfc_entry_notifier(nb, net,
-                                                    FIB_EVENT_ENTRY_ADD, mfc,
+                                                    FIB_EVENT_ENTRY_ADD,
+                                                    (struct mfc_cache *)mfc,
                                                     mrt->id);
        }
 
@@ -3327,6 +3090,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 static struct pernet_operations ipmr_net_ops = {
        .init = ipmr_net_init,
        .exit = ipmr_net_exit,
+       .async = true,
 };
 
 int __init ip_mr_init(void)
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644 (file)
index 0000000..8ba55bf
--- /dev/null
@@ -0,0 +1,323 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+                    struct net_device *dev,
+                    unsigned long rate_limit,
+                    unsigned char threshold,
+                    unsigned short flags,
+                    unsigned short get_iflink_mask)
+{
+       v->dev = NULL;
+       v->bytes_in = 0;
+       v->bytes_out = 0;
+       v->pkt_in = 0;
+       v->pkt_out = 0;
+       v->rate_limit = rate_limit;
+       v->flags = flags;
+       v->threshold = threshold;
+       if (v->flags & get_iflink_mask)
+               v->link = dev_get_iflink(dev);
+       else
+               v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net))
+{
+       struct mr_table *mrt;
+
+       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+       if (!mrt)
+               return NULL;
+       mrt->id = id;
+       write_pnet(&mrt->net, net);
+
+       mrt->ops = *ops;
+       rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+       INIT_LIST_HEAD(&mrt->mfc_cache_list);
+       INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+       timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+       mrt->mroute_reg_vif_num = -1;
+       table_set(mrt, net);
+       return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c;
+
+       list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode)
+               if (parent == -1 || parent == c->mfc_parent)
+                       return c;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c;
+
+       list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+                              *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode)
+               if (c->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c, *proxy;
+
+       list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+               if (c->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+
+               /* It's ok if the vifi is part of the static tree */
+               proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+               if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+       }
+
+       return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+       struct mr_table *mrt = iter->mrt;
+
+       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+               if (!VIF_EXISTS(mrt, iter->ct))
+                       continue;
+               if (pos-- == 0)
+                       return &mrt->vif_table[iter->ct];
+       }
+       return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct mr_vif_iter *iter = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct mr_table *mrt = iter->mrt;
+
+       ++*pos;
+       if (v == SEQ_START_TOKEN)
+               return mr_vif_seq_idx(net, iter, 0);
+
+       while (++iter->ct < mrt->maxvif) {
+               if (!VIF_EXISTS(mrt, iter->ct))
+                       continue;
+               return &mrt->vif_table[iter->ct];
+       }
+       return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+                    struct mr_mfc_iter *it, loff_t pos)
+{
+       struct mr_table *mrt = it->mrt;
+       struct mr_mfc *mfc;
+
+       rcu_read_lock();
+       it->cache = &mrt->mfc_cache_list;
+       list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+               if (pos-- == 0)
+                       return mfc;
+       rcu_read_unlock();
+
+       spin_lock_bh(it->lock);
+       it->cache = &mrt->mfc_unres_queue;
+       list_for_each_entry(mfc, it->cache, list)
+               if (pos-- == 0)
+                       return mfc;
+       spin_unlock_bh(it->lock);
+
+       it->cache = NULL;
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                     loff_t *pos)
+{
+       struct mr_mfc_iter *it = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct mr_table *mrt = it->mrt;
+       struct mr_mfc *c = v;
+
+       ++*pos;
+
+       if (v == SEQ_START_TOKEN)
+               return mr_mfc_seq_idx(net, seq->private, 0);
+
+       if (c->list.next != it->cache)
+               return list_entry(c->list.next, struct mr_mfc, list);
+
+       if (it->cache == &mrt->mfc_unres_queue)
+               goto end_of_list;
+
+       /* exhausted cache_array, show unresolved */
+       rcu_read_unlock();
+       it->cache = &mrt->mfc_unres_queue;
+
+       spin_lock_bh(it->lock);
+       if (!list_empty(it->cache))
+               return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+       spin_unlock_bh(it->lock);
+       it->cache = NULL;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                  struct mr_mfc *c, struct rtmsg *rtm)
+{
+       struct rta_mfc_stats mfcs;
+       struct nlattr *mp_attr;
+       struct rtnexthop *nhp;
+       unsigned long lastuse;
+       int ct;
+
+       /* If cache is unresolved, don't try to parse IIF and OIF */
+       if (c->mfc_parent >= MAXVIFS) {
+               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+               return -ENOENT;
+       }
+
+       if (VIF_EXISTS(mrt, c->mfc_parent) &&
+           nla_put_u32(skb, RTA_IIF,
+                       mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+               return -EMSGSIZE;
+
+       if (c->mfc_flags & MFC_OFFLOAD)
+               rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+       mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+       if (!mp_attr)
+               return -EMSGSIZE;
+
+       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+               if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+                       struct vif_device *vif;
+
+                       nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+                       if (!nhp) {
+                               nla_nest_cancel(skb, mp_attr);
+                               return -EMSGSIZE;
+                       }
+
+                       nhp->rtnh_flags = 0;
+                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+                       vif = &mrt->vif_table[ct];
+                       nhp->rtnh_ifindex = vif->dev->ifindex;
+                       nhp->rtnh_len = sizeof(*nhp);
+               }
+       }
+
+       nla_nest_end(skb, mp_attr);
+
+       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+       mfcs.mfcs_packets = c->mfc_un.res.pkt;
+       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+                             RTA_PAD))
+               return -EMSGSIZE;
+
+       rtm->rtm_type = RTN_MULTICAST;
+       return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                    struct mr_table *(*iter)(struct net *net,
+                                             struct mr_table *mrt),
+                    int (*fill)(struct mr_table *mrt,
+                                struct sk_buff *skb,
+                                u32 portid, u32 seq, struct mr_mfc *c,
+                                int cmd, int flags),
+                    spinlock_t *lock)
+{
+       unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+       struct net *net = sock_net(skb->sk);
+       struct mr_table *mrt;
+       struct mr_mfc *mfc;
+
+       rcu_read_lock();
+       for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+               if (t < s_t)
+                       goto next_table;
+               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+                       if (e < s_e)
+                               goto next_entry;
+                       if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+                                cb->nlh->nlmsg_seq, mfc,
+                                RTM_NEWROUTE, NLM_F_MULTI) < 0)
+                               goto done;
+next_entry:
+                       e++;
+               }
+               e = 0;
+               s_e = 0;
+
+               spin_lock_bh(lock);
+               list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+                       if (e < s_e)
+                               goto next_entry2;
+                       if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+                                cb->nlh->nlmsg_seq, mfc,
+                                RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+                               spin_unlock_bh(lock);
+                               goto done;
+                       }
+next_entry2:
+                       e++;
+               }
+               spin_unlock_bh(lock);
+               e = 0;
+               s_e = 0;
+next_table:
+               t++;
+       }
+done:
+       rcu_read_unlock();
+
+       cb->args[1] = e;
+       cb->args[0] = t;
+
+       return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
index 4ffe302..e3e420f 100644 (file)
@@ -252,6 +252,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
                        }
                        if (table_base + v
                            != arpt_next_entry(e)) {
+                               if (unlikely(stackidx >= private->stacksize)) {
+                                       verdict = NF_DROP;
+                                       break;
+                               }
                                jumpstack[stackidx++] = e;
                        }
 
index 9a71f31..d4f7584 100644 (file)
@@ -330,8 +330,13 @@ ipt_do_table(struct sk_buff *skb,
                                continue;
                        }
                        if (table_base + v != ipt_next_entry(e) &&
-                           !(e->ip.flags & IPT_F_GOTO))
+                           !(e->ip.flags & IPT_F_GOTO)) {
+                               if (unlikely(stackidx >= private->stacksize)) {
+                                       verdict = NF_DROP;
+                                       break;
+                               }
                                jumpstack[stackidx++] = e;
+                       }
 
                        e = get_entry(table_base, v);
                        continue;
@@ -1911,6 +1916,7 @@ static void __net_exit ip_tables_net_exit(struct net *net)
 static struct pernet_operations ip_tables_net_ops = {
        .init = ip_tables_net_init,
        .exit = ip_tables_net_exit,
+       .async = true,
 };
 
 static int __init ip_tables_init(void)
index 3a84a60..08b3e48 100644 (file)
@@ -107,12 +107,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
 
        local_bh_disable();
        if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
-               list_del_rcu(&c->list);
-               spin_unlock(&cn->lock);
-               local_bh_enable();
-
-               unregister_netdevice_notifier(&c->notifier);
-
                /* In case anyone still accesses the file, the open/close
                 * functions are also incrementing the refcount on their own,
                 * so it's safe to remove the entry even if it's in use. */
@@ -120,6 +114,12 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
                if (cn->procdir)
                        proc_remove(c->pde);
 #endif
+               list_del_rcu(&c->list);
+               spin_unlock(&cn->lock);
+               local_bh_enable();
+
+               unregister_netdevice_notifier(&c->notifier);
+
                return;
        }
        local_bh_enable();
@@ -154,8 +154,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
 #endif
                if (unlikely(!refcount_inc_not_zero(&c->refcount)))
                        c = NULL;
-               else if (entry)
-                       refcount_inc(&c->entries);
+               else if (entry) {
+                       if (unlikely(!refcount_inc_not_zero(&c->entries))) {
+                               clusterip_config_put(c);
+                               c = NULL;
+                       }
+               }
        }
        rcu_read_unlock_bh();
 
@@ -836,6 +840,7 @@ static struct pernet_operations clusterip_net_ops = {
        .exit = clusterip_net_exit,
        .id   = &clusterip_net_id,
        .size = sizeof(struct clusterip_net),
+       .async = true,
 };
 
 static int __init clusterip_tg_init(void)
index 2707652..aaaf9a8 100644 (file)
@@ -98,17 +98,15 @@ static int ecn_tg_check(const struct xt_tgchk_param *par)
        const struct ipt_ECN_info *einfo = par->targinfo;
        const struct ipt_entry *e = par->entryinfo;
 
-       if (einfo->operation & IPT_ECN_OP_MASK) {
-               pr_info("unsupported ECN operation %x\n", einfo->operation);
+       if (einfo->operation & IPT_ECN_OP_MASK)
                return -EINVAL;
-       }
-       if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
-               pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
+
+       if (einfo->ip_ect & ~IPT_ECN_IP_MASK)
                return -EINVAL;
-       }
+
        if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
            (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
-               pr_info("cannot use TCP operations on a non-tcp rule\n");
+               pr_info_ratelimited("cannot use operation on non-tcp rule\n");
                return -EINVAL;
        }
        return 0;
index 8bd0d7b..e8bed33 100644 (file)
@@ -74,13 +74,13 @@ static int reject_tg_check(const struct xt_tgchk_param *par)
        const struct ipt_entry *e = par->entryinfo;
 
        if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-               pr_info("ECHOREPLY no longer supported.\n");
+               pr_info_ratelimited("ECHOREPLY no longer supported.\n");
                return -EINVAL;
        } else if (rejinfo->with == IPT_TCP_RESET) {
                /* Must specify that it's a TCP packet */
                if (e->ip.proto != IPPROTO_TCP ||
                    (e->ip.invflags & XT_INV_PROTO)) {
-                       pr_info("TCP_RESET invalid for non-tcp\n");
+                       pr_info_ratelimited("TCP_RESET invalid for non-tcp\n");
                        return -EINVAL;
                }
        }
index 37fb955..fd01f13 100644 (file)
@@ -105,14 +105,14 @@ static int rpfilter_check(const struct xt_mtchk_param *par)
        const struct xt_rpfilter_info *info = par->matchinfo;
        unsigned int options = ~XT_RPFILTER_OPTION_MASK;
        if (info->flags & options) {
-               pr_info("unknown options encountered");
+               pr_info_ratelimited("unknown options\n");
                return -EINVAL;
        }
 
        if (strcmp(par->table, "mangle") != 0 &&
            strcmp(par->table, "raw") != 0) {
-               pr_info("match only valid in the \'raw\' "
-                       "or \'mangle\' tables, not \'%s\'.\n", par->table);
+               pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n",
+                                   par->table);
                return -EINVAL;
        }
 
index 9ac92ea..c1c136a 100644 (file)
@@ -87,6 +87,7 @@ static void __net_exit iptable_filter_net_exit(struct net *net)
 static struct pernet_operations iptable_filter_net_ops = {
        .init = iptable_filter_net_init,
        .exit = iptable_filter_net_exit,
+       .async = true,
 };
 
 static int __init iptable_filter_init(void)
index a0d3ad6..57244b6 100644 (file)
@@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net)
 
 static struct pernet_operations defrag4_net_ops = {
        .exit = defrag4_net_exit,
+       .async = true,
 };
 
 static int __init nf_defrag_init(void)
index b8f0db5..0164def 100644 (file)
@@ -1204,6 +1204,7 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v4_net_ops = {
        .init = ping_v4_proc_init_net,
        .exit = ping_v4_proc_exit_net,
+       .async = true,
 };
 
 int __init ping_proc_init(void)
index dc5edc8..d97e83b 100644 (file)
@@ -549,10 +549,10 @@ static __net_exit void ip_proc_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ip_proc_ops = {
        .init = ip_proc_init_net,
        .exit = ip_proc_exit_net,
+       .async = true,
 };
 
 int __init ip_misc_proc_init(void)
 {
        return register_pernet_subsys(&ip_proc_ops);
 }
-
index 9b367fc..54648d2 100644 (file)
@@ -1156,6 +1156,7 @@ static __net_exit void raw_exit_net(struct net *net)
 static __net_initdata struct pernet_operations raw_net_ops = {
        .init = raw_init_net,
        .exit = raw_exit_net,
+       .async = true,
 };
 
 int __init raw_proc_init(void)
index 49cc1c1..6a7b3cb 100644 (file)
@@ -417,6 +417,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
        .init = ip_rt_do_proc_init,
        .exit = ip_rt_do_proc_exit,
+       .async = true,
 };
 
 static int __init ip_rt_proc_init(void)
@@ -1508,7 +1509,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
                rt->rt_pmtu = 0;
                rt->rt_gateway = 0;
                rt->rt_uses_gateway = 0;
-               rt->rt_table_id = 0;
                INIT_LIST_HEAD(&rt->rt_uncached);
 
                rt->dst.output = ip_output;
@@ -1644,19 +1644,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
        spin_unlock_bh(&fnhe_lock);
 }
 
-static void set_lwt_redirect(struct rtable *rth)
-{
-       if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
-               rth->dst.lwtstate->orig_output = rth->dst.output;
-               rth->dst.output = lwtunnel_output;
-       }
-
-       if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
-               rth->dst.lwtstate->orig_input = rth->dst.input;
-               rth->dst.input = lwtunnel_input;
-       }
-}
-
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
                           const struct fib_result *res,
@@ -1739,15 +1726,13 @@ rt_cache:
        }
 
        rth->rt_is_input = 1;
-       if (res->table)
-               rth->rt_table_id = res->table->tb_id;
        RT_CACHE_STAT_INC(in_slow_tot);
 
        rth->dst.input = ip_forward;
 
        rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
                       do_cache);
-       set_lwt_redirect(rth);
+       lwtunnel_set_redirect(&rth->dst);
        skb_dst_set(skb, &rth->dst);
 out:
        err = 0;
@@ -1763,44 +1748,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
                                 struct flow_keys *hash_keys)
 {
        const struct iphdr *outer_iph = ip_hdr(skb);
+       const struct iphdr *key_iph = outer_iph;
        const struct iphdr *inner_iph;
        const struct icmphdr *icmph;
        struct iphdr _inner_iph;
        struct icmphdr _icmph;
 
-       hash_keys->addrs.v4addrs.src = outer_iph->saddr;
-       hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
        if (likely(outer_iph->protocol != IPPROTO_ICMP))
-               return;
+               goto out;
 
        if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
-               return;
+               goto out;
 
        icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
                                   &_icmph);
        if (!icmph)
-               return;
+               goto out;
 
        if (icmph->type != ICMP_DEST_UNREACH &&
            icmph->type != ICMP_REDIRECT &&
            icmph->type != ICMP_TIME_EXCEEDED &&
            icmph->type != ICMP_PARAMETERPROB)
-               return;
+               goto out;
 
        inner_iph = skb_header_pointer(skb,
                                       outer_iph->ihl * 4 + sizeof(_icmph),
                                       sizeof(_inner_iph), &_inner_iph);
        if (!inner_iph)
-               return;
-       hash_keys->addrs.v4addrs.src = inner_iph->saddr;
-       hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+               goto out;
+
+       key_iph = inner_iph;
+out:
+       hash_keys->addrs.v4addrs.src = key_iph->saddr;
+       hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
 /* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-                      const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+                      const struct sk_buff *skb, struct flow_keys *flkeys)
 {
-       struct net *net = fi->fib_net;
        struct flow_keys hash_keys;
        u32 mhash;
 
@@ -1824,13 +1810,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
                        /* short-circuit if we already have L4 hash present */
                        if (skb->l4_hash)
                                return skb_get_hash_raw(skb) >> 1;
+
                        memset(&hash_keys, 0, sizeof(hash_keys));
-                       skb_flow_dissect_flow_keys(skb, &keys, flag);
-                       hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-                       hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-                       hash_keys.ports.src = keys.ports.src;
-                       hash_keys.ports.dst = keys.ports.dst;
-                       hash_keys.basic.ip_proto = keys.basic.ip_proto;
+
+                       if (!flkeys) {
+                               skb_flow_dissect_flow_keys(skb, &keys, flag);
+                               flkeys = &keys;
+                       }
+
+                       hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+                       hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+                       hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+                       hash_keys.ports.src = flkeys->ports.src;
+                       hash_keys.ports.dst = flkeys->ports.dst;
+                       hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
                } else {
                        memset(&hash_keys, 0, sizeof(hash_keys));
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1846,17 +1839,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 
        return mhash >> 1;
 }
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
 static int ip_mkroute_input(struct sk_buff *skb,
                            struct fib_result *res,
                            struct in_device *in_dev,
-                           __be32 daddr, __be32 saddr, u32 tos)
+                           __be32 daddr, __be32 saddr, u32 tos,
+                           struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi && res->fi->fib_nhs > 1) {
-               int h = fib_multipath_hash(res->fi, NULL, skb);
+               int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
                fib_select_multipath(res, h);
        }
@@ -1882,13 +1875,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                               struct fib_result *res)
 {
        struct in_device *in_dev = __in_dev_get_rcu(dev);
+       struct flow_keys *flkeys = NULL, _flkeys;
+       struct net    *net = dev_net(dev);
        struct ip_tunnel_info *tun_info;
-       struct flowi4   fl4;
+       int             err = -EINVAL;
        unsigned int    flags = 0;
        u32             itag = 0;
        struct rtable   *rth;
-       int             err = -EINVAL;
-       struct net    *net = dev_net(dev);
+       struct flowi4   fl4;
        bool do_cache;
 
        /* IP on this device is disabled. */
@@ -1947,6 +1941,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        fl4.daddr = daddr;
        fl4.saddr = saddr;
        fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+               flkeys = &_flkeys;
+
        err = fib_lookup(net, &fl4, res, 0);
        if (err != 0) {
                if (!IN_DEV_FORWARD(in_dev))
@@ -1972,7 +1970,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (res->type != RTN_UNICAST)
                goto martian_destination;
 
-       err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+       err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:   return err;
 
 brd_input:
@@ -2014,8 +2012,6 @@ local_input:
        rth->dst.tclassid = itag;
 #endif
        rth->rt_is_input = 1;
-       if (res->table)
-               rth->rt_table_id = res->table->tb_id;
 
        RT_CACHE_STAT_INC(in_slow_tot);
        if (res->type == RTN_UNREACHABLE) {
@@ -2244,8 +2240,6 @@ add:
                return ERR_PTR(-ENOBUFS);
 
        rth->rt_iif = orig_oif;
-       if (res->table)
-               rth->rt_table_id = res->table->tb_id;
 
        RT_CACHE_STAT_INC(out_slow_tot);
 
@@ -2267,7 +2261,7 @@ add:
        }
 
        rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
-       set_lwt_redirect(rth);
+       lwtunnel_set_redirect(&rth->dst);
 
        return rth;
 }
@@ -2775,7 +2769,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                rt->rt_flags |= RTCF_NOTIFY;
 
        if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
-               table_id = rt->rt_table_id;
+               table_id = res.table ? res.table->tb_id : 0;
 
        if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
                if (!res.fi) {
@@ -2994,6 +2988,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_route_ops = {
        .init = sysctl_route_net_init,
        .exit = sysctl_route_net_exit,
+       .async = true,
 };
 #endif
 
@@ -3007,6 +3002,7 @@ static __net_init int rt_genid_init(struct net *net)
 
 static __net_initdata struct pernet_operations rt_genid_ops = {
        .init = rt_genid_init,
+       .async = true,
 };
 
 static int __net_init ipv4_inetpeer_init(struct net *net)
@@ -3032,6 +3028,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net)
 static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
        .init   =       ipv4_inetpeer_init,
        .exit   =       ipv4_inetpeer_exit,
+       .async  =       true,
 };
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
index 93e1721..011de9a 100644 (file)
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 
        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (write && ret == 0)
-               call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+               call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
 
        return ret;
 }
@@ -1219,6 +1219,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
        .init = ipv4_sysctl_init_net,
        .exit = ipv4_sysctl_exit_net,
+       .async = true,
 };
 
 static __init int sysctl_ipv4_init(void)
index 48636ae..a335397 100644 (file)
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
        sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
 
        sk_sockets_allocated_inc(sk);
+       sk->sk_route_forced_caps = NETIF_F_GSO;
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
@@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
        struct tcp_sock *tp = tcp_sk(sk);
        u32 new_size_goal, size_goal;
 
-       if (!large_allowed || !sk_can_gso(sk))
+       if (!large_allowed)
                return mss_now;
 
        /* Note : tcp_tso_autosize() will eventually split this later */
@@ -1062,8 +1063,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
                        size_t size, int flags)
 {
-       if (!(sk->sk_route_caps & NETIF_F_SG) ||
-           !sk_check_csum_caps(sk))
+       if (!(sk->sk_route_caps & NETIF_F_SG))
                return sock_no_sendpage_locked(sk, page, offset, size, flags);
 
        tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
@@ -1102,27 +1102,11 @@ static int linear_payload_sz(bool first_skb)
        return 0;
 }
 
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
 {
-       const struct tcp_sock *tp = tcp_sk(sk);
-       int tmp = tp->mss_cache;
-
-       if (sg) {
-               if (zc)
-                       return 0;
-
-               if (sk_can_gso(sk)) {
-                       tmp = linear_payload_sz(first_skb);
-               } else {
-                       int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
-                       if (tmp >= pgbreak &&
-                           tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
-                               tmp = pgbreak;
-               }
-       }
-
-       return tmp;
+       if (zc)
+               return 0;
+       return linear_payload_sz(first_skb);
 }
 
 void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1171,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
        int flags, err, copied = 0;
        int mss_now = 0, size_goal, copied_syn = 0;
        bool process_backlog = false;
-       bool sg, zc = false;
+       bool zc = false;
        long timeo;
 
        flags = msg->msg_flags;
@@ -1205,7 +1189,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
                        goto out_err;
                }
 
-               zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+               zc = sk->sk_route_caps & NETIF_F_SG;
                if (!zc)
                        uarg->zerocopy = 0;
        }
@@ -1268,18 +1252,12 @@ restart:
        if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
                goto do_error;
 
-       sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
        while (msg_data_left(msg)) {
                int copy = 0;
-               int max = size_goal;
 
                skb = tcp_write_queue_tail(sk);
-               if (skb) {
-                       if (skb->ip_summed == CHECKSUM_NONE)
-                               max = mss_now;
-                       copy = max - skb->len;
-               }
+               if (skb)
+                       copy = size_goal - skb->len;
 
                if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
                        bool first_skb;
@@ -1297,22 +1275,17 @@ new_segment:
                                goto restart;
                        }
                        first_skb = tcp_rtx_and_write_queues_empty(sk);
-                       linear = select_size(sk, sg, first_skb, zc);
+                       linear = select_size(first_skb, zc);
                        skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
                                                  first_skb);
                        if (!skb)
                                goto wait_for_memory;
 
                        process_backlog = true;
-                       /*
-                        * Check whether we can use HW checksum.
-                        */
-                       if (sk_check_csum_caps(sk))
-                               skb->ip_summed = CHECKSUM_PARTIAL;
+                       skb->ip_summed = CHECKSUM_PARTIAL;
 
                        skb_entail(sk, skb);
                        copy = size_goal;
-                       max = size_goal;
 
                        /* All packets are restored as if they have
                         * already been sent. skb_mstamp isn't set to
@@ -1343,7 +1316,7 @@ new_segment:
 
                        if (!skb_can_coalesce(skb, i, pfrag->page,
                                              pfrag->offset)) {
-                               if (i >= sysctl_max_skb_frags || !sg) {
+                               if (i >= sysctl_max_skb_frags) {
                                        tcp_mark_push(tp, skb);
                                        goto new_segment;
                                }
@@ -1396,7 +1369,7 @@ new_segment:
                        goto out;
                }
 
-               if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+               if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
                        continue;
 
                if (forced_push(tp)) {
index a471f69..c92014c 100644 (file)
@@ -97,10 +97,9 @@ struct bbr {
                packet_conservation:1,  /* use packet conservation? */
                restore_cwnd:1,      /* decided to revert cwnd to old value */
                round_start:1,       /* start of packet-timed tx->ack round? */
-               tso_segs_goal:7,     /* segments we want in each skb we send */
                idle_restart:1,      /* restarting after idle? */
                probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-               unused:5,
+               unused:12,
                lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
                lt_rtt_cnt:7,        /* round trips in long-term interval */
                lt_use_bw:1;         /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
                sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-       struct bbr *bbr = inet_csk_ca(sk);
-
-       return bbr->tso_segs_goal;
+       return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct bbr *bbr = inet_csk_ca(sk);
-       u32 min_segs;
+       u32 segs, bytes;
+
+       /* Sort of tcp_tso_autosize() but ignoring
+        * driver provided sk_gso_max_size.
+        */
+       bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+                     GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+       segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-       min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-       bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-                                0x7FU);
+       return min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
        cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
 
        /* Allow enough full-sized skbs in flight to utilize end systems. */
-       cwnd += 3 * bbr->tso_segs_goal;
+       cwnd += 3 * bbr_tso_segs_goal(sk);
 
        /* Reduce delayed ACKs by rounding up cwnd to the next even number. */
        cwnd = (cwnd + 1) & ~1U;
@@ -824,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 
        bw = bbr_bw(sk);
        bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-       bbr_set_tso_segs_goal(sk);
        bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
@@ -834,7 +834,6 @@ static void bbr_init(struct sock *sk)
        struct bbr *bbr = inet_csk_ca(sk);
 
        bbr->prior_cwnd = 0;
-       bbr->tso_segs_goal = 0;  /* default segs per skb until first ACK */
        bbr->rtt_cnt = 0;
        bbr->next_rtt_delivered = 0;
        bbr->prev_ca_state = TCP_CA_Open;
@@ -936,7 +935,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
        .undo_cwnd      = bbr_undo_cwnd,
        .cwnd_event     = bbr_cwnd_event,
        .ssthresh       = bbr_ssthresh,
-       .tso_segs_goal  = bbr_tso_segs_goal,
+       .min_tso_segs   = bbr_min_tso_segs,
        .get_info       = bbr_get_info,
        .set_state      = bbr_set_state,
 };
index 575d3c1..06b9c47 100644 (file)
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
        int len;
        int in_sack;
 
-       if (!sk_can_gso(sk))
-               goto fallback;
-
        /* Normally R but no L won't result in plain S */
        if (!dup_sack &&
            (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@@ -5870,10 +5867,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
        tp->rx_opt.saw_tstamp = 0;
        req = tp->fastopen_rsk;
        if (req) {
+               bool req_stolen;
+
                WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
                    sk->sk_state != TCP_FIN_WAIT1);
 
-               if (!tcp_check_req(sk, skb, req, true))
+               if (!tcp_check_req(sk, skb, req, true, &req_stolen))
                        goto discard;
        }
 
index f8ad397..2c6aec2 100644 (file)
@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
        struct tcphdr *th = tcp_hdr(skb);
 
-       if (skb->ip_summed == CHECKSUM_PARTIAL) {
-               th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
-               skb->csum_start = skb_transport_header(skb) - skb->head;
-               skb->csum_offset = offsetof(struct tcphdr, check);
-       } else {
-               th->check = tcp_v4_check(skb->len, saddr, daddr,
-                                        csum_partial(th,
-                                                     th->doff << 2,
-                                                     skb->csum));
-       }
+       th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+       skb->csum_start = skb_transport_header(skb) - skb->head;
+       skb->csum_offset = offsetof(struct tcphdr, check);
 }
 
 /* This routine computes an IPv4 TCP checksum. */
@@ -1672,6 +1665,7 @@ process:
 
        if (sk->sk_state == TCP_NEW_SYN_RECV) {
                struct request_sock *req = inet_reqsk(sk);
+               bool req_stolen = false;
                struct sock *nsk;
 
                sk = req->rsk_listener;
@@ -1694,10 +1688,20 @@ process:
                        th = (const struct tcphdr *)skb->data;
                        iph = ip_hdr(skb);
                        tcp_v4_fill_cb(skb, iph, th);
-                       nsk = tcp_check_req(sk, skb, req, false);
+                       nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
                }
                if (!nsk) {
                        reqsk_put(req);
+                       if (req_stolen) {
+                               /* Another cpu got exclusive access to req
+                                * and created a full blown socket.
+                                * Try to feed this packet to this socket
+                                * instead of discarding it.
+                                */
+                               tcp_v4_restore_cb(skb);
+                               sock_put(sk);
+                               goto lookup;
+                       }
                        goto discard_and_relse;
                }
                if (nsk == sk) {
@@ -2387,6 +2391,7 @@ static void __net_exit tcp4_proc_exit_net(struct net *net)
 static struct pernet_operations tcp4_net_ops = {
        .init = tcp4_proc_init_net,
        .exit = tcp4_proc_exit_net,
+       .async = true,
 };
 
 int __init tcp4_proc_init(void)
@@ -2573,6 +2578,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
        .init      = tcp_sk_init,
        .exit      = tcp_sk_exit,
        .exit_batch = tcp_sk_exit_batch,
+       .async     = true,
 };
 
 void __init tcp_v4_init(void)
index 03b51cd..aa6fea9 100644 (file)
@@ -1024,6 +1024,7 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
        .init           =       tcp_net_metrics_init,
        .exit_batch     =       tcp_net_metrics_exit_batch,
+       .async          =       true,
 };
 
 void __init tcp_metrics_init(void)
index a8384b0..e7e3643 100644 (file)
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
 
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                           struct request_sock *req,
-                          bool fastopen)
+                          bool fastopen, bool *req_stolen)
 {
        struct tcp_options_received tmp_opt;
        struct sock *child;
@@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
        sock_rps_save_rxhash(child, skb);
        tcp_synack_rtt_meas(child, req);
+       *req_stolen = !own_req;
        return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
index e9f985e..383cac0 100644 (file)
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 /* Initialize TSO segments for a packet. */
 static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 {
-       if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+       if (skb->len <= mss_now) {
                /* Avoid the costly divide in the normal
                 * non-TSO case.
                 */
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
        TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
        tcp_skb_fragment_eor(skb, buff);
 
-       if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
-               /* Copy and checksum data tail into the new buffer. */
-               buff->csum = csum_partial_copy_nocheck(skb->data + len,
-                                                      skb_put(buff, nsize),
-                                                      nsize, 0);
-
-               skb_trim(skb, len);
-
-               skb->csum = csum_block_sub(skb->csum, buff->csum, len);
-       } else {
-               skb->ip_summed = CHECKSUM_PARTIAL;
-               skb_split(skb, buff, len);
-       }
+       skb_split(skb, buff, len);
 
-       buff->ip_summed = skb->ip_summed;
+       buff->ip_summed = CHECKSUM_PARTIAL;
 
        buff->tstamp = skb->tstamp;
        tcp_fragment_tstamp(skb, buff);
@@ -1715,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-                    int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+                           int min_tso_segs)
 {
        u32 bytes, segs;
 
@@ -1730,9 +1718,8 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
         */
        segs = max_t(u32, bytes / mss_now, min_tso_segs);
 
-       return min_t(u32, segs, sk->sk_gso_max_segs);
+       return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1740,11 +1727,14 @@ EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
        const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-       u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+       u32 min_tso, tso_segs;
+
+       min_tso = ca_ops->min_tso_segs ?
+                       ca_ops->min_tso_segs(sk) :
+                       sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
 
-       return tso_segs ? :
-               tcp_tso_autosize(sk, mss_now,
-                                sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+       tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+       return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 
 /* Returns the portion of skb which can be sent right away */
@@ -1901,7 +1891,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 
        tcp_skb_fragment_eor(skb, buff);
 
-       buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+       buff->ip_summed = CHECKSUM_PARTIAL;
        skb_split(skb, buff, len);
        tcp_fragment_tstamp(skb, buff);
 
@@ -2027,6 +2017,24 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
        }
 }
 
+static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
+{
+       struct sk_buff *skb, *next;
+
+       skb = tcp_send_head(sk);
+       tcp_for_write_queue_from_safe(skb, next, sk) {
+               if (len <= skb->len)
+                       break;
+
+               if (unlikely(TCP_SKB_CB(skb)->eor))
+                       return false;
+
+               len -= skb->len;
+       }
+
+       return true;
+}
+
 /* Create a new MTU probe if we are ready.
  * MTU probe is regularly attempting to increase the path MTU by
  * deliberately sending larger packets.  This discovers routing
@@ -2099,6 +2107,9 @@ static int tcp_mtu_probe(struct sock *sk)
                        return 0;
        }
 
+       if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
+               return -1;
+
        /* We're allowed to probe.  Build it now. */
        nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
        if (!nskb)
@@ -2113,7 +2124,7 @@ static int tcp_mtu_probe(struct sock *sk)
        TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
        TCP_SKB_CB(nskb)->sacked = 0;
        nskb->csum = 0;
-       nskb->ip_summed = skb->ip_summed;
+       nskb->ip_summed = CHECKSUM_PARTIAL;
 
        tcp_insert_write_queue_before(nskb, skb, sk);
        tcp_highest_sack_replace(sk, skb, nskb);
@@ -2121,19 +2132,16 @@ static int tcp_mtu_probe(struct sock *sk)
        len = 0;
        tcp_for_write_queue_from_safe(skb, next, sk) {
                copy = min_t(int, skb->len, probe_size - len);
-               if (nskb->ip_summed) {
-                       skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
-               } else {
-                       __wsum csum = skb_copy_and_csum_bits(skb, 0,
-                                                            skb_put(nskb, copy),
-                                                            copy, 0);
-                       nskb->csum = csum_block_add(nskb->csum, csum, len);
-               }
+               skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
 
                if (skb->len <= copy) {
                        /* We've eaten all the data from this skb.
                         * Throw it away. */
                        TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+                       /* If this is the last SKB we copy and eor is set
+                        * we need to propagate it to the new skb.
+                        */
+                       TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
                        tcp_unlink_write_queue(skb, sk);
                        sk_wmem_free_skb(sk, skb);
                } else {
@@ -2141,9 +2149,6 @@ static int tcp_mtu_probe(struct sock *sk)
                                                   ~(TCPHDR_FIN|TCPHDR_PSH);
                        if (!skb_shinfo(skb)->nr_frags) {
                                skb_pull(skb, copy);
-                               if (skb->ip_summed != CHECKSUM_PARTIAL)
-                                       skb->csum = csum_partial(skb->data,
-                                                                skb->len, 0);
                        } else {
                                __pskb_trim_head(skb, copy);
                                tcp_set_skb_tso_segs(skb, mss_now);
@@ -2721,12 +2726,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
        }
        tcp_highest_sack_replace(sk, next_skb, skb);
 
-       if (next_skb->ip_summed == CHECKSUM_PARTIAL)
-               skb->ip_summed = CHECKSUM_PARTIAL;
-
-       if (skb->ip_summed != CHECKSUM_PARTIAL)
-               skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
        /* Update sequence range on original skb. */
        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
 
index ec35eaa..c063001 100644 (file)
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
        for (handler = rcu_dereference(head);           \
             handler != NULL;                           \
             handler = rcu_dereference(handler->next))  \
-       
+
 static int tunnel4_rcv(struct sk_buff *skb)
 {
        struct xfrm_tunnel *handler;
index bfaefe5..3013404 100644 (file)
@@ -2024,6 +2024,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
                err = udplite_checksum_init(skb, uh);
                if (err)
                        return err;
+
+               if (UDP_SKB_CB(skb)->partial_cov) {
+                       skb->csum = inet_compute_pseudo(skb, proto);
+                       return 0;
+               }
        }
 
        /* Note, we are only interested in != 0 or == 0, thus the
@@ -2757,6 +2762,7 @@ static void __net_exit udp4_proc_exit_net(struct net *net)
 static struct pernet_operations udp4_net_ops = {
        .init = udp4_proc_init_net,
        .exit = udp4_proc_exit_net,
+       .async = true,
 };
 
 int __init udp4_proc_init(void)
index f96614e..72f2c38 100644 (file)
@@ -104,6 +104,7 @@ static void __net_exit udplite4_proc_exit_net(struct net *net)
 static struct pernet_operations udplite4_net_ops = {
        .init = udplite4_proc_init_net,
        .exit = udplite4_proc_exit_net,
+       .async = true,
 };
 
 static __init int udplite4_proc_init(void)
index 05017e2..0c752dc 100644 (file)
@@ -100,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
        xdst->u.rt.rt_gateway = rt->rt_gateway;
        xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
        xdst->u.rt.rt_pmtu = rt->rt_pmtu;
-       xdst->u.rt.rt_table_id = rt->rt_table_id;
        INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 
        return 0;
@@ -365,6 +364,7 @@ static void __net_exit xfrm4_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm4_net_ops = {
        .init   = xfrm4_net_init,
        .exit   = xfrm4_net_exit,
+       .async  = true,
 };
 
 static void __init xfrm4_policy_init(void)
@@ -379,4 +379,3 @@ void __init xfrm4_init(void)
        xfrm4_protocol_init();
        register_pernet_subsys(&xfrm4_net_ops);
 }
-
index ea71e4b..6794ddf 100644 (file)
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
 config IPV6_MROUTE
        bool "IPv6: multicast routing"
        depends on IPV6
+       select IP_MROUTE_COMMON
        ---help---
          Experimental support for IPv6 multicast forwarding.
          If unsure, say N.
index e1846b9..b5fd116 100644 (file)
@@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
 #endif
 }
 
+static bool ipv6_allow_optimistic_dad(struct net *net,
+                                     struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+       if (!idev)
+               return false;
+       if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+               return false;
+
+       return true;
+#else
+       return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
                               struct ipv6_saddr_score *score,
                               struct ipv6_saddr_dst *dst,
@@ -1968,6 +1983,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
                spin_lock_bh(&ifp->lock);
                addrconf_del_dad_work(ifp);
                ifp->flags |= IFA_F_TENTATIVE;
+               if (dad_failed)
+                       ifp->flags &= ~IFA_F_OPTIMISTIC;
                spin_unlock_bh(&ifp->lock);
                if (dad_failed)
                        ipv6_ifa_notify(0, ifp);
@@ -4257,6 +4274,7 @@ static void __net_exit if6_proc_net_exit(struct net *net)
 static struct pernet_operations if6_proc_net_ops = {
        .init = if6_proc_net_init,
        .exit = if6_proc_net_exit,
+       .async = true,
 };
 
 int __init if6_proc_init(void)
@@ -4500,6 +4518,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
            (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
                return -EINVAL;
 
+       if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+               ifa_flags &= ~IFA_F_OPTIMISTIC;
+
        timeout = addrconf_timeout_fixup(valid_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                expires = jiffies_to_clock_t(timeout * HZ);
@@ -4573,6 +4594,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct in6_addr *pfx, *peer_pfx;
        struct inet6_ifaddr *ifa;
        struct net_device *dev;
+       struct inet6_dev *idev;
        u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
        u32 ifa_flags;
        int err;
@@ -4606,7 +4628,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        /* We ignore other flags so far. */
        ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-                    IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+                    IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+       idev = ipv6_find_idev(dev);
+       if (IS_ERR(idev))
+               return PTR_ERR(idev);
+
+       if (!ipv6_allow_optimistic_dad(net, idev))
+               ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+       if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+               NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+               return -EINVAL;
+       }
 
        ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
        if (!ifa) {
@@ -6550,6 +6584,7 @@ static void __net_exit addrconf_exit_net(struct net *net)
 static struct pernet_operations addrconf_ops = {
        .init = addrconf_init_net,
        .exit = addrconf_exit_net,
+       .async = true,
 };
 
 static struct rtnl_af_ops inet6_ops __read_mostly = {
index 1d6ced3..ba2e636 100644 (file)
@@ -344,6 +344,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
 static struct pernet_operations ipv6_addr_label_ops = {
        .init = ip6addrlbl_net_init,
        .exit = ip6addrlbl_net_exit,
+       .async = true,
 };
 
 int __init ipv6_addr_label_init(void)
index 4169177..dbbe040 100644 (file)
@@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
  */
 
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
-                int *uaddr_len, int peer)
+                int peer)
 {
        struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
        struct sock *sk = sock->sk;
@@ -500,8 +500,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
        }
        sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
                                                 sk->sk_bound_dev_if);
-       *uaddr_len = sizeof(*sin);
-       return 0;
+       return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet6_getname);
 
@@ -858,6 +857,7 @@ static void __net_exit inet6_net_exit(struct net *net)
 static struct pernet_operations inet6_net_ops = {
        .init = inet6_net_init,
        .exit = inet6_net_exit,
+       .async = true,
 };
 
 static const struct ipv6_stub ipv6_stub_impl = {
index 8e085cc..c61718d 100644 (file)
@@ -78,7 +78,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
        if (ifindex == 0) {
                struct rt6_info *rt;
 
-               rt = rt6_lookup(net, addr, NULL, 0, 0);
+               rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
                if (rt) {
                        dev = rt->dst.dev;
                        ip6_rt_put(rt);
@@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net)
        remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
-
index 11025f8..b643f5c 100644 (file)
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
        return nexthdr;
 }
 EXPORT_SYMBOL(ipv6_find_hdr);
-
index b240f24..00ef946 100644 (file)
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+                                  const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup)
 {
        if (net->ipv6.fib6_has_custom_rules) {
                struct fib_lookup_arg arg = {
                        .lookup_ptr = lookup,
+                       .lookup_data = skb,
                        .flags = FIB_LOOKUP_NOREF,
                };
 
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
        } else {
                struct rt6_info *rt;
 
-               rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+               rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
                if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
                        return &rt->dst;
                ip6_rt_put(rt);
-               rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+               rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
                if (rt->dst.error != -EAGAIN)
                        return &rt->dst;
                ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
                goto out;
        }
 
-       rt = lookup(net, table, flp6, flags);
+       rt = lookup(net, table, flp6, arg->lookup_data, flags);
        if (rt != net->ipv6.ip6_null_entry) {
                struct fib6_rule *r = (struct fib6_rule *)rule;
 
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
        if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
                return 0;
 
+       if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->sport_range) &&
+           !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->dport_range) &&
+           !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+               return 0;
+
        return 1;
 }
 
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        rule6->dst.plen = frh->dst_len;
        rule6->tclass = frh->tos;
 
+       if (fib_rule_requires_fldissect(rule))
+               net->ipv6.fib6_rules_require_fldissect++;
+
        net->ipv6.fib6_has_custom_rules = true;
        err = 0;
 errout:
        return err;
 }
 
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+       struct net *net = rule->fr_net;
+
+       if (net->ipv6.fib6_rules_require_fldissect &&
+           fib_rule_requires_fldissect(rule))
+               net->ipv6.fib6_rules_require_fldissect--;
+
+       return 0;
+}
+
 static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
                             struct nlattr **tb)
 {
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
        .match                  = fib6_rule_match,
        .suppress               = fib6_rule_suppress,
        .configure              = fib6_rule_configure,
+       .delete                 = fib6_rule_delete,
        .compare                = fib6_rule_compare,
        .fill                   = fib6_rule_fill,
        .nlmsg_payload          = fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
                goto out_fib6_rules_ops;
 
        net->ipv6.fib6_rules_ops = ops;
+       net->ipv6.fib6_rules_require_fldissect = 0;
 out:
        return err;
 
@@ -368,6 +397,7 @@ static void __net_exit fib6_rules_net_exit(struct net *net)
 static struct pernet_operations fib6_rules_net_ops = {
        .init = fib6_rules_net_init,
        .exit = fib6_rules_net_exit,
+       .async = true,
 };
 
 int __init fib6_rules_init(void)
index 6ae5dd3..6f84668 100644 (file)
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        fl6.fl6_icmp_type = type;
        fl6.fl6_icmp_code = code;
        fl6.flowi6_uid = sock_net_uid(net, NULL);
-       fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+       fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
        sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
        skb_pull(skb2, nhs);
        skb_reset_network_header(skb2);
 
-       rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+       rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+                       skb, 0);
 
        if (rt && rt->dst.dev)
                skb2->dev = rt->dst.dev;
@@ -997,6 +998,7 @@ static void __net_exit icmpv6_sk_exit(struct net *net)
 static struct pernet_operations icmpv6_sk_ops = {
        .init = icmpv6_sk_init,
        .exit = icmpv6_sk_exit,
+       .async = true,
 };
 
 int __init icmpv6_init(void)
index 44c39c5..e438699 100644 (file)
@@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = {
        .exit = ila_exit_net,
        .id   = &ila_net_id,
        .size = sizeof(struct ila_net),
+       .async = true,
 };
 
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
index ec43d18..547515e 100644 (file)
@@ -73,6 +73,11 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
                err = udplite_checksum_init(skb, uh);
                if (err)
                        return err;
+
+               if (UDP_SKB_CB(skb)->partial_cov) {
+                       skb->csum = ip6_compute_pseudo(skb, proto);
+                       return 0;
+               }
        }
 
        /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
index 92b8d8c..2f995e9 100644 (file)
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+                                  const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup)
 {
        struct rt6_info *rt;
 
-       rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+       rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
        if (rt->dst.error == -EAGAIN) {
                ip6_rt_put(rt);
                rt = net->ipv6.ip6_null_entry;
@@ -2160,6 +2161,7 @@ static void fib6_net_exit(struct net *net)
 static struct pernet_operations fib6_net_ops = {
        .init = fib6_net_init,
        .exit = fib6_net_exit,
+       .async = true,
 };
 
 int __init fib6_init(void)
index 3dab664..6ddf522 100644 (file)
@@ -873,6 +873,7 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net)
 static struct pernet_operations ip6_flowlabel_net_ops = {
        .init = ip6_flowlabel_proc_init,
        .exit = ip6_flowlabel_net_exit,
+       .async = true,
 };
 
 int ip6_flowlabel_init(void)
index 3c35312..18a3dfb 100644 (file)
@@ -695,9 +695,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
        else
                fl6->daddr = tunnel->parms.raddr;
 
-       if (tunnel->parms.o_flags & TUNNEL_SEQ)
-               tunnel->o_seqno++;
-
        /* Push GRE header. */
        protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -720,14 +717,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
                fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
                dsfield = key->tos;
-               flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+               flags = key->tun_flags &
+                       (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
                tunnel->tun_hlen = gre_calc_hlen(flags);
 
                gre_build_header(skb, tunnel->tun_hlen,
                                 flags, protocol,
-                                tunnel_id_to_key32(tun_info->key.tun_id), 0);
+                                tunnel_id_to_key32(tun_info->key.tun_id),
+                                (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+                                                     : 0);
 
        } else {
+               if (tunnel->parms.o_flags & TUNNEL_SEQ)
+                       tunnel->o_seqno++;
+
                gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
                                 protocol, tunnel->parms.o_key,
                                 htonl(tunnel->o_seqno));
@@ -1053,7 +1056,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 
                struct rt6_info *rt = rt6_lookup(t->net,
                                                 &p->raddr, &p->laddr,
-                                                p->link, strict);
+                                                p->link, NULL, strict);
 
                if (!rt)
                        return;
@@ -1517,6 +1520,7 @@ static struct pernet_operations ip6gre_net_ops = {
        .exit_batch = ip6gre_exit_batch_net,
        .id   = &ip6gre_net_id,
        .size = sizeof(struct ip6gre_net),
+       .async = true,
 };
 
 static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1784,6 +1788,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
        netif_keep_dst(dev);
 }
 
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+       return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
                                       struct ip_tunnel_encap *ipencap)
 {
index 997c7f1..a6eb0e6 100644 (file)
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-                   ((mroute6_socket(net, skb) &&
+                   ((mroute6_is_socket(net, skb) &&
                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
                                         &ipv6_hdr(skb)->saddr))) {
index 4b15fe9..1124f31 100644 (file)
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
                /* Try to guess incoming interface */
                rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
-                               NULL, 0, 0);
+                               NULL, 0, skb2, 0);
 
                if (rt && rt->dst.dev)
                        skb2->dev = rt->dst.dev;
@@ -1444,7 +1444,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 
                struct rt6_info *rt = rt6_lookup(t->net,
                                                 &p->raddr, &p->laddr,
-                                                p->link, strict);
+                                                p->link, NULL, strict);
 
                if (!rt)
                        return;
@@ -2250,6 +2250,7 @@ static struct pernet_operations ip6_tnl_net_ops = {
        .exit_batch = ip6_tnl_exit_batch_net,
        .id   = &ip6_tnl_net_id,
        .size = sizeof(struct ip6_tnl_net),
+       .async = true,
 };
 
 /**
index fa3ae1c..a482b85 100644 (file)
@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t)
                              (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
                struct rt6_info *rt = rt6_lookup(t->net,
                                                 &p->raddr, &p->laddr,
-                                                p->link, strict);
+                                                p->link, NULL, strict);
 
                if (rt)
                        tdev = rt->dst.dev;
@@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = {
        .exit_batch = vti6_exit_batch_net,
        .id   = &vti6_net_id,
        .size = sizeof(struct vti6_net),
+       .async = true,
 };
 
 static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
index 9f6cace..2a38f9d 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
 #include <net/raw.h>
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
 #include <net/ip6_checksum.h>
 #include <linux/netconf.h>
 
-struct mr6_table {
-       struct list_head        list;
-       possible_net_t          net;
-       u32                     id;
-       struct sock             *mroute6_sk;
-       struct timer_list       ipmr_expire_timer;
-       struct list_head        mfc6_unres_queue;
-       struct list_head        mfc6_cache_array[MFC6_LINES];
-       struct mif_device       vif6_table[MAXMIFS];
-       int                     maxvif;
-       atomic_t                cache_resolve_queue_len;
-       bool                    mroute_do_assert;
-       bool                    mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-       int                     mroute_reg_vif_num;
-#endif
-};
-
 struct ip6mr_rule {
        struct fib_rule         common;
 };
 
 struct ip6mr_result {
-       struct mr6_table        *mrt;
+       struct mr_table *mrt;
 };
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
 
 static DEFINE_RWLOCK(mrt_lock);
 
-/*
- *     Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
                           struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
                              mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-                              struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
                              int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
                               struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 #define ip6mr_for_each_table(mrt, net) \
        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+                                           struct mr_table *mrt)
+{
+       struct mr_table *ret;
+
+       if (!mrt)
+               ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+                                    struct mr_table, list);
+       else
+               ret = list_entry_rcu(mrt->list.next,
+                                    struct mr_table, list);
+
+       if (&ret->list == &net->ipv6.mr6_tables)
+               return NULL;
+       return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        ip6mr_for_each_table(mrt, net) {
                if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-                           struct mr6_table **mrt)
+                           struct mr_table **mrt)
 {
        int err;
        struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
                             int flags, struct fib_lookup_arg *arg)
 {
        struct ip6mr_result *res = arg->result;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        switch (rule->action) {
        case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 static int __net_init ip6mr_rules_init(struct net *net)
 {
        struct fib_rules_ops *ops;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        int err;
 
        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-       struct mr6_table *mrt, *next;
+       struct mr_table *mrt, *next;
 
        rtnl_lock();
        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 #define ip6mr_for_each_table(mrt, net) \
        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+                                           struct mr_table *mrt)
+{
+       if (!mrt)
+               return net->ipv6.mrt6;
+       return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
        return net->ipv6.mrt6;
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-                           struct mr6_table **mrt)
+                           struct mr_table **mrt)
 {
        *mrt = net->ipv6.mrt6;
        return 0;
@@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 }
 #endif
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+                         const void *ptr)
 {
-       struct mr6_table *mrt;
-       unsigned int i;
+       const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+       struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 
-       mrt = ip6mr_get_table(net, id);
-       if (mrt)
-               return mrt;
-
-       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-       if (!mrt)
-               return NULL;
-       mrt->id = id;
-       write_pnet(&mrt->net, net);
-
-       /* Forwarding cache */
-       for (i = 0; i < MFC6_LINES; i++)
-               INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
-       INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+       return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+              !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
 
-       timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+       .head_offset = offsetof(struct mr_mfc, mnode),
+       .key_offset = offsetof(struct mfc6_cache, cmparg),
+       .key_len = sizeof(struct mfc6_cache_cmp_arg),
+       .nelem_hint = 3,
+       .locks_mul = 1,
+       .obj_cmpfn = ip6mr_hash_cmp,
+       .automatic_shrinking = true,
+};
 
-#ifdef CONFIG_IPV6_PIMSM_V2
-       mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+                               struct net *net)
+{
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 #endif
-       return mrt;
 }
 
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
-       del_timer_sync(&mrt->ipmr_expire_timer);
-       mroute_clean_tables(mrt, true);
-       kfree(mrt);
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
-       struct seq_net_private p;
-       struct mr6_table *mrt;
-       struct list_head *cache;
-       int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+       .mf6c_origin = IN6ADDR_ANY_INIT,
+       .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 };
 
+static struct mr_table_ops ip6mr_mr_table_ops = {
+       .rht_params = &ip6mr_rht_params,
+       .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
 
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
-                                          struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
-       struct mr6_table *mrt = it->mrt;
-       struct mfc6_cache *mfc;
-
-       read_lock(&mrt_lock);
-       for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-               it->cache = &mrt->mfc6_cache_array[it->ct];
-               list_for_each_entry(mfc, it->cache, list)
-                       if (pos-- == 0)
-                               return mfc;
-       }
-       read_unlock(&mrt_lock);
+       struct mr_table *mrt;
 
-       spin_lock_bh(&mfc_unres_lock);
-       it->cache = &mrt->mfc6_unres_queue;
-       list_for_each_entry(mfc, it->cache, list)
-               if (pos-- == 0)
-                       return mfc;
-       spin_unlock_bh(&mfc_unres_lock);
+       mrt = ip6mr_get_table(net, id);
+       if (mrt)
+               return mrt;
 
-       it->cache = NULL;
-       return NULL;
+       return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+                             ipmr_expire_process, ip6mr_new_table_set);
 }
 
-/*
- *     The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
-       struct seq_net_private p;
-       struct mr6_table *mrt;
-       int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
-                                           struct ipmr_vif_iter *iter,
-                                           loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
 {
-       struct mr6_table *mrt = iter->mrt;
-
-       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-               if (!MIF_EXISTS(mrt, iter->ct))
-                       continue;
-               if (pos-- == 0)
-                       return &mrt->vif6_table[iter->ct];
-       }
-       return NULL;
+       del_timer_sync(&mrt->ipmr_expire_timer);
+       mroute_clean_tables(mrt, true);
+       rhltable_destroy(&mrt->mfc_hash);
+       kfree(mrt);
 }
 
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(mrt_lock)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
        if (!mrt)
@@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        iter->mrt = mrt;
 
        read_lock(&mrt_lock);
-       return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_vif_iter *iter = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt = iter->mrt;
-
-       ++*pos;
-       if (v == SEQ_START_TOKEN)
-               return ip6mr_vif_seq_idx(net, iter, 0);
-
-       while (++iter->ct < mrt->maxvif) {
-               if (!MIF_EXISTS(mrt, iter->ct))
-                       continue;
-               return &mrt->vif6_table[iter->ct];
-       }
-       return NULL;
+       return mr_vif_seq_start(seq, pos);
 }
 
 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-       struct ipmr_vif_iter *iter = seq->private;
-       struct mr6_table *mrt = iter->mrt;
+       struct mr_vif_iter *iter = seq->private;
+       struct mr_table *mrt = iter->mrt;
 
        if (v == SEQ_START_TOKEN) {
                seq_puts(seq,
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
        } else {
-               const struct mif_device *vif = v;
+               const struct vif_device *vif = v;
                const char *name = vif->dev ? vif->dev->name : "none";
 
                seq_printf(seq,
                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-                          vif - mrt->vif6_table,
+                          vif - mrt->vif_table,
                           name, vif->bytes_in, vif->pkt_in,
                           vif->bytes_out, vif->pkt_out,
                           vif->flags);
@@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ip6mr_vif_seq_ops = {
        .start = ip6mr_vif_seq_start,
-       .next  = ip6mr_vif_seq_next,
+       .next  = mr_vif_seq_next,
        .stop  = ip6mr_vif_seq_stop,
        .show  = ip6mr_vif_seq_show,
 };
@@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
 static int ip6mr_vif_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
-                           sizeof(struct ipmr_vif_iter));
+                           sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct ipmr_mfc_iter *it = seq->private;
        struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
        if (!mrt)
                return ERR_PTR(-ENOENT);
 
-       it->mrt = mrt;
-       it->cache = NULL;
-       return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct mfc6_cache *mfc = v;
-       struct ipmr_mfc_iter *it = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt = it->mrt;
-
-       ++*pos;
-
-       if (v == SEQ_START_TOKEN)
-               return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-       if (mfc->list.next != it->cache)
-               return list_entry(mfc->list.next, struct mfc6_cache, list);
-
-       if (it->cache == &mrt->mfc6_unres_queue)
-               goto end_of_list;
-
-       BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
-       while (++it->ct < MFC6_LINES) {
-               it->cache = &mrt->mfc6_cache_array[it->ct];
-               if (list_empty(it->cache))
-                       continue;
-               return list_first_entry(it->cache, struct mfc6_cache, list);
-       }
-
-       /* exhausted cache_array, show unresolved */
-       read_unlock(&mrt_lock);
-       it->cache = &mrt->mfc6_unres_queue;
-       it->ct = 0;
-
-       spin_lock_bh(&mfc_unres_lock);
-       if (!list_empty(it->cache))
-               return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
-       spin_unlock_bh(&mfc_unres_lock);
-       it->cache = NULL;
-
-       return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct mr6_table *mrt = it->mrt;
-
-       if (it->cache == &mrt->mfc6_unres_queue)
-               spin_unlock_bh(&mfc_unres_lock);
-       else if (it->cache == &mrt->mfc6_cache_array[it->ct])
-               read_unlock(&mrt_lock);
+       return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
        } else {
                const struct mfc6_cache *mfc = v;
-               const struct ipmr_mfc_iter *it = seq->private;
-               struct mr6_table *mrt = it->mrt;
+               const struct mr_mfc_iter *it = seq->private;
+               struct mr_table *mrt = it->mrt;
 
                seq_printf(seq, "%pI6 %pI6 %-3hd",
                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
-                          mfc->mf6c_parent);
+                          mfc->_c.mfc_parent);
 
-               if (it->cache != &mrt->mfc6_unres_queue) {
+               if (it->cache != &mrt->mfc_unres_queue) {
                        seq_printf(seq, " %8lu %8lu %8lu",
-                                  mfc->mfc_un.res.pkt,
-                                  mfc->mfc_un.res.bytes,
-                                  mfc->mfc_un.res.wrong_if);
-                       for (n = mfc->mfc_un.res.minvif;
-                            n < mfc->mfc_un.res.maxvif; n++) {
-                               if (MIF_EXISTS(mrt, n) &&
-                                   mfc->mfc_un.res.ttls[n] < 255)
+                                  mfc->_c.mfc_un.res.pkt,
+                                  mfc->_c.mfc_un.res.bytes,
+                                  mfc->_c.mfc_un.res.wrong_if);
+                       for (n = mfc->_c.mfc_un.res.minvif;
+                            n < mfc->_c.mfc_un.res.maxvif; n++) {
+                               if (VIF_EXISTS(mrt, n) &&
+                                   mfc->_c.mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
-                                                  " %2d:%-3d",
-                                                  n, mfc->mfc_un.res.ttls[n]);
+                                                  " %2d:%-3d", n,
+                                                  mfc->_c.mfc_un.res.ttls[n]);
                        }
                } else {
                        /* unresolved mfc_caches don't contain
@@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
        .start = ipmr_mfc_seq_start,
-       .next  = ipmr_mfc_seq_next,
-       .stop  = ipmr_mfc_seq_stop,
+       .next  = mr_mfc_seq_next,
+       .stop  = mr_mfc_seq_stop,
        .show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-                           sizeof(struct ipmr_mfc_iter));
+                           sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb)
        struct ipv6hdr   *encap;
        struct net_device  *reg_dev = NULL;
        struct net *net = dev_net(skb->dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->dev->ifindex,
                .flowi6_mark    = skb->mark,
@@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
        read_lock(&mrt_lock);
        if (reg_vif_num >= 0)
-               reg_dev = mrt->vif6_table[reg_vif_num].dev;
+               reg_dev = mrt->vif_table[reg_vif_num].dev;
        if (reg_dev)
                dev_hold(reg_dev);
        read_unlock(&mrt_lock);
@@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
                                      struct net_device *dev)
 {
        struct net *net = dev_net(dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_oif     = dev->ifindex,
                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev)
        dev->features           |= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 {
        struct net_device *dev;
        char name[IFNAMSIZ];
@@ -773,17 +657,17 @@ failure:
  *     Delete a VIF entry
  */
 
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
                       struct list_head *head)
 {
-       struct mif_device *v;
+       struct vif_device *v;
        struct net_device *dev;
        struct inet6_dev *in6_dev;
 
        if (vifi < 0 || vifi >= mrt->maxvif)
                return -EADDRNOTAVAIL;
 
-       v = &mrt->vif6_table[vifi];
+       v = &mrt->vif_table[vifi];
 
        write_lock_bh(&mrt_lock);
        dev = v->dev;
@@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
        if (vifi + 1 == mrt->maxvif) {
                int tmp;
                for (tmp = vifi - 1; tmp >= 0; tmp--) {
-                       if (MIF_EXISTS(mrt, tmp))
+                       if (VIF_EXISTS(mrt, tmp))
                                break;
                }
                mrt->maxvif = tmp + 1;
@@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
        return 0;
 }
 
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+       struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+       kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-       kmem_cache_free(mrt_cachep, c);
+       call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 }
 
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 {
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
 
        atomic_dec(&mrt->cache_resolve_queue_len);
 
-       while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
                if (ipv6_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct ipv6hdr));
@@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
 {
        unsigned long now = jiffies;
        unsigned long expires = 10 * HZ;
-       struct mfc6_cache *c, *next;
+       struct mr_mfc *c, *next;
 
-       list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
                if (time_after(c->mfc_un.unres.expires, now)) {
                        /* not yet... */
                        unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
                }
 
                list_del(&c->list);
-               mr6_netlink_event(mrt, c, RTM_DELROUTE);
-               ip6mr_destroy_unres(mrt, c);
+               mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+               ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
        }
 
-       if (!list_empty(&mrt->mfc6_unres_queue))
+       if (!list_empty(&mrt->mfc_unres_queue))
                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(struct timer_list *t)
 {
-       struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+       struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 
        if (!spin_trylock(&mfc_unres_lock)) {
                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
                return;
        }
 
-       if (!list_empty(&mrt->mfc6_unres_queue))
+       if (!list_empty(&mrt->mfc_unres_queue))
                ipmr_do_expire_process(mrt);
 
        spin_unlock(&mfc_unres_lock);
@@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+                                   struct mr_mfc *cache,
                                    unsigned char *ttls)
 {
        int vifi;
@@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
-               if (MIF_EXISTS(mrt, vifi) &&
+               if (VIF_EXISTS(mrt, vifi) &&
                    ttls[vifi] && ttls[vifi] < 255) {
                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
                        if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
        cache->mfc_un.res.lastuse = jiffies;
 }
 
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
                    struct mif6ctl *vifc, int mrtsock)
 {
        int vifi = vifc->mif6c_mifi;
-       struct mif_device *v = &mrt->vif6_table[vifi];
+       struct vif_device *v = &mrt->vif_table[vifi];
        struct net_device *dev;
        struct inet6_dev *in6_dev;
        int err;
 
        /* Is vif busy ? */
-       if (MIF_EXISTS(mrt, vifi))
+       if (VIF_EXISTS(mrt, vifi))
                return -EADDRINUSE;
 
        switch (vifc->mif6c_flags) {
@@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
                                             dev->ifindex, &in6_dev->cnf);
        }
 
-       /*
-        *      Fill in the VIF structures
-        */
-       v->rate_limit = vifc->vifc_rate_limit;
-       v->flags = vifc->mif6c_flags;
-       if (!mrtsock)
-               v->flags |= VIFF_STATIC;
-       v->threshold = vifc->vifc_threshold;
-       v->bytes_in = 0;
-       v->bytes_out = 0;
-       v->pkt_in = 0;
-       v->pkt_out = 0;
-       v->link = dev->ifindex;
-       if (v->flags & MIFF_REGISTER)
-               v->link = dev_get_iflink(dev);
+       /* Fill in the VIF structures */
+       vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+                       vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+                       MIFF_REGISTER);
 
        /* And finish update writing critical data */
        write_lock_bh(&mrt_lock);
@@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
        return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
                                           const struct in6_addr *origin,
                                           const struct in6_addr *mcastgrp)
 {
-       int line = MFC6_HASH(mcastgrp, origin);
-       struct mfc6_cache *c;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-                       return c;
-       }
-       return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
-                                                     mifi_t mifi)
-{
-       int line = MFC6_HASH(&in6addr_any, &in6addr_any);
-       struct mfc6_cache *c;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-               if (ipv6_addr_any(&c->mf6c_origin) &&
-                   ipv6_addr_any(&c->mf6c_mcastgrp) &&
-                   (c->mfc_un.res.ttls[mifi] < 255))
-                       return c;
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = *origin,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
 
-       return NULL;
+       return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
                                               struct in6_addr *mcastgrp,
                                               mifi_t mifi)
 {
-       int line = MFC6_HASH(mcastgrp, &in6addr_any);
-       struct mfc6_cache *c, *proxy;
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = in6addr_any,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
 
        if (ipv6_addr_any(mcastgrp))
-               goto skip;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-               if (ipv6_addr_any(&c->mf6c_origin) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
-                       if (c->mfc_un.res.ttls[mifi] < 255)
-                               return c;
-
-                       /* It's ok if the mifi is part of the static tree */
-                       proxy = ip6mr_cache_find_any_parent(mrt,
-                                                           c->mf6c_parent);
-                       if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
-                               return c;
-               }
+               return mr_mfc_find_any_parent(mrt, mifi);
+       return mr_mfc_find_any(mrt, mifi, &arg);
+}
 
-skip:
-       return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+                       const struct in6_addr *origin,
+                       const struct in6_addr *mcastgrp,
+                       int parent)
+{
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = *origin,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
+
+       return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
-/*
- *     Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
 static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
        if (!c)
                return NULL;
-       c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-       c->mfc_un.res.minvif = MAXMIFS;
+       c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+       c->_c.mfc_un.res.minvif = MAXMIFS;
        return c;
 }
 
@@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
        if (!c)
                return NULL;
-       skb_queue_head_init(&c->mfc_un.unres.unresolved);
-       c->mfc_un.unres.expires = jiffies + 10 * HZ;
+       skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+       c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
        return c;
 }
 
@@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *     A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
                                struct mfc6_cache *uc, struct mfc6_cache *c)
 {
        struct sk_buff *skb;
@@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
         *      Play the pending entries through our router
         */
 
-       while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+       while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
                if (ipv6_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct ipv6hdr));
 
-                       if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+                       if (mr_fill_mroute(mrt, skb, &c->_c,
+                                          nlmsg_data(nlh)) > 0) {
                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
                        } else {
                                nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
  *     Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
                              mifi_t mifi, int assert)
 {
+       struct sock *mroute6_sk;
        struct sk_buff *skb;
        struct mrt6msg *msg;
        int ret;
@@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
        skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
 
-       if (!mrt->mroute6_sk) {
+       rcu_read_lock();
+       mroute6_sk = rcu_dereference(mrt->mroute_sk);
+       if (!mroute6_sk) {
+               rcu_read_unlock();
                kfree_skb(skb);
                return -EINVAL;
        }
 
        mrt6msg_netlink_event(mrt, skb);
 
-       /*
-        *      Deliver to user space multicast routing algorithms
-        */
-       ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+       /* Deliver to user space multicast routing algorithms */
+       ret = sock_queue_rcv_skb(mroute6_sk, skb);
+       rcu_read_unlock();
        if (ret < 0) {
                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
                kfree_skb(skb);
@@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
        return ret;
 }
 
-/*
- *     Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+                                 struct sk_buff *skb)
 {
+       struct mfc6_cache *c;
        bool found = false;
        int err;
-       struct mfc6_cache *c;
 
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
                        found = true;
@@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
                        return -ENOBUFS;
                }
 
-               /*
-                *      Fill in the new cache entry
-                */
-               c->mf6c_parent = -1;
+               /* Fill in the new cache entry */
+               c->_c.mfc_parent = -1;
                c->mf6c_origin = ipv6_hdr(skb)->saddr;
                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
 
@@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
                }
 
                atomic_inc(&mrt->cache_resolve_queue_len);
-               list_add(&c->list, &mrt->mfc6_unres_queue);
+               list_add(&c->_c.list, &mrt->mfc_unres_queue);
                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
                ipmr_do_expire_process(mrt);
        }
 
-       /*
-        *      See if we can append the packet
-        */
-       if (c->mfc_un.unres.unresolved.qlen > 3) {
+       /* See if we can append the packet */
+       if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
-               skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+               skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
                err = 0;
        }
 
@@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *     MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
                            int parent)
 {
-       int line;
-       struct mfc6_cache *c, *next;
-
-       line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+       struct mfc6_cache *c;
 
-       list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp,
-                                   &mfc->mf6cc_mcastgrp.sin6_addr) &&
-                   (parent == -1 || parent == c->mf6c_parent)) {
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
+       /* The entries are added/deleted only under RTNL */
+       rcu_read_lock();
+       c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+                                   &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+       rcu_read_unlock();
+       if (!c)
+               return -ENOENT;
+       rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+       list_del_rcu(&c->_c.list);
 
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_cache_free(c);
-                       return 0;
-               }
-       }
-       return -ENOENT;
+       mr6_netlink_event(mrt, c, RTM_DELROUTE);
+       ip6mr_cache_free(c);
+       return 0;
 }
 
 static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);
-       struct mr6_table *mrt;
-       struct mif_device *v;
+       struct mr_table *mrt;
+       struct vif_device *v;
        int ct;
 
        if (event != NETDEV_UNREGISTER)
                return NOTIFY_DONE;
 
        ip6mr_for_each_table(mrt, net) {
-               v = &mrt->vif6_table[0];
+               v = &mrt->vif_table[0];
                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
                        if (v->dev == dev)
                                mif6_delete(mrt, ct, 1, NULL);
@@ -1397,6 +1251,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 static struct pernet_operations ip6mr_net_ops = {
        .init = ip6mr_net_init,
        .exit = ip6mr_net_exit,
+       .async = true,
 };
 
 int __init ip6_mr_init(void)
@@ -1452,14 +1307,14 @@ void ip6_mr_cleanup(void)
        kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
                         struct mf6cctl *mfc, int mrtsock, int parent)
 {
-       bool found = false;
-       int line;
-       struct mfc6_cache *uc, *c;
        unsigned char ttls[MAXMIFS];
-       int i;
+       struct mfc6_cache *uc, *c;
+       struct mr_mfc *_uc;
+       bool found;
+       int i, err;
 
        if (mfc->mf6cc_parent >= MAXMIFS)
                return -ENFILE;
@@ -1468,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
        for (i = 0; i < MAXMIFS; i++) {
                if (IF_ISSET(i, &mfc->mf6cc_ifset))
                        ttls[i] = 1;
-
-       }
-
-       line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp,
-                                   &mfc->mf6cc_mcastgrp.sin6_addr) &&
-                   (parent == -1 || parent == mfc->mf6cc_parent)) {
-                       found = true;
-                       break;
-               }
        }
 
-       if (found) {
+       /* The entries are added/deleted only under RTNL */
+       rcu_read_lock();
+       c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+                                   &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+       rcu_read_unlock();
+       if (c) {
                write_lock_bh(&mrt_lock);
-               c->mf6c_parent = mfc->mf6cc_parent;
-               ip6mr_update_thresholds(mrt, c, ttls);
+               c->_c.mfc_parent = mfc->mf6cc_parent;
+               ip6mr_update_thresholds(mrt, &c->_c, ttls);
                if (!mrtsock)
-                       c->mfc_flags |= MFC_STATIC;
+                       c->_c.mfc_flags |= MFC_STATIC;
                write_unlock_bh(&mrt_lock);
                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
                return 0;
@@ -1504,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
-       c->mf6c_parent = mfc->mf6cc_parent;
-       ip6mr_update_thresholds(mrt, c, ttls);
+       c->_c.mfc_parent = mfc->mf6cc_parent;
+       ip6mr_update_thresholds(mrt, &c->_c, ttls);
        if (!mrtsock)
-               c->mfc_flags |= MFC_STATIC;
+               c->_c.mfc_flags |= MFC_STATIC;
 
-       write_lock_bh(&mrt_lock);
-       list_add(&c->list, &mrt->mfc6_cache_array[line]);
-       write_unlock_bh(&mrt_lock);
+       err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+                                 ip6mr_rht_params);
+       if (err) {
+               pr_err("ip6mr: rhtable insert error %d\n", err);
+               ip6mr_cache_free(c);
+               return err;
+       }
+       list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 
-       /*
-        *      Check to see if we resolved a queued list. If so we
-        *      need to send on the frames and tidy up.
+       /* Check to see if we resolved a queued list. If so we
+        * need to send on the frames and tidy up.
         */
        found = false;
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+               uc = (struct mfc6_cache *)_uc;
                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-                       list_del(&uc->list);
+                       list_del(&_uc->list);
                        atomic_dec(&mrt->cache_resolve_queue_len);
                        found = true;
                        break;
                }
        }
-       if (list_empty(&mrt->mfc6_unres_queue))
+       if (list_empty(&mrt->mfc_unres_queue))
                del_timer(&mrt->ipmr_expire_timer);
        spin_unlock_bh(&mfc_unres_lock);
 
@@ -1544,61 +1396,54 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *     Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
-       int i;
+       struct mr_mfc *c, *tmp;
        LIST_HEAD(list);
-       struct mfc6_cache *c, *next;
+       int i;
 
-       /*
-        *      Shut down all active vif entries
-        */
+       /* Shut down all active vif entries */
        for (i = 0; i < mrt->maxvif; i++) {
-               if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+               if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
                        continue;
                mif6_delete(mrt, i, 0, &list);
        }
        unregister_netdevice_many(&list);
 
-       /*
-        *      Wipe the cache
-        */
-       for (i = 0; i < MFC6_LINES; i++) {
-               list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-                       if (!all && (c->mfc_flags & MFC_STATIC))
-                               continue;
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
-
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_cache_free(c);
-               }
+       /* Wipe the cache */
+       list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+               if (!all && (c->mfc_flags & MFC_STATIC))
+                       continue;
+               rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+               list_del_rcu(&c->list);
+               mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+               ip6mr_cache_free((struct mfc6_cache *)c);
        }
 
        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
                spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+               list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
                        list_del(&c->list);
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_destroy_unres(mrt, c);
+                       mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+                                         RTM_DELROUTE);
+                       ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
                }
                spin_unlock_bh(&mfc_unres_lock);
        }
 }
 
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 {
        int err = 0;
        struct net *net = sock_net(sk);
 
        rtnl_lock();
        write_lock_bh(&mrt_lock);
-       if (likely(mrt->mroute6_sk == NULL)) {
-               mrt->mroute6_sk = sk;
-               net->ipv6.devconf_all->mc_forwarding++;
-       } else {
+       if (rtnl_dereference(mrt->mroute_sk)) {
                err = -EADDRINUSE;
+       } else {
+               rcu_assign_pointer(mrt->mroute_sk, sk);
+               net->ipv6.devconf_all->mc_forwarding++;
        }
        write_unlock_bh(&mrt_lock);
 
@@ -1616,7 +1461,7 @@ int ip6mr_sk_done(struct sock *sk)
 {
        int err = -EACCES;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1624,9 +1469,9 @@ int ip6mr_sk_done(struct sock *sk)
 
        rtnl_lock();
        ip6mr_for_each_table(mrt, net) {
-               if (sk == mrt->mroute6_sk) {
+               if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        write_lock_bh(&mrt_lock);
-                       mrt->mroute6_sk = NULL;
+                       RCU_INIT_POINTER(mrt->mroute_sk, NULL);
                        net->ipv6.devconf_all->mc_forwarding--;
                        write_unlock_bh(&mrt_lock);
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1640,13 +1485,14 @@ int ip6mr_sk_done(struct sock *sk)
                }
        }
        rtnl_unlock();
+       synchronize_rcu();
 
        return err;
 }
 
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
                .flowi6_oif     = skb->dev->ifindex,
@@ -1656,8 +1502,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
                return NULL;
 
-       return mrt->mroute6_sk;
+       return rcu_access_pointer(mrt->mroute_sk);
 }
+EXPORT_SYMBOL(mroute6_is_socket);
 
 /*
  *     Socket options and virtual interface manipulation. The whole
@@ -1673,7 +1520,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
        struct mf6cctl mfc;
        mifi_t mifi;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1684,7 +1531,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                return -ENOENT;
 
        if (optname != MRT6_INIT) {
-               if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+               if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+                   !ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EACCES;
        }
 
@@ -1706,7 +1554,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                if (vif.mif6c_mifi >= MAXMIFS)
                        return -ENFILE;
                rtnl_lock();
-               ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+               ret = mif6_add(net, mrt, &vif,
+                              sk == rtnl_dereference(mrt->mroute_sk));
                rtnl_unlock();
                return ret;
 
@@ -1741,7 +1590,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
                else
                        ret = ip6mr_mfc_add(net, mrt, &mfc,
-                                           sk == mrt->mroute6_sk, parent);
+                                           sk ==
+                                           rtnl_dereference(mrt->mroute_sk),
+                                           parent);
                rtnl_unlock();
                return ret;
 
@@ -1793,7 +1644,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
                if (v != RT_TABLE_DEFAULT && v >= 100000000)
                        return -EINVAL;
-               if (sk == mrt->mroute6_sk)
+               if (sk == rcu_access_pointer(mrt->mroute_sk))
                        return -EBUSY;
 
                rtnl_lock();
@@ -1824,7 +1675,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
        int olr;
        int val;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1872,10 +1723,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 {
        struct sioc_sg_req6 sr;
        struct sioc_mif_req6 vr;
-       struct mif_device *vif;
+       struct vif_device *vif;
        struct mfc6_cache *c;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
        if (!mrt)
@@ -1888,8 +1739,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
                if (vr.mifi >= mrt->maxvif)
                        return -EINVAL;
                read_lock(&mrt_lock);
-               vif = &mrt->vif6_table[vr.mifi];
-               if (MIF_EXISTS(mrt, vr.mifi)) {
+               vif = &mrt->vif_table[vr.mifi];
+               if (VIF_EXISTS(mrt, vr.mifi)) {
                        vr.icount = vif->pkt_in;
                        vr.ocount = vif->pkt_out;
                        vr.ibytes = vif->bytes_in;
@@ -1906,19 +1757,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;
 
-               read_lock(&mrt_lock);
+               rcu_read_lock();
                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
-                       read_unlock(&mrt_lock);
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+                       rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
@@ -1946,10 +1797,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
        struct compat_sioc_sg_req6 sr;
        struct compat_sioc_mif_req6 vr;
-       struct mif_device *vif;
+       struct vif_device *vif;
        struct mfc6_cache *c;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
        if (!mrt)
@@ -1962,8 +1813,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                if (vr.mifi >= mrt->maxvif)
                        return -EINVAL;
                read_lock(&mrt_lock);
-               vif = &mrt->vif6_table[vr.mifi];
-               if (MIF_EXISTS(mrt, vr.mifi)) {
+               vif = &mrt->vif_table[vr.mifi];
+               if (VIF_EXISTS(mrt, vr.mifi)) {
                        vr.icount = vif->pkt_in;
                        vr.ocount = vif->pkt_out;
                        vr.ibytes = vif->bytes_in;
@@ -1980,19 +1831,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;
 
-               read_lock(&mrt_lock);
+               rcu_read_lock();
                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
-                       read_unlock(&mrt_lock);
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+                       rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
@@ -2013,11 +1864,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
  *     Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
        struct ipv6hdr *ipv6h;
-       struct mif_device *vif = &mrt->vif6_table[vifi];
+       struct vif_device *vif = &mrt->vif_table[vifi];
        struct net_device *dev;
        struct dst_entry *dst;
        struct flowi6 fl6;
@@ -2087,46 +1938,50 @@ out_free:
        return 0;
 }
 
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
        int ct;
 
        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
-               if (mrt->vif6_table[ct].dev == dev)
+               if (mrt->vif_table[ct].dev == dev)
                        break;
        }
        return ct;
 }
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
-                          struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+                          struct sk_buff *skb, struct mfc6_cache *c)
 {
        int psend = -1;
        int vif, ct;
        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-       vif = cache->mf6c_parent;
-       cache->mfc_un.res.pkt++;
-       cache->mfc_un.res.bytes += skb->len;
-       cache->mfc_un.res.lastuse = jiffies;
+       vif = c->_c.mfc_parent;
+       c->_c.mfc_un.res.pkt++;
+       c->_c.mfc_un.res.bytes += skb->len;
+       c->_c.mfc_un.res.lastuse = jiffies;
 
-       if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+       if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
                struct mfc6_cache *cache_proxy;
 
                /* For an (*,G) entry, we only check that the incoming
                 * interface is part of the static tree.
                 */
-               cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+               rcu_read_lock();
+               cache_proxy = mr_mfc_find_any_parent(mrt, vif);
                if (cache_proxy &&
-                   cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+                   cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+                       rcu_read_unlock();
                        goto forward;
+               }
+               rcu_read_unlock();
        }
 
        /*
         * Wrong interface: drop packet and (maybe) send PIM assert.
         */
-       if (mrt->vif6_table[vif].dev != skb->dev) {
-               cache->mfc_un.res.wrong_if++;
+       if (mrt->vif_table[vif].dev != skb->dev) {
+               c->_c.mfc_un.res.wrong_if++;
 
                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2135,52 +1990,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
                       large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
-                    cache->mfc_un.res.ttls[true_vifi] < 255) &&
+                    c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
                    time_after(jiffies,
-                              cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-                       cache->mfc_un.res.last_assert = jiffies;
+                              c->_c.mfc_un.res.last_assert +
+                              MFC_ASSERT_THRESH)) {
+                       c->_c.mfc_un.res.last_assert = jiffies;
                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
                }
                goto dont_forward;
        }
 
 forward:
-       mrt->vif6_table[vif].pkt_in++;
-       mrt->vif6_table[vif].bytes_in += skb->len;
+       mrt->vif_table[vif].pkt_in++;
+       mrt->vif_table[vif].bytes_in += skb->len;
 
        /*
         *      Forward the frame
         */
-       if (ipv6_addr_any(&cache->mf6c_origin) &&
-           ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+       if (ipv6_addr_any(&c->mf6c_origin) &&
+           ipv6_addr_any(&c->mf6c_mcastgrp)) {
                if (true_vifi >= 0 &&
-                   true_vifi != cache->mf6c_parent &&
+                   true_vifi != c->_c.mfc_parent &&
                    ipv6_hdr(skb)->hop_limit >
-                               cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+                               c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
                        /* It's an (*,*) entry and the packet is not coming from
                         * the upstream: forward the packet to the upstream
                         * only.
                         */
-                       psend = cache->mf6c_parent;
+                       psend = c->_c.mfc_parent;
                        goto last_forward;
                }
                goto dont_forward;
        }
-       for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+       for (ct = c->_c.mfc_un.res.maxvif - 1;
+            ct >= c->_c.mfc_un.res.minvif; ct--) {
                /* For (*,G) entry, don't forward to the incoming interface */
-               if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
-                   ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+               if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+                   ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
                                if (skb2)
-                                       ip6mr_forward2(net, mrt, skb2, cache, psend);
+                                       ip6mr_forward2(net, mrt, skb2,
+                                                      c, psend);
                        }
                        psend = ct;
                }
        }
 last_forward:
        if (psend != -1) {
-               ip6mr_forward2(net, mrt, skb, cache, psend);
+               ip6mr_forward2(net, mrt, skb, c, psend);
                return;
        }
 
@@ -2197,7 +2055,7 @@ int ip6_mr_input(struct sk_buff *skb)
 {
        struct mfc6_cache *cache;
        struct net *net = dev_net(skb->dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->dev->ifindex,
                .flowi6_mark    = skb->mark,
@@ -2247,66 +2105,11 @@ int ip6_mr_input(struct sk_buff *skb)
        return 0;
 }
 
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-                              struct mfc6_cache *c, struct rtmsg *rtm)
-{
-       struct rta_mfc_stats mfcs;
-       struct nlattr *mp_attr;
-       struct rtnexthop *nhp;
-       unsigned long lastuse;
-       int ct;
-
-       /* If cache is unresolved, don't try to parse IIF and OIF */
-       if (c->mf6c_parent >= MAXMIFS) {
-               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-               return -ENOENT;
-       }
-
-       if (MIF_EXISTS(mrt, c->mf6c_parent) &&
-           nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
-               return -EMSGSIZE;
-       mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
-       if (!mp_attr)
-               return -EMSGSIZE;
-
-       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-               if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-                       nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
-                       if (!nhp) {
-                               nla_nest_cancel(skb, mp_attr);
-                               return -EMSGSIZE;
-                       }
-
-                       nhp->rtnh_flags = 0;
-                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-                       nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
-                       nhp->rtnh_len = sizeof(*nhp);
-               }
-       }
-
-       nla_nest_end(skb, mp_attr);
-
-       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-       mfcs.mfcs_packets = c->mfc_un.res.pkt;
-       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-                             RTA_PAD))
-               return -EMSGSIZE;
-
-       rtm->rtm_type = RTN_MULTICAST;
-       return 1;
-}
-
 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
                    u32 portid)
 {
        int err;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct mfc6_cache *cache;
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
@@ -2367,15 +2170,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
                return err;
        }
 
-       if (rtm->rtm_flags & RTM_F_NOTIFY)
-               cache->mfc_flags |= MFC_NOTIFY;
-
-       err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+       err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
        read_unlock(&mrt_lock);
        return err;
 }
 
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
                             int flags)
 {
@@ -2397,7 +2197,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
                goto nla_put_failure;
        rtm->rtm_type = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-       if (c->mfc_flags & MFC_STATIC)
+       if (c->_c.mfc_flags & MFC_STATIC)
                rtm->rtm_protocol = RTPROT_STATIC;
        else
                rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2406,7 +2206,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
                goto nla_put_failure;
-       err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+       err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
        /* do not break the dump if cache is unresolved */
        if (err < 0 && err != -ENOENT)
                goto nla_put_failure;
@@ -2419,6 +2219,14 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                             u32 portid, u32 seq, struct mr_mfc *c,
+                             int cmd, int flags)
+{
+       return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+                                cmd, flags);
+}
+
 static int mr6_msgsize(bool unresolved, int maxvif)
 {
        size_t len =
@@ -2440,14 +2248,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
        return len;
 }
 
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
                              int cmd)
 {
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+       skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
                        GFP_ATOMIC);
        if (!skb)
                goto errout;
@@ -2482,7 +2290,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
        return len;
 }
 
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
 {
        struct net *net = read_pnet(&mrt->net);
        struct nlmsghdr *nlh;
@@ -2532,65 +2340,6 @@ errout:
 
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct net *net = sock_net(skb->sk);
-       struct mr6_table *mrt;
-       struct mfc6_cache *mfc;
-       unsigned int t = 0, s_t;
-       unsigned int h = 0, s_h;
-       unsigned int e = 0, s_e;
-
-       s_t = cb->args[0];
-       s_h = cb->args[1];
-       s_e = cb->args[2];
-
-       read_lock(&mrt_lock);
-       ip6mr_for_each_table(mrt, net) {
-               if (t < s_t)
-                       goto next_table;
-               if (t > s_t)
-                       s_h = 0;
-               for (h = s_h; h < MFC6_LINES; h++) {
-                       list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
-                               if (e < s_e)
-                                       goto next_entry;
-                               if (ip6mr_fill_mroute(mrt, skb,
-                                                     NETLINK_CB(cb->skb).portid,
-                                                     cb->nlh->nlmsg_seq,
-                                                     mfc, RTM_NEWROUTE,
-                                                     NLM_F_MULTI) < 0)
-                                       goto done;
-next_entry:
-                               e++;
-                       }
-                       e = s_e = 0;
-               }
-               spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
-                       if (e < s_e)
-                               goto next_entry2;
-                       if (ip6mr_fill_mroute(mrt, skb,
-                                             NETLINK_CB(cb->skb).portid,
-                                             cb->nlh->nlmsg_seq,
-                                             mfc, RTM_NEWROUTE,
-                                             NLM_F_MULTI) < 0) {
-                               spin_unlock_bh(&mfc_unres_lock);
-                               goto done;
-                       }
-next_entry2:
-                       e++;
-               }
-               spin_unlock_bh(&mfc_unres_lock);
-               e = s_e = 0;
-               s_h = 0;
-next_table:
-               t++;
-       }
-done:
-       read_unlock(&mrt_lock);
-
-       cb->args[2] = e;
-       cb->args[1] = h;
-       cb->args[0] = t;
-
-       return skb->len;
+       return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+                               _ip6mr_fill_mroute, &mfc_unres_lock);
 }
index d78d41f..4d780c7 100644 (file)
@@ -1367,10 +1367,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                if (get_user(len, optlen))
                        return -EFAULT;
 
-               lock_sock(sk);
-               err = nf_getsockopt(sk, PF_INET6, optname, optval,
-                               &len);
-               release_sock(sk);
+               err = nf_getsockopt(sk, PF_INET6, optname, optval, &len);
                if (err >= 0)
                        err = put_user(len, optlen);
        }
@@ -1409,10 +1406,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
                if (get_user(len, optlen))
                        return -EFAULT;
 
-               lock_sock(sk);
-               err = compat_nf_getsockopt(sk, PF_INET6,
-                                          optname, optval, &len);
-               release_sock(sk);
+               err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len);
                if (err >= 0)
                        err = put_user(len, optlen);
        }
@@ -1421,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 }
 EXPORT_SYMBOL(compat_ipv6_getsockopt);
 #endif
-
index 9b9d2ff..d1a0cef 100644 (file)
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
        if (ifindex == 0) {
                struct rt6_info *rt;
-               rt = rt6_lookup(net, addr, NULL, 0, 0);
+               rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
                if (rt) {
                        dev = rt->dst.dev;
                        ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
        struct inet6_dev *idev = NULL;
 
        if (ifindex == 0) {
-               struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+               struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
 
                if (rt) {
                        dev = rt->dst.dev;
@@ -2997,6 +2997,7 @@ static void __net_exit igmp6_net_exit(struct net *net)
 static struct pernet_operations igmp6_net_ops = {
        .init = igmp6_net_init,
        .exit = igmp6_net_exit,
+       .async = true,
 };
 
 int __init igmp6_init(void)
index f61a5b6..0a19ce3 100644 (file)
@@ -1882,6 +1882,7 @@ static void __net_exit ndisc_net_exit(struct net *net)
 static struct pernet_operations ndisc_net_ops = {
        .init = ndisc_net_init,
        .exit = ndisc_net_exit,
+       .async = true,
 };
 
 int __init ndisc_init(void)
index af4c917..62358b9 100644 (file)
@@ -352,6 +352,10 @@ ip6t_do_table(struct sk_buff *skb,
                        }
                        if (table_base + v != ip6t_next_entry(e) &&
                            !(e->ipv6.flags & IP6T_F_GOTO)) {
+                               if (unlikely(stackidx >= private->stacksize)) {
+                                       verdict = NF_DROP;
+                                       break;
+                               }
                                jumpstack[stackidx++] = e;
                        }
 
index fa51a20..38dea8f 100644 (file)
@@ -85,14 +85,14 @@ static int reject_tg6_check(const struct xt_tgchk_param *par)
        const struct ip6t_entry *e = par->entryinfo;
 
        if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
-               pr_info("ECHOREPLY is not supported.\n");
+               pr_info_ratelimited("ECHOREPLY is not supported\n");
                return -EINVAL;
        } else if (rejinfo->with == IP6T_TCP_RESET) {
                /* Must specify that it's a TCP packet */
                if (!(e->ipv6.flags & IP6T_F_PROTO) ||
                    e->ipv6.proto != IPPROTO_TCP ||
                    (e->ipv6.invflags & XT_INV_PROTO)) {
-                       pr_info("TCP_RESET illegal for non-tcp\n");
+                       pr_info_ratelimited("TCP_RESET illegal for non-tcp\n");
                        return -EINVAL;
                }
        }
index b12e61b..910a273 100644 (file)
@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
                lookup_flags |= RT6_LOOKUP_F_IFACE;
        }
 
-       rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+       rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
        if (rt->dst.error)
                goto out;
 
@@ -103,14 +103,14 @@ static int rpfilter_check(const struct xt_mtchk_param *par)
        unsigned int options = ~XT_RPFILTER_OPTION_MASK;
 
        if (info->flags & options) {
-               pr_info("unknown options encountered");
+               pr_info_ratelimited("unknown options\n");
                return -EINVAL;
        }
 
        if (strcmp(par->table, "mangle") != 0 &&
            strcmp(par->table, "raw") != 0) {
-               pr_info("match only valid in the \'raw\' "
-                       "or \'mangle\' tables, not \'%s\'.\n", par->table);
+               pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n",
+                                   par->table);
                return -EINVAL;
        }
 
index 9642164..33719d5 100644 (file)
@@ -122,12 +122,14 @@ static int srh_mt6_check(const struct xt_mtchk_param *par)
        const struct ip6t_srh *srhinfo = par->matchinfo;
 
        if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
-               pr_err("unknown srh match flags  %X\n", srhinfo->mt_flags);
+               pr_info_ratelimited("unknown srh match flags  %X\n",
+                                   srhinfo->mt_flags);
                return -EINVAL;
        }
 
        if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
-               pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
+               pr_info_ratelimited("unknown srh invflags %X\n",
+                                   srhinfo->mt_invflags);
                return -EINVAL;
        }
 
index c87b483..32f98bc 100644 (file)
@@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net)
 
 static struct pernet_operations defrag6_net_ops = {
        .exit = defrag6_net_exit,
+       .async = true,
 };
 
 static int __init nf_defrag_init(void)
index cc5174c..3230b3d 100644 (file)
@@ -181,7 +181,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
        *dest = 0;
  again:
-       rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+       rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+                                     lookup_flags);
        if (rt->dst.error)
                goto put_rt_err;
 
index d12c55d..318c6e9 100644 (file)
@@ -240,6 +240,7 @@ static void __net_init ping_v6_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v6_net_ops = {
        .init = ping_v6_proc_init_net,
        .exit = ping_v6_proc_exit_net,
+       .async = true,
 };
 #endif
 
index b678142..1678cf0 100644 (file)
@@ -343,6 +343,7 @@ static void __net_exit ipv6_proc_exit_net(struct net *net)
 static struct pernet_operations ipv6_proc_ops = {
        .init = ipv6_proc_init_net,
        .exit = ipv6_proc_exit_net,
+       .async = true,
 };
 
 int __init ipv6_misc_proc_init(void)
@@ -354,4 +355,3 @@ void ipv6_misc_proc_exit(void)
 {
        unregister_pernet_subsys(&ipv6_proc_ops);
 }
-
index 4c25339..10a4ac4 100644 (file)
@@ -1332,6 +1332,7 @@ static void __net_exit raw6_exit_net(struct net *net)
 static struct pernet_operations raw6_net_ops = {
        .init = raw6_init_net,
        .exit = raw6_exit_net,
+       .async = true,
 };
 
 int __init raw6_proc_init(void)
index afbc000..b5da69c 100644 (file)
@@ -733,6 +733,7 @@ static void __net_exit ipv6_frags_exit_net(struct net *net)
 static struct pernet_operations ip6_frags_ops = {
        .init = ipv6_frags_init_net,
        .exit = ipv6_frags_exit_net,
+       .async = true,
 };
 
 int __init ipv6_frag_init(void)
index 9dcfadd..f0ae584 100644 (file)
@@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
        return false;
 }
 
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+                                            struct rt6_info *match,
                                             struct flowi6 *fl6, int oif,
+                                            const struct sk_buff *skb,
                                             int strict)
 {
        struct rt6_info *sibling, *next_sibling;
@@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
         * case it will always be non-zero. Otherwise now is the time to do it.
         */
        if (!fl6->mp_hash)
-               fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+               fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
        if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
                return match;
@@ -914,7 +916,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                                             struct fib6_table *table,
-                                            struct flowi6 *fl6, int flags)
+                                            struct flowi6 *fl6,
+                                            const struct sk_buff *skb,
+                                            int flags)
 {
        struct rt6_info *rt, *rt_cache;
        struct fib6_node *fn;
@@ -929,8 +933,8 @@ restart:
                rt = rt6_device_match(net, rt, &fl6->saddr,
                                      fl6->flowi6_oif, flags);
                if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-                       rt = rt6_multipath_select(rt, fl6,
-                                                 fl6->flowi6_oif, flags);
+                       rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+                                                 skb, flags);
        }
        if (rt == net->ipv6.ip6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
@@ -954,14 +958,15 @@ restart:
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-                                   int flags)
+                                  const struct sk_buff *skb, int flags)
 {
-       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+       return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
 }
 EXPORT_SYMBOL_GPL(ip6_route_lookup);
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-                           const struct in6_addr *saddr, int oif, int strict)
+                           const struct in6_addr *saddr, int oif,
+                           const struct sk_buff *skb, int strict)
 {
        struct flowi6 fl6 = {
                .flowi6_oif = oif,
@@ -975,7 +980,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        }
 
-       dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+       dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
        if (dst->error == 0)
                return (struct rt6_info *) dst;
 
@@ -1647,7 +1652,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
 }
 
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-                              int oif, struct flowi6 *fl6, int flags)
+                              int oif, struct flowi6 *fl6,
+                              const struct sk_buff *skb, int flags)
 {
        struct fib6_node *fn, *saved_fn;
        struct rt6_info *rt, *rt_cache;
@@ -1669,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
        rt = rt6_select(net, fn, oif, strict);
        if (rt->rt6i_nsiblings)
-               rt = rt6_multipath_select(rt, fl6, oif, strict);
+               rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
        if (rt == net->ipv6.ip6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
                if (fn)
@@ -1768,28 +1774,35 @@ uncached_rt_out:
 }
 EXPORT_SYMBOL_GPL(ip6_pol_route);
 
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
-                                           struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+                                           struct fib6_table *table,
+                                           struct flowi6 *fl6,
+                                           const struct sk_buff *skb,
+                                           int flags)
 {
-       return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_input_lookup(struct net *net,
                                         struct net_device *dev,
-                                        struct flowi6 *fl6, int flags)
+                                        struct flowi6 *fl6,
+                                        const struct sk_buff *skb,
+                                        int flags)
 {
        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
                flags |= RT6_LOOKUP_F_IFACE;
 
-       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+       return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
-                                 struct flow_keys *keys)
+                                 struct flow_keys *keys,
+                                 struct flow_keys *flkeys)
 {
        const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
        const struct ipv6hdr *key_iph = outer_iph;
+       struct flow_keys *_flkeys = flkeys;
        const struct ipv6hdr *inner_iph;
        const struct icmp6hdr *icmph;
        struct ipv6hdr _inner_iph;
@@ -1811,26 +1824,76 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
                goto out;
 
        key_iph = inner_iph;
+       _flkeys = NULL;
 out:
-       memset(keys, 0, sizeof(*keys));
-       keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-       keys->addrs.v6addrs.src = key_iph->saddr;
-       keys->addrs.v6addrs.dst = key_iph->daddr;
-       keys->tags.flow_label = ip6_flowinfo(key_iph);
-       keys->basic.ip_proto = key_iph->nexthdr;
+       if (_flkeys) {
+               keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+               keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+               keys->tags.flow_label = _flkeys->tags.flow_label;
+               keys->basic.ip_proto = _flkeys->basic.ip_proto;
+       } else {
+               keys->addrs.v6addrs.src = key_iph->saddr;
+               keys->addrs.v6addrs.dst = key_iph->daddr;
+               keys->tags.flow_label = ip6_flowinfo(key_iph);
+               keys->basic.ip_proto = key_iph->nexthdr;
+       }
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+                      const struct sk_buff *skb, struct flow_keys *flkeys)
 {
        struct flow_keys hash_keys;
+       u32 mhash;
 
-       if (skb) {
-               ip6_multipath_l3_keys(skb, &hash_keys);
-               return flow_hash_from_keys(&hash_keys) >> 1;
+       switch (net->ipv6.sysctl.multipath_hash_policy) {
+       case 0:
+               memset(&hash_keys, 0, sizeof(hash_keys));
+               hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+               if (skb) {
+                       ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+               } else {
+                       hash_keys.addrs.v6addrs.src = fl6->saddr;
+                       hash_keys.addrs.v6addrs.dst = fl6->daddr;
+                       hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+                       hash_keys.basic.ip_proto = fl6->flowi6_proto;
+               }
+               break;
+       case 1:
+               if (skb) {
+                       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+                       struct flow_keys keys;
+
+                       /* short-circuit if we already have L4 hash present */
+                       if (skb->l4_hash)
+                               return skb_get_hash_raw(skb) >> 1;
+
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+
+                        if (!flkeys) {
+                               skb_flow_dissect_flow_keys(skb, &keys, flag);
+                               flkeys = &keys;
+                       }
+                       hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+                       hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+                       hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+                       hash_keys.ports.src = flkeys->ports.src;
+                       hash_keys.ports.dst = flkeys->ports.dst;
+                       hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+               } else {
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+                       hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+                       hash_keys.addrs.v6addrs.src = fl6->saddr;
+                       hash_keys.addrs.v6addrs.dst = fl6->daddr;
+                       hash_keys.ports.src = fl6->fl6_sport;
+                       hash_keys.ports.dst = fl6->fl6_dport;
+                       hash_keys.basic.ip_proto = fl6->flowi6_proto;
+               }
+               break;
        }
+       mhash = flow_hash_from_keys(&hash_keys);
 
-       return get_hash_from_flowi6(fl6) >> 1;
+       return mhash >> 1;
 }
 
 void ip6_route_input(struct sk_buff *skb)
@@ -1847,20 +1910,29 @@ void ip6_route_input(struct sk_buff *skb)
                .flowi6_mark = skb->mark,
                .flowi6_proto = iph->nexthdr,
        };
+       struct flow_keys *flkeys = NULL, _flkeys;
 
        tun_info = skb_tunnel_info(skb);
        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+       if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+               flkeys = &_flkeys;
+
        if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-               fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+               fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
        skb_dst_drop(skb);
-       skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+       skb_dst_set(skb,
+                   ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
 }
 
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
-                                            struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+                                            struct fib6_table *table,
+                                            struct flowi6 *fl6,
+                                            const struct sk_buff *skb,
+                                            int flags)
 {
-       return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1888,7 +1960,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
        else if (sk)
                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
-       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+       return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
 }
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
@@ -2137,6 +2209,7 @@ struct ip6rd_flowi {
 static struct rt6_info *__ip6_route_redirect(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
+                                            const struct sk_buff *skb,
                                             int flags)
 {
        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2210,8 +2283,9 @@ out:
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
-                                       const struct flowi6 *fl6,
-                                       const struct in6_addr *gateway)
+                                           const struct flowi6 *fl6,
+                                           const struct sk_buff *skb,
+                                           const struct in6_addr *gateway)
 {
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct ip6rd_flowi rdfl;
@@ -2219,7 +2293,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
        rdfl.fl6 = *fl6;
        rdfl.gateway = *gateway;
 
-       return fib6_rule_lookup(net, &rdfl.fl6,
+       return fib6_rule_lookup(net, &rdfl.fl6, skb,
                                flags, __ip6_route_redirect);
 }
 
@@ -2239,7 +2313,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
        fl6.flowlabel = ip6_flowinfo(iph);
        fl6.flowi6_uid = uid;
 
-       dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+       dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
        rt6_do_redirect(dst, NULL, skb);
        dst_release(dst);
 }
@@ -2261,7 +2335,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
        fl6.saddr = iph->daddr;
        fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-       dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+       dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
        rt6_do_redirect(dst, NULL, skb);
        dst_release(dst);
 }
@@ -2463,7 +2537,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
                flags |= RT6_LOOKUP_F_HAS_SADDR;
 
        flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
-       rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+       rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
 
        /* if table lookup failed, fall back to full lookup */
        if (rt == net->ipv6.ip6_null_entry) {
@@ -2526,7 +2600,7 @@ static int ip6_route_check_nh(struct net *net,
        }
 
        if (!grt)
-               grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+               grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
 
        if (!grt)
                goto out;
@@ -2671,14 +2745,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
                if (err)
                        goto out;
                rt->dst.lwtstate = lwtstate_get(lwtstate);
-               if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
-                       rt->dst.lwtstate->orig_output = rt->dst.output;
-                       rt->dst.output = lwtunnel_output;
-               }
-               if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
-                       rt->dst.lwtstate->orig_input = rt->dst.input;
-                       rt->dst.input = lwtunnel_input;
-               }
+               lwtunnel_set_redirect(&rt->dst);
        }
 
        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -4598,7 +4665,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                if (!ipv6_addr_any(&fl6.saddr))
                        flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-               dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+               dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
 
                rcu_read_unlock();
        } else {
@@ -4979,6 +5046,7 @@ static void __net_exit ip6_route_net_exit_late(struct net *net)
 static struct pernet_operations ip6_route_net_ops = {
        .init = ip6_route_net_init,
        .exit = ip6_route_net_exit,
+       .async = true,
 };
 
 static int __net_init ipv6_inetpeer_init(struct net *net)
@@ -5004,11 +5072,13 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net)
 static struct pernet_operations ipv6_inetpeer_ops = {
        .init   =       ipv6_inetpeer_init,
        .exit   =       ipv6_inetpeer_exit,
+       .async  =       true,
 };
 
 static struct pernet_operations ip6_route_net_late_ops = {
        .init = ip6_route_net_init_late,
        .exit = ip6_route_net_exit_late,
+       .async = true,
 };
 
 static struct notifier_block ip6_route_dev_notifier = {
index 7f5621d..c3f13c3 100644 (file)
@@ -395,6 +395,7 @@ static void __net_exit seg6_net_exit(struct net *net)
 static struct pernet_operations ip6_segments_ops = {
        .init = seg6_net_init,
        .exit = seg6_net_exit,
+       .async = true,
 };
 
 static const struct genl_ops seg6_genl_ops[] = {
index ba3767e..4572232 100644 (file)
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
                fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
 
        if (!tbl_id) {
-               dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+               dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
        } else {
                struct fib6_table *table;
 
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
                if (!table)
                        goto out;
 
-               rt = ip6_pol_route(net, table, 0, &fl6, flags);
+               rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
                dst = &rt->dst;
        }
 
index 3873d38..182db07 100644 (file)
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 #ifdef CONFIG_IPV6_SIT_6RD
        struct ip_tunnel *t = netdev_priv(dev);
 
-       if (t->dev == sitn->fb_tunnel_dev) {
+       if (dev == sitn->fb_tunnel_dev) {
                ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
                t->ip6rd.relay_prefix = 0;
                t->ip6rd.prefixlen = 16;
@@ -1878,6 +1878,7 @@ static struct pernet_operations sit_net_ops = {
        .exit_batch = sit_exit_batch_net,
        .id   = &sit_net_id,
        .size = sizeof(struct sit_net),
+       .async = true,
 };
 
 static void __exit sit_cleanup(void)
index a789a8a..966c42a 100644 (file)
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
+#include <net/netevent.h>
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
 
+static int zero;
 static int one = 1;
 static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+                                         void __user *buffer, size_t *lenp,
+                                         loff_t *ppos)
+{
+       struct net *net;
+       int ret;
+
+       net = container_of(table->data, struct net,
+                          ipv6.sysctl.multipath_hash_policy);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (write && ret == 0)
+               call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+       return ret;
+}
 
 static struct ctl_table ipv6_table_template[] = {
        {
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+       {
+               .procname       = "fib_multipath_hash_policy",
+               .data           = &init_net.ipv6.sysctl.multipath_hash_policy,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_rt6_multipath_hash_policy,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
        { }
 };
 
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
        ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
        ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
        ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+       ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
 
        ipv6_route_table = ipv6_route_sysctl_init(net);
        if (!ipv6_route_table)
@@ -251,6 +278,7 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 static struct pernet_operations ipv6_sysctl_net_ops = {
        .init = ipv6_sysctl_net_init,
        .exit = ipv6_sysctl_net_exit,
+       .async = true,
 };
 
 static struct ctl_table_header *ip6_header;
index 412139f..5425d7b 100644 (file)
@@ -1451,6 +1451,7 @@ process:
 
        if (sk->sk_state == TCP_NEW_SYN_RECV) {
                struct request_sock *req = inet_reqsk(sk);
+               bool req_stolen = false;
                struct sock *nsk;
 
                sk = req->rsk_listener;
@@ -1470,10 +1471,20 @@ process:
                        th = (const struct tcphdr *)skb->data;
                        hdr = ipv6_hdr(skb);
                        tcp_v6_fill_cb(skb, hdr, th);
-                       nsk = tcp_check_req(sk, skb, req, false);
+                       nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
                }
                if (!nsk) {
                        reqsk_put(req);
+                       if (req_stolen) {
+                               /* Another cpu got exclusive access to req
+                                * and created a full blown socket.
+                                * Try to feed this packet to this socket
+                                * instead of discarding it.
+                                */
+                               tcp_v6_restore_cb(skb);
+                               sock_put(sk);
+                               goto lookup;
+                       }
                        goto discard_and_relse;
                }
                if (nsk == sk) {
@@ -1996,6 +2007,7 @@ static struct pernet_operations tcpv6_net_ops = {
        .init       = tcpv6_net_init,
        .exit       = tcpv6_net_exit,
        .exit_batch = tcpv6_net_exit_batch,
+       .async      = true,
 };
 
 int __init tcpv6_init(void)
index 14ae32b..f383978 100644 (file)
@@ -123,6 +123,7 @@ static void __net_exit udplite6_proc_exit_net(struct net *net)
 static struct pernet_operations udplite6_net_ops = {
        .init = udplite6_proc_init_net,
        .exit = udplite6_proc_exit_net,
+       .async = true,
 };
 
 int __init udplite6_proc_init(void)
index 09fb44e..88cd0c9 100644 (file)
@@ -395,6 +395,7 @@ static void __net_exit xfrm6_net_exit(struct net *net)
 static struct pernet_operations xfrm6_net_ops = {
        .init   = xfrm6_net_init,
        .exit   = xfrm6_net_exit,
+       .async  = true,
 };
 
 int __init xfrm6_init(void)
index b15075a..16f4347 100644 (file)
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
 {
        xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
 }
-
index f85f0d7..a967361 100644 (file)
@@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
        .exit   = xfrm6_tunnel_net_exit,
        .id     = &xfrm6_tunnel_net_id,
        .size   = sizeof(struct xfrm6_tunnel_net),
+       .async  = true,
 };
 
 static int __init xfrm6_tunnel_init(void)
index 1e8cc7b..81ce15f 100644 (file)
@@ -989,14 +989,13 @@ done:
 }
 
 static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
-                            int *len, int peer)
+                            int peer)
 {
        struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
        struct sock *sk = sock->sk;
        struct iucv_sock *iucv = iucv_sk(sk);
 
        addr->sa_family = AF_IUCV;
-       *len = sizeof(struct sockaddr_iucv);
 
        if (peer) {
                memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
@@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
        memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
        memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
 
-       return 0;
+       return sizeof(struct sockaddr_iucv);
 }
 
 /**
index 9d5649e..2c1c8b3 100644 (file)
@@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net)
 static struct pernet_operations kcm_net_ops = {
        .init = kcm_proc_init_net,
        .exit = kcm_proc_exit_net,
+       .async = true,
 };
 
 int __init kcm_proc_init(void)
index f297d53..a6cd071 100644 (file)
@@ -1417,6 +1417,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
         */
        if (csk->sk_user_data) {
                write_unlock_bh(&csk->sk_callback_lock);
+               strp_stop(&psock->strp);
                strp_done(&psock->strp);
                kmem_cache_free(kcm_psockp, psock);
                return -EALREADY;
@@ -2014,6 +2015,7 @@ static struct pernet_operations kcm_net_ops = {
        .exit = kcm_exit_net,
        .id   = &kcm_net_id,
        .size = sizeof(struct kcm_net),
+       .async = true,
 };
 
 static int __init kcm_init(void)
index 7e2e718..3ac08ab 100644 (file)
@@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = {
        .exit = pfkey_net_exit,
        .id   = &pfkey_net_id,
        .size = sizeof(struct netns_pfkey),
+       .async = true,
 };
 
 static void __exit ipsec_pfkey_exit(void)
index ff61124..4614585 100644 (file)
@@ -349,7 +349,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
 }
 
 static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
-                          int *uaddr_len, int peer)
+                          int peer)
 {
        struct sock *sk         = sock->sk;
        struct inet_sock *inet  = inet_sk(sk);
@@ -370,8 +370,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
                lsa->l2tp_conn_id = lsk->conn_id;
                lsa->l2tp_addr.s_addr = addr;
        }
-       *uaddr_len = sizeof(*lsa);
-       return 0;
+       return sizeof(*lsa);
 }
 
 static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
index 1923446..efea58b 100644 (file)
@@ -421,7 +421,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
 }
 
 static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
-                           int *uaddr_len, int peer)
+                           int peer)
 {
        struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
        struct sock *sk = sock->sk;
@@ -449,8 +449,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
        }
        if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
                lsa->l2tp_scope_id = sk->sk_bound_dev_if;
-       *uaddr_len = sizeof(*lsa);
-       return 0;
+       return sizeof(*lsa);
 }
 
 static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
index 59f246d..0c4f49a 100644 (file)
@@ -870,7 +870,7 @@ err:
 /* getname() support.
  */
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
-                           int *usockaddr_len, int peer)
+                           int peer)
 {
        int len = 0;
        int error = 0;
@@ -969,8 +969,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
                memcpy(uaddr, &sp, len);
        }
 
-       *usockaddr_len = len;
-       error = 0;
+       error = len;
 
        sock_put(sk);
 end:
@@ -1771,6 +1770,7 @@ static struct pernet_operations pppol2tp_net_ops = {
        .init = pppol2tp_init_net,
        .exit = pppol2tp_exit_net,
        .id   = &pppol2tp_net_id,
+       .async = true,
 };
 
 /*****************************************************************************
index c38d16f..01dcc08 100644 (file)
@@ -971,7 +971,7 @@ release:
  *     Return the address information of a socket.
  */
 static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
-                         int *uaddrlen, int peer)
+                         int peer)
 {
        struct sockaddr_llc sllc;
        struct sock *sk = sock->sk;
@@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
        lock_sock(sk);
        if (sock_flag(sk, SOCK_ZAPPED))
                goto out;
-       *uaddrlen = sizeof(sllc);
        if (peer) {
                rc = -ENOTCONN;
                if (sk->sk_state != TCP_ESTABLISHED)
@@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
                               IFHWADDRLEN);
                }
        }
-       rc = 0;
        sllc.sllc_family = AF_LLC;
        memcpy(uaddr, &sllc, sizeof(sllc));
+       rc = sizeof(sllc);
 out:
        release_sock(sk);
        return rc;
index a8b1616..e83c19d 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -297,16 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 
        if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
                if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+                       struct tid_ampdu_rx *tid_rx;
+
                        ht_dbg_ratelimited(sta->sdata,
                                           "updated AddBA Req from %pM on tid %u\n",
                                           sta->sta.addr, tid);
                        /* We have no API to update the timeout value in the
-                        * driver so reject the timeout update.
+                        * driver so reject the timeout update if the timeout
+                        * changed. If if did not change, i.e., no real update,
+                        * just reply with success.
                         */
-                       status = WLAN_STATUS_REQUEST_DECLINED;
-                       ieee80211_send_addba_resp(sta->sdata, sta->sta.addr,
-                                                 tid, dialog_token, status,
-                                                 1, buf_size, timeout);
+                       rcu_read_lock();
+                       tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+                       if (tid_rx && tid_rx->timeout == timeout)
+                               status = WLAN_STATUS_SUCCESS;
+                       else
+                               status = WLAN_STATUS_REQUEST_DECLINED;
+                       rcu_read_unlock();
                        goto end;
                }
 
index 46028e1..fd68f6f 100644 (file)
@@ -2685,6 +2685,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 
        ieee80211_recalc_ps(local);
        ieee80211_recalc_ps_vif(sdata);
+       ieee80211_check_fast_rx_iface(sdata);
 
        return 0;
 }
@@ -2892,7 +2893,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
        }
        if (beacon->probe_resp_len) {
                new_beacon->probe_resp_len = beacon->probe_resp_len;
-               beacon->probe_resp = pos;
+               new_beacon->probe_resp = pos;
                memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
                pos += beacon->probe_resp_len;
        }
index 1f466d1..a75653a 100644 (file)
@@ -212,6 +212,7 @@ static const char *hw_flag_names[] = {
        FLAG(REPORTS_LOW_ACK),
        FLAG(SUPPORTS_TX_FRAG),
        FLAG(SUPPORTS_TDLS_BUFFER_STA),
+       FLAG(DEAUTH_NEED_MGD_TX_PREP),
 #undef FLAG
 };
 
index 444ea8d..4105081 100644 (file)
@@ -160,12 +160,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
                       sta->cparams.ecn ? "yes" : "no");
        p += scnprintf(p,
                       bufsz+buf-p,
-                      "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
+                      "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
 
        for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
                txqi = to_txq_info(sta->sta.txq[i]);
                p += scnprintf(p, bufsz+buf-p,
-                              "%d %d %u %u %u %u %u %u %u %u %u\n",
+                              "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
                               txqi->txq.tid,
                               txqi->txq.ac,
                               txqi->tin.backlog_bytes,
@@ -176,7 +176,11 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
                               txqi->tin.overlimit,
                               txqi->tin.collisions,
                               txqi->tin.tx_bytes,
-                              txqi->tin.tx_packets);
+                              txqi->tin.tx_packets,
+                              txqi->flags,
+                              txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
+                              txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
+                              txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
        }
 
        rcu_read_unlock();
index 2690002..ae9c33c 100644 (file)
@@ -1467,7 +1467,7 @@ struct ieee802_11_elems {
        const struct ieee80211_timeout_interval_ie *timeout_int;
        const u8 *opmode_notif;
        const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
-       const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
+       struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
        const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie;
 
        /* length of them, respectively */
index 5fe01f8..d13ba06 100644 (file)
@@ -1324,8 +1324,7 @@ static void ieee80211_iface_work(struct work_struct *work)
                        mutex_lock(&local->sta_mtx);
                        sta = sta_info_get_bss(sdata, mgmt->sa);
                        if (sta) {
-                               u16 tid = *ieee80211_get_qos_ctl(hdr) &
-                                               IEEE80211_QOS_CTL_TID_MASK;
+                               u16 tid = ieee80211_get_tid(hdr);
 
                                __ieee80211_stop_rx_ba_session(
                                        sta, tid, WLAN_BACK_RECIPIENT,
index 73ac607..6a381cb 100644 (file)
@@ -1255,13 +1255,12 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
 }
 
 static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
-                              struct ieee80211_mgmt *mgmt, size_t len)
+                              struct ieee80211_mgmt *mgmt, size_t len,
+                              struct ieee802_11_elems *elems)
 {
        struct ieee80211_mgmt *mgmt_fwd;
        struct sk_buff *skb;
        struct ieee80211_local *local = sdata->local;
-       u8 *pos = mgmt->u.action.u.chan_switch.variable;
-       size_t offset_ttl;
 
        skb = dev_alloc_skb(local->tx_headroom + len);
        if (!skb)
@@ -1269,13 +1268,9 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
        skb_reserve(skb, local->tx_headroom);
        mgmt_fwd = skb_put(skb, len);
 
-       /* offset_ttl is based on whether the secondary channel
-        * offset is available or not. Subtract 1 from the mesh TTL
-        * and disable the initiator flag before forwarding.
-        */
-       offset_ttl = (len < 42) ? 7 : 10;
-       *(pos + offset_ttl) -= 1;
-       *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
+       elems->mesh_chansw_params_ie->mesh_ttl--;
+       elems->mesh_chansw_params_ie->mesh_flags &=
+               ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
 
        memcpy(mgmt_fwd, mgmt, len);
        eth_broadcast_addr(mgmt_fwd->da);
@@ -1323,7 +1318,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
 
        /* forward or re-broadcast the CSA frame */
        if (fwd_csa) {
-               if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0)
+               if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0)
                        mcsa_dbg(sdata, "Failed to forward the CSA frame");
        }
 }
index 408649b..37e1727 100644 (file)
@@ -35,7 +35,7 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
        da = ieee80211_get_DA(hdr);
        sa = ieee80211_get_SA(hdr);
        if (ieee80211_is_data_qos(hdr->frame_control))
-               tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+               tid = ieee80211_get_tid(hdr);
        else
                tid = 0;
 
index 39b660b..0024eff 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2008,9 +2009,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
                ieee80211_flush_queues(local, sdata, true);
 
        /* deauthenticate/disassociate now */
-       if (tx || frame_buf)
+       if (tx || frame_buf) {
+               struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+               /*
+                * In multi channel scenarios guarantee that the virtual
+                * interface is granted immediate airtime to transmit the
+                * deauthentication frame by calling mgd_prepare_tx, if the
+                * driver requested so.
+                */
+               if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
+                   !ifmgd->have_beacon)
+                       drv_mgd_prepare_tx(sdata->local, sdata);
+
                ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
                                               reason, tx, frame_buf);
+       }
 
        /* flush out frame - make sure the deauth was actually sent */
        if (tx)
@@ -2151,7 +2165,7 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
                                           u16 tx_time)
 {
        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-       u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+       u16 tid = ieee80211_get_tid(hdr);
        int ac = ieee80211_ac_from_tid(tid);
        struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
        unsigned long now = jiffies;
index 4a5bdad..fb586b6 100644 (file)
@@ -669,7 +669,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
        if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
                return;
 
-       tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+       tid = ieee80211_get_tid(hdr);
        if (likely(sta->ampdu_mlme.tid_tx[tid]))
                return;
 
index fd58061..de7d107 100644 (file)
@@ -439,6 +439,10 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                        flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
                if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
                        flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+               if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
+                       flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
+               if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
+                       flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
                put_unaligned_le16(flags, pos);
                pos += 2;
                if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
@@ -1185,7 +1189,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
        ack_policy = *ieee80211_get_qos_ctl(hdr) &
                     IEEE80211_QOS_CTL_ACK_POLICY_MASK;
-       tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+       tid = ieee80211_get_tid(hdr);
 
        tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
        if (!tid_agg_rx) {
@@ -1524,9 +1528,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
                   ieee80211_has_pm(hdr->frame_control) &&
                   (ieee80211_is_data_qos(hdr->frame_control) ||
                    ieee80211_is_qos_nullfunc(hdr->frame_control))) {
-               u8 tid;
-
-               tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+               u8 tid = ieee80211_get_tid(hdr);
 
                ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid);
        }
@@ -2351,39 +2353,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 }
 
 static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
 {
        struct net_device *dev = rx->sdata->dev;
        struct sk_buff *skb = rx->skb;
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
        __le16 fc = hdr->frame_control;
        struct sk_buff_head frame_list;
-       struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
        struct ethhdr ethhdr;
        const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
 
-       if (unlikely(!ieee80211_is_data(fc)))
-               return RX_CONTINUE;
-
-       if (unlikely(!ieee80211_is_data_present(fc)))
-               return RX_DROP_MONITOR;
-
-       if (!(status->rx_flags & IEEE80211_RX_AMSDU))
-               return RX_CONTINUE;
-
        if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
-               switch (rx->sdata->vif.type) {
-               case NL80211_IFTYPE_AP_VLAN:
-                       if (!rx->sdata->u.vlan.sta)
-                               return RX_DROP_UNUSABLE;
-                       break;
-               case NL80211_IFTYPE_STATION:
-                       if (!rx->sdata->u.mgd.use_4addr)
-                               return RX_DROP_UNUSABLE;
-                       break;
-               default:
-                       return RX_DROP_UNUSABLE;
-               }
                check_da = NULL;
                check_sa = NULL;
        } else switch (rx->sdata->vif.type) {
@@ -2403,15 +2383,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
                        break;
        }
 
-       if (is_multicast_ether_addr(hdr->addr1))
-               return RX_DROP_UNUSABLE;
-
        skb->dev = dev;
        __skb_queue_head_init(&frame_list);
 
        if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
                                          rx->sdata->vif.addr,
-                                         rx->sdata->vif.type))
+                                         rx->sdata->vif.type,
+                                         data_offset))
                return RX_DROP_UNUSABLE;
 
        ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2433,6 +2411,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
        return RX_QUEUED;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+       struct sk_buff *skb = rx->skb;
+       struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+       __le16 fc = hdr->frame_control;
+
+       if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+               return RX_CONTINUE;
+
+       if (unlikely(!ieee80211_is_data(fc)))
+               return RX_CONTINUE;
+
+       if (unlikely(!ieee80211_is_data_present(fc)))
+               return RX_DROP_MONITOR;
+
+       if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+               switch (rx->sdata->vif.type) {
+               case NL80211_IFTYPE_AP_VLAN:
+                       if (!rx->sdata->u.vlan.sta)
+                               return RX_DROP_UNUSABLE;
+                       break;
+               case NL80211_IFTYPE_STATION:
+                       if (!rx->sdata->u.mgd.use_4addr)
+                               return RX_DROP_UNUSABLE;
+                       break;
+               default:
+                       return RX_DROP_UNUSABLE;
+               }
+       }
+
+       if (is_multicast_ether_addr(hdr->addr1))
+               return RX_DROP_UNUSABLE;
+
+       return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
 #ifdef CONFIG_MAC80211_MESH
 static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
@@ -2848,6 +2864,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
                case WLAN_HT_ACTION_SMPS: {
                        struct ieee80211_supported_band *sband;
                        enum ieee80211_smps_mode smps_mode;
+                       struct sta_opmode_info sta_opmode = {};
 
                        /* convert to HT capability */
                        switch (mgmt->u.action.u.ht_smps.smps_control) {
@@ -2868,17 +2885,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
                        if (rx->sta->sta.smps_mode == smps_mode)
                                goto handled;
                        rx->sta->sta.smps_mode = smps_mode;
+                       sta_opmode.smps_mode = smps_mode;
+                       sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
 
                        sband = rx->local->hw.wiphy->bands[status->band];
 
                        rate_control_rate_update(local, sband, rx->sta,
                                                 IEEE80211_RC_SMPS_CHANGED);
+                       cfg80211_sta_opmode_change_notify(sdata->dev,
+                                                         rx->sta->addr,
+                                                         &sta_opmode,
+                                                         GFP_KERNEL);
                        goto handled;
                }
                case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
                        struct ieee80211_supported_band *sband;
                        u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
                        enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
+                       struct sta_opmode_info sta_opmode = {};
 
                        /* If it doesn't support 40 MHz it can't change ... */
                        if (!(rx->sta->sta.ht_cap.cap &
@@ -2899,9 +2923,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
                        rx->sta->sta.bandwidth = new_bw;
                        sband = rx->local->hw.wiphy->bands[status->band];
+                       sta_opmode.bw = new_bw;
+                       sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
 
                        rate_control_rate_update(local, sband, rx->sta,
                                                 IEEE80211_RC_BW_CHANGED);
+                       cfg80211_sta_opmode_change_notify(sdata->dev,
+                                                         rx->sta->addr,
+                                                         &sta_opmode,
+                                                         GFP_KERNEL);
                        goto handled;
                }
                default:
@@ -3731,15 +3761,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_STATION:
-               /* 4-addr is harder to deal with, later maybe */
-               if (sdata->u.mgd.use_4addr)
-                       goto clear;
-               /* software powersave is a huge mess, avoid all of it */
-               if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
-                       goto clear;
-               if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
-                   !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
-                       goto clear;
                if (sta->sta.tdls) {
                        fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
                        fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3751,6 +3772,23 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
                        fastrx.expected_ds_bits =
                                cpu_to_le16(IEEE80211_FCTL_FROMDS);
                }
+
+               if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+                       fastrx.expected_ds_bits |=
+                               cpu_to_le16(IEEE80211_FCTL_TODS);
+                       fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+                       fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+               }
+
+               if (!sdata->u.mgd.powersave)
+                       break;
+
+               /* software powersave is a huge mess, avoid all of it */
+               if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+                       goto clear;
+               if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+                   !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+                       goto clear;
                break;
        case NL80211_IFTYPE_AP_VLAN:
        case NL80211_IFTYPE_AP:
@@ -3767,6 +3805,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
                        !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
                        (sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
                         !sdata->u.vlan.sta);
+
+               if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+                   sdata->u.vlan.sta) {
+                       fastrx.expected_ds_bits |=
+                               cpu_to_le16(IEEE80211_FCTL_FROMDS);
+                       fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+                       fastrx.internal_forward = 0;
+               }
+
                break;
        default:
                goto clear;
@@ -3865,7 +3912,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
        struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
        struct sta_info *sta = rx->sta;
        int orig_len = skb->len;
-       int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+       int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+       int snap_offs = hdrlen;
        struct {
                u8 snap[sizeof(rfc1042_header)];
                __be16 proto;
@@ -3896,10 +3944,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
            (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
                return false;
 
-       /* we don't deal with A-MSDU deaggregation here */
-       if (status->rx_flags & IEEE80211_RX_AMSDU)
-               return false;
-
        if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
                return false;
 
@@ -3931,21 +3975,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
                snap_offs += IEEE80211_CCMP_HDR_LEN;
        }
 
-       if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
-               goto drop;
-       payload = (void *)(skb->data + snap_offs);
+       if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+               if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+                       goto drop;
 
-       if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
-               return false;
+               payload = (void *)(skb->data + snap_offs);
 
-       /* Don't handle these here since they require special code.
-        * Accept AARP and IPX even though they should come with a
-        * bridge-tunnel header - but if we get them this way then
-        * there's little point in discarding them.
-        */
-       if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
-                    payload->proto == fast_rx->control_port_protocol))
-               return false;
+               if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+                       return false;
+
+               /* Don't handle these here since they require special code.
+                * Accept AARP and IPX even though they should come with a
+                * bridge-tunnel header - but if we get them this way then
+                * there's little point in discarding them.
+                */
+               if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+                            payload->proto == fast_rx->control_port_protocol))
+                       return false;
+       }
 
        /* after this point, don't punt to the slowpath! */
 
@@ -3959,12 +4006,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
        }
 
        /* statistics part of ieee80211_rx_h_sta_process() */
-       stats->last_rx = jiffies;
-       stats->last_rate = sta_stats_encode_rate(status);
-
-       stats->fragments++;
-       stats->packets++;
-
        if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
                stats->last_signal = status->signal;
                if (!fast_rx->uses_rss)
@@ -3993,6 +4034,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
        if (rx->key && !ieee80211_has_protected(hdr->frame_control))
                goto drop;
 
+       if (status->rx_flags & IEEE80211_RX_AMSDU) {
+               if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+                   RX_QUEUED)
+                       goto drop;
+
+               return true;
+       }
+
+       stats->last_rx = jiffies;
+       stats->last_rate = sta_stats_encode_rate(status);
+
+       stats->fragments++;
+       stats->packets++;
+
        /* do the header conversion - first grab the addresses */
        ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
        ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
index ee01817..0293348 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2008, Intel Corporation
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -27,7 +28,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
                                 u32 sta_flags, u8 *bssid,
                                 struct ieee80211_csa_ie *csa_ie)
 {
-       enum nl80211_band new_band;
+       enum nl80211_band new_band = current_band;
        int new_freq;
        u8 new_chan_no;
        struct ieee80211_channel *new_chan;
@@ -55,15 +56,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
                                elems->ext_chansw_ie->new_operating_class,
                                &new_band)) {
                        sdata_info(sdata,
-                                  "cannot understand ECSA IE operating class %d, disconnecting\n",
+                                  "cannot understand ECSA IE operating class, %d, ignoring\n",
                                   elems->ext_chansw_ie->new_operating_class);
-                       return -EINVAL;
                }
                new_chan_no = elems->ext_chansw_ie->new_ch_num;
                csa_ie->count = elems->ext_chansw_ie->count;
                csa_ie->mode = elems->ext_chansw_ie->mode;
        } else if (elems->ch_switch_ie) {
-               new_band = current_band;
                new_chan_no = elems->ch_switch_ie->new_ch_num;
                csa_ie->count = elems->ch_switch_ie->count;
                csa_ie->mode = elems->ch_switch_ie->mode;
index 0c5627f..655c3d8 100644 (file)
@@ -314,7 +314,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
        if (ieee80211_hw_check(hw, USES_RSS)) {
                sta->pcpu_rx_stats =
-                       alloc_percpu(struct ieee80211_sta_rx_stats);
+                       alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
                if (!sta->pcpu_rx_stats)
                        goto free;
        }
@@ -433,6 +433,7 @@ free_txq:
        if (sta->sta.txq[0])
                kfree(to_txq_info(sta->sta.txq[0]));
 free:
+       free_percpu(sta->pcpu_rx_stats);
 #ifdef CONFIG_MAC80211_MESH
        kfree(sta->mesh);
 #endif
@@ -2287,6 +2288,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
                sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
                sinfo->expected_throughput = thr;
        }
+
+       if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
+           sta->status_stats.ack_signal_filled) {
+               sinfo->ack_signal = sta->status_stats.last_ack_signal;
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
+       }
 }
 
 u32 sta_get_expected_throughput(struct sta_info *sta)
index cd53619..f64eb86 100644 (file)
@@ -548,6 +548,8 @@ struct sta_info {
                u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
                u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
                unsigned long last_ack;
+               s8 last_ack_signal;
+               bool ack_signal_filled;
        } status_stats;
 
        /* Updated from TX path only, no locking requirements */
index da7427a..743e89c 100644 (file)
@@ -187,9 +187,16 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
        struct ieee80211_mgmt *mgmt = (void *) skb->data;
        struct ieee80211_local *local = sta->local;
        struct ieee80211_sub_if_data *sdata = sta->sdata;
+       struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
 
-       if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+       if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
                sta->status_stats.last_ack = jiffies;
+               if (txinfo->status.is_valid_ack_signal) {
+                       sta->status_stats.last_ack_signal =
+                                        (s8)txinfo->status.ack_signal;
+                       sta->status_stats.ack_signal_filled = true;
+               }
+       }
 
        if (ieee80211_is_data_qos(mgmt->frame_control)) {
                struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -487,6 +494,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
                            ieee80211_is_qos_nullfunc(hdr->frame_control))
                                cfg80211_probe_status(sdata->dev, hdr->addr1,
                                                      cookie, acked,
+                                                     info->status.ack_signal,
+                                                     info->status.is_valid_ack_signal,
                                                      GFP_ATOMIC);
                        else
                                cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
index 25904af..7643178 100644 (file)
@@ -797,7 +797,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-       u8 *qc;
        int tid;
 
        /*
@@ -844,9 +843,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
                return TX_CONTINUE;
 
        /* include per-STA, per-TID sequence counter */
-
-       qc = ieee80211_get_qos_ctl(hdr);
-       tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+       tid = ieee80211_get_tid(hdr);
        tx->sta->tx_stats.msdu[tid]++;
 
        hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -1158,7 +1155,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
        struct ieee80211_hdr *hdr;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        int tid;
-       u8 *qc;
 
        memset(tx, 0, sizeof(*tx));
        tx->skb = skb;
@@ -1198,8 +1194,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
            !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
                struct tid_ampdu_tx *tid_tx;
 
-               qc = ieee80211_get_qos_ctl(hdr);
-               tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+               tid = ieee80211_get_tid(hdr);
 
                tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
                if (tid_tx) {
@@ -1921,7 +1916,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+       struct ieee80211_hdr *hdr;
        int headroom;
        bool may_encrypt;
 
index b9276ac..5714dee 100644 (file)
@@ -447,6 +447,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
                                  enum nl80211_band band)
 {
        enum ieee80211_sta_rx_bandwidth new_bw;
+       struct sta_opmode_info sta_opmode = {};
        u32 changed = 0;
        u8 nss;
 
@@ -460,7 +461,9 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 
        if (sta->sta.rx_nss != nss) {
                sta->sta.rx_nss = nss;
+               sta_opmode.rx_nss = nss;
                changed |= IEEE80211_RC_NSS_CHANGED;
+               sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
        }
 
        switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
@@ -481,9 +484,15 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
        new_bw = ieee80211_sta_cur_vht_bw(sta);
        if (new_bw != sta->sta.bandwidth) {
                sta->sta.bandwidth = new_bw;
+               sta_opmode.bw = new_bw;
                changed |= IEEE80211_RC_BW_CHANGED;
+               sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
        }
 
+       if (sta_opmode.changed)
+               cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr,
+                                                 &sta_opmode, GFP_KERNEL);
+
        return changed;
 }
 
index 785056c..58d0b25 100644 (file)
@@ -340,7 +340,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
        a4_included = ieee80211_has_a4(hdr->frame_control);
 
        if (ieee80211_is_data_qos(hdr->frame_control))
-               qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+               qos_tid = ieee80211_get_tid(hdr);
        else
                qos_tid = 0;
 
@@ -601,8 +601,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
        aad[23] = 0;
 
        if (ieee80211_is_data_qos(hdr->frame_control))
-               qos_tid = *ieee80211_get_qos_ctl(hdr) &
-                       IEEE80211_QOS_CTL_TID_MASK;
+               qos_tid = ieee80211_get_tid(hdr);
        else
                qos_tid = 0;
 
@@ -867,8 +866,7 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx)
                return RX_DROP_UNUSABLE;
 
        if (ieee80211_is_data_qos(hdr->frame_control))
-               qos_tid = *ieee80211_get_qos_ctl(hdr) &
-                               IEEE80211_QOS_CTL_TID_MASK;
+               qos_tid = ieee80211_get_tid(hdr);
        else
                qos_tid = 0;
 
index 0f6b817..d72cc78 100644 (file)
@@ -629,6 +629,7 @@ static void __net_exit netfilter_net_exit(struct net *net)
 static struct pernet_operations netfilter_net_ops = {
        .init = netfilter_net_init,
        .exit = netfilter_net_exit,
+       .async = true,
 };
 
 int __init netfilter_init(void)
index d625179..6a340c9 100644 (file)
@@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblc_ops = {
        .init = __ip_vs_lblc_init,
        .exit = __ip_vs_lblc_exit,
+       .async = true,
 };
 
 static int __init ip_vs_lblc_init(void)
index 84c57b6..0627881 100644 (file)
@@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblcr_ops = {
        .init = __ip_vs_lblcr_init,
        .exit = __ip_vs_lblcr_exit,
+       .async = true,
 };
 
 static int __init ip_vs_lblcr_init(void)
index c2c1b16..1ba3da5 100644 (file)
@@ -577,6 +577,7 @@ static void __net_exit nf_log_net_exit(struct net *net)
 static struct pernet_operations nf_log_net_ops = {
        .init = nf_log_net_init,
        .exit = nf_log_net_exit,
+       .async = true,
 };
 
 int __init netfilter_log_init(void)
index fbce552..7d7466d 100644 (file)
@@ -41,7 +41,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
                                 const struct nf_conn *ct,
                                 u16 *rover)
 {
-       unsigned int range_size, min, i;
+       unsigned int range_size, min, max, i;
        __be16 *portptr;
        u_int16_t off;
 
@@ -71,7 +71,10 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
                }
        } else {
                min = ntohs(range->min_proto.all);
-               range_size = ntohs(range->max_proto.all) - min + 1;
+               max = ntohs(range->max_proto.all);
+               if (unlikely(max < min))
+                       swap(max, min);
+               range_size = max - min + 1;
        }
 
        if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
index 92139a0..64b875e 100644 (file)
@@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = {
        .exit           = synproxy_net_exit,
        .id             = &synproxy_net_id,
        .size           = sizeof(struct synproxy_net),
+       .async          = true,
 };
 
 static int __init synproxy_core_init(void)
index 2f685ee..d9deebe 100644 (file)
@@ -434,36 +434,35 @@ int xt_check_match(struct xt_mtchk_param *par,
                 * ebt_among is exempt from centralized matchsize checking
                 * because it uses a dynamic-size data set.
                 */
-               pr_err("%s_tables: %s.%u match: invalid size "
-                      "%u (kernel) != (user) %u\n",
-                      xt_prefix[par->family], par->match->name,
-                      par->match->revision,
-                      XT_ALIGN(par->match->matchsize), size);
+               pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n",
+                                  xt_prefix[par->family], par->match->name,
+                                  par->match->revision,
+                                  XT_ALIGN(par->match->matchsize), size);
                return -EINVAL;
        }
        if (par->match->table != NULL &&
            strcmp(par->match->table, par->table) != 0) {
-               pr_err("%s_tables: %s match: only valid in %s table, not %s\n",
-                      xt_prefix[par->family], par->match->name,
-                      par->match->table, par->table);
+               pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n",
+                                   xt_prefix[par->family], par->match->name,
+                                   par->match->table, par->table);
                return -EINVAL;
        }
        if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
                char used[64], allow[64];
 
-               pr_err("%s_tables: %s match: used from hooks %s, but only "
-                      "valid from %s\n",
-                      xt_prefix[par->family], par->match->name,
-                      textify_hooks(used, sizeof(used), par->hook_mask,
-                                    par->family),
-                      textify_hooks(allow, sizeof(allow), par->match->hooks,
-                                    par->family));
+               pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n",
+                                   xt_prefix[par->family], par->match->name,
+                                   textify_hooks(used, sizeof(used),
+                                                 par->hook_mask, par->family),
+                                   textify_hooks(allow, sizeof(allow),
+                                                 par->match->hooks,
+                                                 par->family));
                return -EINVAL;
        }
        if (par->match->proto && (par->match->proto != proto || inv_proto)) {
-               pr_err("%s_tables: %s match: only valid for protocol %u\n",
-                      xt_prefix[par->family], par->match->name,
-                      par->match->proto);
+               pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n",
+                                   xt_prefix[par->family], par->match->name,
+                                   par->match->proto);
                return -EINVAL;
        }
        if (par->match->checkentry != NULL) {
@@ -814,36 +813,35 @@ int xt_check_target(struct xt_tgchk_param *par,
        int ret;
 
        if (XT_ALIGN(par->target->targetsize) != size) {
-               pr_err("%s_tables: %s.%u target: invalid size "
-                      "%u (kernel) != (user) %u\n",
-                      xt_prefix[par->family], par->target->name,
-                      par->target->revision,
-                      XT_ALIGN(par->target->targetsize), size);
+               pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n",
+                                  xt_prefix[par->family], par->target->name,
+                                  par->target->revision,
+                                  XT_ALIGN(par->target->targetsize), size);
                return -EINVAL;
        }
        if (par->target->table != NULL &&
            strcmp(par->target->table, par->table) != 0) {
-               pr_err("%s_tables: %s target: only valid in %s table, not %s\n",
-                      xt_prefix[par->family], par->target->name,
-                      par->target->table, par->table);
+               pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n",
+                                   xt_prefix[par->family], par->target->name,
+                                   par->target->table, par->table);
                return -EINVAL;
        }
        if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
                char used[64], allow[64];
 
-               pr_err("%s_tables: %s target: used from hooks %s, but only "
-                      "usable from %s\n",
-                      xt_prefix[par->family], par->target->name,
-                      textify_hooks(used, sizeof(used), par->hook_mask,
-                                    par->family),
-                      textify_hooks(allow, sizeof(allow), par->target->hooks,
-                                    par->family));
+               pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n",
+                                   xt_prefix[par->family], par->target->name,
+                                   textify_hooks(used, sizeof(used),
+                                                 par->hook_mask, par->family),
+                                   textify_hooks(allow, sizeof(allow),
+                                                 par->target->hooks,
+                                                 par->family));
                return -EINVAL;
        }
        if (par->target->proto && (par->target->proto != proto || inv_proto)) {
-               pr_err("%s_tables: %s target: only valid for protocol %u\n",
-                      xt_prefix[par->family], par->target->name,
-                      par->target->proto);
+               pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n",
+                                   xt_prefix[par->family], par->target->name,
+                                   par->target->proto);
                return -EINVAL;
        }
        if (par->target->checkentry != NULL) {
@@ -1004,10 +1002,6 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
        if (sz < sizeof(*info))
                return NULL;
 
-       /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-       if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
-               return NULL;
-
        /* __GFP_NORETRY is not fully supported by kvmalloc but it should
         * work reasonably well if sz is too large and bail out rather
         * than shoot all processes down before realizing there is nothing
@@ -1765,6 +1759,7 @@ static void __net_exit xt_net_exit(struct net *net)
 static struct pernet_operations xt_net_ops = {
        .init = xt_net_init,
        .exit = xt_net_exit,
+       .async = true,
 };
 
 static int __init xt_init(void)
index c502419..f368ee6 100644 (file)
@@ -120,8 +120,8 @@ static int audit_tg_check(const struct xt_tgchk_param *par)
        const struct xt_audit_info *info = par->targinfo;
 
        if (info->type > XT_AUDIT_TYPE_MAX) {
-               pr_info("Audit type out of range (valid range: 0..%hhu)\n",
-                       XT_AUDIT_TYPE_MAX);
+               pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n",
+                                   XT_AUDIT_TYPE_MAX);
                return -ERANGE;
        }
 
index 0f642ef..9f4151e 100644 (file)
@@ -36,13 +36,13 @@ static int checksum_tg_check(const struct xt_tgchk_param *par)
        const struct xt_CHECKSUM_info *einfo = par->targinfo;
 
        if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
-               pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
+               pr_info_ratelimited("unsupported CHECKSUM operation %x\n",
+                                   einfo->operation);
                return -EINVAL;
        }
-       if (!einfo->operation) {
-               pr_info("no CHECKSUM operation enabled\n");
+       if (!einfo->operation)
                return -EINVAL;
-       }
+
        return 0;
 }
 
index da56c06..f3f1caa 100644 (file)
@@ -91,8 +91,8 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 
        if (strcmp(par->table, "mangle") != 0 &&
            strcmp(par->table, "security") != 0) {
-               pr_info("target only valid in the \'mangle\' "
-                       "or \'security\' tables, not \'%s\'.\n", par->table);
+               pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+                                   par->table);
                return -EINVAL;
        }
 
@@ -102,14 +102,14 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
                break;
 
        default:
-               pr_info("invalid mode: %hu\n", info->mode);
+               pr_info_ratelimited("invalid mode: %hu\n", info->mode);
                return -EINVAL;
        }
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0)
-               pr_info("cannot load conntrack support for proto=%u\n",
-                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
        return ret;
 }
 
index 5a152e2..8790190 100644 (file)
@@ -82,15 +82,14 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 
        proto = xt_ct_find_proto(par);
        if (!proto) {
-               pr_info("You must specify a L4 protocol, and not use "
-                       "inversions on it.\n");
+               pr_info_ratelimited("You must specify a L4 protocol and not use inversions on it\n");
                return -ENOENT;
        }
 
        helper = nf_conntrack_helper_try_module_get(helper_name, par->family,
                                                    proto);
        if (helper == NULL) {
-               pr_info("No such helper \"%s\"\n", helper_name);
+               pr_info_ratelimited("No such helper \"%s\"\n", helper_name);
                return -ENOENT;
        }
 
@@ -124,6 +123,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
        const struct nf_conntrack_l4proto *l4proto;
        struct ctnl_timeout *timeout;
        struct nf_conn_timeout *timeout_ext;
+       const char *errmsg = NULL;
        int ret = 0;
        u8 proto;
 
@@ -131,29 +131,29 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
        timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
        if (timeout_find_get == NULL) {
                ret = -ENOENT;
-               pr_info("Timeout policy base is empty\n");
+               errmsg = "Timeout policy base is empty";
                goto out;
        }
 
        proto = xt_ct_find_proto(par);
        if (!proto) {
                ret = -EINVAL;
-               pr_info("You must specify a L4 protocol, and not use "
-                       "inversions on it.\n");
+               errmsg = "You must specify a L4 protocol and not use inversions on it";
                goto out;
        }
 
        timeout = timeout_find_get(par->net, timeout_name);
        if (timeout == NULL) {
                ret = -ENOENT;
-               pr_info("No such timeout policy \"%s\"\n", timeout_name);
+               pr_info_ratelimited("No such timeout policy \"%s\"\n",
+                                   timeout_name);
                goto out;
        }
 
        if (timeout->l3num != par->family) {
                ret = -EINVAL;
-               pr_info("Timeout policy `%s' can only be used by L3 protocol "
-                       "number %d\n", timeout_name, timeout->l3num);
+               pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
+                                   timeout_name, 3, timeout->l3num);
                goto err_put_timeout;
        }
        /* Make sure the timeout policy matches any existing protocol tracker,
@@ -162,9 +162,8 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
        l4proto = __nf_ct_l4proto_find(par->family, proto);
        if (timeout->l4proto->l4proto != l4proto->l4proto) {
                ret = -EINVAL;
-               pr_info("Timeout policy `%s' can only be used by L4 protocol "
-                       "number %d\n",
-                       timeout_name, timeout->l4proto->l4proto);
+               pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
+                                   timeout_name, 4, timeout->l4proto->l4proto);
                goto err_put_timeout;
        }
        timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
@@ -180,6 +179,8 @@ err_put_timeout:
        __xt_ct_tg_timeout_put(timeout);
 out:
        rcu_read_unlock();
+       if (errmsg)
+               pr_info_ratelimited("%s\n", errmsg);
        return ret;
 #else
        return -EOPNOTSUPP;
index 3f83d38..098ed85 100644 (file)
@@ -66,10 +66,8 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 {
        const struct xt_DSCP_info *info = par->targinfo;
 
-       if (info->dscp > XT_DSCP_MAX) {
-               pr_info("dscp %x out of range\n", info->dscp);
+       if (info->dscp > XT_DSCP_MAX)
                return -EDOM;
-       }
        return 0;
 }
 
index 1535e87..4653b07 100644 (file)
@@ -105,10 +105,8 @@ static int ttl_tg_check(const struct xt_tgchk_param *par)
 {
        const struct ipt_TTL_info *info = par->targinfo;
 
-       if (info->mode > IPT_TTL_MAXMODE) {
-               pr_info("TTL: invalid or unknown mode %u\n", info->mode);
+       if (info->mode > IPT_TTL_MAXMODE)
                return -EINVAL;
-       }
        if (info->mode != IPT_TTL_SET && info->ttl == 0)
                return -EINVAL;
        return 0;
@@ -118,15 +116,10 @@ static int hl_tg6_check(const struct xt_tgchk_param *par)
 {
        const struct ip6t_HL_info *info = par->targinfo;
 
-       if (info->mode > IP6T_HL_MAXMODE) {
-               pr_info("invalid or unknown mode %u\n", info->mode);
+       if (info->mode > IP6T_HL_MAXMODE)
                return -EINVAL;
-       }
-       if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
-               pr_info("increment/decrement does not "
-                       "make sense with value 0\n");
+       if (info->mode != IP6T_HL_SET && info->hop_limit == 0)
                return -EINVAL;
-       }
        return 0;
 }
 
index 60e6dbe..9c75f41 100644 (file)
@@ -9,6 +9,8 @@
  * the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
@@ -312,29 +314,30 @@ hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
 static int hmark_tg_check(const struct xt_tgchk_param *par)
 {
        const struct xt_hmark_info *info = par->targinfo;
+       const char *errmsg = "proto mask must be zero with L3 mode";
 
-       if (!info->hmodulus) {
-               pr_info("xt_HMARK: hash modulus can't be zero\n");
+       if (!info->hmodulus)
                return -EINVAL;
-       }
+
        if (info->proto_mask &&
-           (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) {
-               pr_info("xt_HMARK: proto mask must be zero with L3 mode\n");
-               return -EINVAL;
-       }
+           (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
+               goto err;
+
        if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
            (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
-                            XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) {
-               pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n");
+                            XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
                return -EINVAL;
-       }
+
        if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
            (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
                             XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
-               pr_info("xt_HMARK: spi-set and port-set can't be combined\n");
-               return -EINVAL;
+               errmsg = "spi-set and port-set can't be combined";
+               goto err;
        }
        return 0;
+err:
+       pr_info_ratelimited("%s\n", errmsg);
+       return -EINVAL;
 }
 
 static struct xt_target hmark_tg_reg[] __read_mostly = {
index 6c2482b..1ac6600 100644 (file)
@@ -146,11 +146,11 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
        timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
        info->timer->refcnt = 1;
 
+       INIT_WORK(&info->timer->work, idletimer_tg_work);
+
        mod_timer(&info->timer->timer,
                  msecs_to_jiffies(info->timeout * 1000) + jiffies);
 
-       INIT_WORK(&info->timer->work, idletimer_tg_work);
-
        return 0;
 
 out_free_attr:
@@ -191,7 +191,10 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
                pr_debug("timeout value is zero\n");
                return -EINVAL;
        }
-
+       if (info->timeout >= INT_MAX / 1000) {
+               pr_debug("timeout value is too big\n");
+               return -EINVAL;
+       }
        if (info->label[0] == '\0' ||
            strnlen(info->label,
                    MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
index 1dcad89..1984644 100644 (file)
@@ -111,10 +111,8 @@ static int led_tg_check(const struct xt_tgchk_param *par)
        struct xt_led_info_internal *ledinternal;
        int err;
 
-       if (ledinfo->id[0] == '\0') {
-               pr_info("No 'id' parameter given.\n");
+       if (ledinfo->id[0] == '\0')
                return -EINVAL;
-       }
 
        mutex_lock(&xt_led_mutex);
 
@@ -138,13 +136,14 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
        err = led_trigger_register(&ledinternal->netfilter_led_trigger);
        if (err) {
-               pr_err("Trigger name is already in use.\n");
+               pr_info_ratelimited("Trigger name is already in use.\n");
                goto exit_alloc;
        }
 
-       /* See if we need to set up a timer */
-       if (ledinfo->delay > 0)
-               timer_setup(&ledinternal->timer, led_timeout_callback, 0);
+       /* Since the letinternal timer can be shared between multiple targets,
+        * always set it up, even if the current target does not need it
+        */
+       timer_setup(&ledinternal->timer, led_timeout_callback, 0);
 
        list_add_tail(&ledinternal->list, &xt_led_triggers);
 
@@ -181,8 +180,7 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par)
 
        list_del(&ledinternal->list);
 
-       if (ledinfo->delay > 0)
-               del_timer_sync(&ledinternal->timer);
+       del_timer_sync(&ledinternal->timer);
 
        led_trigger_unregister(&ledinternal->netfilter_led_trigger);
 
index a360b99..a9aca80 100644 (file)
@@ -8,6 +8,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
@@ -67,13 +69,13 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
        init_hashrandom(&jhash_initval);
 
        if (info->queues_total == 0) {
-               pr_err("NFQUEUE: number of total queues is 0\n");
+               pr_info_ratelimited("number of total queues is 0\n");
                return -EINVAL;
        }
        maxid = info->queues_total - 1 + info->queuenum;
        if (maxid > 0xffff) {
-               pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
-                      info->queues_total, maxid);
+               pr_info_ratelimited("number of queues (%u) out of range (got %u)\n",
+                                   info->queues_total, maxid);
                return -ERANGE;
        }
        if (par->target->revision == 2 && info->flags > 1)
index 9faf5e0..4ad5fe2 100644 (file)
@@ -60,18 +60,20 @@ static int checkentry_lsm(struct xt_secmark_target_info *info)
                                       &info->secid);
        if (err) {
                if (err == -EINVAL)
-                       pr_info("invalid security context \'%s\'\n", info->secctx);
+                       pr_info_ratelimited("invalid security context \'%s\'\n",
+                                           info->secctx);
                return err;
        }
 
        if (!info->secid) {
-               pr_info("unable to map security context \'%s\'\n", info->secctx);
+               pr_info_ratelimited("unable to map security context \'%s\'\n",
+                                   info->secctx);
                return -ENOENT;
        }
 
        err = security_secmark_relabel_packet(info->secid);
        if (err) {
-               pr_info("unable to obtain relabeling permission\n");
+               pr_info_ratelimited("unable to obtain relabeling permission\n");
                return err;
        }
 
@@ -86,14 +88,14 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 
        if (strcmp(par->table, "mangle") != 0 &&
            strcmp(par->table, "security") != 0) {
-               pr_info("target only valid in the \'mangle\' "
-                       "or \'security\' tables, not \'%s\'.\n", par->table);
+               pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+                                   par->table);
                return -EINVAL;
        }
 
        if (mode && mode != info->mode) {
-               pr_info("mode already set to %hu cannot mix with "
-                       "rules for mode %hu\n", mode, info->mode);
+               pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n",
+                                   mode, info->mode);
                return -EINVAL;
        }
 
@@ -101,7 +103,7 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
        case SECMARK_MODE_SEL:
                break;
        default:
-               pr_info("invalid mode: %hu\n", info->mode);
+               pr_info_ratelimited("invalid mode: %hu\n", info->mode);
                return -EINVAL;
        }
 
index 99bb8e4..98efb20 100644 (file)
@@ -273,8 +273,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
            (par->hook_mask & ~((1 << NF_INET_FORWARD) |
                           (1 << NF_INET_LOCAL_OUT) |
                           (1 << NF_INET_POST_ROUTING))) != 0) {
-               pr_info("path-MTU clamping only supported in "
-                       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+               pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
                return -EINVAL;
        }
        if (par->nft_compat)
@@ -283,7 +282,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
        xt_ematch_foreach(ematch, e)
                if (find_syn_match(ematch))
                        return 0;
-       pr_info("Only works on TCP SYN packets\n");
+       pr_info_ratelimited("Only works on TCP SYN packets\n");
        return -EINVAL;
 }
 
@@ -298,8 +297,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
            (par->hook_mask & ~((1 << NF_INET_FORWARD) |
                           (1 << NF_INET_LOCAL_OUT) |
                           (1 << NF_INET_POST_ROUTING))) != 0) {
-               pr_info("path-MTU clamping only supported in "
-                       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+               pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
                return -EINVAL;
        }
        if (par->nft_compat)
@@ -308,7 +306,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
        xt_ematch_foreach(ematch, e)
                if (find_syn_match(ematch))
                        return 0;
-       pr_info("Only works on TCP SYN packets\n");
+       pr_info_ratelimited("Only works on TCP SYN packets\n");
        return -EINVAL;
 }
 #endif
index 17d7705..8c89323 100644 (file)
@@ -540,8 +540,7 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
            !(i->invflags & IP6T_INV_PROTO))
                return 0;
 
-       pr_info("Can be used only in combination with "
-               "either -p tcp or -p udp\n");
+       pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
        return -EINVAL;
 }
 #endif
@@ -559,8 +558,7 @@ static int tproxy_tg4_check(const struct xt_tgchk_param *par)
            && !(i->invflags & IPT_INV_PROTO))
                return 0;
 
-       pr_info("Can be used only in combination with "
-               "either -p tcp or -p udp\n");
+       pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
        return -EINVAL;
 }
 
index 911a7c0..89e281b 100644 (file)
@@ -164,48 +164,47 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 
 static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 {
+       const char *errmsg = "both incoming and outgoing interface limitation cannot be selected";
        struct xt_addrtype_info_v1 *info = par->matchinfo;
 
        if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
-           info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
-               pr_info("both incoming and outgoing "
-                       "interface limitation cannot be selected\n");
-               return -EINVAL;
-       }
+           info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
+               goto err;
 
        if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
            (1 << NF_INET_LOCAL_IN)) &&
            info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
-               pr_info("output interface limitation "
-                       "not valid in PREROUTING and INPUT\n");
-               return -EINVAL;
+               errmsg = "output interface limitation not valid in PREROUTING and INPUT";
+               goto err;
        }
 
        if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
            (1 << NF_INET_LOCAL_OUT)) &&
            info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
-               pr_info("input interface limitation "
-                       "not valid in POSTROUTING and OUTPUT\n");
-               return -EINVAL;
+               errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT";
+               goto err;
        }
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        if (par->family == NFPROTO_IPV6) {
                if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
-                       pr_err("ipv6 BLACKHOLE matching not supported\n");
-                       return -EINVAL;
+                       errmsg = "ipv6 BLACKHOLE matching not supported";
+                       goto err;
                }
                if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
-                       pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
-                       return -EINVAL;
+                       errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported";
+                       goto err;
                }
                if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
-                       pr_err("ipv6 does not support BROADCAST matching\n");
-                       return -EINVAL;
+                       errmsg = "ipv6 does not support BROADCAST matching";
+                       goto err;
                }
        }
 #endif
        return 0;
+err:
+       pr_info_ratelimited("%s\n", errmsg);
+       return -EINVAL;
 }
 
 static struct xt_match addrtype_mt_reg[] __read_mostly = {
index 06b090d..a2cf8a6 100644 (file)
@@ -7,6 +7,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/skbuff.h>
@@ -34,7 +36,7 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
        program.filter = insns;
 
        if (bpf_prog_create(ret, &program)) {
-               pr_info("bpf: check failed: parse error\n");
+               pr_info_ratelimited("check failed: parse error\n");
                return -EINVAL;
        }
 
index 891f4e7..7df2dec 100644 (file)
@@ -12,6 +12,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/skbuff.h>
 #include <linux/module.h>
 #include <linux/netfilter/x_tables.h>
@@ -48,7 +50,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
        }
 
        if (info->has_path && info->has_classid) {
-               pr_info("xt_cgroup: both path and classid specified\n");
+               pr_info_ratelimited("path and classid specified\n");
                return -EINVAL;
        }
 
@@ -56,8 +58,8 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
        if (info->has_path) {
                cgrp = cgroup_get_from_path(info->path);
                if (IS_ERR(cgrp)) {
-                       pr_info("xt_cgroup: invalid path, errno=%ld\n",
-                               PTR_ERR(cgrp));
+                       pr_info_ratelimited("invalid path, errno=%ld\n",
+                                           PTR_ERR(cgrp));
                        return -EINVAL;
                }
                info->priv = cgrp;
index 57ef175..0068688 100644 (file)
@@ -135,14 +135,12 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
        struct xt_cluster_match_info *info = par->matchinfo;
 
        if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
-               pr_info("you have exceeded the maximum "
-                       "number of cluster nodes (%u > %u)\n",
-                       info->total_nodes, XT_CLUSTER_NODES_MAX);
+               pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n",
+                                   info->total_nodes, XT_CLUSTER_NODES_MAX);
                return -EINVAL;
        }
        if (info->node_mask >= (1ULL << info->total_nodes)) {
-               pr_info("this node mask cannot be "
-                       "higher than the total number of nodes\n");
+               pr_info_ratelimited("node mask cannot exceed total number of nodes\n");
                return -EDOM;
        }
        return 0;
index cad0b7b..93cb018 100644 (file)
@@ -112,8 +112,8 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0)
-               pr_info("cannot load conntrack support for proto=%u\n",
-                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
 
        /*
         * This filter cannot function correctly unless connection tracking
index 2337287..4fa4efd 100644 (file)
@@ -57,14 +57,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
        int ret;
 
        if (info->options & ~options) {
-               pr_err("Unknown options in mask %x\n", info->options);
+               pr_info_ratelimited("Unknown options in mask %x\n",
+                                   info->options);
                return -EINVAL;
        }
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0) {
-               pr_info("cannot load conntrack support for proto=%u\n",
-                                                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
                return ret;
        }
 
index ec377cc..809639c 100644 (file)
@@ -79,8 +79,8 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0)
-               pr_info("cannot load conntrack support for proto=%u\n",
-                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
        return ret;
 }
 
@@ -109,8 +109,8 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0)
-               pr_info("cannot load conntrack support for proto=%u\n",
-                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
        return ret;
 }
 
index 39cf1d0..df80fe7 100644 (file)
@@ -272,8 +272,8 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0)
-               pr_info("cannot load conntrack support for proto=%u\n",
-                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
        return ret;
 }
 
index 236ac80..a4c2b86 100644 (file)
@@ -46,10 +46,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 {
        const struct xt_dscp_info *info = par->matchinfo;
 
-       if (info->dscp > XT_DSCP_MAX) {
-               pr_info("dscp %x out of range\n", info->dscp);
+       if (info->dscp > XT_DSCP_MAX)
                return -EDOM;
-       }
 
        return 0;
 }
index 3c831a8..c7ad4af 100644 (file)
@@ -97,7 +97,7 @@ static int ecn_mt_check4(const struct xt_mtchk_param *par)
 
        if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
            (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
-               pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+               pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
                return -EINVAL;
        }
 
@@ -139,7 +139,7 @@ static int ecn_mt_check6(const struct xt_mtchk_param *par)
 
        if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
            (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) {
-               pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+               pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
                return -EINVAL;
        }
 
index ca68474..db2fe09 100644 (file)
@@ -523,7 +523,8 @@ static u64 user2rate(u64 user)
        if (user != 0) {
                return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
        } else {
-               pr_warn("invalid rate from userspace: %llu\n", user);
+               pr_info_ratelimited("invalid rate from userspace: %llu\n",
+                                   user);
                return 0;
        }
 }
@@ -774,7 +775,7 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
                if (!dh->rateinfo.prev_window &&
                    (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
                        spin_unlock(&dh->lock);
-                       rcu_read_unlock_bh();
+                       local_bh_enable();
                        return !(cfg->mode & XT_HASHLIMIT_INVERT);
                } else {
                        goto overlimit;
@@ -865,33 +866,34 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
        }
 
        if (cfg->mode & ~XT_HASHLIMIT_ALL) {
-               pr_info("Unknown mode mask %X, kernel too old?\n",
-                                               cfg->mode);
+               pr_info_ratelimited("Unknown mode mask %X, kernel too old?\n",
+                                   cfg->mode);
                return -EINVAL;
        }
 
        /* Check for overflow. */
        if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
                if (cfg->avg == 0 || cfg->avg > U32_MAX) {
-                       pr_info("hashlimit invalid rate\n");
+                       pr_info_ratelimited("invalid rate\n");
                        return -ERANGE;
                }
 
                if (cfg->interval == 0) {
-                       pr_info("hashlimit invalid interval\n");
+                       pr_info_ratelimited("invalid interval\n");
                        return -EINVAL;
                }
        } else if (cfg->mode & XT_HASHLIMIT_BYTES) {
                if (user2credits_byte(cfg->avg) == 0) {
-                       pr_info("overflow, rate too high: %llu\n", cfg->avg);
+                       pr_info_ratelimited("overflow, rate too high: %llu\n",
+                                           cfg->avg);
                        return -EINVAL;
                }
        } else if (cfg->burst == 0 ||
-                   user2credits(cfg->avg * cfg->burst, revision) <
-                   user2credits(cfg->avg, revision)) {
-                       pr_info("overflow, try lower: %llu/%llu\n",
-                               cfg->avg, cfg->burst);
-                       return -ERANGE;
+                  user2credits(cfg->avg * cfg->burst, revision) <
+                  user2credits(cfg->avg, revision)) {
+               pr_info_ratelimited("overflow, try lower: %llu/%llu\n",
+                                   cfg->avg, cfg->burst);
+               return -ERANGE;
        }
 
        mutex_lock(&hashlimit_mutex);
@@ -1343,6 +1345,7 @@ static struct pernet_operations hashlimit_net_ops = {
        .exit   = hashlimit_net_exit,
        .id     = &hashlimit_net_id,
        .size   = sizeof(struct hashlimit_net),
+       .async  = true,
 };
 
 static int __init hashlimit_mt_init(void)
index 38a7815..fd077ae 100644 (file)
@@ -61,8 +61,8 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0) {
-               pr_info("cannot load conntrack support for proto=%u\n",
-                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
                return ret;
        }
        info->name[sizeof(info->name) - 1] = '\0';
index 7ca64a5..57f1df5 100644 (file)
@@ -72,7 +72,7 @@ static int comp_mt_check(const struct xt_mtchk_param *par)
 
        /* Must specify no unknown invflags */
        if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) {
-               pr_err("unknown flags %X\n", compinfo->invflags);
+               pr_info_ratelimited("unknown flags %X\n", compinfo->invflags);
                return -EINVAL;
        }
        return 0;
index 42540d2..1d950a6 100644 (file)
@@ -158,7 +158,8 @@ static int ipvs_mt_check(const struct xt_mtchk_param *par)
            && par->family != NFPROTO_IPV6
 #endif
                ) {
-               pr_info("protocol family %u not supported\n", par->family);
+               pr_info_ratelimited("protocol family %u not supported\n",
+                                   par->family);
                return -EINVAL;
        }
 
index 8aee572..c43482b 100644 (file)
@@ -216,7 +216,7 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par)
        /* Check for invalid flags */
        if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION |
                            XT_L2TP_TYPE)) {
-               pr_info("unknown flags: %x\n", info->flags);
+               pr_info_ratelimited("unknown flags: %x\n", info->flags);
                return -EINVAL;
        }
 
@@ -225,7 +225,8 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par)
            (!(info->flags & XT_L2TP_SID)) &&
            ((!(info->flags & XT_L2TP_TYPE)) ||
             (info->type != XT_L2TP_TYPE_CONTROL))) {
-               pr_info("invalid flags combination: %x\n", info->flags);
+               pr_info_ratelimited("invalid flags combination: %x\n",
+                                   info->flags);
                return -EINVAL;
        }
 
@@ -234,19 +235,22 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par)
         */
        if (info->flags & XT_L2TP_VERSION) {
                if ((info->version < 2) || (info->version > 3)) {
-                       pr_info("wrong L2TP version: %u\n", info->version);
+                       pr_info_ratelimited("wrong L2TP version: %u\n",
+                                           info->version);
                        return -EINVAL;
                }
 
                if (info->version == 2) {
                        if ((info->flags & XT_L2TP_TID) &&
                            (info->tid > 0xffff)) {
-                               pr_info("v2 tid > 0xffff: %u\n", info->tid);
+                               pr_info_ratelimited("v2 tid > 0xffff: %u\n",
+                                                   info->tid);
                                return -EINVAL;
                        }
                        if ((info->flags & XT_L2TP_SID) &&
                            (info->sid > 0xffff)) {
-                               pr_info("v2 sid > 0xffff: %u\n", info->sid);
+                               pr_info_ratelimited("v2 sid > 0xffff: %u\n",
+                                                   info->sid);
                                return -EINVAL;
                        }
                }
@@ -268,13 +272,13 @@ static int l2tp_mt_check4(const struct xt_mtchk_param *par)
 
        if ((ip->proto != IPPROTO_UDP) &&
            (ip->proto != IPPROTO_L2TP)) {
-               pr_info("missing protocol rule (udp|l2tpip)\n");
+               pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
                return -EINVAL;
        }
 
        if ((ip->proto == IPPROTO_L2TP) &&
            (info->version == 2)) {
-               pr_info("v2 doesn't support IP mode\n");
+               pr_info_ratelimited("v2 doesn't support IP mode\n");
                return -EINVAL;
        }
 
@@ -295,13 +299,13 @@ static int l2tp_mt_check6(const struct xt_mtchk_param *par)
 
        if ((ip->proto != IPPROTO_UDP) &&
            (ip->proto != IPPROTO_L2TP)) {
-               pr_info("missing protocol rule (udp|l2tpip)\n");
+               pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
                return -EINVAL;
        }
 
        if ((ip->proto == IPPROTO_L2TP) &&
            (info->version == 2)) {
-               pr_info("v2 doesn't support IP mode\n");
+               pr_info_ratelimited("v2 doesn't support IP mode\n");
                return -EINVAL;
        }
 
index 61403b7..55d18cd 100644 (file)
@@ -106,8 +106,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
        /* Check for overflow. */
        if (r->burst == 0
            || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-               pr_info("Overflow, try lower: %u/%u\n",
-                       r->avg, r->burst);
+               pr_info_ratelimited("Overflow, try lower: %u/%u\n",
+                                   r->avg, r->burst);
                return -ERANGE;
        }
 
index 0fd14d1..bdb689c 100644 (file)
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
@@ -19,8 +21,7 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
        const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
        if (mr->rangesize != 1) {
-               pr_info("%s: multiple ranges no longer supported\n",
-                       par->target->name);
+               pr_info_ratelimited("multiple ranges no longer supported\n");
                return -EINVAL;
        }
        return nf_ct_netns_get(par->net, par->family);
index 6f92d25..c8674de 100644 (file)
@@ -6,6 +6,8 @@
  * it under the terms of the GNU General Public License version 2 (or any
  * later at your option) as published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
@@ -39,8 +41,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
 
        nfacct = nfnl_acct_find_get(par->net, info->name);
        if (nfacct == NULL) {
-               pr_info("xt_nfacct: accounting object with name `%s' "
-                       "does not exists\n", info->name);
+               pr_info_ratelimited("accounting object `%s' does not exists\n",
+                                   info->name);
                return -ENOENT;
        }
        info->nfacct = nfacct;
index bb33598..9d6d67b 100644 (file)
@@ -107,9 +107,7 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
             info->invert & XT_PHYSDEV_OP_BRIDGED) &&
            par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
            (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
-               pr_info("using --physdev-out and --physdev-is-out are only "
-                       "supported in the FORWARD and POSTROUTING chains with "
-                       "bridged traffic.\n");
+               pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n");
                if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
                        return -EINVAL;
        }
index 5639fb0..13f8ccf 100644 (file)
@@ -132,26 +132,29 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
 static int policy_mt_check(const struct xt_mtchk_param *par)
 {
        const struct xt_policy_info *info = par->matchinfo;
+       const char *errmsg = "neither incoming nor outgoing policy selected";
+
+       if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT)))
+               goto err;
 
-       if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
-               pr_info("neither incoming nor outgoing policy selected\n");
-               return -EINVAL;
-       }
        if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
            (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
-               pr_info("output policy not valid in PREROUTING and INPUT\n");
-               return -EINVAL;
+               errmsg = "output policy not valid in PREROUTING and INPUT";
+               goto err;
        }
        if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
            (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
-               pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
-               return -EINVAL;
+               errmsg = "input policy not valid in POSTROUTING and OUTPUT";
+               goto err;
        }
        if (info->len > XT_POLICY_MAX_ELEM) {
-               pr_info("too many policy elements\n");
-               return -EINVAL;
+               errmsg = "too many policy elements";
+               goto err;
        }
        return 0;
+err:
+       pr_info_ratelimited("%s\n", errmsg);
+       return -EINVAL;
 }
 
 static struct xt_match policy_mt_reg[] __read_mostly = {
index 245fa35..19efdb7 100644 (file)
@@ -342,8 +342,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
        net_get_random_once(&hash_rnd, sizeof(hash_rnd));
 
        if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
-               pr_info("Unsupported user space flags (%08x)\n",
-                       info->check_set);
+               pr_info_ratelimited("Unsupported userspace flags (%08x)\n",
+                                   info->check_set);
                return -EINVAL;
        }
        if (hweight8(info->check_set &
@@ -357,8 +357,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
        if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
                return -EINVAL;
        if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) {
-               pr_info("hitcount (%u) is larger than allowed maximum (%u)\n",
-                       info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
+               pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n",
+                                   info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
                return -EINVAL;
        }
        if (info->name[0] == '\0' ||
@@ -587,7 +587,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
                add = true;
                break;
        default:
-               pr_info("Need \"+ip\", \"-ip\" or \"/\"\n");
+               pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n");
                return -EINVAL;
        }
 
@@ -601,10 +601,8 @@ recent_mt_proc_write(struct file *file, const char __user *input,
                succ   = in4_pton(c, size, (void *)&addr, '\n', NULL);
        }
 
-       if (!succ) {
-               pr_info("illegal address written to procfs\n");
+       if (!succ)
                return -EINVAL;
-       }
 
        spin_lock_bh(&recent_lock);
        e = recent_entry_lookup(t, &addr, family, 0);
@@ -689,6 +687,7 @@ static struct pernet_operations recent_net_ops = {
        .exit   = recent_net_exit,
        .id     = &recent_net_id,
        .size   = sizeof(struct recent_net),
+       .async  = true,
 };
 
 static struct xt_match recent_mt_reg[] __read_mostly = {
index 16b6b11..6f4c521 100644 (file)
@@ -92,12 +92,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
        index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
        if (index == IPSET_INVALID_ID) {
-               pr_warn("Cannot find set identified by id %u to match\n",
-                       info->match_set.index);
+               pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+                                   info->match_set.index);
                return -ENOENT;
        }
        if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
-               pr_warn("Protocol error: set match dimension is over the limit!\n");
+               pr_info_ratelimited("set match dimension is over the limit!\n");
                ip_set_nfnl_put(par->net, info->match_set.index);
                return -ERANGE;
        }
@@ -143,12 +143,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
        index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
        if (index == IPSET_INVALID_ID) {
-               pr_warn("Cannot find set identified by id %u to match\n",
-                       info->match_set.index);
+               pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+                                   info->match_set.index);
                return -ENOENT;
        }
        if (info->match_set.dim > IPSET_DIM_MAX) {
-               pr_warn("Protocol error: set match dimension is over the limit!\n");
+               pr_info_ratelimited("set match dimension is over the limit!\n");
                ip_set_nfnl_put(par->net, info->match_set.index);
                return -ERANGE;
        }
@@ -241,8 +241,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
        if (info->add_set.index != IPSET_INVALID_ID) {
                index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
                if (index == IPSET_INVALID_ID) {
-                       pr_warn("Cannot find add_set index %u as target\n",
-                               info->add_set.index);
+                       pr_info_ratelimited("Cannot find add_set index %u as target\n",
+                                           info->add_set.index);
                        return -ENOENT;
                }
        }
@@ -250,8 +250,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
        if (info->del_set.index != IPSET_INVALID_ID) {
                index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
                if (index == IPSET_INVALID_ID) {
-                       pr_warn("Cannot find del_set index %u as target\n",
-                               info->del_set.index);
+                       pr_info_ratelimited("Cannot find del_set index %u as target\n",
+                                           info->del_set.index);
                        if (info->add_set.index != IPSET_INVALID_ID)
                                ip_set_nfnl_put(par->net, info->add_set.index);
                        return -ENOENT;
@@ -259,7 +259,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
        }
        if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
            info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
-               pr_warn("Protocol error: SET target dimension is over the limit!\n");
+               pr_info_ratelimited("SET target dimension over the limit!\n");
                if (info->add_set.index != IPSET_INVALID_ID)
                        ip_set_nfnl_put(par->net, info->add_set.index);
                if (info->del_set.index != IPSET_INVALID_ID)
@@ -316,8 +316,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
        if (info->add_set.index != IPSET_INVALID_ID) {
                index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
                if (index == IPSET_INVALID_ID) {
-                       pr_warn("Cannot find add_set index %u as target\n",
-                               info->add_set.index);
+                       pr_info_ratelimited("Cannot find add_set index %u as target\n",
+                                           info->add_set.index);
                        return -ENOENT;
                }
        }
@@ -325,8 +325,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
        if (info->del_set.index != IPSET_INVALID_ID) {
                index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
                if (index == IPSET_INVALID_ID) {
-                       pr_warn("Cannot find del_set index %u as target\n",
-                               info->del_set.index);
+                       pr_info_ratelimited("Cannot find del_set index %u as target\n",
+                                           info->del_set.index);
                        if (info->add_set.index != IPSET_INVALID_ID)
                                ip_set_nfnl_put(par->net, info->add_set.index);
                        return -ENOENT;
@@ -334,7 +334,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
        }
        if (info->add_set.dim > IPSET_DIM_MAX ||
            info->del_set.dim > IPSET_DIM_MAX) {
-               pr_warn("Protocol error: SET target dimension is over the limit!\n");
+               pr_info_ratelimited("SET target dimension over the limit!\n");
                if (info->add_set.index != IPSET_INVALID_ID)
                        ip_set_nfnl_put(par->net, info->add_set.index);
                if (info->del_set.index != IPSET_INVALID_ID)
@@ -444,8 +444,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
                index = ip_set_nfnl_get_byindex(par->net,
                                                info->add_set.index);
                if (index == IPSET_INVALID_ID) {
-                       pr_warn("Cannot find add_set index %u as target\n",
-                               info->add_set.index);
+                       pr_info_ratelimited("Cannot find add_set index %u as target\n",
+                                           info->add_set.index);
                        return -ENOENT;
                }
        }
@@ -454,8 +454,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
                index = ip_set_nfnl_get_byindex(par->net,
                                                info->del_set.index);
                if (index == IPSET_INVALID_ID) {
-                       pr_warn("Cannot find del_set index %u as target\n",
-                               info->del_set.index);
+                       pr_info_ratelimited("Cannot find del_set index %u as target\n",
+                                           info->del_set.index);
                        if (info->add_set.index != IPSET_INVALID_ID)
                                ip_set_nfnl_put(par->net,
                                                info->add_set.index);
@@ -465,7 +465,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 
        if (info->map_set.index != IPSET_INVALID_ID) {
                if (strncmp(par->table, "mangle", 7)) {
-                       pr_warn("--map-set only usable from mangle table\n");
+                       pr_info_ratelimited("--map-set only usable from mangle table\n");
                        return -EINVAL;
                }
                if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
@@ -473,14 +473,14 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
                     !(par->hook_mask & (1 << NF_INET_FORWARD |
                                         1 << NF_INET_LOCAL_OUT |
                                         1 << NF_INET_POST_ROUTING))) {
-                       pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
+                       pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
                        return -EINVAL;
                }
                index = ip_set_nfnl_get_byindex(par->net,
                                                info->map_set.index);
                if (index == IPSET_INVALID_ID) {
-                       pr_warn("Cannot find map_set index %u as target\n",
-                               info->map_set.index);
+                       pr_info_ratelimited("Cannot find map_set index %u as target\n",
+                                           info->map_set.index);
                        if (info->add_set.index != IPSET_INVALID_ID)
                                ip_set_nfnl_put(par->net,
                                                info->add_set.index);
@@ -494,7 +494,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
        if (info->add_set.dim > IPSET_DIM_MAX ||
            info->del_set.dim > IPSET_DIM_MAX ||
            info->map_set.dim > IPSET_DIM_MAX) {
-               pr_warn("Protocol error: SET target dimension is over the limit!\n");
+               pr_info_ratelimited("SET target dimension over the limit!\n");
                if (info->add_set.index != IPSET_INVALID_ID)
                        ip_set_nfnl_put(par->net, info->add_set.index);
                if (info->del_set.index != IPSET_INVALID_ID)
index 575d215..2ac7f67 100644 (file)
@@ -171,7 +171,8 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par)
                return err;
 
        if (info->flags & ~XT_SOCKET_FLAGS_V1) {
-               pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
+               pr_info_ratelimited("unknown flags 0x%x\n",
+                                   info->flags & ~XT_SOCKET_FLAGS_V1);
                return -EINVAL;
        }
        return 0;
@@ -187,7 +188,8 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par)
                return err;
 
        if (info->flags & ~XT_SOCKET_FLAGS_V2) {
-               pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
+               pr_info_ratelimited("unknown flags 0x%x\n",
+                                   info->flags & ~XT_SOCKET_FLAGS_V2);
                return -EINVAL;
        }
        return 0;
@@ -203,8 +205,8 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par)
        if (err)
                return err;
        if (info->flags & ~XT_SOCKET_FLAGS_V3) {
-               pr_info("unknown flags 0x%x\n",
-                       info->flags & ~XT_SOCKET_FLAGS_V3);
+               pr_info_ratelimited("unknown flags 0x%x\n",
+                                   info->flags & ~XT_SOCKET_FLAGS_V3);
                return -EINVAL;
        }
        return 0;
index 5fbd791..0b41c0b 100644 (file)
@@ -44,8 +44,8 @@ static int state_mt_check(const struct xt_mtchk_param *par)
 
        ret = nf_ct_netns_get(par->net, par->family);
        if (ret < 0)
-               pr_info("cannot load conntrack support for proto=%u\n",
-                       par->family);
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
        return ret;
 }
 
index 1b01eec..0160f50 100644 (file)
@@ -235,13 +235,13 @@ static int time_mt_check(const struct xt_mtchk_param *par)
 
        if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
            info->daytime_stop > XT_TIME_MAX_DAYTIME) {
-               pr_info("invalid argument - start or "
-                       "stop time greater than 23:59:59\n");
+               pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n");
                return -EDOM;
        }
 
        if (info->flags & ~XT_TIME_ALL_FLAGS) {
-               pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS);
+               pr_info_ratelimited("unknown flags 0x%x\n",
+                                   info->flags & ~XT_TIME_ALL_FLAGS);
                return -EINVAL;
        }
 
index 2ad445c..5d10dcf 100644 (file)
@@ -253,6 +253,7 @@ static struct pernet_operations netlink_tap_net_ops = {
        .exit = netlink_tap_exit_net,
        .id   = &netlink_tap_net_id,
        .size = sizeof(struct netlink_tap_net),
+       .async = true,
 };
 
 static bool netlink_filter_tap(const struct sk_buff *skb)
@@ -1105,7 +1106,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 }
 
 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
-                          int *addr_len, int peer)
+                          int peer)
 {
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
@@ -1113,7 +1114,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 
        nladdr->nl_family = AF_NETLINK;
        nladdr->nl_pad = 0;
-       *addr_len = sizeof(*nladdr);
 
        if (peer) {
                nladdr->nl_pid = nlk->dst_portid;
@@ -1124,7 +1124,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
                netlink_unlock_table();
        }
-       return 0;
+       return sizeof(*nladdr);
 }
 
 static int netlink_ioctl(struct socket *sock, unsigned int cmd,
@@ -2308,7 +2308,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
        if (cb->start) {
                ret = cb->start(cb);
                if (ret)
-                       goto error_unlock;
+                       goto error_put;
        }
 
        nlk->cb_running = true;
@@ -2328,6 +2328,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
         */
        return -EINTR;
 
+error_put:
+       module_put(control->module);
 error_unlock:
        sock_put(sk);
        mutex_unlock(nlk->cb_mutex);
@@ -2724,6 +2726,7 @@ static void __init netlink_add_usersock_entry(void)
 static struct pernet_operations __net_initdata netlink_net_ops = {
        .init = netlink_net_init,
        .exit = netlink_net_exit,
+       .async = true,
 };
 
 static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
index 6f02499..a6f63a5 100644 (file)
@@ -1035,6 +1035,7 @@ static void __net_exit genl_pernet_exit(struct net *net)
 static struct pernet_operations genl_pernet_ops = {
        .init = genl_pernet_init,
        .exit = genl_pernet_exit,
+       .async = true,
 };
 
 static int __init genl_init(void)
index 9ba30c6..35bb680 100644 (file)
@@ -829,11 +829,12 @@ out_release:
 }
 
 static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
-       int *uaddr_len, int peer)
+       int peer)
 {
        struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
        struct sock *sk = sock->sk;
        struct nr_sock *nr = nr_sk(sk);
+       int uaddr_len;
 
        memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
 
@@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
                sax->fsa_ax25.sax25_call   = nr->user_addr;
                memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
                sax->fsa_digipeater[0]     = nr->dest_addr;
-               *uaddr_len = sizeof(struct full_sockaddr_ax25);
+               uaddr_len = sizeof(struct full_sockaddr_ax25);
        } else {
                sax->fsa_ax25.sax25_family = AF_NETROM;
                sax->fsa_ax25.sax25_ndigis = 0;
                sax->fsa_ax25.sax25_call   = nr->source_addr;
-               *uaddr_len = sizeof(struct sockaddr_ax25);
+               uaddr_len = sizeof(struct sockaddr_ax25);
        }
        release_sock(sk);
 
-       return 0;
+       return uaddr_len;
 }
 
 int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
index 367d8c0..2ceefa1 100644 (file)
@@ -149,6 +149,10 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
 
        pr_debug("uri: %s, len: %zu\n", uri, uri_len);
 
+       /* sdreq->tlv_len is u8, takes uri_len, + 3 for header, + 1 for NULL */
+       if (WARN_ON_ONCE(uri_len > U8_MAX - 4))
+               return NULL;
+
        sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
        if (sdreq == NULL)
                return NULL;
index 3760400..ea0c0c6 100644 (file)
@@ -497,7 +497,7 @@ error:
 }
 
 static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
-                            int *len, int peer)
+                            int peer)
 {
        struct sock *sk = sock->sk;
        struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
                 llcp_sock->dsap, llcp_sock->ssap);
 
        memset(llcp_addr, 0, sizeof(*llcp_addr));
-       *len = sizeof(struct sockaddr_nfc_llcp);
 
        lock_sock(sk);
        if (!llcp_sock->dev) {
@@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
               llcp_addr->service_name_len);
        release_sock(sk);
 
-       return 0;
+       return sizeof(struct sockaddr_nfc_llcp);
 }
 
 static inline __poll_t llcp_accept_poll(struct sock *parent)
index c0b83dc..f018eaf 100644 (file)
@@ -61,7 +61,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 };
 
 static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
-       [NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
+       [NFC_SDP_ATTR_URI] = { .type = NLA_STRING,
+                              .len = U8_MAX - 4 },
        [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
 };
 
index e0f3f4a..2c5a6fe 100644 (file)
@@ -3409,7 +3409,7 @@ out:
 }
 
 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
-                              int *uaddr_len, int peer)
+                              int peer)
 {
        struct net_device *dev;
        struct sock *sk = sock->sk;
@@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
        if (dev)
                strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
        rcu_read_unlock();
-       *uaddr_len = sizeof(*uaddr);
 
-       return 0;
+       return sizeof(*uaddr);
 }
 
 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
-                         int *uaddr_len, int peer)
+                         int peer)
 {
        struct net_device *dev;
        struct sock *sk = sock->sk;
@@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
                sll->sll_halen = 0;
        }
        rcu_read_unlock();
-       *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 
-       return 0;
+       return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 }
 
 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
@@ -4559,6 +4557,7 @@ static void __net_exit packet_net_exit(struct net *net)
 static struct pernet_operations packet_net_ops = {
        .init = packet_net_init,
        .exit = packet_net_exit,
+       .async = true,
 };
 
 
index 7778751..9454e83 100644 (file)
@@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = {
        .exit = phonet_exit_net,
        .id   = &phonet_net_id,
        .size = sizeof(struct phonet_net),
+       .async = true,
 };
 
 /* Initialize Phonet devices list */
index fffcd69..f9b40e6 100644 (file)
@@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 }
 
 static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
-                               int *sockaddr_len, int peer)
+                               int peer)
 {
        struct sock *sk = sock->sk;
        struct pn_sock *pn = pn_sk(sk);
@@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
                pn_sockaddr_set_object((struct sockaddr_pn *)addr,
                                        pn->sobject);
 
-       *sockaddr_len = sizeof(struct sockaddr_pn);
-       return 0;
+       return sizeof(struct sockaddr_pn);
 }
 
 static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
index 5fb3929..b33e5ae 100644 (file)
@@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
 }
 
 static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
-                       int *len, int peer)
+                       int peer)
 {
        struct qrtr_sock *ipc = qrtr_sk(sock->sk);
        struct sockaddr_qrtr qaddr;
@@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
        }
        release_sock(sk);
 
-       *len = sizeof(qaddr);
        qaddr.sq_family = AF_QIPCRTR;
 
        memcpy(saddr, &qaddr, sizeof(qaddr));
 
-       return 0;
+       return sizeof(qaddr);
 }
 
 static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
index 744c637..f712610 100644 (file)
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
        rds_send_drop_to(rs, NULL);
        rds_rdma_drop_keys(rs);
        rds_notify_queue_get(rs, NULL);
+       __skb_queue_purge(&rs->rs_zcookie_queue);
 
        spin_lock_bh(&rds_sock_lock);
        list_del_init(&rs->rs_item);
@@ -110,7 +111,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
 }
 
 static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
-                      int *uaddr_len, int peer)
+                      int peer)
 {
        struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
        struct rds_sock *rs = rds_sk_to_rs(sock->sk);
@@ -131,8 +132,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
 
        sin->sin_family = AF_INET;
 
-       *uaddr_len = sizeof(*sin);
-       return 0;
+       return sizeof(*sin);
 }
 
 /*
@@ -145,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
  *  -  to signal that a previously congested destination may have become
  *     uncongested
  *  -  A notification has been queued to the socket (this can be a congestion
- *     update, or a RDMA completion).
+ *     update, or a RDMA completion, or a MSG_ZEROCOPY completion).
  *
  * EPOLLOUT is asserted if there is room on the send queue. This does not mean
  * however, that the next sendmsg() call will succeed. If the application tries
@@ -179,10 +179,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
                spin_unlock(&rs->rs_lock);
        }
        if (!list_empty(&rs->rs_recv_queue) ||
-           !list_empty(&rs->rs_notify_queue))
+           !list_empty(&rs->rs_notify_queue) ||
+           !skb_queue_empty(&rs->rs_zcookie_queue))
                mask |= (EPOLLIN | EPOLLRDNORM);
        if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
                mask |= (EPOLLOUT | EPOLLWRNORM);
+       if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+               mask |= POLLERR;
        read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 
        /* clear state any time we wake a seen-congested socket */
@@ -512,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
        INIT_LIST_HEAD(&rs->rs_recv_queue);
        INIT_LIST_HEAD(&rs->rs_notify_queue);
        INIT_LIST_HEAD(&rs->rs_cong_list);
+       skb_queue_head_init(&rs->rs_zcookie_queue);
        spin_lock_init(&rs->rs_rdma_lock);
        rs->rs_rdma_keys = RB_ROOT;
        rs->rs_rx_traces = 0;
index 94e190f..2da3176 100644 (file)
@@ -224,7 +224,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
        if (rds_destroy_pending(conn))
                ret = -ENETDOWN;
        else
-               ret = trans->conn_alloc(conn, gfp);
+               ret = trans->conn_alloc(conn, GFP_ATOMIC);
        if (ret) {
                rcu_read_unlock();
                kfree(conn->c_path);
index 4318cc9..116cf87 100644 (file)
@@ -33,6 +33,9 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/errqueue.h>
 
 #include "rds.h"
 
@@ -53,20 +56,84 @@ void rds_message_addref(struct rds_message *rm)
 }
 EXPORT_SYMBOL_GPL(rds_message_addref);
 
+static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
+{
+       struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
+       int ncookies = ck->num;
+
+       if (ncookies == RDS_MAX_ZCOOKIES)
+               return false;
+       ck->cookies[ncookies] = cookie;
+       ck->num =  ++ncookies;
+       return true;
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+                                    struct rds_znotifier *znotif)
+{
+       struct sk_buff *skb, *tail;
+       unsigned long flags;
+       struct sk_buff_head *q;
+       u32 cookie = znotif->z_cookie;
+       struct rds_zcopy_cookies *ck;
+
+       q = &rs->rs_zcookie_queue;
+       spin_lock_irqsave(&q->lock, flags);
+       tail = skb_peek_tail(q);
+
+       if (tail && skb_zcookie_add(tail, cookie)) {
+               spin_unlock_irqrestore(&q->lock, flags);
+               mm_unaccount_pinned_pages(&znotif->z_mmp);
+               consume_skb(rds_skb_from_znotifier(znotif));
+               /* caller invokes rds_wake_sk_sleep() */
+               return;
+       }
+
+       skb = rds_skb_from_znotifier(znotif);
+       ck = (struct rds_zcopy_cookies *)skb->cb;
+       memset(ck, 0, sizeof(*ck));
+       WARN_ON(!skb_zcookie_add(skb, cookie));
+
+       __skb_queue_tail(q, skb);
+
+       spin_unlock_irqrestore(&q->lock, flags);
+       /* caller invokes rds_wake_sk_sleep() */
+
+       mm_unaccount_pinned_pages(&znotif->z_mmp);
+}
+
 /*
  * This relies on dma_map_sg() not touching sg[].page during merging.
  */
 static void rds_message_purge(struct rds_message *rm)
 {
-       unsigned long i;
+       unsigned long i, flags;
+       bool zcopy = false;
 
        if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
                return;
 
+       spin_lock_irqsave(&rm->m_rs_lock, flags);
+       if (rm->m_rs) {
+               struct rds_sock *rs = rm->m_rs;
+
+               if (rm->data.op_mmp_znotifier) {
+                       zcopy = true;
+                       rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+                       rds_wake_sk_sleep(rs);
+                       rm->data.op_mmp_znotifier = NULL;
+               }
+               sock_put(rds_rs_to_sk(rs));
+               rm->m_rs = NULL;
+       }
+       spin_unlock_irqrestore(&rm->m_rs_lock, flags);
+
        for (i = 0; i < rm->data.op_nents; i++) {
-               rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
                /* XXX will have to put_page for page refs */
-               __free_page(sg_page(&rm->data.op_sg[i]));
+               if (!zcopy)
+                       __free_page(sg_page(&rm->data.op_sg[i]));
+               else
+                       put_page(sg_page(&rm->data.op_sg[i]));
        }
        rm->data.op_nents = 0;
 
@@ -266,12 +333,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
        return rm;
 }
 
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+                              bool zcopy)
 {
        unsigned long to_copy, nbytes;
        unsigned long sg_off;
        struct scatterlist *sg;
        int ret = 0;
+       int length = iov_iter_count(from);
 
        rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -281,6 +350,55 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
        sg = rm->data.op_sg;
        sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
+       if (zcopy) {
+               int total_copied = 0;
+               struct sk_buff *skb;
+
+               skb = alloc_skb(0, GFP_KERNEL);
+               if (!skb)
+                       return -ENOMEM;
+               BUILD_BUG_ON(sizeof(skb->cb) <
+                            max_t(int, sizeof(struct rds_znotifier),
+                                  sizeof(struct rds_zcopy_cookies)));
+               rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
+               if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+                                           length)) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               while (iov_iter_count(from)) {
+                       struct page *pages;
+                       size_t start;
+                       ssize_t copied;
+
+                       copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+                                                   1, &start);
+                       if (copied < 0) {
+                               struct mmpin *mmp;
+                               int i;
+
+                               for (i = 0; i < rm->data.op_nents; i++)
+                                       put_page(sg_page(&rm->data.op_sg[i]));
+                               mmp = &rm->data.op_mmp_znotifier->z_mmp;
+                               mm_unaccount_pinned_pages(mmp);
+                               ret = -EFAULT;
+                               goto err;
+                       }
+                       total_copied += copied;
+                       iov_iter_advance(from, copied);
+                       length -= copied;
+                       sg_set_page(sg, pages, copied, start);
+                       rm->data.op_nents++;
+                       sg++;
+               }
+               WARN_ON_ONCE(length != 0);
+               return ret;
+err:
+               consume_skb(skb);
+               rm->data.op_mmp_znotifier = NULL;
+               return ret;
+       } /* zcopy */
+
        while (iov_iter_count(from)) {
                if (!sg_page(sg)) {
                        ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
index 7301b9b..33b1635 100644 (file)
@@ -356,6 +356,19 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 #define RDS_MSG_PAGEVEC                7
 #define RDS_MSG_FLUSH          8
 
+struct rds_znotifier {
+       struct list_head        z_list;
+       struct mmpin            z_mmp;
+       u32                     z_cookie;
+};
+
+#define        RDS_ZCOPY_SKB(__skb)    ((struct rds_znotifier *)&((__skb)->cb[0]))
+
+static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z)
+{
+       return container_of((void *)z, struct sk_buff, cb);
+}
+
 struct rds_message {
        refcount_t              m_refcount;
        struct list_head        m_sock_item;
@@ -436,6 +449,7 @@ struct rds_message {
                        unsigned int            op_count;
                        unsigned int            op_dmasg;
                        unsigned int            op_dmaoff;
+                       struct rds_znotifier    *op_mmp_znotifier;
                        struct scatterlist      *op_sg;
                } data;
        };
@@ -589,6 +603,8 @@ struct rds_sock {
        /* Socket receive path trace points*/
        u8                      rs_rx_traces;
        u8                      rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+       struct sk_buff_head     rs_zcookie_queue;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -771,7 +787,8 @@ rds_conn_connecting(struct rds_connection *conn)
 /* message.c */
 struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
 struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+                              bool zcopy);
 struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
 void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
                                 __be16 dport, u64 seq);
index b25bcfe..d507477 100644 (file)
@@ -577,6 +577,32 @@ out:
        return ret;
 }
 
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+       struct sk_buff *skb;
+       struct sk_buff_head *q = &rs->rs_zcookie_queue;
+       struct rds_zcopy_cookies *done;
+
+       if (!msg->msg_control)
+               return false;
+
+       if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+           msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+               return false;
+
+       skb = skb_dequeue(q);
+       if (!skb)
+               return false;
+       done = (struct rds_zcopy_cookies *)skb->cb;
+       if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+                    done)) {
+               skb_queue_head(q, skb);
+               return false;
+       }
+       consume_skb(skb);
+       return true;
+}
+
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                int msg_flags)
 {
@@ -594,6 +620,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
        if (msg_flags & MSG_OOB)
                goto out;
+       if (msg_flags & MSG_ERRQUEUE)
+               return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR);
 
        while (1) {
                /* If there are pending notifications, do those - and nothing else */
@@ -609,7 +637,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
                if (!rds_next_incoming(rs, &inc)) {
                        if (nonblock) {
-                               ret = -EAGAIN;
+                               bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+                               ret = reaped ?  0 : -EAGAIN;
                                break;
                        }
 
@@ -658,6 +688,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        ret = -EFAULT;
                        goto out;
                }
+               rds_recvmsg_zcookie(rs, msg);
 
                rds_stats_inc(s_recv_delivered);
 
index b1b0022..acad042 100644 (file)
@@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status)
                                rm->rdma.op_notifier = NULL;
                        }
                        was_on_sock = 1;
-                       rm->m_rs = NULL;
                }
                spin_unlock(&rs->rs_lock);
 
@@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
                 */
                if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
                        spin_unlock_irqrestore(&cp->cp_lock, flags);
-                       spin_lock_irqsave(&rm->m_rs_lock, flags);
-                       rm->m_rs = NULL;
-                       spin_unlock_irqrestore(&rm->m_rs_lock, flags);
                        continue;
                }
                list_del_init(&rm->m_conn_item);
@@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
                __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
                spin_unlock(&rs->rs_lock);
 
-               rm->m_rs = NULL;
                spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
                rds_message_put(rm);
@@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
                __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
                spin_unlock(&rs->rs_lock);
 
-               rm->m_rs = NULL;
                spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
                rds_message_put(rm);
@@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
                list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
                set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
                rds_message_addref(rm);
+               sock_hold(rds_rs_to_sk(rs));
                rm->m_rs = rs;
 
                /* The code ordering is a little weird, but we're
@@ -880,12 +875,13 @@ out:
  * rds_message is getting to be quite complicated, and we'd like to allocate
  * it all in one go. This figures out how big it needs to be up front.
  */
-static int rds_rm_size(struct msghdr *msg, int data_len)
+static int rds_rm_size(struct msghdr *msg, int num_sgs)
 {
        struct cmsghdr *cmsg;
        int size = 0;
        int cmsg_groups = 0;
        int retval;
+       bool zcopy_cookie = false;
 
        for_each_cmsghdr(cmsg, msg) {
                if (!CMSG_OK(msg, cmsg))
@@ -904,6 +900,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
                        break;
 
+               case RDS_CMSG_ZCOPY_COOKIE:
+                       zcopy_cookie = true;
+                       /* fall through */
+
                case RDS_CMSG_RDMA_DEST:
                case RDS_CMSG_RDMA_MAP:
                        cmsg_groups |= 2;
@@ -924,7 +924,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
        }
 
-       size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
+       if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
+               return -EINVAL;
+
+       size += num_sgs * sizeof(struct scatterlist);
 
        /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
        if (cmsg_groups == 3)
@@ -933,6 +936,19 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
        return size;
 }
 
+static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
+                         struct cmsghdr *cmsg)
+{
+       u32 *cookie;
+
+       if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
+           !rm->data.op_mmp_znotifier)
+               return -EINVAL;
+       cookie = CMSG_DATA(cmsg);
+       rm->data.op_mmp_znotifier->z_cookie = *cookie;
+       return 0;
+}
+
 static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
                         struct msghdr *msg, int *allocated_mr)
 {
@@ -975,6 +991,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
                        ret = rds_cmsg_atomic(rs, rm, cmsg);
                        break;
 
+               case RDS_CMSG_ZCOPY_COOKIE:
+                       ret = rds_cmsg_zcopy(rs, rm, cmsg);
+                       break;
+
                default:
                        return -EINVAL;
                }
@@ -1045,10 +1065,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
        long timeo = sock_sndtimeo(sk, nonblock);
        struct rds_conn_path *cpath;
        size_t total_payload_len = payload_len, rdma_payload_len = 0;
+       bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
+                     sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
+       int num_sgs = ceil(payload_len, PAGE_SIZE);
 
        /* Mirror Linux UDP mirror of BSD error message compatibility */
        /* XXX: Perhaps MSG_MORE someday */
-       if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
+       if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
                ret = -EOPNOTSUPP;
                goto out;
        }
@@ -1092,8 +1115,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
                goto out;
        }
 
+       if (zcopy) {
+               if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
+                       ret = -EOPNOTSUPP;
+                       goto out;
+               }
+               num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
+       }
        /* size of rm including all sgs */
-       ret = rds_rm_size(msg, payload_len);
+       ret = rds_rm_size(msg, num_sgs);
        if (ret < 0)
                goto out;
 
@@ -1105,12 +1135,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
        /* Attach data to the rm */
        if (payload_len) {
-               rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+               rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
                if (!rm->data.op_sg) {
                        ret = -ENOMEM;
                        goto out;
                }
-               ret = rds_message_copy_from_user(rm, &msg->msg_iter);
+               ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
                if (ret)
                        goto out;
        }
index 44c4652..08230a1 100644 (file)
@@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
        struct rds_tcp_connection *tc;
        unsigned long flags;
        struct sockaddr_in sin;
-       int sinlen;
        struct socket *sock;
 
        spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
@@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 
                sock = tc->t_sock;
                if (sock) {
-                       sock->ops->getname(sock, (struct sockaddr *)&sin,
-                                          &sinlen, 0);
+                       sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
                        tsinfo.local_addr = sin.sin_addr.s_addr;
                        tsinfo.local_port = sin.sin_port;
-                       sock->ops->getname(sock, (struct sockaddr *)&sin,
-                                          &sinlen, 1);
+                       sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
                        tsinfo.peer_addr = sin.sin_addr.s_addr;
                        tsinfo.peer_port = sin.sin_port;
                }
index 083bd25..5170373 100644 (file)
@@ -938,7 +938,7 @@ out_release:
 }
 
 static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
-       int *uaddr_len, int peer)
+       int peer)
 {
        struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
        struct sock *sk = sock->sk;
@@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
                        srose->srose_digis[n] = rose->source_digis[n];
        }
 
-       *uaddr_len = sizeof(struct full_sockaddr_rose);
-       return 0;
+       return sizeof(struct full_sockaddr_rose);
 }
 
 int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
index 42410e9..cf73dc0 100644 (file)
@@ -445,7 +445,7 @@ send_fragmentable:
                                        (char *)&opt, sizeof(opt));
                if (ret == 0) {
                        ret = kernel_sendmsg(conn->params.local->socket, &msg,
-                                            iov, 1, iov[0].iov_len);
+                                            iov, 2, len);
 
                        opt = IPV6_PMTUDISC_DO;
                        kernel_setsockopt(conn->params.local->socket,
index cc21e8d..9d45d8b 100644 (file)
@@ -517,9 +517,10 @@ try_again:
                        ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
                                       sizeof(unsigned int), &id32);
                } else {
+                       unsigned long idl = call->user_call_ID;
+
                        ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
-                                      sizeof(unsigned long),
-                                      &call->user_call_ID);
+                                      sizeof(unsigned long), &idl);
                }
                if (ret < 0)
                        goto error_unlock_call;
index f24a6ae..a01169f 100644 (file)
@@ -658,6 +658,18 @@ config NET_EMATCH_IPSET
          To compile this code as a module, choose M here: the
          module will be called em_ipset.
 
+config NET_EMATCH_IPT
+       tristate "IPtables Matches"
+       depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
+       ---help---
+         Say Y here to be able to classify packets based on iptables
+         matches.
+         Current supported match is "policy" which allows packet classification
+         based on IPsec policy that was used during decapsulation
+
+         To compile this code as a module, choose M here: the
+         module will be called em_ipt.
+
 config NET_CLS_ACT
        bool "Actions"
        select NET_CLS
index 5b63544..8811d38 100644 (file)
@@ -75,3 +75,4 @@ obj-$(CONFIG_NET_EMATCH_META) += em_meta.o
 obj-$(CONFIG_NET_EMATCH_TEXT)  += em_text.o
 obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o
 obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o
+obj-$(CONFIG_NET_EMATCH_IPT)   += em_ipt.o
index eba6682..1f65d6a 100644 (file)
@@ -202,7 +202,8 @@ nla_put_failure:
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
                       struct netlink_callback *cb, int type,
-                      const struct tc_action_ops *ops)
+                      const struct tc_action_ops *ops,
+                      struct netlink_ext_ack *extack)
 {
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
@@ -211,7 +212,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
        } else if (type == RTM_GETACTION) {
                return tcf_dump_walker(idrinfo, skb, cb);
        } else {
-               WARN(1, "tcf_generic_walker: unknown action %d\n", type);
+               WARN(1, "tcf_generic_walker: unknown command %d\n", type);
+               NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
                return -EINVAL;
        }
 }
@@ -605,7 +607,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind)
+                                   char *name, int ovr, int bind,
+                                   struct netlink_ext_ack *extack)
 {
        struct tc_action *a;
        struct tc_action_ops *a_o;
@@ -616,31 +619,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        int err;
 
        if (name == NULL) {
-               err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+               err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
                if (err < 0)
                        goto err_out;
                err = -EINVAL;
                kind = tb[TCA_ACT_KIND];
-               if (kind == NULL)
+               if (!kind) {
+                       NL_SET_ERR_MSG(extack, "TC action kind must be specified");
                        goto err_out;
-               if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+               }
+               if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+                       NL_SET_ERR_MSG(extack, "TC action name too long");
                        goto err_out;
+               }
                if (tb[TCA_ACT_COOKIE]) {
                        int cklen = nla_len(tb[TCA_ACT_COOKIE]);
 
-                       if (cklen > TC_COOKIE_MAX_SIZE)
+                       if (cklen > TC_COOKIE_MAX_SIZE) {
+                               NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
                                goto err_out;
+                       }
 
                        cookie = nla_memdup_cookie(tb);
                        if (!cookie) {
+                               NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
                                err = -ENOMEM;
                                goto err_out;
                        }
                }
        } else {
-               err = -EINVAL;
-               if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+               if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+                       NL_SET_ERR_MSG(extack, "TC action name too long");
+                       err = -EINVAL;
                        goto err_out;
+               }
        }
 
        a_o = tc_lookup_action_n(act_name);
@@ -663,15 +675,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                        goto err_mod;
                }
 #endif
+               NL_SET_ERR_MSG(extack, "Failed to load TC action module");
                err = -ENOENT;
                goto err_out;
        }
 
        /* backward compatibility for policer */
        if (name == NULL)
-               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
+               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
+                               extack);
        else
-               err = a_o->init(net, nla, est, &a, ovr, bind);
+               err = a_o->init(net, nla, est, &a, ovr, bind, extack);
        if (err < 0)
                goto err_mod;
 
@@ -697,6 +711,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 
                        list_add_tail(&a->list, &actions);
                        tcf_action_destroy(&actions, bind);
+                       NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
                        return ERR_PTR(err);
                }
        }
@@ -726,19 +741,20 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
                    struct nlattr *est, char *name, int ovr, int bind,
-                   struct list_head *actions)
+                   struct list_head *actions, struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *act;
        int err;
        int i;
 
-       err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+       err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
        if (err < 0)
                return err;
 
        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-               act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
+               act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
+                                       extack);
                if (IS_ERR(act)) {
                        err = PTR_ERR(act);
                        goto err;
@@ -822,7 +838,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
        t->tca__pad2 = 0;
 
        nest = nla_nest_start(skb, TCA_ACT_TAB);
-       if (nest == NULL)
+       if (!nest)
                goto out_nlmsg_trim;
 
        if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -840,7 +856,8 @@ out_nlmsg_trim:
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-              struct list_head *actions, int event)
+              struct list_head *actions, int event,
+              struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
 
@@ -849,6 +866,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
                return -ENOBUFS;
        if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
                         0, 0) <= 0) {
+               NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
                kfree_skb(skb);
                return -EINVAL;
        }
@@ -857,7 +875,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 }
 
 static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
-                                         struct nlmsghdr *n, u32 portid)
+                                         struct nlmsghdr *n, u32 portid,
+                                         struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[TCA_ACT_MAX + 1];
        const struct tc_action_ops *ops;
@@ -865,22 +884,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
        int index;
        int err;
 
-       err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+       err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
        if (err < 0)
                goto err_out;
 
        err = -EINVAL;
        if (tb[TCA_ACT_INDEX] == NULL ||
-           nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
+           nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
+               NL_SET_ERR_MSG(extack, "Invalid TC action index value");
                goto err_out;
+       }
        index = nla_get_u32(tb[TCA_ACT_INDEX]);
 
        err = -EINVAL;
        ops = tc_lookup_action(tb[TCA_ACT_KIND]);
-       if (!ops) /* could happen in batch of actions */
+       if (!ops) { /* could happen in batch of actions */
+               NL_SET_ERR_MSG(extack, "Specified TC action not found");
                goto err_out;
+       }
        err = -ENOENT;
-       if (ops->lookup(net, &a, index) == 0)
+       if (ops->lookup(net, &a, index, extack) == 0)
                goto err_mod;
 
        module_put(ops->owner);
@@ -893,7 +916,8 @@ err_out:
 }
 
 static int tca_action_flush(struct net *net, struct nlattr *nla,
-                           struct nlmsghdr *n, u32 portid)
+                           struct nlmsghdr *n, u32 portid,
+                           struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
        unsigned char *b;
@@ -907,39 +931,45 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
        int err = -ENOMEM;
 
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-       if (!skb) {
-               pr_debug("tca_action_flush: failed skb alloc\n");
+       if (!skb)
                return err;
-       }
 
        b = skb_tail_pointer(skb);
 
-       err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+       err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
        if (err < 0)
                goto err_out;
 
        err = -EINVAL;
        kind = tb[TCA_ACT_KIND];
        ops = tc_lookup_action(kind);
-       if (!ops) /*some idjot trying to flush unknown action */
+       if (!ops) { /*some idjot trying to flush unknown action */
+               NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
                goto err_out;
+       }
 
        nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
                        sizeof(*t), 0);
-       if (!nlh)
+       if (!nlh) {
+               NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
                goto out_module_put;
+       }
        t = nlmsg_data(nlh);
        t->tca_family = AF_UNSPEC;
        t->tca__pad1 = 0;
        t->tca__pad2 = 0;
 
        nest = nla_nest_start(skb, TCA_ACT_TAB);
-       if (nest == NULL)
+       if (!nest) {
+               NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
                goto out_module_put;
+       }
 
-       err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
-       if (err <= 0)
+       err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
+       if (err <= 0) {
+               nla_nest_cancel(skb, nest);
                goto out_module_put;
+       }
 
        nla_nest_end(skb, nest);
 
@@ -950,6 +980,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
                             n->nlmsg_flags & NLM_F_ECHO);
        if (err > 0)
                return 0;
+       if (err < 0)
+               NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
 
        return err;
 
@@ -962,7 +994,7 @@ err_out:
 
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-              u32 portid)
+              u32 portid, struct netlink_ext_ack *extack)
 {
        int ret;
        struct sk_buff *skb;
@@ -973,6 +1005,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
        if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
                         0, 1) <= 0) {
+               NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
                kfree_skb(skb);
                return -EINVAL;
        }
@@ -980,6 +1013,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
        /* now do the delete */
        ret = tcf_action_destroy(actions, 0);
        if (ret < 0) {
+               NL_SET_ERR_MSG(extack, "Failed to delete TC action");
                kfree_skb(skb);
                return ret;
        }
@@ -993,26 +1027,27 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
 static int
 tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
-             u32 portid, int event)
+             u32 portid, int event, struct netlink_ext_ack *extack)
 {
        int i, ret;
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *act;
        LIST_HEAD(actions);
 
-       ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+       ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
        if (ret < 0)
                return ret;
 
        if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
-               if (tb[1] != NULL)
-                       return tca_action_flush(net, tb[1], n, portid);
-               else
-                       return -EINVAL;
+               if (tb[1])
+                       return tca_action_flush(net, tb[1], n, portid, extack);
+
+               NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
+               return -EINVAL;
        }
 
        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-               act = tcf_action_get_1(net, tb[i], n, portid);
+               act = tcf_action_get_1(net, tb[i], n, portid, extack);
                if (IS_ERR(act)) {
                        ret = PTR_ERR(act);
                        goto err;
@@ -1022,9 +1057,9 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
        }
 
        if (event == RTM_GETACTION)
-               ret = tcf_get_notify(net, portid, n, &actions, event);
+               ret = tcf_get_notify(net, portid, n, &actions, event, extack);
        else { /* delete */
-               ret = tcf_del_notify(net, n, &actions, portid);
+               ret = tcf_del_notify(net, n, &actions, portid, extack);
                if (ret)
                        goto err;
                return ret;
@@ -1037,7 +1072,7 @@ err:
 
 static int
 tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-              u32 portid)
+              u32 portid, struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
        int err = 0;
@@ -1048,6 +1083,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
        if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
                         RTM_NEWACTION, 0, 0) <= 0) {
+               NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while deleting TC action");
                kfree_skb(skb);
                return -EINVAL;
        }
@@ -1060,16 +1096,18 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-                         struct nlmsghdr *n, u32 portid, int ovr)
+                         struct nlmsghdr *n, u32 portid, int ovr,
+                         struct netlink_ext_ack *extack)
 {
        int ret = 0;
        LIST_HEAD(actions);
 
-       ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
+       ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+                             extack);
        if (ret)
                return ret;
 
-       return tcf_add_notify(net, n, &actions, portid);
+       return tcf_add_notify(net, n, &actions, portid, extack);
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
@@ -1097,7 +1135,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
                return ret;
 
        if (tca[TCA_ACT_TAB] == NULL) {
-               pr_notice("tc_ctl_action: received NO action attribs\n");
+               NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
                return -EINVAL;
        }
 
@@ -1113,17 +1151,18 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
                if (n->nlmsg_flags & NLM_F_REPLACE)
                        ovr = 1;
 replay:
-               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
+               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+                                    extack);
                if (ret == -EAGAIN)
                        goto replay;
                break;
        case RTM_DELACTION:
                ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-                                   portid, RTM_DELACTION);
+                                   portid, RTM_DELACTION, extack);
                break;
        case RTM_GETACTION:
                ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-                                   portid, RTM_GETACTION);
+                                   portid, RTM_GETACTION, extack);
                break;
        default:
                BUG();
@@ -1218,7 +1257,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
        if (nest == NULL)
                goto out_module_put;
 
-       ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
+       ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
        if (ret < 0)
                goto out_module_put;
 
@@ -1454,6 +1493,7 @@ static struct pernet_operations tcf_action_net_ops = {
        .exit = tcf_action_net_exit,
        .id = &tcf_action_net_id,
        .size = sizeof(struct tcf_action_net),
+       .async = true,
 };
 
 static int __init tc_action_init(void)
index b3f2c15..da72e0c 100644 (file)
@@ -272,7 +272,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act,
-                       int replace, int bind)
+                       int replace, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, bpf_net_id);
        struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -367,14 +367,16 @@ static void tcf_bpf_cleanup(struct tc_action *act)
 
 static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
                          struct netlink_callback *cb, int type,
-                         const struct tc_action_ops *ops)
+                         const struct tc_action_ops *ops,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
@@ -411,6 +413,7 @@ static struct pernet_operations bpf_net_ops = {
        .exit_batch = bpf_exit_net,
        .id   = &bpf_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init bpf_init_module(void)
index 2b15ba8..371e5e4 100644 (file)
@@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                             struct nlattr *est, struct tc_action **a,
-                            int ovr, int bind)
+                            int ovr, int bind,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, connmark_net_id);
        struct nlattr *tb[TCA_CONNMARK_MAX + 1];
@@ -176,14 +177,16 @@ nla_put_failure:
 
 static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
                               struct netlink_callback *cb, int type,
-                              const struct tc_action_ops *ops)
+                              const struct tc_action_ops *ops,
+                              struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
+                              struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
@@ -219,6 +222,7 @@ static struct pernet_operations connmark_net_ops = {
        .exit_batch = connmark_exit_net,
        .id   = &connmark_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init connmark_init_module(void)
index b7ba9b0..1fb1f1f 100644 (file)
@@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a, int ovr,
-                        int bind)
+                        int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
        struct tcf_csum_params *params_old, *params_new;
@@ -631,14 +631,16 @@ static void tcf_csum_cleanup(struct tc_action *a)
 
 static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
                           struct netlink_callback *cb, int type,
-                          const struct tc_action_ops *ops)
+                          const struct tc_action_ops *ops,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
 
@@ -675,6 +677,7 @@ static struct pernet_operations csum_net_ops = {
        .exit_batch = csum_exit_net,
        .id   = &csum_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_DESCRIPTION("Checksum updating actions");
index b56986d..7456325 100644 (file)
@@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind)
+                        int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gact_net_id);
        struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -201,14 +201,16 @@ nla_put_failure:
 
 static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
                           struct netlink_callback *cb, int type,
-                          const struct tc_action_ops *ops)
+                          const struct tc_action_ops *ops,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gact_net_id);
 
@@ -245,6 +247,7 @@ static struct pernet_operations gact_net_ops = {
        .exit_batch = gact_exit_net,
        .id   = &gact_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
index 5954e99..555b1ca 100644 (file)
@@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a,
-                       int ovr, int bind)
+                       int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
        struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -824,14 +824,16 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 
 static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
                          struct netlink_callback *cb, int type,
-                         const struct tc_action_ops *ops)
+                         const struct tc_action_ops *ops,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
 
@@ -868,6 +870,7 @@ static struct pernet_operations ife_net_ops = {
        .exit_batch = ife_exit_net,
        .id   = &ife_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init ife_init_module(void)
index 06e380a..1086671 100644 (file)
@@ -193,7 +193,7 @@ err1:
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a, int ovr,
-                       int bind)
+                       int bind, struct netlink_ext_ack *extack)
 {
        return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
                              bind);
@@ -201,7 +201,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
                       struct nlattr *est, struct tc_action **a, int ovr,
-                      int bind)
+                      int bind, struct netlink_ext_ack *extack)
 {
        return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
                              bind);
@@ -303,14 +303,16 @@ nla_put_failure:
 
 static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
                          struct netlink_callback *cb, int type,
-                         const struct tc_action_ops *ops)
+                         const struct tc_action_ops *ops,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -347,18 +349,21 @@ static struct pernet_operations ipt_net_ops = {
        .exit_batch = ipt_exit_net,
        .id   = &ipt_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
                         struct netlink_callback *cb, int type,
-                        const struct tc_action_ops *ops)
+                        const struct tc_action_ops *ops,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, xt_net_id);
 
@@ -395,6 +400,7 @@ static struct pernet_operations xt_net_ops = {
        .exit_batch = xt_exit_net,
        .id   = &xt_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
index e6ff88f..64c8657 100644 (file)
@@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a, int ovr,
-                          int bind)
+                          int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
        struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        bool exists = false;
        int ret;
 
-       if (nla == NULL)
+       if (!nla) {
+               NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
                return -EINVAL;
-       ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
+       }
+       ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
        if (ret < 0)
                return ret;
-       if (tb[TCA_MIRRED_PARMS] == NULL)
+       if (!tb[TCA_MIRRED_PARMS]) {
+               NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
                return -EINVAL;
+       }
        parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
        exists = tcf_idr_check(tn, parm->index, a, bind);
@@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        default:
                if (exists)
                        tcf_idr_release(*a, bind);
+               NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
                return -EINVAL;
        }
        if (parm->ifindex) {
@@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        }
 
        if (!exists) {
-               if (dev == NULL)
+               if (!dev) {
+                       NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
                        return -EINVAL;
+               }
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_mirred_ops, bind, true);
                if (ret)
@@ -265,14 +272,16 @@ nla_put_failure:
 
 static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
                             struct netlink_callback *cb, int type,
-                            const struct tc_action_ops *ops)
+                            const struct tc_action_ops *ops,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
@@ -344,6 +353,7 @@ static struct pernet_operations mirred_net_ops = {
        .exit_batch = mirred_exit_net,
        .id   = &mirred_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002)");
index 98c6a4b..b1bc757 100644 (file)
@@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 };
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-                       struct tc_action **a, int ovr, int bind)
+                       struct tc_action **a, int ovr, int bind,
+                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, nat_net_id);
        struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -277,14 +278,16 @@ nla_put_failure:
 
 static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
                          struct netlink_callback *cb, int type,
-                         const struct tc_action_ops *ops)
+                         const struct tc_action_ops *ops,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, nat_net_id);
 
@@ -320,6 +323,7 @@ static struct pernet_operations nat_net_ops = {
        .exit_batch = nat_exit_net,
        .id   = &nat_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_DESCRIPTION("Stateless NAT actions");
index 349beaf..5e8cc8f 100644 (file)
@@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                         int ovr, int bind)
+                         int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, pedit_net_id);
        struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -419,14 +419,16 @@ nla_put_failure:
 
 static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
                            struct netlink_callback *cb, int type,
-                           const struct tc_action_ops *ops)
+                           const struct tc_action_ops *ops,
+                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
+                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
@@ -463,6 +465,7 @@ static struct pernet_operations pedit_net_ops = {
        .exit_batch = pedit_exit_net,
        .id   = &pedit_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
index 95d3c90..51fe4fe 100644 (file)
@@ -58,11 +58,12 @@ static struct tc_action_ops act_police_ops;
 
 static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
                                 struct netlink_callback *cb, int type,
-                                const struct tc_action_ops *ops)
+                                const struct tc_action_ops *ops,
+                                struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, police_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -74,7 +75,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
                               struct nlattr *est, struct tc_action **a,
-                              int ovr, int bind)
+                              int ovr, int bind,
+                              struct netlink_ext_ack *extack)
 {
        int ret = 0, err;
        struct nlattr *tb[TCA_POLICE_MAX + 1];
@@ -304,7 +306,8 @@ nla_put_failure:
        return -1;
 }
 
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, police_net_id);
 
@@ -344,6 +347,7 @@ static struct pernet_operations police_net_ops = {
        .exit_batch = police_exit_net,
        .id   = &police_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init police_init_module(void)
index 1ba0df2..238dfd2 100644 (file)
@@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a, int ovr,
-                          int bind)
+                          int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
        struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -202,14 +202,16 @@ nla_put_failure:
 
 static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
                             struct netlink_callback *cb, int type,
-                            const struct tc_action_ops *ops)
+                            const struct tc_action_ops *ops,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
 
@@ -246,6 +248,7 @@ static struct pernet_operations sample_net_ops = {
        .exit_batch = sample_exit_net,
        .id   = &sample_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init sample_init_module(void)
index 425eac1..91816d7 100644 (file)
@@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind)
+                        int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, simp_net_id);
        struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -170,14 +170,16 @@ nla_put_failure:
 
 static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
                           struct netlink_callback *cb, int type,
-                          const struct tc_action_ops *ops)
+                          const struct tc_action_ops *ops,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, simp_net_id);
 
@@ -214,6 +216,7 @@ static struct pernet_operations simp_net_ops = {
        .exit_batch = simp_exit_net,
        .id   = &simp_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
index 5a3f691..7971510 100644 (file)
@@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                           int ovr, int bind)
+                           int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
        struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
@@ -208,14 +208,16 @@ nla_put_failure:
 
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
                              struct netlink_callback *cb, int type,
-                             const struct tc_action_ops *ops)
+                             const struct tc_action_ops *ops,
+                             struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
+                             struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
@@ -251,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = {
        .exit_batch = skbedit_exit_net,
        .id   = &skbedit_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
index fa97526..febec75 100644 (file)
@@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind)
+                          int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbmod_net_id);
        struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -232,14 +232,16 @@ nla_put_failure:
 
 static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
                             struct netlink_callback *cb, int type,
-                            const struct tc_action_ops *ops)
+                            const struct tc_action_ops *ops,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
@@ -276,6 +278,7 @@ static struct pernet_operations skbmod_net_ops = {
        .exit_batch = skbmod_exit_net,
        .id   = &skbmod_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
index 0e23aac..9169b7e 100644 (file)
@@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind)
+                          int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
        struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -291,14 +291,16 @@ nla_put_failure:
 
 static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
                             struct netlink_callback *cb, int type,
-                            const struct tc_action_ops *ops)
+                            const struct tc_action_ops *ops,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
+                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
@@ -335,6 +337,7 @@ static struct pernet_operations tunnel_key_net_ops = {
        .exit_batch = tunnel_key_exit_net,
        .id   = &tunnel_key_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init tunnel_key_init_module(void)
index e1a1b3f..c2ee7fd 100644 (file)
@@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind)
+                        int ovr, int bind, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
        struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -267,14 +267,16 @@ nla_put_failure:
 
 static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
                           struct netlink_callback *cb, int type,
-                          const struct tc_action_ops *ops)
+                          const struct tc_action_ops *ops,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-       return tcf_generic_walker(tn, skb, cb, type, ops);
+       return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
@@ -311,6 +313,7 @@ static struct pernet_operations vlan_net_ops = {
        .exit_batch = vlan_exit_net,
        .id   = &vlan_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init vlan_init_module(void)
index 2bc1bc2..19f9f42 100644 (file)
@@ -376,17 +376,12 @@ struct tcf_net {
 static unsigned int tcf_net_id;
 
 static int tcf_block_insert(struct tcf_block *block, struct net *net,
-                           u32 block_index, struct netlink_ext_ack *extack)
+                           struct netlink_ext_ack *extack)
 {
        struct tcf_net *tn = net_generic(net, tcf_net_id);
-       int err;
 
-       err = idr_alloc_u32(&tn->idr, block, &block_index, block_index,
-                           GFP_KERNEL);
-       if (err)
-               return err;
-       block->index = block_index;
-       return 0;
+       return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
+                            GFP_KERNEL);
 }
 
 static void tcf_block_remove(struct tcf_block *block, struct net *net)
@@ -397,6 +392,7 @@ static void tcf_block_remove(struct tcf_block *block, struct net *net)
 }
 
 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
+                                         u32 block_index,
                                          struct netlink_ext_ack *extack)
 {
        struct tcf_block *block;
@@ -419,10 +415,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
                err = -ENOMEM;
                goto err_chain_create;
        }
-       block->net = qdisc_net(q);
        block->refcnt = 1;
        block->net = net;
-       block->q = q;
+       block->index = block_index;
+
+       /* Don't store q pointer for blocks which are shared */
+       if (!tcf_block_shared(block))
+               block->q = q;
        return block;
 
 err_chain_create:
@@ -518,13 +517,12 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
        }
 
        if (!block) {
-               block = tcf_block_create(net, q, extack);
+               block = tcf_block_create(net, q, ei->block_index, extack);
                if (IS_ERR(block))
                        return PTR_ERR(block);
                created = true;
-               if (ei->block_index) {
-                       err = tcf_block_insert(block, net,
-                                              ei->block_index, extack);
+               if (tcf_block_shared(block)) {
+                       err = tcf_block_insert(block, net, extack);
                        if (err)
                                goto err_block_insert;
                }
@@ -1399,13 +1397,18 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
                        continue;
                if (!tcf_chain_dump(chain, q, parent, skb, cb,
-                                   index_start, &index))
+                                   index_start, &index)) {
+                       err = -EMSGSIZE;
                        break;
+               }
        }
 
        cb->args[0] = index;
 
 out:
+       /* If we did no progress, the error (EMSGSIZE) is real */
+       if (skb->len == 0 && err)
+               return err;
        return skb->len;
 }
 
@@ -1434,7 +1437,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                if (exts->police && tb[exts->police]) {
                        act = tcf_action_init_1(net, tp, tb[exts->police],
                                                rate_tlv, "police", ovr,
-                                               TCA_ACT_BIND);
+                                               TCA_ACT_BIND, extack);
                        if (IS_ERR(act))
                                return PTR_ERR(act);
 
@@ -1447,7 +1450,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
                        err = tcf_action_init(net, tp, tb[exts->action],
                                              rate_tlv, NULL, ovr, TCA_ACT_BIND,
-                                             &actions);
+                                             &actions, extack);
                        if (err)
                                return err;
                        list_for_each_entry(act, &actions, list)
@@ -1615,6 +1618,7 @@ static struct pernet_operations tcf_net_ops = {
        .exit = tcf_net_exit,
        .id   = &tcf_net_id,
        .size = sizeof(struct tcf_net),
+       .async = true,
 };
 
 static int __init tc_filter_init(void)
index 6c7601a..ed8b6a2 100644 (file)
@@ -96,7 +96,7 @@ struct tc_u_hnode {
 
 struct tc_u_common {
        struct tc_u_hnode __rcu *hlist;
-       struct tcf_block        *block;
+       void                    *ptr;
        int                     refcnt;
        struct idr              handle_idr;
        struct hlist_node       hnode;
@@ -330,9 +330,25 @@ static struct hlist_head *tc_u_common_hash;
 #define U32_HASH_SHIFT 10
 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
 
+static void *tc_u_common_ptr(const struct tcf_proto *tp)
+{
+       struct tcf_block *block = tp->chain->block;
+
+       /* The block sharing is currently supported only
+        * for classless qdiscs. In that case we use block
+        * for tc_u_common identification. In case the
+        * block is not shared, block->q is a valid pointer
+        * and we can use that. That works for classful qdiscs.
+        */
+       if (tcf_block_shared(block))
+               return block;
+       else
+               return block->q;
+}
+
 static unsigned int tc_u_hash(const struct tcf_proto *tp)
 {
-       return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
+       return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
 }
 
 static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
@@ -342,7 +358,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
 
        h = tc_u_hash(tp);
        hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
-               if (tc->block == tp->chain->block)
+               if (tc->ptr == tc_u_common_ptr(tp))
                        return tc;
        }
        return NULL;
@@ -371,7 +387,7 @@ static int u32_init(struct tcf_proto *tp)
                        kfree(root_ht);
                        return -ENOBUFS;
                }
-               tp_c->block = tp->chain->block;
+               tp_c->ptr = tc_u_common_ptr(tp);
                INIT_HLIST_NODE(&tp_c->hnode);
                idr_init(&tp_c->handle_idr);
 
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
new file mode 100644 (file)
index 0000000..a5f34e9
--- /dev/null
@@ -0,0 +1,257 @@
+/*
+ * net/sched/em_ipt.c IPtables matches Ematch
+ *
+ * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_ipt.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/pkt_cls.h>
+
+struct em_ipt_match {
+       const struct xt_match *match;
+       u32 hook;
+       u8 match_data[0] __aligned(8);
+};
+
+struct em_ipt_xt_match {
+       char *match_name;
+       int (*validate_match_data)(struct nlattr **tb, u8 mrev);
+};
+
+static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
+       [TCA_EM_IPT_MATCH_NAME]         = { .type = NLA_STRING,
+                                           .len = XT_EXTENSION_MAXNAMELEN },
+       [TCA_EM_IPT_MATCH_REVISION]     = { .type = NLA_U8 },
+       [TCA_EM_IPT_HOOK]               = { .type = NLA_U32 },
+       [TCA_EM_IPT_NFPROTO]            = { .type = NLA_U8 },
+       [TCA_EM_IPT_MATCH_DATA]         = { .type = NLA_UNSPEC },
+};
+
+static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
+{
+       struct xt_mtchk_param mtpar = {};
+       union {
+               struct ipt_entry e4;
+               struct ip6t_entry e6;
+       } e = {};
+
+       mtpar.net       = net;
+       mtpar.table     = "filter";
+       mtpar.hook_mask = 1 << im->hook;
+       mtpar.family    = im->match->family;
+       mtpar.match     = im->match;
+       mtpar.entryinfo = &e;
+       mtpar.matchinfo = (void *)im->match_data;
+       return xt_check_match(&mtpar, mdata_len, 0, 0);
+}
+
+static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+       if (mrev != 0) {
+               pr_err("only policy match revision 0 supported");
+               return -EINVAL;
+       }
+
+       if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
+               pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
+       {
+               .match_name = "policy",
+               .validate_match_data = policy_validate_match_data
+       },
+       {}
+};
+
+static struct xt_match *get_xt_match(struct nlattr **tb)
+{
+       const struct em_ipt_xt_match *m;
+       struct nlattr *mname_attr;
+       u8 nfproto, mrev = 0;
+       int ret;
+
+       mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
+       for (m = em_ipt_xt_matches; m->match_name; m++) {
+               if (!nla_strcmp(mname_attr, m->match_name))
+                       break;
+       }
+
+       if (!m->match_name) {
+               pr_err("Unsupported xt match");
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (tb[TCA_EM_IPT_MATCH_REVISION])
+               mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
+
+       ret = m->validate_match_data(tb, mrev);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+       return xt_request_find_match(nfproto, m->match_name, mrev);
+}
+
+static int em_ipt_change(struct net *net, void *data, int data_len,
+                        struct tcf_ematch *em)
+{
+       struct nlattr *tb[TCA_EM_IPT_MAX + 1];
+       struct em_ipt_match *im = NULL;
+       struct xt_match *match;
+       int mdata_len, ret;
+
+       ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
+                       NULL);
+       if (ret < 0)
+               return ret;
+
+       if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
+           !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
+               return -EINVAL;
+
+       match = get_xt_match(tb);
+       if (IS_ERR(match)) {
+               pr_err("unable to load match\n");
+               return PTR_ERR(match);
+       }
+
+       mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
+       im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
+       if (!im) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       im->match = match;
+       im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+       nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
+
+       ret = check_match(net, im, mdata_len);
+       if (ret)
+               goto err;
+
+       em->datalen = sizeof(*im) + mdata_len;
+       em->data = (unsigned long)im;
+       return 0;
+
+err:
+       kfree(im);
+       module_put(match->me);
+       return ret;
+}
+
+static void em_ipt_destroy(struct tcf_ematch *em)
+{
+       struct em_ipt_match *im = (void *)em->data;
+
+       if (!im)
+               return;
+
+       if (im->match->destroy) {
+               struct xt_mtdtor_param par = {
+                       .net = em->net,
+                       .match = im->match,
+                       .matchinfo = im->match_data,
+                       .family = im->match->family
+               };
+               im->match->destroy(&par);
+       }
+       module_put(im->match->me);
+       kfree((void *)im);
+}
+
+static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
+                       struct tcf_pkt_info *info)
+{
+       const struct em_ipt_match *im = (const void *)em->data;
+       struct xt_action_param acpar = {};
+       struct net_device *indev = NULL;
+       struct nf_hook_state state;
+       int ret;
+
+       rcu_read_lock();
+
+       if (skb->skb_iif)
+               indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+
+       nf_hook_state_init(&state, im->hook, im->match->family,
+                          indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
+
+       acpar.match = im->match;
+       acpar.matchinfo = im->match_data;
+       acpar.state = &state;
+
+       ret = im->match->match(skb, &acpar);
+
+       rcu_read_unlock();
+       return ret;
+}
+
+static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+       struct em_ipt_match *im = (void *)em->data;
+
+       if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
+               return -EMSGSIZE;
+       if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
+               return -EMSGSIZE;
+       if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
+               return -EMSGSIZE;
+       if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+               return -EMSGSIZE;
+       if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
+                   im->match->usersize ?: im->match->matchsize,
+                   im->match_data) < 0)
+               return -EMSGSIZE;
+
+       return 0;
+}
+
+static struct tcf_ematch_ops em_ipt_ops = {
+       .kind     = TCF_EM_IPT,
+       .change   = em_ipt_change,
+       .destroy  = em_ipt_destroy,
+       .match    = em_ipt_match,
+       .dump     = em_ipt_dump,
+       .owner    = THIS_MODULE,
+       .link     = LIST_HEAD_INIT(em_ipt_ops.link)
+};
+
+static int __init init_em_ipt(void)
+{
+       return tcf_em_register(&em_ipt_ops);
+}
+
+static void __exit exit_em_ipt(void)
+{
+       tcf_em_unregister(&em_ipt_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
+MODULE_DESCRIPTION("TC extended match for IPtables matches");
+
+module_init(init_em_ipt);
+module_exit(exit_em_ipt);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);
index d512f49..68f9d94 100644 (file)
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
                               unsigned int len)
 {
+       bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
        const struct Qdisc_class_ops *cops;
        unsigned long cl;
        u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
                 * If child was empty even before update then backlog
                 * counter is screwed and we skip notification because
                 * parent class is already passive.
+                *
+                * If the original child was offloaded then it is allowed
+                * to be seem as empty, so the parent is notified anyway.
                 */
-               notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+               notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+                                                      !qdisc_is_offloaded);
                /* TODO: perform the search on a per txq basis */
                sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
                if (sch == NULL) {
@@ -2128,6 +2133,7 @@ static void __net_exit psched_net_exit(struct net *net)
 static struct pernet_operations psched_net_ops = {
        .init = psched_net_init,
        .exit = psched_net_exit,
+       .async = true,
 };
 
 static int __init pktsched_init(void)
index efbf51f..222e53d 100644 (file)
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
        sch->q.qlen = 0;
 }
 
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 {
-       struct prio_sched_data *q = qdisc_priv(sch);
        struct net_device *dev = qdisc_dev(sch);
        struct tc_prio_qopt_offload opt = {
                .handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                return -EOPNOTSUPP;
 
-       if (enable) {
+       if (qopt) {
                opt.command = TC_PRIO_REPLACE;
-               opt.replace_params.bands = q->bands;
-               memcpy(&opt.replace_params.priomap, q->prio2band,
+               opt.replace_params.bands = qopt->bands;
+               memcpy(&opt.replace_params.priomap, qopt->priomap,
                       TC_PRIO_MAX + 1);
                opt.replace_params.qstats = &sch->qstats;
        } else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
        struct prio_sched_data *q = qdisc_priv(sch);
 
        tcf_block_put(q->block);
-       prio_offload(sch, false);
+       prio_offload(sch, NULL);
        for (prio = 0; prio < q->bands; prio++)
                qdisc_destroy(q->queues[prio]);
 }
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
                }
        }
 
+       prio_offload(sch, qopt);
        sch_tree_lock(sch);
        q->bands = qopt->bands;
        memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
        }
 
        sch_tree_unlock(sch);
-       prio_offload(sch, true);
        return 0;
 }
 
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
                      struct Qdisc **old, struct netlink_ext_ack *extack)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
+       struct tc_prio_qopt_offload graft_offload;
+       struct net_device *dev = qdisc_dev(sch);
        unsigned long band = arg - 1;
+       bool any_qdisc_is_offloaded;
+       int err;
 
        if (new == NULL)
                new = &noop_qdisc;
 
        *old = qdisc_replace(sch, new, &q->queues[band]);
+
+       if (!tc_can_offload(dev))
+               return 0;
+
+       graft_offload.handle = sch->handle;
+       graft_offload.parent = sch->parent;
+       graft_offload.graft_params.band = band;
+       graft_offload.graft_params.child_handle = new->handle;
+       graft_offload.command = TC_PRIO_GRAFT;
+
+       err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+                                           &graft_offload);
+
+       /* Don't report error if the graft is part of destroy operation. */
+       if (err && new != &noop_qdisc) {
+               /* Don't report error if the parent, the old child and the new
+                * one are not offloaded.
+                */
+               any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+               any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+               if (*old)
+                       any_qdisc_is_offloaded |= (*old)->flags &
+                                                  TCQ_F_OFFLOADED;
+
+               if (any_qdisc_is_offloaded)
+                       NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+       }
+
        return 0;
 }
 
index 6776582..e845e45 100644 (file)
@@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
          offload.o stream_sched.o stream_sched_prio.o \
          stream_sched_rr.o stream_interleave.o
 
+sctp_diag-y := diag.o
+
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
index 291c97b..8f6c2e8 100644 (file)
@@ -81,6 +81,12 @@ const char *sctp_cname(const union sctp_subtype cid)
        case SCTP_CID_RECONF:
                return "RECONF";
 
+       case SCTP_CID_I_DATA:
+               return "I_DATA";
+
+       case SCTP_CID_I_FWD_TSN:
+               return "I_FWD_TSN";
+
        default:
                break;
        }
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
new file mode 100644 (file)
index 0000000..078f01a
--- /dev/null
@@ -0,0 +1,557 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions implement sctp diag support.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ *    Xin Long <lucien.xin@gmail.com>
+ */
+
+#include <linux/module.h>
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+#include <net/sctp/sctp.h>
+
+static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+                              void *info);
+
+/* define some functions to make asoc/ep fill look clean */
+static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
+                                       struct sock *sk,
+                                       struct sctp_association *asoc)
+{
+       union sctp_addr laddr, paddr;
+       struct dst_entry *dst;
+       struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
+
+       laddr = list_entry(asoc->base.bind_addr.address_list.next,
+                          struct sctp_sockaddr_entry, list)->a;
+       paddr = asoc->peer.primary_path->ipaddr;
+       dst = asoc->peer.primary_path->dst;
+
+       r->idiag_family = sk->sk_family;
+       r->id.idiag_sport = htons(asoc->base.bind_addr.port);
+       r->id.idiag_dport = htons(asoc->peer.port);
+       r->id.idiag_if = dst ? dst->dev->ifindex : 0;
+       sock_diag_save_cookie(sk, r->id.idiag_cookie);
+
+#if IS_ENABLED(CONFIG_IPV6)
+       if (sk->sk_family == AF_INET6) {
+               *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr;
+               *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr;
+       } else
+#endif
+       {
+               memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+               memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+
+               r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr;
+               r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr;
+       }
+
+       r->idiag_state = asoc->state;
+       if (timer_pending(t3_rtx)) {
+               r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+               r->idiag_retrans = asoc->rtx_data_chunks;
+               r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+       } else {
+               r->idiag_timer = 0;
+               r->idiag_retrans = 0;
+               r->idiag_expires = 0;
+       }
+}
+
+static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
+                                        struct list_head *address_list)
+{
+       struct sctp_sockaddr_entry *laddr;
+       int addrlen = sizeof(struct sockaddr_storage);
+       int addrcnt = 0;
+       struct nlattr *attr;
+       void *info = NULL;
+
+       list_for_each_entry_rcu(laddr, address_list, list)
+               addrcnt++;
+
+       attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt);
+       if (!attr)
+               return -EMSGSIZE;
+
+       info = nla_data(attr);
+       list_for_each_entry_rcu(laddr, address_list, list) {
+               memcpy(info, &laddr->a, sizeof(laddr->a));
+               memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
+               info += addrlen;
+       }
+
+       return 0;
+}
+
+static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
+                                       struct sctp_association *asoc)
+{
+       int addrlen = sizeof(struct sockaddr_storage);
+       struct sctp_transport *from;
+       struct nlattr *attr;
+       void *info = NULL;
+
+       attr = nla_reserve(skb, INET_DIAG_PEERS,
+                          addrlen * asoc->peer.transport_count);
+       if (!attr)
+               return -EMSGSIZE;
+
+       info = nla_data(attr);
+       list_for_each_entry(from, &asoc->peer.transport_addr_list,
+                           transports) {
+               memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
+               memset(info + sizeof(from->ipaddr), 0,
+                      addrlen - sizeof(from->ipaddr));
+               info += addrlen;
+       }
+
+       return 0;
+}
+
+/* sctp asoc/ep fill*/
+static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
+                              struct sk_buff *skb,
+                              const struct inet_diag_req_v2 *req,
+                              struct user_namespace *user_ns,
+                              int portid, u32 seq, u16 nlmsg_flags,
+                              const struct nlmsghdr *unlh,
+                              bool net_admin)
+{
+       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+       struct list_head *addr_list;
+       struct inet_diag_msg *r;
+       struct nlmsghdr  *nlh;
+       int ext = req->idiag_ext;
+       struct sctp_infox infox;
+       void *info = NULL;
+
+       nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
+                       nlmsg_flags);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       r = nlmsg_data(nlh);
+       BUG_ON(!sk_fullsock(sk));
+
+       if (asoc) {
+               inet_diag_msg_sctpasoc_fill(r, sk, asoc);
+       } else {
+               inet_diag_msg_common_fill(r, sk);
+               r->idiag_state = sk->sk_state;
+               r->idiag_timer = 0;
+               r->idiag_retrans = 0;
+       }
+
+       if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
+               goto errout;
+
+       if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
+               u32 mem[SK_MEMINFO_VARS];
+               int amt;
+
+               if (asoc && asoc->ep->sndbuf_policy)
+                       amt = asoc->sndbuf_used;
+               else
+                       amt = sk_wmem_alloc_get(sk);
+               mem[SK_MEMINFO_WMEM_ALLOC] = amt;
+               if (asoc && asoc->ep->rcvbuf_policy)
+                       amt = atomic_read(&asoc->rmem_alloc);
+               else
+                       amt = sk_rmem_alloc_get(sk);
+               mem[SK_MEMINFO_RMEM_ALLOC] = amt;
+               mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+               mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+               mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+               mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+               mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+               mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+               mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+
+               if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
+                       goto errout;
+       }
+
+       if (ext & (1 << (INET_DIAG_INFO - 1))) {
+               struct nlattr *attr;
+
+               attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
+                                        sizeof(struct sctp_info),
+                                        INET_DIAG_PAD);
+               if (!attr)
+                       goto errout;
+
+               info = nla_data(attr);
+       }
+       infox.sctpinfo = (struct sctp_info *)info;
+       infox.asoc = asoc;
+       sctp_diag_get_info(sk, r, &infox);
+
+       addr_list = asoc ? &asoc->base.bind_addr.address_list
+                        : &ep->base.bind_addr.address_list;
+       if (inet_diag_msg_sctpladdrs_fill(skb, addr_list))
+               goto errout;
+
+       if (asoc && (ext & (1 << (INET_DIAG_CONG - 1))))
+               if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0)
+                       goto errout;
+
+       if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc))
+               goto errout;
+
+       nlmsg_end(skb, nlh);
+       return 0;
+
+errout:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+/* callback and param */
+struct sctp_comm_param {
+       struct sk_buff *skb;
+       struct netlink_callback *cb;
+       const struct inet_diag_req_v2 *r;
+       const struct nlmsghdr *nlh;
+       bool net_admin;
+};
+
+static size_t inet_assoc_attr_size(struct sctp_association *asoc)
+{
+       int addrlen = sizeof(struct sockaddr_storage);
+       int addrcnt = 0;
+       struct sctp_sockaddr_entry *laddr;
+
+       list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list,
+                               list)
+               addrcnt++;
+
+       return    nla_total_size(sizeof(struct sctp_info))
+               + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+               + nla_total_size(1) /* INET_DIAG_TOS */
+               + nla_total_size(1) /* INET_DIAG_TCLASS */
+               + nla_total_size(4) /* INET_DIAG_MARK */
+               + nla_total_size(addrlen * asoc->peer.transport_count)
+               + nla_total_size(addrlen * addrcnt)
+               + nla_total_size(sizeof(struct inet_diag_meminfo))
+               + nla_total_size(sizeof(struct inet_diag_msg))
+               + 64;
+}
+
+static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
+{
+       struct sctp_association *assoc = tsp->asoc;
+       struct sock *sk = tsp->asoc->base.sk;
+       struct sctp_comm_param *commp = p;
+       struct sk_buff *in_skb = commp->skb;
+       const struct inet_diag_req_v2 *req = commp->r;
+       const struct nlmsghdr *nlh = commp->nlh;
+       struct net *net = sock_net(in_skb->sk);
+       struct sk_buff *rep;
+       int err;
+
+       err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
+       if (err)
+               goto out;
+
+       err = -ENOMEM;
+       rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL);
+       if (!rep)
+               goto out;
+
+       lock_sock(sk);
+       if (sk != assoc->base.sk) {
+               release_sock(sk);
+               sk = assoc->base.sk;
+               lock_sock(sk);
+       }
+       err = inet_sctp_diag_fill(sk, assoc, rep, req,
+                                 sk_user_ns(NETLINK_CB(in_skb).sk),
+                                 NETLINK_CB(in_skb).portid,
+                                 nlh->nlmsg_seq, 0, nlh,
+                                 commp->net_admin);
+       release_sock(sk);
+       if (err < 0) {
+               WARN_ON(err == -EMSGSIZE);
+               kfree_skb(rep);
+               goto out;
+       }
+
+       err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
+                             MSG_DONTWAIT);
+       if (err > 0)
+               err = 0;
+out:
+       return err;
+}
+
+static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+{
+       struct sctp_endpoint *ep = tsp->asoc->ep;
+       struct sctp_comm_param *commp = p;
+       struct sock *sk = ep->base.sk;
+       struct sk_buff *skb = commp->skb;
+       struct netlink_callback *cb = commp->cb;
+       const struct inet_diag_req_v2 *r = commp->r;
+       struct sctp_association *assoc;
+       int err = 0;
+
+       lock_sock(sk);
+       list_for_each_entry(assoc, &ep->asocs, asocs) {
+               if (cb->args[4] < cb->args[1])
+                       goto next;
+
+               if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) &&
+                   r->id.idiag_sport)
+                       goto next;
+               if (r->id.idiag_dport != htons(assoc->peer.port) &&
+                   r->id.idiag_dport)
+                       goto next;
+
+               if (!cb->args[3] &&
+                   inet_sctp_diag_fill(sk, NULL, skb, r,
+                                       sk_user_ns(NETLINK_CB(cb->skb).sk),
+                                       NETLINK_CB(cb->skb).portid,
+                                       cb->nlh->nlmsg_seq,
+                                       NLM_F_MULTI, cb->nlh,
+                                       commp->net_admin) < 0) {
+                       err = 1;
+                       goto release;
+               }
+               cb->args[3] = 1;
+
+               if (inet_sctp_diag_fill(sk, assoc, skb, r,
+                                       sk_user_ns(NETLINK_CB(cb->skb).sk),
+                                       NETLINK_CB(cb->skb).portid,
+                                       cb->nlh->nlmsg_seq, 0, cb->nlh,
+                                       commp->net_admin) < 0) {
+                       err = 1;
+                       goto release;
+               }
+next:
+               cb->args[4]++;
+       }
+       cb->args[1] = 0;
+       cb->args[3] = 0;
+       cb->args[4] = 0;
+release:
+       release_sock(sk);
+       return err;
+}
+
+static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
+{
+       struct sctp_endpoint *ep = tsp->asoc->ep;
+       struct sctp_comm_param *commp = p;
+       struct sock *sk = ep->base.sk;
+       const struct inet_diag_req_v2 *r = commp->r;
+       struct sctp_association *assoc =
+               list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+       /* find the ep only once through the transports by this condition */
+       if (tsp->asoc != assoc)
+               return 0;
+
+       if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+               return 0;
+
+       return 1;
+}
+
+static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
+{
+       struct sctp_comm_param *commp = p;
+       struct sock *sk = ep->base.sk;
+       struct sk_buff *skb = commp->skb;
+       struct netlink_callback *cb = commp->cb;
+       const struct inet_diag_req_v2 *r = commp->r;
+       struct net *net = sock_net(skb->sk);
+       struct inet_sock *inet = inet_sk(sk);
+       int err = 0;
+
+       if (!net_eq(sock_net(sk), net))
+               goto out;
+
+       if (cb->args[4] < cb->args[1])
+               goto next;
+
+       if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
+               goto next;
+
+       if (r->sdiag_family != AF_UNSPEC &&
+           sk->sk_family != r->sdiag_family)
+               goto next;
+
+       if (r->id.idiag_sport != inet->inet_sport &&
+           r->id.idiag_sport)
+               goto next;
+
+       if (r->id.idiag_dport != inet->inet_dport &&
+           r->id.idiag_dport)
+               goto next;
+
+       if (inet_sctp_diag_fill(sk, NULL, skb, r,
+                               sk_user_ns(NETLINK_CB(cb->skb).sk),
+                               NETLINK_CB(cb->skb).portid,
+                               cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                               cb->nlh, commp->net_admin) < 0) {
+               err = 2;
+               goto out;
+       }
+next:
+       cb->args[4]++;
+out:
+       return err;
+}
+
+/* define the functions for sctp_diag_handler*/
+static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+                              void *info)
+{
+       struct sctp_infox *infox = (struct sctp_infox *)info;
+
+       if (infox->asoc) {
+               r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
+               r->idiag_wqueue = infox->asoc->sndbuf_used;
+       } else {
+               r->idiag_rqueue = sk->sk_ack_backlog;
+               r->idiag_wqueue = sk->sk_max_ack_backlog;
+       }
+       if (infox->sctpinfo)
+               sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
+}
+
+static int sctp_diag_dump_one(struct sk_buff *in_skb,
+                             const struct nlmsghdr *nlh,
+                             const struct inet_diag_req_v2 *req)
+{
+       struct net *net = sock_net(in_skb->sk);
+       union sctp_addr laddr, paddr;
+       struct sctp_comm_param commp = {
+               .skb = in_skb,
+               .r = req,
+               .nlh = nlh,
+               .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
+       };
+
+       if (req->sdiag_family == AF_INET) {
+               laddr.v4.sin_port = req->id.idiag_sport;
+               laddr.v4.sin_addr.s_addr = req->id.idiag_src[0];
+               laddr.v4.sin_family = AF_INET;
+
+               paddr.v4.sin_port = req->id.idiag_dport;
+               paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0];
+               paddr.v4.sin_family = AF_INET;
+       } else {
+               laddr.v6.sin6_port = req->id.idiag_sport;
+               memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
+                      sizeof(laddr.v6.sin6_addr));
+               laddr.v6.sin6_family = AF_INET6;
+
+               paddr.v6.sin6_port = req->id.idiag_dport;
+               memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
+                      sizeof(paddr.v6.sin6_addr));
+               paddr.v6.sin6_family = AF_INET6;
+       }
+
+       return sctp_transport_lookup_process(sctp_tsp_dump_one,
+                                            net, &laddr, &paddr, &commp);
+}
+
+static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+                          const struct inet_diag_req_v2 *r, struct nlattr *bc)
+{
+       u32 idiag_states = r->idiag_states;
+       struct net *net = sock_net(skb->sk);
+       struct sctp_comm_param commp = {
+               .skb = skb,
+               .cb = cb,
+               .r = r,
+               .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
+       };
+       int pos = cb->args[2];
+
+       /* eps hashtable dumps
+        * args:
+        * 0 : if it will traversal listen sock
+        * 1 : to record the sock pos of this time's traversal
+        * 4 : to work as a temporary variable to traversal list
+        */
+       if (cb->args[0] == 0) {
+               if (!(idiag_states & TCPF_LISTEN))
+                       goto skip;
+               if (sctp_for_each_endpoint(sctp_ep_dump, &commp))
+                       goto done;
+skip:
+               cb->args[0] = 1;
+               cb->args[1] = 0;
+               cb->args[4] = 0;
+       }
+
+       /* asocs by transport hashtable dump
+        * args:
+        * 1 : to record the assoc pos of this time's traversal
+        * 2 : to record the transport pos of this time's traversal
+        * 3 : to mark if we have dumped the ep info of the current asoc
+        * 4 : to work as a temporary variable to traversal list
+        * 5 : to save the sk we get from travelsing the tsp list.
+        */
+       if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
+               goto done;
+
+       sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
+                               net, &pos, &commp);
+       cb->args[2] = pos;
+
+done:
+       cb->args[1] = cb->args[4];
+       cb->args[4] = 0;
+}
+
+static const struct inet_diag_handler sctp_diag_handler = {
+       .dump            = sctp_diag_dump,
+       .dump_one        = sctp_diag_dump_one,
+       .idiag_get_info  = sctp_diag_get_info,
+       .idiag_type      = IPPROTO_SCTP,
+       .idiag_info_size = sizeof(struct sctp_info),
+};
+
+static int __init sctp_diag_init(void)
+{
+       return inet_diag_register(&sctp_diag_handler);
+}
+
+static void __exit sctp_diag_exit(void)
+{
+       inet_diag_unregister(&sctp_diag_handler);
+}
+
+module_init(sctp_diag_init);
+module_exit(sctp_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
index 141c9c4..0247cc4 100644 (file)
@@ -897,15 +897,12 @@ int sctp_hash_transport(struct sctp_transport *t)
        rhl_for_each_entry_rcu(transport, tmp, list, node)
                if (transport->asoc->ep == t->asoc->ep) {
                        rcu_read_unlock();
-                       err = -EEXIST;
-                       goto out;
+                       return -EEXIST;
                }
        rcu_read_unlock();
 
        err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
                                  &t->node, sctp_hash_params);
-
-out:
        if (err)
                pr_err_once("insert transport fail, errno %d\n", err);
 
index e35d4f7..0d873c5 100644 (file)
@@ -952,16 +952,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
 
 /* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
 static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
-                       int *uaddr_len, int peer)
+                       int peer)
 {
        int rc;
 
-       rc = inet6_getname(sock, uaddr, uaddr_len, peer);
+       rc = inet6_getname(sock, uaddr, peer);
 
-       if (rc != 0)
+       if (rc < 0)
                return rc;
 
-       *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk),
+       rc = sctp_v6_addr_to_user(sctp_sk(sock->sk),
                                          (union sctp_addr *)uaddr);
 
        return rc;
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
deleted file mode 100644 (file)
index a72a7d9..0000000
+++ /dev/null
@@ -1,526 +0,0 @@
-#include <linux/module.h>
-#include <linux/inet_diag.h>
-#include <linux/sock_diag.h>
-#include <net/sctp/sctp.h>
-
-static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
-                              void *info);
-
-/* define some functions to make asoc/ep fill look clean */
-static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
-                                       struct sock *sk,
-                                       struct sctp_association *asoc)
-{
-       union sctp_addr laddr, paddr;
-       struct dst_entry *dst;
-       struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
-
-       laddr = list_entry(asoc->base.bind_addr.address_list.next,
-                          struct sctp_sockaddr_entry, list)->a;
-       paddr = asoc->peer.primary_path->ipaddr;
-       dst = asoc->peer.primary_path->dst;
-
-       r->idiag_family = sk->sk_family;
-       r->id.idiag_sport = htons(asoc->base.bind_addr.port);
-       r->id.idiag_dport = htons(asoc->peer.port);
-       r->id.idiag_if = dst ? dst->dev->ifindex : 0;
-       sock_diag_save_cookie(sk, r->id.idiag_cookie);
-
-#if IS_ENABLED(CONFIG_IPV6)
-       if (sk->sk_family == AF_INET6) {
-               *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr;
-               *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr;
-       } else
-#endif
-       {
-               memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
-               memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
-
-               r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr;
-               r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr;
-       }
-
-       r->idiag_state = asoc->state;
-       if (timer_pending(t3_rtx)) {
-               r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
-               r->idiag_retrans = asoc->rtx_data_chunks;
-               r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
-       } else {
-               r->idiag_timer = 0;
-               r->idiag_retrans = 0;
-               r->idiag_expires = 0;
-       }
-}
-
-static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
-                                        struct list_head *address_list)
-{
-       struct sctp_sockaddr_entry *laddr;
-       int addrlen = sizeof(struct sockaddr_storage);
-       int addrcnt = 0;
-       struct nlattr *attr;
-       void *info = NULL;
-
-       list_for_each_entry_rcu(laddr, address_list, list)
-               addrcnt++;
-
-       attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt);
-       if (!attr)
-               return -EMSGSIZE;
-
-       info = nla_data(attr);
-       list_for_each_entry_rcu(laddr, address_list, list) {
-               memcpy(info, &laddr->a, sizeof(laddr->a));
-               memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
-               info += addrlen;
-       }
-
-       return 0;
-}
-
-static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
-                                       struct sctp_association *asoc)
-{
-       int addrlen = sizeof(struct sockaddr_storage);
-       struct sctp_transport *from;
-       struct nlattr *attr;
-       void *info = NULL;
-
-       attr = nla_reserve(skb, INET_DIAG_PEERS,
-                          addrlen * asoc->peer.transport_count);
-       if (!attr)
-               return -EMSGSIZE;
-
-       info = nla_data(attr);
-       list_for_each_entry(from, &asoc->peer.transport_addr_list,
-                           transports) {
-               memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
-               memset(info + sizeof(from->ipaddr), 0,
-                      addrlen - sizeof(from->ipaddr));
-               info += addrlen;
-       }
-
-       return 0;
-}
-
-/* sctp asoc/ep fill*/
-static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
-                              struct sk_buff *skb,
-                              const struct inet_diag_req_v2 *req,
-                              struct user_namespace *user_ns,
-                              int portid, u32 seq, u16 nlmsg_flags,
-                              const struct nlmsghdr *unlh,
-                              bool net_admin)
-{
-       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
-       struct list_head *addr_list;
-       struct inet_diag_msg *r;
-       struct nlmsghdr  *nlh;
-       int ext = req->idiag_ext;
-       struct sctp_infox infox;
-       void *info = NULL;
-
-       nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
-                       nlmsg_flags);
-       if (!nlh)
-               return -EMSGSIZE;
-
-       r = nlmsg_data(nlh);
-       BUG_ON(!sk_fullsock(sk));
-
-       if (asoc) {
-               inet_diag_msg_sctpasoc_fill(r, sk, asoc);
-       } else {
-               inet_diag_msg_common_fill(r, sk);
-               r->idiag_state = sk->sk_state;
-               r->idiag_timer = 0;
-               r->idiag_retrans = 0;
-       }
-
-       if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
-               goto errout;
-
-       if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
-               u32 mem[SK_MEMINFO_VARS];
-               int amt;
-
-               if (asoc && asoc->ep->sndbuf_policy)
-                       amt = asoc->sndbuf_used;
-               else
-                       amt = sk_wmem_alloc_get(sk);
-               mem[SK_MEMINFO_WMEM_ALLOC] = amt;
-               if (asoc && asoc->ep->rcvbuf_policy)
-                       amt = atomic_read(&asoc->rmem_alloc);
-               else
-                       amt = sk_rmem_alloc_get(sk);
-               mem[SK_MEMINFO_RMEM_ALLOC] = amt;
-               mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
-               mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
-               mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
-               mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
-               mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
-               mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
-               mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
-
-               if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
-                       goto errout;
-       }
-
-       if (ext & (1 << (INET_DIAG_INFO - 1))) {
-               struct nlattr *attr;
-
-               attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
-                                        sizeof(struct sctp_info),
-                                        INET_DIAG_PAD);
-               if (!attr)
-                       goto errout;
-
-               info = nla_data(attr);
-       }
-       infox.sctpinfo = (struct sctp_info *)info;
-       infox.asoc = asoc;
-       sctp_diag_get_info(sk, r, &infox);
-
-       addr_list = asoc ? &asoc->base.bind_addr.address_list
-                        : &ep->base.bind_addr.address_list;
-       if (inet_diag_msg_sctpladdrs_fill(skb, addr_list))
-               goto errout;
-
-       if (asoc && (ext & (1 << (INET_DIAG_CONG - 1))))
-               if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0)
-                       goto errout;
-
-       if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc))
-               goto errout;
-
-       nlmsg_end(skb, nlh);
-       return 0;
-
-errout:
-       nlmsg_cancel(skb, nlh);
-       return -EMSGSIZE;
-}
-
-/* callback and param */
-struct sctp_comm_param {
-       struct sk_buff *skb;
-       struct netlink_callback *cb;
-       const struct inet_diag_req_v2 *r;
-       const struct nlmsghdr *nlh;
-       bool net_admin;
-};
-
-static size_t inet_assoc_attr_size(struct sctp_association *asoc)
-{
-       int addrlen = sizeof(struct sockaddr_storage);
-       int addrcnt = 0;
-       struct sctp_sockaddr_entry *laddr;
-
-       list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list,
-                               list)
-               addrcnt++;
-
-       return    nla_total_size(sizeof(struct sctp_info))
-               + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
-               + nla_total_size(1) /* INET_DIAG_TOS */
-               + nla_total_size(1) /* INET_DIAG_TCLASS */
-               + nla_total_size(4) /* INET_DIAG_MARK */
-               + nla_total_size(addrlen * asoc->peer.transport_count)
-               + nla_total_size(addrlen * addrcnt)
-               + nla_total_size(sizeof(struct inet_diag_meminfo))
-               + nla_total_size(sizeof(struct inet_diag_msg))
-               + 64;
-}
-
-static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
-{
-       struct sctp_association *assoc = tsp->asoc;
-       struct sock *sk = tsp->asoc->base.sk;
-       struct sctp_comm_param *commp = p;
-       struct sk_buff *in_skb = commp->skb;
-       const struct inet_diag_req_v2 *req = commp->r;
-       const struct nlmsghdr *nlh = commp->nlh;
-       struct net *net = sock_net(in_skb->sk);
-       struct sk_buff *rep;
-       int err;
-
-       err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-       if (err)
-               goto out;
-
-       err = -ENOMEM;
-       rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL);
-       if (!rep)
-               goto out;
-
-       lock_sock(sk);
-       if (sk != assoc->base.sk) {
-               release_sock(sk);
-               sk = assoc->base.sk;
-               lock_sock(sk);
-       }
-       err = inet_sctp_diag_fill(sk, assoc, rep, req,
-                                 sk_user_ns(NETLINK_CB(in_skb).sk),
-                                 NETLINK_CB(in_skb).portid,
-                                 nlh->nlmsg_seq, 0, nlh,
-                                 commp->net_admin);
-       release_sock(sk);
-       if (err < 0) {
-               WARN_ON(err == -EMSGSIZE);
-               kfree_skb(rep);
-               goto out;
-       }
-
-       err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
-                             MSG_DONTWAIT);
-       if (err > 0)
-               err = 0;
-out:
-       return err;
-}
-
-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
-{
-       struct sctp_endpoint *ep = tsp->asoc->ep;
-       struct sctp_comm_param *commp = p;
-       struct sock *sk = ep->base.sk;
-       struct sk_buff *skb = commp->skb;
-       struct netlink_callback *cb = commp->cb;
-       const struct inet_diag_req_v2 *r = commp->r;
-       struct sctp_association *assoc;
-       int err = 0;
-
-       lock_sock(sk);
-       list_for_each_entry(assoc, &ep->asocs, asocs) {
-               if (cb->args[4] < cb->args[1])
-                       goto next;
-
-               if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) &&
-                   r->id.idiag_sport)
-                       goto next;
-               if (r->id.idiag_dport != htons(assoc->peer.port) &&
-                   r->id.idiag_dport)
-                       goto next;
-
-               if (!cb->args[3] &&
-                   inet_sctp_diag_fill(sk, NULL, skb, r,
-                                       sk_user_ns(NETLINK_CB(cb->skb).sk),
-                                       NETLINK_CB(cb->skb).portid,
-                                       cb->nlh->nlmsg_seq,
-                                       NLM_F_MULTI, cb->nlh,
-                                       commp->net_admin) < 0) {
-                       err = 1;
-                       goto release;
-               }
-               cb->args[3] = 1;
-
-               if (inet_sctp_diag_fill(sk, assoc, skb, r,
-                                       sk_user_ns(NETLINK_CB(cb->skb).sk),
-                                       NETLINK_CB(cb->skb).portid,
-                                       cb->nlh->nlmsg_seq, 0, cb->nlh,
-                                       commp->net_admin) < 0) {
-                       err = 1;
-                       goto release;
-               }
-next:
-               cb->args[4]++;
-       }
-       cb->args[1] = 0;
-       cb->args[3] = 0;
-       cb->args[4] = 0;
-release:
-       release_sock(sk);
-       return err;
-}
-
-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
-{
-       struct sctp_endpoint *ep = tsp->asoc->ep;
-       struct sctp_comm_param *commp = p;
-       struct sock *sk = ep->base.sk;
-       const struct inet_diag_req_v2 *r = commp->r;
-       struct sctp_association *assoc =
-               list_entry(ep->asocs.next, struct sctp_association, asocs);
-
-       /* find the ep only once through the transports by this condition */
-       if (tsp->asoc != assoc)
-               return 0;
-
-       if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
-               return 0;
-
-       return 1;
-}
-
-static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
-{
-       struct sctp_comm_param *commp = p;
-       struct sock *sk = ep->base.sk;
-       struct sk_buff *skb = commp->skb;
-       struct netlink_callback *cb = commp->cb;
-       const struct inet_diag_req_v2 *r = commp->r;
-       struct net *net = sock_net(skb->sk);
-       struct inet_sock *inet = inet_sk(sk);
-       int err = 0;
-
-       if (!net_eq(sock_net(sk), net))
-               goto out;
-
-       if (cb->args[4] < cb->args[1])
-               goto next;
-
-       if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
-               goto next;
-
-       if (r->sdiag_family != AF_UNSPEC &&
-           sk->sk_family != r->sdiag_family)
-               goto next;
-
-       if (r->id.idiag_sport != inet->inet_sport &&
-           r->id.idiag_sport)
-               goto next;
-
-       if (r->id.idiag_dport != inet->inet_dport &&
-           r->id.idiag_dport)
-               goto next;
-
-       if (inet_sctp_diag_fill(sk, NULL, skb, r,
-                               sk_user_ns(NETLINK_CB(cb->skb).sk),
-                               NETLINK_CB(cb->skb).portid,
-                               cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                               cb->nlh, commp->net_admin) < 0) {
-               err = 2;
-               goto out;
-       }
-next:
-       cb->args[4]++;
-out:
-       return err;
-}
-
-/* define the functions for sctp_diag_handler*/
-static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
-                              void *info)
-{
-       struct sctp_infox *infox = (struct sctp_infox *)info;
-
-       if (infox->asoc) {
-               r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
-               r->idiag_wqueue = infox->asoc->sndbuf_used;
-       } else {
-               r->idiag_rqueue = sk->sk_ack_backlog;
-               r->idiag_wqueue = sk->sk_max_ack_backlog;
-       }
-       if (infox->sctpinfo)
-               sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
-}
-
-static int sctp_diag_dump_one(struct sk_buff *in_skb,
-                             const struct nlmsghdr *nlh,
-                             const struct inet_diag_req_v2 *req)
-{
-       struct net *net = sock_net(in_skb->sk);
-       union sctp_addr laddr, paddr;
-       struct sctp_comm_param commp = {
-               .skb = in_skb,
-               .r = req,
-               .nlh = nlh,
-               .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
-       };
-
-       if (req->sdiag_family == AF_INET) {
-               laddr.v4.sin_port = req->id.idiag_sport;
-               laddr.v4.sin_addr.s_addr = req->id.idiag_src[0];
-               laddr.v4.sin_family = AF_INET;
-
-               paddr.v4.sin_port = req->id.idiag_dport;
-               paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0];
-               paddr.v4.sin_family = AF_INET;
-       } else {
-               laddr.v6.sin6_port = req->id.idiag_sport;
-               memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
-                      sizeof(laddr.v6.sin6_addr));
-               laddr.v6.sin6_family = AF_INET6;
-
-               paddr.v6.sin6_port = req->id.idiag_dport;
-               memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
-                      sizeof(paddr.v6.sin6_addr));
-               paddr.v6.sin6_family = AF_INET6;
-       }
-
-       return sctp_transport_lookup_process(sctp_tsp_dump_one,
-                                            net, &laddr, &paddr, &commp);
-}
-
-static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-                          const struct inet_diag_req_v2 *r, struct nlattr *bc)
-{
-       u32 idiag_states = r->idiag_states;
-       struct net *net = sock_net(skb->sk);
-       struct sctp_comm_param commp = {
-               .skb = skb,
-               .cb = cb,
-               .r = r,
-               .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
-       };
-       int pos = cb->args[2];
-
-       /* eps hashtable dumps
-        * args:
-        * 0 : if it will traversal listen sock
-        * 1 : to record the sock pos of this time's traversal
-        * 4 : to work as a temporary variable to traversal list
-        */
-       if (cb->args[0] == 0) {
-               if (!(idiag_states & TCPF_LISTEN))
-                       goto skip;
-               if (sctp_for_each_endpoint(sctp_ep_dump, &commp))
-                       goto done;
-skip:
-               cb->args[0] = 1;
-               cb->args[1] = 0;
-               cb->args[4] = 0;
-       }
-
-       /* asocs by transport hashtable dump
-        * args:
-        * 1 : to record the assoc pos of this time's traversal
-        * 2 : to record the transport pos of this time's traversal
-        * 3 : to mark if we have dumped the ep info of the current asoc
-        * 4 : to work as a temporary variable to traversal list
-        * 5 : to save the sk we get from travelsing the tsp list.
-        */
-       if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
-               goto done;
-
-       sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
-                               net, &pos, &commp);
-       cb->args[2] = pos;
-
-done:
-       cb->args[1] = cb->args[4];
-       cb->args[4] = 0;
-}
-
-static const struct inet_diag_handler sctp_diag_handler = {
-       .dump            = sctp_diag_dump,
-       .dump_one        = sctp_diag_dump_one,
-       .idiag_get_info  = sctp_diag_get_info,
-       .idiag_type      = IPPROTO_SCTP,
-       .idiag_info_size = sizeof(struct sctp_info),
-};
-
-static int __init sctp_diag_init(void)
-{
-       return inet_diag_register(&sctp_diag_handler);
-}
-
-static void __exit sctp_diag_exit(void)
-{
-       inet_diag_unregister(&sctp_diag_handler);
-}
-
-module_init(sctp_diag_init);
-module_exit(sctp_diag_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
index bf271f8..7fa7603 100644 (file)
@@ -1606,396 +1606,214 @@ static int sctp_error(struct sock *sk, int flags, int err)
 static int sctp_msghdr_parse(const struct msghdr *msg,
                             struct sctp_cmsgs *cmsgs);
 
-static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
+                             struct sctp_sndrcvinfo *srinfo,
+                             const struct msghdr *msg, size_t msg_len)
 {
-       struct net *net = sock_net(sk);
-       struct sctp_sock *sp;
-       struct sctp_endpoint *ep;
-       struct sctp_association *new_asoc = NULL, *asoc = NULL;
-       struct sctp_transport *transport, *chunk_tp;
-       struct sctp_chunk *chunk;
-       union sctp_addr to;
-       struct sockaddr *msg_name = NULL;
-       struct sctp_sndrcvinfo default_sinfo;
-       struct sctp_sndrcvinfo *sinfo;
-       struct sctp_initmsg *sinit;
-       sctp_assoc_t associd = 0;
-       struct sctp_cmsgs cmsgs = { NULL };
-       enum sctp_scope scope;
-       bool fill_sinfo_ttl = false, wait_connect = false;
-       struct sctp_datamsg *datamsg;
-       int msg_flags = msg->msg_flags;
-       __u16 sinfo_flags = 0;
-       long timeo;
+       __u16 sflags;
        int err;
 
-       err = 0;
-       sp = sctp_sk(sk);
-       ep = sp->ep;
-
-       pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
-                msg, msg_len, ep);
+       if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP))
+               return -EPIPE;
 
-       /* We cannot send a message over a TCP-style listening socket. */
-       if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
-               err = -EPIPE;
-               goto out_nounlock;
-       }
+       if (msg_len > sk->sk_sndbuf)
+               return -EMSGSIZE;
 
-       /* Parse out the SCTP CMSGs.  */
-       err = sctp_msghdr_parse(msg, &cmsgs);
+       memset(cmsgs, 0, sizeof(*cmsgs));
+       err = sctp_msghdr_parse(msg, cmsgs);
        if (err) {
                pr_debug("%s: msghdr parse err:%x\n", __func__, err);
-               goto out_nounlock;
+               return err;
        }
 
-       /* Fetch the destination address for this packet.  This
-        * address only selects the association--it is not necessarily
-        * the address we will send to.
-        * For a peeled-off socket, msg_name is ignored.
-        */
-       if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
-               int msg_namelen = msg->msg_namelen;
-
-               err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
-                                      msg_namelen);
-               if (err)
-                       return err;
+       memset(srinfo, 0, sizeof(*srinfo));
+       if (cmsgs->srinfo) {
+               srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream;
+               srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags;
+               srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid;
+               srinfo->sinfo_context = cmsgs->srinfo->sinfo_context;
+               srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id;
+               srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive;
+       }
 
-               if (msg_namelen > sizeof(to))
-                       msg_namelen = sizeof(to);
-               memcpy(&to, msg->msg_name, msg_namelen);
-               msg_name = msg->msg_name;
+       if (cmsgs->sinfo) {
+               srinfo->sinfo_stream = cmsgs->sinfo->snd_sid;
+               srinfo->sinfo_flags = cmsgs->sinfo->snd_flags;
+               srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid;
+               srinfo->sinfo_context = cmsgs->sinfo->snd_context;
+               srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
        }
 
-       sinit = cmsgs.init;
-       if (cmsgs.sinfo != NULL) {
-               memset(&default_sinfo, 0, sizeof(default_sinfo));
-               default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
-               default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
-               default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
-               default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
-               default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
+       sflags = srinfo->sinfo_flags;
+       if (!sflags && msg_len)
+               return 0;
 
-               sinfo = &default_sinfo;
-               fill_sinfo_ttl = true;
-       } else {
-               sinfo = cmsgs.srinfo;
-       }
-       /* Did the user specify SNDINFO/SNDRCVINFO? */
-       if (sinfo) {
-               sinfo_flags = sinfo->sinfo_flags;
-               associd = sinfo->sinfo_assoc_id;
-       }
+       if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT)))
+               return -EINVAL;
 
-       pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
-                msg_len, sinfo_flags);
+       if (((sflags & SCTP_EOF) && msg_len > 0) ||
+           (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0))
+               return -EINVAL;
 
-       /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
-       if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
-               err = -EINVAL;
-               goto out_nounlock;
-       }
+       if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name)
+               return -EINVAL;
 
-       /* If SCTP_EOF is set, no data can be sent. Disallow sending zero
-        * length messages when SCTP_EOF|SCTP_ABORT is not set.
-        * If SCTP_ABORT is set, the message length could be non zero with
-        * the msg_iov set to the user abort reason.
-        */
-       if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
-           (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
-               err = -EINVAL;
-               goto out_nounlock;
-       }
+       return 0;
+}
 
-       /* If SCTP_ADDR_OVER is set, there must be an address
-        * specified in msg_name.
-        */
-       if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
-               err = -EINVAL;
-               goto out_nounlock;
-       }
+static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
+                                struct sctp_cmsgs *cmsgs,
+                                union sctp_addr *daddr,
+                                struct sctp_transport **tp)
+{
+       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+       struct net *net = sock_net(sk);
+       struct sctp_association *asoc;
+       enum sctp_scope scope;
+       int err = -EINVAL;
 
-       transport = NULL;
+       *tp = NULL;
 
-       pr_debug("%s: about to look up association\n", __func__);
+       if (sflags & (SCTP_EOF | SCTP_ABORT))
+               return -EINVAL;
 
-       lock_sock(sk);
+       if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) ||
+                                   sctp_sstate(sk, CLOSING)))
+               return -EADDRNOTAVAIL;
 
-       /* If a msg_name has been specified, assume this is to be used.  */
-       if (msg_name) {
-               /* Look for a matching association on the endpoint. */
-               asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+       if (sctp_endpoint_is_peeled_off(ep, daddr))
+               return -EADDRNOTAVAIL;
 
-               /* If we could not find a matching association on the
-                * endpoint, make sure that it is not a TCP-style
-                * socket that already has an association or there is
-                * no peeled-off association on another socket.
-                */
-               if (!asoc &&
-                   ((sctp_style(sk, TCP) &&
-                     (sctp_sstate(sk, ESTABLISHED) ||
-                      sctp_sstate(sk, CLOSING))) ||
-                    sctp_endpoint_is_peeled_off(ep, &to))) {
-                       err = -EADDRNOTAVAIL;
-                       goto out_unlock;
-               }
+       if (!ep->base.bind_addr.port) {
+               if (sctp_autobind(sk))
+                       return -EAGAIN;
        } else {
-               asoc = sctp_id2assoc(sk, associd);
-               if (!asoc) {
-                       err = -EPIPE;
-                       goto out_unlock;
-               }
+               if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+                   !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+                       return -EACCES;
        }
 
-       if (asoc) {
-               pr_debug("%s: just looked up association:%p\n", __func__, asoc);
+       scope = sctp_scope(daddr);
 
-               /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
-                * socket that has an association in CLOSED state. This can
-                * happen when an accepted socket has an association that is
-                * already CLOSED.
-                */
-               if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
-                       err = -EPIPE;
-                       goto out_unlock;
-               }
+       asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+       if (!asoc)
+               return -ENOMEM;
 
-               if (sinfo_flags & SCTP_EOF) {
-                       pr_debug("%s: shutting down association:%p\n",
-                                __func__, asoc);
+       if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
+               err = -ENOMEM;
+               goto free;
+       }
 
-                       sctp_primitive_SHUTDOWN(net, asoc, NULL);
-                       err = 0;
-                       goto out_unlock;
+       if (cmsgs->init) {
+               struct sctp_initmsg *init = cmsgs->init;
+
+               if (init->sinit_num_ostreams) {
+                       __u16 outcnt = init->sinit_num_ostreams;
+
+                       asoc->c.sinit_num_ostreams = outcnt;
+                       /* outcnt has been changed, need to re-init stream */
+                       err = sctp_stream_init(&asoc->stream, outcnt, 0,
+                                              GFP_KERNEL);
+                       if (err)
+                               goto free;
                }
-               if (sinfo_flags & SCTP_ABORT) {
 
-                       chunk = sctp_make_abort_user(asoc, msg, msg_len);
-                       if (!chunk) {
-                               err = -ENOMEM;
-                               goto out_unlock;
-                       }
+               if (init->sinit_max_instreams)
+                       asoc->c.sinit_max_instreams = init->sinit_max_instreams;
 
-                       pr_debug("%s: aborting association:%p\n",
-                                __func__, asoc);
+               if (init->sinit_max_attempts)
+                       asoc->max_init_attempts = init->sinit_max_attempts;
 
-                       sctp_primitive_ABORT(net, asoc, chunk);
-                       err = 0;
-                       goto out_unlock;
-               }
+               if (init->sinit_max_init_timeo)
+                       asoc->max_init_timeo =
+                               msecs_to_jiffies(init->sinit_max_init_timeo);
        }
 
-       /* Do we need to create the association?  */
-       if (!asoc) {
-               pr_debug("%s: there is no association yet\n", __func__);
+       *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+       if (!*tp) {
+               err = -ENOMEM;
+               goto free;
+       }
 
-               if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
-                       err = -EINVAL;
-                       goto out_unlock;
-               }
+       return 0;
 
-               /* Check for invalid stream against the stream counts,
-                * either the default or the user specified stream counts.
-                */
-               if (sinfo) {
-                       if (!sinit || !sinit->sinit_num_ostreams) {
-                               /* Check against the defaults. */
-                               if (sinfo->sinfo_stream >=
-                                   sp->initmsg.sinit_num_ostreams) {
-                                       err = -EINVAL;
-                                       goto out_unlock;
-                               }
-                       } else {
-                               /* Check against the requested.  */
-                               if (sinfo->sinfo_stream >=
-                                   sinit->sinit_num_ostreams) {
-                                       err = -EINVAL;
-                                       goto out_unlock;
-                               }
-                       }
-               }
+free:
+       sctp_association_free(asoc);
+       return err;
+}
 
-               /*
-                * API 3.1.2 bind() - UDP Style Syntax
-                * If a bind() or sctp_bindx() is not called prior to a
-                * sendmsg() call that initiates a new association, the
-                * system picks an ephemeral port and will choose an address
-                * set equivalent to binding with a wildcard address.
-                */
-               if (!ep->base.bind_addr.port) {
-                       if (sctp_autobind(sk)) {
-                               err = -EAGAIN;
-                               goto out_unlock;
-                       }
-               } else {
-                       /*
-                        * If an unprivileged user inherits a one-to-many
-                        * style socket with open associations on a privileged
-                        * port, it MAY be permitted to accept new associations,
-                        * but it SHOULD NOT be permitted to open new
-                        * associations.
-                        */
-                       if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-                           !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
-                               err = -EACCES;
-                               goto out_unlock;
-                       }
-               }
+static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
+                                    __u16 sflags, struct msghdr *msg,
+                                    size_t msg_len)
+{
+       struct sock *sk = asoc->base.sk;
+       struct net *net = sock_net(sk);
 
-               scope = sctp_scope(&to);
-               new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-               if (!new_asoc) {
-                       err = -ENOMEM;
-                       goto out_unlock;
-               }
-               asoc = new_asoc;
-               err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
-               if (err < 0) {
-                       err = -ENOMEM;
-                       goto out_free;
-               }
+       if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
+               return -EPIPE;
 
-               /* If the SCTP_INIT ancillary data is specified, set all
-                * the association init values accordingly.
-                */
-               if (sinit) {
-                       if (sinit->sinit_num_ostreams) {
-                               __u16 outcnt = sinit->sinit_num_ostreams;
-
-                               asoc->c.sinit_num_ostreams = outcnt;
-                               /* outcnt has been changed, so re-init stream */
-                               err = sctp_stream_init(&asoc->stream, outcnt, 0,
-                                                      GFP_KERNEL);
-                               if (err)
-                                       goto out_free;
-                       }
-                       if (sinit->sinit_max_instreams) {
-                               asoc->c.sinit_max_instreams =
-                                       sinit->sinit_max_instreams;
-                       }
-                       if (sinit->sinit_max_attempts) {
-                               asoc->max_init_attempts
-                                       = sinit->sinit_max_attempts;
-                       }
-                       if (sinit->sinit_max_init_timeo) {
-                               asoc->max_init_timeo =
-                                msecs_to_jiffies(sinit->sinit_max_init_timeo);
-                       }
-               }
+       if (sflags & SCTP_EOF) {
+               pr_debug("%s: shutting down association:%p\n", __func__, asoc);
+               sctp_primitive_SHUTDOWN(net, asoc, NULL);
 
-               /* Prime the peer's transport structures.  */
-               transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
-               if (!transport) {
-                       err = -ENOMEM;
-                       goto out_free;
-               }
+               return 0;
        }
 
-       /* ASSERT: we have a valid association at this point.  */
-       pr_debug("%s: we have a valid association\n", __func__);
+       if (sflags & SCTP_ABORT) {
+               struct sctp_chunk *chunk;
 
-       if (!sinfo) {
-               /* If the user didn't specify SNDINFO/SNDRCVINFO, make up
-                * one with some defaults.
-                */
-               memset(&default_sinfo, 0, sizeof(default_sinfo));
-               default_sinfo.sinfo_stream = asoc->default_stream;
-               default_sinfo.sinfo_flags = asoc->default_flags;
-               default_sinfo.sinfo_ppid = asoc->default_ppid;
-               default_sinfo.sinfo_context = asoc->default_context;
-               default_sinfo.sinfo_timetolive = asoc->default_timetolive;
-               default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
-
-               sinfo = &default_sinfo;
-       } else if (fill_sinfo_ttl) {
-               /* In case SNDINFO was specified, we still need to fill
-                * it with a default ttl from the assoc here.
-                */
-               sinfo->sinfo_timetolive = asoc->default_timetolive;
-       }
+               chunk = sctp_make_abort_user(asoc, msg, msg_len);
+               if (!chunk)
+                       return -ENOMEM;
 
-       /* API 7.1.7, the sndbuf size per association bounds the
-        * maximum size of data that can be sent in a single send call.
-        */
-       if (msg_len > sk->sk_sndbuf) {
-               err = -EMSGSIZE;
-               goto out_free;
+               pr_debug("%s: aborting association:%p\n", __func__, asoc);
+               sctp_primitive_ABORT(net, asoc, chunk);
+
+               return 0;
        }
 
-       if (asoc->pmtu_pending)
-               sctp_assoc_pending_pmtu(asoc);
+       return 1;
+}
 
-       /* If fragmentation is disabled and the message length exceeds the
-        * association fragmentation point, return EMSGSIZE.  The I-D
-        * does not specify what this error is, but this looks like
-        * a great fit.
-        */
-       if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
-               err = -EMSGSIZE;
-               goto out_free;
-       }
+static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+                               struct msghdr *msg, size_t msg_len,
+                               struct sctp_transport *transport,
+                               struct sctp_sndrcvinfo *sinfo)
+{
+       struct sock *sk = asoc->base.sk;
+       struct net *net = sock_net(sk);
+       struct sctp_datamsg *datamsg;
+       bool wait_connect = false;
+       struct sctp_chunk *chunk;
+       long timeo;
+       int err;
 
-       /* Check for invalid stream. */
        if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
                err = -EINVAL;
-               goto out_free;
+               goto err;
        }
 
-       /* Allocate sctp_stream_out_ext if not already done */
        if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
                err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
                if (err)
-                       goto out_free;
+                       goto err;
        }
 
-       if (sctp_wspace(asoc) < msg_len)
-               sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
-
-       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-       if (!sctp_wspace(asoc)) {
-               /* sk can be changed by peel off when waiting for buf. */
-               err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-               if (err) {
-                       if (err == -ESRCH) {
-                               /* asoc is already dead. */
-                               new_asoc = NULL;
-                               err = -EPIPE;
-                       }
-                       goto out_free;
-               }
+       if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+               err = -EMSGSIZE;
+               goto err;
        }
 
-       /* If an address is passed with the sendto/sendmsg call, it is used
-        * to override the primary destination address in the TCP model, or
-        * when SCTP_ADDR_OVER flag is set in the UDP model.
-        */
-       if ((sctp_style(sk, TCP) && msg_name) ||
-           (sinfo_flags & SCTP_ADDR_OVER)) {
-               chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
-               if (!chunk_tp) {
-                       err = -EINVAL;
-                       goto out_free;
-               }
-       } else
-               chunk_tp = NULL;
-
-       /* Auto-connect, if we aren't connected already. */
        if (sctp_state(asoc, CLOSED)) {
                err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
-               if (err < 0)
-                       goto out_free;
+               if (err)
+                       goto err;
 
-               /* If stream interleave is enabled, wait_connect has to be
-                * done earlier than data enqueue, as it needs to make data
-                * or idata according to asoc->intl_enable which is set
-                * after connection is done.
-                */
-               if (sctp_sk(asoc->base.sk)->strm_interleave) {
+               if (sctp_sk(sk)->strm_interleave) {
                        timeo = sock_sndtimeo(sk, 0);
                        err = sctp_wait_for_connect(asoc, &timeo);
                        if (err)
-                               goto out_unlock;
+                               goto err;
                } else {
                        wait_connect = true;
                }
@@ -2003,73 +1821,165 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
                pr_debug("%s: we associated primitively\n", __func__);
        }
 
-       /* Break the message into multiple chunks of maximum size. */
+       if (asoc->pmtu_pending)
+               sctp_assoc_pending_pmtu(asoc);
+
+       if (sctp_wspace(asoc) < msg_len)
+               sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
+       if (!sctp_wspace(asoc)) {
+               timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+               err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+               if (err)
+                       goto err;
+       }
+
        datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
        if (IS_ERR(datamsg)) {
                err = PTR_ERR(datamsg);
-               goto out_free;
+               goto err;
        }
+
        asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
 
-       /* Now send the (possibly) fragmented message. */
        list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
                sctp_chunk_hold(chunk);
-
-               /* Do accounting for the write space.  */
                sctp_set_owner_w(chunk);
-
-               chunk->transport = chunk_tp;
+               chunk->transport = transport;
        }
 
-       /* Send it to the lower layers.  Note:  all chunks
-        * must either fail or succeed.   The lower layer
-        * works that way today.  Keep it that way or this
-        * breaks.
-        */
        err = sctp_primitive_SEND(net, asoc, datamsg);
-       /* Did the lower layer accept the chunk? */
        if (err) {
                sctp_datamsg_free(datamsg);
-               goto out_free;
+               goto err;
        }
 
        pr_debug("%s: we sent primitively\n", __func__);
 
        sctp_datamsg_put(datamsg);
-       err = msg_len;
 
        if (unlikely(wait_connect)) {
-               timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
+               timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
                sctp_wait_for_connect(asoc, &timeo);
        }
 
-       /* If we are already past ASSOCIATE, the lower
-        * layers are responsible for association cleanup.
-        */
-       goto out_unlock;
+       err = msg_len;
 
-out_free:
-       if (new_asoc)
-               sctp_association_free(asoc);
-out_unlock:
-       release_sock(sk);
+err:
+       return err;
+}
 
-out_nounlock:
-       return sctp_error(sk, msg_flags, err);
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+                                              const struct msghdr *msg,
+                                              struct sctp_cmsgs *cmsgs)
+{
+       union sctp_addr *daddr = NULL;
+       int err;
 
-#if 0
-do_sock_err:
-       if (msg_len)
-               err = msg_len;
-       else
-               err = sock_error(sk);
-       goto out;
+       if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+               int len = msg->msg_namelen;
 
-do_interrupted:
-       if (msg_len)
-               err = msg_len;
-       goto out;
-#endif /* 0 */
+               if (len > sizeof(*daddr))
+                       len = sizeof(*daddr);
+
+               daddr = (union sctp_addr *)msg->msg_name;
+
+               err = sctp_verify_addr(sk, daddr, len);
+               if (err)
+                       return ERR_PTR(err);
+       }
+
+       return daddr;
+}
+
+static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
+                                     struct sctp_sndrcvinfo *sinfo,
+                                     struct sctp_cmsgs *cmsgs)
+{
+       if (!cmsgs->srinfo && !cmsgs->sinfo) {
+               sinfo->sinfo_stream = asoc->default_stream;
+               sinfo->sinfo_ppid = asoc->default_ppid;
+               sinfo->sinfo_context = asoc->default_context;
+               sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+       }
+
+       if (!cmsgs->srinfo)
+               sinfo->sinfo_timetolive = asoc->default_timetolive;
+}
+
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+{
+       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+       struct sctp_transport *transport = NULL;
+       struct sctp_sndrcvinfo _sinfo, *sinfo;
+       struct sctp_association *asoc;
+       struct sctp_cmsgs cmsgs;
+       union sctp_addr *daddr;
+       bool new = false;
+       __u16 sflags;
+       int err;
+
+       /* Parse and get snd_info */
+       err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
+       if (err)
+               goto out;
+
+       sinfo  = &_sinfo;
+       sflags = sinfo->sinfo_flags;
+
+       /* Get daddr from msg */
+       daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
+       if (IS_ERR(daddr)) {
+               err = PTR_ERR(daddr);
+               goto out;
+       }
+
+       lock_sock(sk);
+
+       /* Get and check or create asoc */
+       if (daddr) {
+               asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+               if (asoc) {
+                       err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+                                                       msg_len);
+                       if (err <= 0)
+                               goto out_unlock;
+               } else {
+                       err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
+                                                   &transport);
+                       if (err)
+                               goto out_unlock;
+
+                       asoc = transport->asoc;
+                       new = true;
+               }
+
+               if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
+                       transport = NULL;
+       } else {
+               asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
+               if (!asoc) {
+                       err = -EPIPE;
+                       goto out_unlock;
+               }
+
+               err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
+               if (err <= 0)
+                       goto out_unlock;
+       }
+
+       /* Update snd_info with the asoc */
+       sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+       /* Send msg to the asoc */
+       err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
+       if (err < 0 && err != -ESRCH && new)
+               sctp_association_free(asoc);
+
+out_unlock:
+       release_sock(sk);
+out:
+       return sctp_error(sk, msg->msg_flags, err);
 }
 
 /* This is an extended version of skb_pull() that removes the data from the
index cedf672..f799043 100644 (file)
@@ -6,7 +6,7 @@
  *
  * This file is part of the SCTP kernel implementation
  *
- * These functions manipulate sctp tsn mapping array.
+ * This file contains sctp stream maniuplation primitives and helpers.
  *
  * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
index 8c7cf8f..d3764c1 100644 (file)
@@ -3,7 +3,8 @@
  *
  * This file is part of the SCTP kernel implementation
  *
- * These functions manipulate sctp stream queue/scheduling.
+ * These functions implement sctp stream message interleaving, mostly
+ * including I-DATA and I-FORWARD-TSN chunks process.
  *
  * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
@@ -954,12 +955,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
        __u32 freed = 0;
        __u16 needed;
 
-       if (chunk) {
-               needed = ntohs(chunk->chunk_hdr->length);
-               needed -= sizeof(struct sctp_idata_chunk);
-       } else {
-               needed = SCTP_DEFAULT_MAXWINDOW;
-       }
+       needed = ntohs(chunk->chunk_hdr->length) -
+                sizeof(struct sctp_idata_chunk);
 
        if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
                freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed);
@@ -971,9 +968,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
                                                       needed);
        }
 
-       if (chunk && freed >= needed)
-               if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
-                       sctp_intl_start_pd(ulpq, gfp);
+       if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
+               sctp_intl_start_pd(ulpq, gfp);
 
        sk_mem_reclaim(asoc->base.sk);
 }
index da1a5cd..26684e0 100644 (file)
@@ -7,7 +7,6 @@
  *  applicable with RoCE-cards only
  *
  *  Initial restrictions:
- *    - non-blocking connect postponed
  *    - IPv6 support postponed
  *    - support for alternate links postponed
  *    - partial support for non-blocking sockets only
@@ -24,7 +23,6 @@
 
 #include <linux/module.h>
 #include <linux/socket.h>
-#include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
@@ -273,47 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
        smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
-                        __be32 *subnet, u8 *prefix_len)
-{
-       struct dst_entry *dst = sk_dst_get(clcsock->sk);
-       struct in_device *in_dev;
-       struct sockaddr_in addr;
-       int rc = -ENOENT;
-       int len;
-
-       if (!dst) {
-               rc = -ENOTCONN;
-               goto out;
-       }
-       if (!dst->dev) {
-               rc = -ENODEV;
-               goto out_rel;
-       }
-
-       /* get address to which the internal TCP socket is bound */
-       kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
-       /* analyze IPv4 specific data of net_device belonging to TCP socket */
-       rcu_read_lock();
-       in_dev = __in_dev_get_rcu(dst->dev);
-       for_ifa(in_dev) {
-               if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
-                       continue;
-               *prefix_len = inet_mask_len(ifa->ifa_mask);
-               *subnet = ifa->ifa_address & ifa->ifa_mask;
-               rc = 0;
-               break;
-       } endfor_ifa(in_dev);
-       rcu_read_unlock();
-
-out_rel:
-       dst_release(dst);
-out:
-       return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
        struct smc_link_group *lgr = smc->conn.lgr;
        struct smc_link *link;
@@ -333,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
                return rc;
        }
 
+       if (link->llc_confirm_rc)
+               return SMC_CLC_DECL_RMBE_EC;
+
        rc = smc_ib_modify_qp_rts(link);
        if (rc)
                return SMC_CLC_DECL_INTERR;
@@ -347,11 +308,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
        /* send CONFIRM LINK response over RoCE fabric */
        rc = smc_llc_send_confirm_link(link,
                                       link->smcibdev->mac[link->ibport - 1],
-                                      gid, SMC_LLC_RESP);
+                                      &link->smcibdev->gid[link->ibport - 1],
+                                      SMC_LLC_RESP);
        if (rc < 0)
                return SMC_CLC_DECL_TCL;
 
-       return rc;
+       /* receive ADD LINK request from server over RoCE fabric */
+       rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+                                                        SMC_LLC_WAIT_TIME);
+       if (rest <= 0) {
+               struct smc_clc_msg_decline dclc;
+
+               rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+                                     SMC_CLC_DECLINE);
+               return rc;
+       }
+
+       /* send add link reject message, only one link supported for now */
+       rc = smc_llc_send_add_link(link,
+                                  link->smcibdev->mac[link->ibport - 1],
+                                  &link->smcibdev->gid[link->ibport - 1],
+                                  SMC_LLC_RESP);
+       if (rc < 0)
+               return SMC_CLC_DECL_TCL;
+
+       link->state = SMC_LNK_ACTIVE;
+
+       return 0;
 }
 
 static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -373,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
        link->peer_mtu = clc->qp_mtu;
 }
 
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
-       spin_lock_bh(&smc_lgr_list.lock);
-       /* do not use this link group for new connections */
-       if (!list_empty(&lgr->list))
-               list_del_init(&lgr->list);
-       spin_unlock_bh(&smc_lgr_list.lock);
-}
-
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
-       struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
        struct smc_clc_msg_accept_confirm aclc;
        int local_contact = SMC_FIRST_CONTACT;
        struct smc_ib_device *smcibdev;
@@ -439,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
        srv_first_contact = aclc.hdr.flag;
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
-                                       ibport, &aclc.lcl, srv_first_contact);
+       local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+                                       srv_first_contact);
        if (local_contact < 0) {
                rc = local_contact;
                if (rc == -ENOMEM)
@@ -499,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
        if (local_contact == SMC_FIRST_CONTACT) {
                /* QP confirmation over RoCE fabric */
-               reason_code = smc_clnt_conf_first_link(
-                       smc, &smcibdev->gid[ibport - 1]);
+               reason_code = smc_clnt_conf_first_link(smc);
                if (reason_code < 0) {
                        rc = reason_code;
                        goto out_err_unlock;
@@ -559,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
                goto out_err;
        if (addr->sa_family != AF_INET)
                goto out_err;
-       smc->addr = addr;       /* needed for nonblocking connect */
 
        lock_sock(sk);
        switch (sk->sk_state) {
@@ -749,9 +720,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 
                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
                                      SMC_CLC_DECLINE);
+               return rc;
        }
 
-       return rc;
+       if (link->llc_confirm_resp_rc)
+               return SMC_CLC_DECL_RMBE_EC;
+
+       /* send ADD LINK request to client over the RoCE fabric */
+       rc = smc_llc_send_add_link(link,
+                                  link->smcibdev->mac[link->ibport - 1],
+                                  &link->smcibdev->gid[link->ibport - 1],
+                                  SMC_LLC_REQ);
+       if (rc < 0)
+               return SMC_CLC_DECL_TCL;
+
+       /* receive ADD LINK response from client over the RoCE fabric */
+       rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+                                                        SMC_LLC_WAIT_TIME);
+       if (rest <= 0) {
+               struct smc_clc_msg_decline dclc;
+
+               rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+                                     SMC_CLC_DECLINE);
+               return rc;
+       }
+
+       link->state = SMC_LNK_ACTIVE;
+
+       return 0;
 }
 
 /* setup for RDMA connection of server */
@@ -767,11 +763,10 @@ static void smc_listen_work(struct work_struct *work)
        struct sock *newsmcsk = &new_smc->sk;
        struct smc_clc_msg_proposal *pclc;
        struct smc_ib_device *smcibdev;
-       struct sockaddr_in peeraddr;
        u8 buf[SMC_CLC_MAX_LEN];
        struct smc_link *link;
        int reason_code = 0;
-       int rc = 0, len;
+       int rc = 0;
        __be32 subnet;
        u8 prefix_len;
        u8 ibport;
@@ -809,7 +804,7 @@ static void smc_listen_work(struct work_struct *work)
        }
 
        /* determine subnet and mask from internal TCP socket */
-       rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
+       rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
        if (rc) {
                reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
                goto decline_rdma;
@@ -823,13 +818,10 @@ static void smc_listen_work(struct work_struct *work)
                goto decline_rdma;
        }
 
-       /* get address of the peer connected to the internal TCP socket */
-       kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
-
        /* allocate connection / link group */
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
-                                       smcibdev, ibport, &pclc->lcl, 0);
+       local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+                                       0);
        if (local_contact < 0) {
                rc = local_contact;
                if (rc == -ENOMEM)
@@ -1075,7 +1067,7 @@ out:
 }
 
 static int smc_getname(struct socket *sock, struct sockaddr *addr,
-                      int *len, int peer)
+                      int peer)
 {
        struct smc_sock *smc;
 
@@ -1085,7 +1077,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
 
        smc = smc_sk(sock->sk);
 
-       return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
+       return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
 }
 
 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
index 9518986..268cdf1 100644 (file)
@@ -172,7 +172,6 @@ struct smc_sock {                           /* smc sock container */
        struct sock             sk;
        struct socket           *clcsock;       /* internal tcp socket */
        struct smc_connection   conn;           /* smc connection */
-       struct sockaddr         *addr;          /* inet connect address */
        struct smc_sock         *listen_smc;    /* listen parent */
        struct work_struct      tcp_listen_work;/* handle tcp socket accepts */
        struct work_struct      smc_listen_work;/* prepare new accept socket */
@@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
 
 struct smc_clc_msg_local;
 
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
-                        u8 *prefix_len);
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
                    struct smc_ib_device *smcibdev, u8 ibport,
                    struct smc_clc_msg_local *lcl, int srv_first_contact);
 struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
index 8ac5158..874c5a7 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <linux/in.h>
+#include <linux/inetdevice.h>
 #include <linux/if_ether.h>
 #include <linux/sched/signal.h>
 
@@ -22,6 +23,9 @@
 #include "smc_clc.h"
 #include "smc_ib.h"
 
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
  */
@@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
        return true;
 }
 
+/* determine subnet and mask of internal TCP socket */
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
+                            __be32 *subnet, u8 *prefix_len)
+{
+       struct dst_entry *dst = sk_dst_get(clcsock->sk);
+       struct in_device *in_dev;
+       struct sockaddr_in addr;
+       int rc = -ENOENT;
+
+       if (!dst) {
+               rc = -ENOTCONN;
+               goto out;
+       }
+       if (!dst->dev) {
+               rc = -ENODEV;
+               goto out_rel;
+       }
+
+       /* get address to which the internal TCP socket is bound */
+       kernel_getsockname(clcsock, (struct sockaddr *)&addr);
+       /* analyze IPv4 specific data of net_device belonging to TCP socket */
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(dst->dev);
+       for_ifa(in_dev) {
+               if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
+                       continue;
+               *prefix_len = inet_mask_len(ifa->ifa_mask);
+               *subnet = ifa->ifa_address & ifa->ifa_mask;
+               rc = 0;
+               break;
+       } endfor_ifa(in_dev);
+       rcu_read_unlock();
+
+out_rel:
+       dst_release(dst);
+out:
+       return rc;
+}
+
 /* Wait for data on the tcp-socket, analyze received data
  * Returns:
  * 0 if success and it was not a decline that we received.
@@ -211,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 
        memset(&pclc_prfx, 0, sizeof(pclc_prfx));
        /* determine subnet and mask from internal TCP socket */
-       rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
-                                 &pclc_prfx.prefix_len);
+       rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+                                     &pclc_prfx.prefix_len);
        if (rc)
                return SMC_CLC_DECL_CNFERR; /* configuration error */
        pclc_prfx.ipv6_prefixes_cnt = 0;
index c145a0f..20e048b 100644 (file)
@@ -22,9 +22,6 @@
 #define SMC_CLC_CONFIRM                0x03
 #define SMC_CLC_DECLINE                0x04
 
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
 #define SMC_CLC_V1             0x1             /* SMC version                */
 #define CLC_WAIT_TIME          (6 * HZ)        /* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM       0x01010000  /* insufficient memory resources  */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
 #define SMC_CLC_DECL_INTERR    0x99990000  /* internal error                 */
 #define SMC_CLC_DECL_TCL       0x02040000  /* timeout w4 QP confirm          */
 #define SMC_CLC_DECL_SEND      0x07000000  /* sending problem                */
+#define SMC_CLC_DECL_RMBE_EC   0x08000000  /* peer has eyecatcher in RMBE    */
 
 struct smc_clc_msg_hdr {       /* header1 of clc messages */
        u8 eyecatcher[4];       /* eye catcher */
@@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
               ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
+                            u8 *prefix_len);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
                     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
index 2424c71..702ce5f 100644 (file)
@@ -144,7 +144,7 @@ free:
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
                          struct smc_ib_device *smcibdev, u8 ibport,
                          char *peer_systemid, unsigned short vlan_id)
 {
@@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
        }
        lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
        lgr->sync_err = false;
-       lgr->daddr = peer_in_addr;
        memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
        lgr->vlan_id = vlan_id;
        rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 
        lnk = &lgr->lnk[SMC_SINGLE_LINK];
        /* initialize link */
+       lnk->state = SMC_LNK_ACTIVATING;
        lnk->smcibdev = smcibdev;
        lnk->ibport = ibport;
        lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
                goto destroy_qp;
        init_completion(&lnk->llc_confirm);
        init_completion(&lnk->llc_confirm_resp);
+       init_completion(&lnk->llc_add);
+       init_completion(&lnk->llc_add_resp);
 
        smc->conn.lgr = lgr;
        rwlock_init(&lgr->conns_lock);
@@ -306,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
        kfree(lgr);
 }
 
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+       spin_lock_bh(&smc_lgr_list.lock);
+       /* do not use this link group for new connections */
+       if (!list_empty(&lgr->list))
+               list_del_init(&lgr->list);
+       spin_unlock_bh(&smc_lgr_list.lock);
+}
+
 /* terminate linkgroup abnormally */
 void smc_lgr_terminate(struct smc_link_group *lgr)
 {
@@ -313,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
        struct smc_sock *smc;
        struct rb_node *node;
 
-       spin_lock_bh(&smc_lgr_list.lock);
-       if (list_empty(&lgr->list)) {
-               /* termination already triggered */
-               spin_unlock_bh(&smc_lgr_list.lock);
-               return;
-       }
-       /* do not use this link group for new connections */
-       list_del_init(&lgr->list);
-       spin_unlock_bh(&smc_lgr_list.lock);
+       smc_lgr_forget(lgr);
 
        write_lock_bh(&lgr->conns_lock);
        node = rb_first(&lgr->conns_all);
@@ -400,7 +403,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
                    struct smc_ib_device *smcibdev, u8 ibport,
                    struct smc_clc_msg_local *lcl, int srv_first_contact)
 {
@@ -457,7 +460,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 create:
        if (local_contact == SMC_FIRST_CONTACT) {
-               rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+               rc = smc_lgr_create(smc, smcibdev, ibport,
                                    lcl->id_for_peer, vlan_id);
                if (rc)
                        goto out;
@@ -698,27 +701,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
        return -ENOSPC;
 }
 
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
-                           struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 {
-       u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
-       struct smc_link_group *lgr = conn->lgr;
-       u32 rkey = ntohl(clc->rmb_rkey);
+       u64 dma_addr = be64_to_cpu(nw_vaddr);
+       u32 rkey = ntohl(nw_rkey);
        int i;
 
        for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
                if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
                    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
                    test_bit(i, lgr->rtokens_used_mask)) {
-                       conn->rtoken_idx = i;
+                       /* already in list */
+                       return i;
+               }
+       }
+       i = smc_rmb_reserve_rtoken_idx(lgr);
+       if (i < 0)
+               return i;
+       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+       return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+       u32 rkey = ntohl(nw_rkey);
+       int i;
+
+       for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+               if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+                   test_bit(i, lgr->rtokens_used_mask)) {
+                       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+                       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+                       clear_bit(i, lgr->rtokens_used_mask);
                        return 0;
                }
        }
-       conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+       return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+                           struct smc_clc_msg_accept_confirm *clc)
+{
+       conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+                                         clc->rmb_rkey);
        if (conn->rtoken_idx < 0)
                return conn->rtoken_idx;
-       lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
-       lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
        return 0;
 }
index fe691bf..07e2a39 100644 (file)
@@ -32,6 +32,12 @@ enum smc_lgr_role {          /* possible roles of a link group */
        SMC_SERV        /* server */
 };
 
+enum smc_link_state {                  /* possible states of a link */
+       SMC_LNK_INACTIVE,       /* link is inactive */
+       SMC_LNK_ACTIVATING,     /* link is being activated */
+       SMC_LNK_ACTIVE          /* link is active */
+};
+
 #define SMC_WR_BUF_SIZE                48      /* size of work request buffer */
 
 struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
        u8                      peer_mac[ETH_ALEN];     /* = gid[8:10||13:15] */
        u8                      peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
        u8                      link_id;        /* unique # within link group */
+
+       enum smc_link_state     state;          /* state of link */
        struct completion       llc_confirm;    /* wait for rx of conf link */
        struct completion       llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+       int                     llc_confirm_rc; /* rc from confirm link msg */
+       int                     llc_confirm_resp_rc; /* rc from conf_resp msg */
+       struct completion       llc_add;        /* wait for rx of add link */
+       struct completion       llc_add_resp;   /* wait for rx of add link rsp*/
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken {                         /* address/key of remote RMB */
 struct smc_link_group {
        struct list_head        list;
        enum smc_lgr_role       role;           /* client or server */
-       __be32                  daddr;          /* destination ip address */
        struct smc_link         lnk[SMC_LINKS_PER_LGR_MAX];     /* smc link */
        char                    peer_systemid[SMC_SYSTEMID_LEN];
                                                /* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
                            struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
index 92fe4cc..54e8d6d 100644 (file)
@@ -4,9 +4,6 @@
  *
  *  Link Layer Control (LLC)
  *
- *  For now, we only support the necessary "confirm link" functionality
- *  which happens for the first RoCE link after successful CLC handshake.
- *
  *  Copyright IBM Corp. 2016
  *
  *  Author(s):  Klaus Wacker <Klaus.Wacker@de.ibm.com>
 #include "smc_clc.h"
 #include "smc_llc.h"
 
+#define SMC_LLC_DATA_LEN               40
+
+struct smc_llc_hdr {
+       struct smc_wr_rx_hdr common;
+       u8 length;      /* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u8 reserved:4,
+          add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u8 add_link_rej_rsn:4,
+          reserved:4;
+#endif
+       u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC      0x03
+
+struct smc_llc_msg_confirm_link {      /* type 0x01 */
+       struct smc_llc_hdr hd;
+       u8 sender_mac[ETH_ALEN];
+       u8 sender_gid[SMC_GID_SIZE];
+       u8 sender_qp_num[3];
+       u8 link_num;
+       u8 link_uid[SMC_LGR_ID_SIZE];
+       u8 max_links;
+       u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ       0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH    1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS      2
+
+struct smc_llc_msg_add_link {          /* type 0x02 */
+       struct smc_llc_hdr hd;
+       u8 sender_mac[ETH_ALEN];
+       u8 reserved2[2];
+       u8 sender_gid[SMC_GID_SIZE];
+       u8 sender_qp_num[3];
+       u8 link_num;
+       u8 flags2;      /* QP mtu */
+       u8 initial_psn[3];
+       u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL      0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY  0x20
+
+struct smc_llc_msg_del_link {          /* type 0x04 */
+       struct smc_llc_hdr hd;
+       u8 link_num;
+       __be32 reason;
+       u8 reserved[35];
+} __packed;                    /* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link {         /* type 0x07 */
+       struct smc_llc_hdr hd;
+       u8 user_data[16];
+       u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+       union {
+               u8 num_rkeys;   /* first rtoken byte of CONFIRM LINK msg */
+                               /* is actually the num of rtokens, first */
+                               /* rtoken is always for the current link */
+               u8 link_id;     /* link id of the rtoken */
+       };
+       __be32 rmb_key;
+       __be64 rmb_vaddr;
+} __packed;                    /* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG  3
+
+struct smc_llc_msg_confirm_rkey {      /* type 0x06 */
+       struct smc_llc_hdr hd;
+       struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+       u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
+       struct smc_llc_hdr hd;
+       u8 num_rkeys;
+       struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX   8
+#define SMC_LLC_FLAG_RKEY_NEG  0x20
+
+struct smc_llc_msg_delete_rkey {       /* type 0x09 */
+       struct smc_llc_hdr hd;
+       u8 num_rkeys;
+       u8 err_mask;
+       u8 reserved[2];
+       __be32 rkey[8];
+       u8 reserved2[4];
+};
+
+union smc_llc_msg {
+       struct smc_llc_msg_confirm_link confirm_link;
+       struct smc_llc_msg_add_link add_link;
+       struct smc_llc_msg_del_link delete_link;
+
+       struct smc_llc_msg_confirm_rkey confirm_rkey;
+       struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+       struct smc_llc_msg_delete_rkey delete_rkey;
+
+       struct smc_llc_msg_test_link test_link;
+       struct {
+               struct smc_llc_hdr hdr;
+               u8 data[SMC_LLC_DATA_LEN];
+       } raw;
+};
+
+#define SMC_LLC_FLAG_RESP              0x80
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
@@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
        memset(confllc, 0, sizeof(*confllc));
        confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
        confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+       confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
        if (reqresp == SMC_LLC_RESP)
                confllc->hd.flags |= SMC_LLC_FLAG_RESP;
        memcpy(confllc->sender_mac, mac, ETH_ALEN);
@@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
        hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
        /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
        memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
-       confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+       confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+                         union ib_gid *gid,
+                         enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_add_link *addllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       addllc = (struct smc_llc_msg_add_link *)wr_buf;
+       memset(addllc, 0, sizeof(*addllc));
+       addllc->hd.common.type = SMC_LLC_ADD_LINK;
+       addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+       if (reqresp == SMC_LLC_RESP) {
+               addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+               /* always reject more links for now */
+               addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+               addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+       }
+       memcpy(addllc->sender_mac, mac, ETH_ALEN);
+       memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+                            enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_del_link *delllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       delllc = (struct smc_llc_msg_del_link *)wr_buf;
+       memset(delllc, 0, sizeof(*delllc));
+       delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+       delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+       if (reqresp == SMC_LLC_RESP)
+               delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+       /* DEL_LINK_ALL because only 1 link supported */
+       delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+       delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+       delllc->link_num = link->link_id;
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+                          enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_test_link *testllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       testllc = (struct smc_llc_msg_test_link *)wr_buf;
+       memset(testllc, 0, sizeof(*testllc));
+       testllc->hd.common.type = SMC_LLC_TEST_LINK;
+       testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+       if (reqresp == SMC_LLC_RESP)
+               testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+       memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       memcpy(wr_buf, llcbuf, llclen);
        /* send llc message */
        rc = smc_wr_tx_send(link, pend);
        return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
                                    struct smc_llc_msg_confirm_link *llc)
 {
        struct smc_link_group *lgr;
+       int conf_rc;
 
        lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       /* RMBE eyecatchers are not supported */
+       if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+               conf_rc = 0;
+       else
+               conf_rc = ENOTSUPP;
+
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-               if (lgr->role == SMC_SERV)
+               if (lgr->role == SMC_SERV &&
+                   link->state == SMC_LNK_ACTIVATING) {
+                       link->llc_confirm_resp_rc = conf_rc;
                        complete(&link->llc_confirm_resp);
+               }
        } else {
-               if (lgr->role == SMC_CLNT) {
+               if (lgr->role == SMC_CLNT &&
+                   link->state == SMC_LNK_ACTIVATING) {
+                       link->llc_confirm_rc = conf_rc;
                        link->link_id = llc->link_num;
                        complete(&link->llc_confirm);
                }
        }
 }
 
+static void smc_llc_rx_add_link(struct smc_link *link,
+                               struct smc_llc_msg_add_link *llc)
+{
+       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+                                                 lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               if (link->state == SMC_LNK_ACTIVATING)
+                       complete(&link->llc_add_resp);
+       } else {
+               if (link->state == SMC_LNK_ACTIVATING) {
+                       complete(&link->llc_add);
+                       return;
+               }
+
+               if (lgr->role == SMC_SERV) {
+                       smc_llc_send_add_link(link,
+                                       link->smcibdev->mac[link->ibport - 1],
+                                       &link->smcibdev->gid[link->ibport - 1],
+                                       SMC_LLC_REQ);
+
+               } else {
+                       smc_llc_send_add_link(link,
+                                       link->smcibdev->mac[link->ibport - 1],
+                                       &link->smcibdev->gid[link->ibport - 1],
+                                       SMC_LLC_RESP);
+               }
+       }
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+                                  struct smc_llc_msg_del_link *llc)
+{
+       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+                                                 lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               if (lgr->role == SMC_SERV)
+                       smc_lgr_terminate(lgr);
+       } else {
+               if (lgr->role == SMC_SERV) {
+                       smc_lgr_forget(lgr);
+                       smc_llc_send_delete_link(link, SMC_LLC_REQ);
+               } else {
+                       smc_llc_send_delete_link(link, SMC_LLC_RESP);
+                       smc_lgr_terminate(lgr);
+               }
+       }
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+                                struct smc_llc_msg_test_link *llc)
+{
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+       }
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+                                   struct smc_llc_msg_confirm_rkey *llc)
+{
+       struct smc_link_group *lgr;
+       int rc;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               rc = smc_rtoken_add(lgr,
+                                   llc->rtoken[0].rmb_vaddr,
+                                   llc->rtoken[0].rmb_key);
+
+               /* ignore rtokens for other links, we have only one link */
+
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               if (rc < 0)
+                       llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+                                     struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               /* ignore rtokens for other links, we have only one link */
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+                                  struct smc_llc_msg_delete_rkey *llc)
+{
+       struct smc_link_group *lgr;
+       u8 err_mask = 0;
+       int i, max;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+               for (i = 0; i < max; i++) {
+                       if (smc_rtoken_delete(lgr, llc->rkey[i]))
+                               err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+               }
+
+               if (err_mask) {
+                       llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+                       llc->err_mask = err_mask;
+               }
+
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
        struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
                return; /* short message */
        if (llc->raw.hdr.length != sizeof(*llc))
                return; /* invalid message */
-       if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+       switch (llc->raw.hdr.common.type) {
+       case SMC_LLC_TEST_LINK:
+               smc_llc_rx_test_link(link, &llc->test_link);
+               break;
+       case SMC_LLC_CONFIRM_LINK:
                smc_llc_rx_confirm_link(link, &llc->confirm_link);
+               break;
+       case SMC_LLC_ADD_LINK:
+               smc_llc_rx_add_link(link, &llc->add_link);
+               break;
+       case SMC_LLC_DELETE_LINK:
+               smc_llc_rx_delete_link(link, &llc->delete_link);
+               break;
+       case SMC_LLC_CONFIRM_RKEY:
+               smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+               break;
+       case SMC_LLC_CONFIRM_RKEY_CONT:
+               smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+               break;
+       case SMC_LLC_DELETE_RKEY:
+               smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+               break;
+       }
 }
 
 /***************************** init, exit, misc ******************************/
@@ -139,6 +509,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
                .handler        = smc_llc_rx_handler,
                .type           = SMC_LLC_CONFIRM_LINK
        },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_TEST_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_ADD_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_DELETE_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_CONFIRM_RKEY
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_CONFIRM_RKEY_CONT
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_DELETE_RKEY
+       },
        {
                .handler        = NULL,
        }
index 51b27ce..e4a7d5e 100644 (file)
@@ -18,6 +18,7 @@
 #define SMC_LLC_FLAG_RESP              0x80
 
 #define SMC_LLC_WAIT_FIRST_TIME                (5 * HZ)
+#define SMC_LLC_WAIT_TIME              (2 * HZ)
 
 enum smc_llc_reqresp {
        SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
        SMC_LLC_CONFIRM_LINK            = 0x01,
-};
-
-#define SMC_LLC_DATA_LEN               40
-
-struct smc_llc_hdr {
-       struct smc_wr_rx_hdr common;
-       u8 length;      /* 44 */
-       u8 reserved;
-       u8 flags;
-};
-
-struct smc_llc_msg_confirm_link {      /* type 0x01 */
-       struct smc_llc_hdr hd;
-       u8 sender_mac[ETH_ALEN];
-       u8 sender_gid[SMC_GID_SIZE];
-       u8 sender_qp_num[3];
-       u8 link_num;
-       u8 link_uid[SMC_LGR_ID_SIZE];
-       u8 max_links;
-       u8 reserved[9];
-};
-
-union smc_llc_msg {
-       struct smc_llc_msg_confirm_link confirm_link;
-       struct {
-               struct smc_llc_hdr hdr;
-               u8 data[SMC_LLC_DATA_LEN];
-       } raw;
+       SMC_LLC_ADD_LINK                = 0x02,
+       SMC_LLC_DELETE_LINK             = 0x04,
+       SMC_LLC_CONFIRM_RKEY            = 0x06,
+       SMC_LLC_TEST_LINK               = 0x07,
+       SMC_LLC_CONFIRM_RKEY_CONT       = 0x08,
+       SMC_LLC_DELETE_RKEY             = 0x09,
 };
 
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
                              enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+                         enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+                            enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+                          enum smc_llc_reqresp reqresp);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
index a93c99b..d9a1ac2 100644 (file)
 #include <linux/ipv6_route.h>
 #include <linux/route.h>
 #include <linux/sockios.h>
-#include <linux/atalk.h>
 #include <net/busy_poll.h>
 #include <linux/errqueue.h>
 
@@ -234,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
        return __put_user(klen, ulen);
 }
 
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
@@ -991,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *     what to do with it - that's up to the protocol still.
  */
 
-static struct ns_common *get_net_ns(struct ns_common *ns)
+struct ns_common *get_net_ns(struct ns_common *ns)
 {
        return &get_net(container_of(ns, struct net, ns))->ns;
 }
+EXPORT_SYMBOL_GPL(get_net_ns);
 
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
@@ -1573,8 +1573,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
                goto out_fd;
 
        if (upeer_sockaddr) {
-               if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
-                                         &len, 2) < 0) {
+               len = newsock->ops->getname(newsock,
+                                       (struct sockaddr *)&address, 2);
+               if (len < 0) {
                        err = -ECONNABORTED;
                        goto out_fd;
                }
@@ -1654,7 +1655,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 {
        struct socket *sock;
        struct sockaddr_storage address;
-       int len, err, fput_needed;
+       int err, fput_needed;
 
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (!sock)
@@ -1664,10 +1665,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
        if (err)
                goto out_put;
 
-       err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
-       if (err)
+       err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+       if (err < 0)
                goto out_put;
-       err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
+        /* "err" is actually length in this case */
+       err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
 
 out_put:
        fput_light(sock->file, fput_needed);
@@ -1685,7 +1687,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 {
        struct socket *sock;
        struct sockaddr_storage address;
-       int len, err, fput_needed;
+       int err, fput_needed;
 
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (sock != NULL) {
@@ -1695,11 +1697,10 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
                        return err;
                }
 
-               err =
-                   sock->ops->getname(sock, (struct sockaddr *)&address, &len,
-                                      1);
-               if (!err)
-                       err = move_addr_to_user(&address, len, usockaddr,
+               err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+               if (err >= 0)
+                       /* "err" is actually length in this case */
+                       err = move_addr_to_user(&address, err, usockaddr,
                                                usockaddr_len);
                fput_light(sock->file, fput_needed);
        }
@@ -2288,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
        if (!sock)
                return err;
 
-       err = sock_error(sock->sk);
-       if (err) {
-               datagrams = err;
-               goto out_put;
+       if (likely(!(flags & MSG_ERRQUEUE))) {
+               err = sock_error(sock->sk);
+               if (err) {
+                       datagrams = err;
+                       goto out_put;
+               }
        }
 
        entry = mmsg;
@@ -3166,17 +3169,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 }
 EXPORT_SYMBOL(kernel_connect);
 
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-                        int *addrlen)
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
 {
-       return sock->ops->getname(sock, addr, addrlen, 0);
+       return sock->ops->getname(sock, addr, 0);
 }
 EXPORT_SYMBOL(kernel_getsockname);
 
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-                        int *addrlen)
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 {
-       return sock->ops->getname(sock, addr, addrlen, 1);
+       return sock->ops->getname(sock, addr, 1);
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
index 6e432ec..8063956 100644 (file)
@@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = {
  * negative errno is returned.
  */
 static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
-                       struct sockaddr *buf, int buflen)
+                       struct sockaddr *buf)
 {
        struct socket *sock;
        int err;
@@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
                goto out_release;
        }
 
-       err = kernel_getsockname(sock, buf, &buflen);
+       err = kernel_getsockname(sock, buf);
        if (err < 0) {
                dprintk("RPC:       getsockname failed (%d)\n", err);
                goto out_release;
@@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
        rcu_read_unlock();
 
        rpc_set_port(sap, 0);
-       err = rpc_sockname(net, sap, salen, buf, buflen);
+       err = rpc_sockname(net, sap, salen, buf);
        put_net(net);
        if (err != 0)
                /* Couldn't discover local address, return ANYADDR */
index 943f2a7..08cd951 100644 (file)
@@ -832,12 +832,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
        }
        set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 
-       err = kernel_getpeername(newsock, sin, &slen);
+       err = kernel_getpeername(newsock, sin);
        if (err < 0) {
                net_warn_ratelimited("%s: peername failed (err %d)!\n",
                                     serv->sv_name, -err);
                goto failed;            /* aborted connection or whatever */
        }
+       slen = err;
 
        /* Ideally, we would want to reject connections from unauthorized
         * hosts here, but when we get encryption, the IP of the host won't
@@ -866,7 +867,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
        if (IS_ERR(newsvsk))
                goto failed;
        svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
-       err = kernel_getsockname(newsock, sin, &slen);
+       err = kernel_getsockname(newsock, sin);
+       slen = err;
        if (unlikely(err < 0)) {
                dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
                slen = offsetof(struct sockaddr, sa_data);
@@ -1465,7 +1467,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
                err = PTR_ERR(svsk);
                goto out;
        }
-       if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+       salen = kernel_getsockname(svsk->sk_sock, sin);
+       if (salen >= 0)
                svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
        svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
        return svc_one_sock_name(svsk, name_return, len);
@@ -1539,10 +1542,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
        if (error < 0)
                goto bummer;
 
-       newlen = len;
-       error = kernel_getsockname(sock, newsin, &newlen);
+       error = kernel_getsockname(sock, newsin);
        if (error < 0)
                goto bummer;
+       newlen = error;
 
        if (protocol == IPPROTO_TCP) {
                if ((error = kernel_listen(sock, 64)) < 0)
index a6b8c1f..956e29c 100644 (file)
@@ -1794,10 +1794,9 @@ static void xs_sock_set_reuseport(struct socket *sock)
 static unsigned short xs_sock_getport(struct socket *sock)
 {
        struct sockaddr_storage buf;
-       int buflen;
        unsigned short port = 0;
 
-       if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+       if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
                goto out;
        switch (buf.ss_family) {
        case AF_INET6:
index 9aed6fe..f424539 100644 (file)
@@ -89,6 +89,7 @@ static void __net_exit sysctl_net_exit(struct net *net)
 static struct pernet_operations sysctl_pernet_ops = {
        .init = sysctl_net_init,
        .exit = sysctl_net_exit,
+       .async = true,
 };
 
 static struct ctl_table_header *net_header;
index 37bb0bf..1edb719 100644 (file)
@@ -9,7 +9,7 @@ tipc-y  += addr.o bcast.o bearer.o \
           core.o link.o discover.o msg.o  \
           name_distr.o  subscr.o monitor.o name_table.o net.o  \
           netlink.o netlink_compat.o node.o socket.o eth_media.o \
-          server.o socket.o group.o
+          topsrv.o socket.o group.o
 
 tipc-$(CONFIG_TIPC_MEDIA_UDP)  += udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)   += ib_media.o
index c800147..f3d2e83 100644 (file)
@@ -813,7 +813,7 @@ err_out:
        return err;
 }
 
-int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 {
        int err;
        char *name;
@@ -835,20 +835,27 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 
        name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
-       rtnl_lock();
        bearer = tipc_bearer_find(net, name);
-       if (!bearer) {
-               rtnl_unlock();
+       if (!bearer)
                return -EINVAL;
-       }
 
        bearer_disable(net, bearer);
-       rtnl_unlock();
 
        return 0;
 }
 
-int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+{
+       int err;
+
+       rtnl_lock();
+       err = __tipc_nl_bearer_disable(skb, info);
+       rtnl_unlock();
+
+       return err;
+}
+
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
        int err;
        char *bearer;
@@ -890,15 +897,18 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
                        prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
        }
 
+       return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+}
+
+int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+{
+       int err;
+
        rtnl_lock();
-       err = tipc_enable_bearer(net, bearer, domain, prio, attrs);
-       if (err) {
-               rtnl_unlock();
-               return err;
-       }
+       err = __tipc_nl_bearer_enable(skb, info);
        rtnl_unlock();
 
-       return 0;
+       return err;
 }
 
 int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
@@ -944,13 +954,13 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
 
-int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
-       int err;
-       char *name;
        struct tipc_bearer *b;
        struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
        struct net *net = sock_net(skb->sk);
+       char *name;
+       int err;
 
        if (!info->attrs[TIPC_NLA_BEARER])
                return -EINVAL;
@@ -965,35 +975,42 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
                return -EINVAL;
        name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
-       rtnl_lock();
        b = tipc_bearer_find(net, name);
-       if (!b) {
-               rtnl_unlock();
+       if (!b)
                return -EINVAL;
-       }
 
        if (attrs[TIPC_NLA_BEARER_PROP]) {
                struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
 
                err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
                                              props);
-               if (err) {
-                       rtnl_unlock();
+               if (err)
                        return err;
-               }
 
-               if (props[TIPC_NLA_PROP_TOL])
+               if (props[TIPC_NLA_PROP_TOL]) {
                        b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+                       tipc_node_apply_tolerance(net, b);
+               }
                if (props[TIPC_NLA_PROP_PRIO])
                        b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
                if (props[TIPC_NLA_PROP_WIN])
                        b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
        }
-       rtnl_unlock();
 
        return 0;
 }
 
+int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+{
+       int err;
+
+       rtnl_lock();
+       err = __tipc_nl_bearer_set(skb, info);
+       rtnl_unlock();
+
+       return err;
+}
+
 static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
                               struct tipc_media *media, int nlflags)
 {
@@ -1115,7 +1132,7 @@ err_out:
        return err;
 }
 
-int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
 {
        int err;
        char *name;
@@ -1133,22 +1150,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
                return -EINVAL;
        name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
 
-       rtnl_lock();
        m = tipc_media_find(name);
-       if (!m) {
-               rtnl_unlock();
+       if (!m)
                return -EINVAL;
-       }
 
        if (attrs[TIPC_NLA_MEDIA_PROP]) {
                struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
 
                err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP],
                                              props);
-               if (err) {
-                       rtnl_unlock();
+               if (err)
                        return err;
-               }
 
                if (props[TIPC_NLA_PROP_TOL])
                        m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
@@ -1157,7 +1169,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
                if (props[TIPC_NLA_PROP_WIN])
                        m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
        }
-       rtnl_unlock();
 
        return 0;
 }
+
+int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+{
+       int err;
+
+       rtnl_lock();
+       err = __tipc_nl_media_set(skb, info);
+       rtnl_unlock();
+
+       return err;
+}
index 42d6eee..a53613d 100644 (file)
@@ -188,15 +188,19 @@ extern struct tipc_media udp_media_info;
 #endif
 
 int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
index 20b21af..ff8b071 100644 (file)
@@ -64,7 +64,7 @@ struct tipc_bearer;
 struct tipc_bc_base;
 struct tipc_link;
 struct tipc_name_table;
-struct tipc_server;
+struct tipc_topsrv;
 struct tipc_monitor;
 
 #define TIPC_MOD_VER "2.0.0"
@@ -112,7 +112,7 @@ struct tipc_net {
        struct list_head dist_queue;
 
        /* Topology subscription server */
-       struct tipc_server *topsrv;
+       struct tipc_topsrv *topsrv;
        atomic_t subscription_count;
 };
 
@@ -131,7 +131,7 @@ static inline struct list_head *tipc_nodes(struct net *net)
        return &tipc_net(net)->node_list;
 }
 
-static inline struct tipc_server *tipc_topsrv(struct net *net)
+static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
 {
        return tipc_net(net)->topsrv;
 }
index 122162a..03086cc 100644 (file)
@@ -37,7 +37,7 @@
 #include "addr.h"
 #include "group.h"
 #include "bcast.h"
-#include "server.h"
+#include "topsrv.h"
 #include "msg.h"
 #include "socket.h"
 #include "node.h"
index 2d6b2ae..3c23046 100644 (file)
@@ -2126,7 +2126,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
                             struct sk_buff_head *xmitq)
 {
        l->tolerance = tol;
-       tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
+       if (link_is_up(l))
+               tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
 }
 
 void tipc_link_set_prio(struct tipc_link *l, u32 prio,
index ed0457c..e01c9c6 100644 (file)
@@ -326,10 +326,10 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 
        /* Any subscriptions waiting for notification?  */
        list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-               tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-                                           TIPC_PUBLISHED, publ->ref,
-                                           publ->node, publ->scope,
-                                           created_subseq);
+               tipc_sub_report_overlap(s, publ->lower, publ->upper,
+                                       TIPC_PUBLISHED, publ->ref,
+                                       publ->node, publ->scope,
+                                       created_subseq);
        }
        return publ;
 }
@@ -397,10 +397,9 @@ found:
 
        /* Notify any waiting subscriptions */
        list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-               tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-                                           TIPC_WITHDRAWN, publ->ref,
-                                           publ->node, publ->scope,
-                                           removed_subseq);
+               tipc_sub_report_overlap(s, publ->lower, publ->upper,
+                                       TIPC_WITHDRAWN, publ->ref, publ->node,
+                                       publ->scope, removed_subseq);
        }
 
        return publ;
@@ -412,33 +411,37 @@ found:
  * sequence overlapping with the requested sequence
  */
 static void tipc_nameseq_subscribe(struct name_seq *nseq,
-                                  struct tipc_subscription *s,
-                                  bool status)
+                                  struct tipc_subscription *sub)
 {
        struct sub_seq *sseq = nseq->sseqs;
        struct tipc_name_seq ns;
+       struct tipc_subscr *s = &sub->evt.s;
+       bool no_status;
 
-       tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+       ns.type = tipc_sub_read(s, seq.type);
+       ns.lower = tipc_sub_read(s, seq.lower);
+       ns.upper = tipc_sub_read(s, seq.upper);
+       no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
 
-       tipc_subscrp_get(s);
-       list_add(&s->nameseq_list, &nseq->subscriptions);
+       tipc_sub_get(sub);
+       list_add(&sub->nameseq_list, &nseq->subscriptions);
 
-       if (!status || !sseq)
+       if (no_status || !sseq)
                return;
 
        while (sseq != &nseq->sseqs[nseq->first_free]) {
-               if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
+               if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) {
                        struct publication *crs;
                        struct name_info *info = sseq->info;
                        int must_report = 1;
 
                        list_for_each_entry(crs, &info->zone_list, zone_list) {
-                               tipc_subscrp_report_overlap(s, sseq->lower,
-                                                           sseq->upper,
-                                                           TIPC_PUBLISHED,
-                                                           crs->ref, crs->node,
-                                                           crs->scope,
-                                                           must_report);
+                               tipc_sub_report_overlap(sub, sseq->lower,
+                                                       sseq->upper,
+                                                       TIPC_PUBLISHED,
+                                                       crs->ref, crs->node,
+                                                       crs->scope,
+                                                       must_report);
                                must_report = 0;
                        }
                }
@@ -808,24 +811,27 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 /**
  * tipc_nametbl_subscribe - add a subscription object to the name table
  */
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
+void tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
-       struct tipc_net *tn = net_generic(s->net, tipc_net_id);
-       u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+       struct tipc_net *tn = tipc_net(sub->net);
+       struct tipc_subscr *s = &sub->evt.s;
+       u32 type = tipc_sub_read(s, seq.type);
        int index = hash(type);
        struct name_seq *seq;
        struct tipc_name_seq ns;
 
        spin_lock_bh(&tn->nametbl_lock);
-       seq = nametbl_find_seq(s->net, type);
+       seq = nametbl_find_seq(sub->net, type);
        if (!seq)
                seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
        if (seq) {
                spin_lock_bh(&seq->lock);
-               tipc_nameseq_subscribe(seq, s, status);
+               tipc_nameseq_subscribe(seq, sub);
                spin_unlock_bh(&seq->lock);
        } else {
-               tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+               ns.type = tipc_sub_read(s, seq.type);
+               ns.lower = tipc_sub_read(s, seq.lower);
+               ns.upper = tipc_sub_read(s, seq.upper);
                pr_warn("Failed to create subscription for {%u,%u,%u}\n",
                        ns.type, ns.lower, ns.upper);
        }
@@ -835,18 +841,19 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
 /**
  * tipc_nametbl_unsubscribe - remove a subscription object from name table
  */
-void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
-       struct tipc_net *tn = net_generic(s->net, tipc_net_id);
+       struct tipc_subscr *s = &sub->evt.s;
+       struct tipc_net *tn = tipc_net(sub->net);
        struct name_seq *seq;
-       u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+       u32 type = tipc_sub_read(s, seq.type);
 
        spin_lock_bh(&tn->nametbl_lock);
-       seq = nametbl_find_seq(s->net, type);
+       seq = nametbl_find_seq(sub->net, type);
        if (seq != NULL) {
                spin_lock_bh(&seq->lock);
-               list_del_init(&s->nameseq_list);
-               tipc_subscrp_put(s);
+               list_del_init(&sub->nameseq_list);
+               tipc_sub_put(sub);
                if (!seq->first_free && list_empty(&seq->subscriptions)) {
                        hlist_del_init_rcu(&seq->ns_list);
                        kfree(seq->sseqs);
index f56e7cb..1765260 100644 (file)
@@ -120,7 +120,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
                                             u32 lower, u32 node, u32 ref,
                                             u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
+void tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
 void tipc_nametbl_stop(struct net *net);
index 719c592..1a2fde0 100644 (file)
@@ -200,7 +200,7 @@ out:
        return skb->len;
 }
 
-int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = sock_net(skb->sk);
        struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -241,10 +241,19 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
                if (!tipc_addr_node_valid(addr))
                        return -EINVAL;
 
-               rtnl_lock();
                tipc_net_start(net, addr);
-               rtnl_unlock();
        }
 
        return 0;
 }
+
+int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+{
+       int err;
+
+       rtnl_lock();
+       err = __tipc_nl_net_set(skb, info);
+       rtnl_unlock();
+
+       return err;
+}
index c7c2549..c0306aa 100644 (file)
@@ -47,5 +47,6 @@ void tipc_net_stop(struct net *net);
 
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
 
 #endif
index e48f0b2..4492cda 100644 (file)
@@ -285,10 +285,6 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
        if (!trans_buf)
                return -ENOMEM;
 
-       err = (*cmd->transcode)(cmd, trans_buf, msg);
-       if (err)
-               goto trans_out;
-
        attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
                        sizeof(struct nlattr *), GFP_KERNEL);
        if (!attrbuf) {
@@ -296,27 +292,34 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
                goto trans_out;
        }
 
-       err = nla_parse(attrbuf, tipc_genl_family.maxattr,
-                       (const struct nlattr *)trans_buf->data,
-                       trans_buf->len, NULL, NULL);
-       if (err)
-               goto parse_out;
-
        doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!doit_buf) {
                err = -ENOMEM;
-               goto parse_out;
+               goto attrbuf_out;
        }
 
-       doit_buf->sk = msg->dst_sk;
-
        memset(&info, 0, sizeof(info));
        info.attrs = attrbuf;
 
+       rtnl_lock();
+       err = (*cmd->transcode)(cmd, trans_buf, msg);
+       if (err)
+               goto doit_out;
+
+       err = nla_parse(attrbuf, tipc_genl_family.maxattr,
+                       (const struct nlattr *)trans_buf->data,
+                       trans_buf->len, NULL, NULL);
+       if (err)
+               goto doit_out;
+
+       doit_buf->sk = msg->dst_sk;
+
        err = (*cmd->doit)(doit_buf, &info);
+doit_out:
+       rtnl_unlock();
 
        kfree_skb(doit_buf);
-parse_out:
+attrbuf_out:
        kfree(attrbuf);
 trans_out:
        kfree_skb(trans_buf);
@@ -722,13 +725,13 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
 
        media = tipc_media_find(lc->name);
        if (media) {
-               cmd->doit = &tipc_nl_media_set;
+               cmd->doit = &__tipc_nl_media_set;
                return tipc_nl_compat_media_set(skb, msg);
        }
 
        bearer = tipc_bearer_find(msg->net, lc->name);
        if (bearer) {
-               cmd->doit = &tipc_nl_bearer_set;
+               cmd->doit = &__tipc_nl_bearer_set;
                return tipc_nl_compat_bearer_set(skb, msg);
        }
 
@@ -1089,12 +1092,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
                return tipc_nl_compat_dumpit(&dump, msg);
        case TIPC_CMD_ENABLE_BEARER:
                msg->req_type = TIPC_TLV_BEARER_CONFIG;
-               doit.doit = tipc_nl_bearer_enable;
+               doit.doit = __tipc_nl_bearer_enable;
                doit.transcode = tipc_nl_compat_bearer_enable;
                return tipc_nl_compat_doit(&doit, msg);
        case TIPC_CMD_DISABLE_BEARER:
                msg->req_type = TIPC_TLV_BEARER_NAME;
-               doit.doit = tipc_nl_bearer_disable;
+               doit.doit = __tipc_nl_bearer_disable;
                doit.transcode = tipc_nl_compat_bearer_disable;
                return tipc_nl_compat_doit(&doit, msg);
        case TIPC_CMD_SHOW_LINK_STATS:
@@ -1148,12 +1151,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
                return tipc_nl_compat_dumpit(&dump, msg);
        case TIPC_CMD_SET_NODE_ADDR:
                msg->req_type = TIPC_TLV_NET_ADDR;
-               doit.doit = tipc_nl_net_set;
+               doit.doit = __tipc_nl_net_set;
                doit.transcode = tipc_nl_compat_net_set;
                return tipc_nl_compat_doit(&doit, msg);
        case TIPC_CMD_SET_NETID:
                msg->req_type = TIPC_TLV_UNSIGNED;
-               doit.doit = tipc_nl_net_set;
+               doit.doit = __tipc_nl_net_set;
                doit.transcode = tipc_nl_compat_net_set;
                return tipc_nl_compat_doit(&doit, msg);
        case TIPC_CMD_GET_NETID:
index 9036d87..389193d 100644 (file)
@@ -1618,6 +1618,30 @@ discard:
        kfree_skb(skb);
 }
 
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+{
+       struct tipc_net *tn = tipc_net(net);
+       int bearer_id = b->identity;
+       struct sk_buff_head xmitq;
+       struct tipc_link_entry *e;
+       struct tipc_node *n;
+
+       __skb_queue_head_init(&xmitq);
+
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(n, &tn->node_list, list) {
+               tipc_node_write_lock(n);
+               e = &n->links[bearer_id];
+               if (e->link)
+                       tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+               tipc_node_write_unlock(n);
+               tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+       }
+
+       rcu_read_unlock();
+}
+
 int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = sock_net(skb->sk);
index acd58d2..4ce5e3a 100644 (file)
@@ -65,6 +65,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
                          struct tipc_media_addr *maddr,
                          bool *respond, bool *dupl_addr);
 void tipc_node_delete_links(struct net *net, int bearer_id);
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
                           char *linkname, size_t len);
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
diff --git a/net/tipc/server.c b/net/tipc/server.c
deleted file mode 100644 (file)
index df0c563..0000000
+++ /dev/null
@@ -1,710 +0,0 @@
-/*
- * net/tipc/server.c: TIPC server infrastructure
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "server.h"
-#include "core.h"
-#include "socket.h"
-#include "addr.h"
-#include "msg.h"
-#include <net/sock.h>
-#include <linux/module.h>
-
-/* Number of messages to send before rescheduling */
-#define MAX_SEND_MSG_COUNT     25
-#define MAX_RECV_MSG_COUNT     25
-#define CF_CONNECTED           1
-#define CF_SERVER              2
-
-#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
-
-/**
- * struct tipc_conn - TIPC connection structure
- * @kref: reference counter to connection object
- * @conid: connection identifier
- * @sock: socket handler associated with connection
- * @flags: indicates connection state
- * @server: pointer to connected server
- * @rwork: receive work item
- * @usr_data: user-specified field
- * @rx_action: what to do when connection socket is active
- * @outqueue: pointer to first outbound message in queue
- * @outqueue_lock: control access to the outqueue
- * @outqueue: list of connection objects for its server
- * @swork: send work item
- */
-struct tipc_conn {
-       struct kref kref;
-       int conid;
-       struct socket *sock;
-       unsigned long flags;
-       struct tipc_server *server;
-       struct work_struct rwork;
-       int (*rx_action) (struct tipc_conn *con);
-       void *usr_data;
-       struct list_head outqueue;
-       spinlock_t outqueue_lock;
-       struct work_struct swork;
-};
-
-/* An entry waiting to be sent */
-struct outqueue_entry {
-       struct list_head list;
-       struct kvec iov;
-       struct sockaddr_tipc dest;
-};
-
-static void tipc_recv_work(struct work_struct *work);
-static void tipc_send_work(struct work_struct *work);
-static void tipc_clean_outqueues(struct tipc_conn *con);
-
-static void tipc_conn_kref_release(struct kref *kref)
-{
-       struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
-       struct tipc_server *s = con->server;
-       struct sockaddr_tipc *saddr = s->saddr;
-       struct socket *sock = con->sock;
-       struct sock *sk;
-
-       if (sock) {
-               sk = sock->sk;
-               if (test_bit(CF_SERVER, &con->flags)) {
-                       __module_get(sock->ops->owner);
-                       __module_get(sk->sk_prot_creator->owner);
-               }
-               saddr->scope = -TIPC_NODE_SCOPE;
-               kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
-               sock_release(sock);
-               con->sock = NULL;
-       }
-       spin_lock_bh(&s->idr_lock);
-       idr_remove(&s->conn_idr, con->conid);
-       s->idr_in_use--;
-       spin_unlock_bh(&s->idr_lock);
-       tipc_clean_outqueues(con);
-       kfree(con);
-}
-
-static void conn_put(struct tipc_conn *con)
-{
-       kref_put(&con->kref, tipc_conn_kref_release);
-}
-
-static void conn_get(struct tipc_conn *con)
-{
-       kref_get(&con->kref);
-}
-
-static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
-{
-       struct tipc_conn *con;
-
-       spin_lock_bh(&s->idr_lock);
-       con = idr_find(&s->conn_idr, conid);
-       if (con) {
-               if (!test_bit(CF_CONNECTED, &con->flags) ||
-                   !kref_get_unless_zero(&con->kref))
-                       con = NULL;
-       }
-       spin_unlock_bh(&s->idr_lock);
-       return con;
-}
-
-static void sock_data_ready(struct sock *sk)
-{
-       struct tipc_conn *con;
-
-       read_lock_bh(&sk->sk_callback_lock);
-       con = sock2con(sk);
-       if (con && test_bit(CF_CONNECTED, &con->flags)) {
-               conn_get(con);
-               if (!queue_work(con->server->rcv_wq, &con->rwork))
-                       conn_put(con);
-       }
-       read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void sock_write_space(struct sock *sk)
-{
-       struct tipc_conn *con;
-
-       read_lock_bh(&sk->sk_callback_lock);
-       con = sock2con(sk);
-       if (con && test_bit(CF_CONNECTED, &con->flags)) {
-               conn_get(con);
-               if (!queue_work(con->server->send_wq, &con->swork))
-                       conn_put(con);
-       }
-       read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
-{
-       struct sock *sk = sock->sk;
-
-       write_lock_bh(&sk->sk_callback_lock);
-
-       sk->sk_data_ready = sock_data_ready;
-       sk->sk_write_space = sock_write_space;
-       sk->sk_user_data = con;
-
-       con->sock = sock;
-
-       write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
-{
-       struct tipc_server *s = con->server;
-       struct sock *sk = con->sock->sk;
-       bool disconnect = false;
-
-       write_lock_bh(&sk->sk_callback_lock);
-       disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
-       if (disconnect) {
-               sk->sk_user_data = NULL;
-               if (con->conid)
-                       s->tipc_conn_release(con->conid, con->usr_data);
-       }
-       write_unlock_bh(&sk->sk_callback_lock);
-
-       /* Handle concurrent calls from sending and receiving threads */
-       if (!disconnect)
-               return;
-
-       /* Don't flush pending works, -just let them expire */
-       kernel_sock_shutdown(con->sock, SHUT_RDWR);
-       conn_put(con);
-}
-
-static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
-{
-       struct tipc_conn *con;
-       int ret;
-
-       con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
-       if (!con)
-               return ERR_PTR(-ENOMEM);
-
-       kref_init(&con->kref);
-       INIT_LIST_HEAD(&con->outqueue);
-       spin_lock_init(&con->outqueue_lock);
-       INIT_WORK(&con->swork, tipc_send_work);
-       INIT_WORK(&con->rwork, tipc_recv_work);
-
-       spin_lock_bh(&s->idr_lock);
-       ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
-       if (ret < 0) {
-               kfree(con);
-               spin_unlock_bh(&s->idr_lock);
-               return ERR_PTR(-ENOMEM);
-       }
-       con->conid = ret;
-       s->idr_in_use++;
-       spin_unlock_bh(&s->idr_lock);
-
-       set_bit(CF_CONNECTED, &con->flags);
-       con->server = s;
-
-       return con;
-}
-
-static int tipc_receive_from_sock(struct tipc_conn *con)
-{
-       struct tipc_server *s = con->server;
-       struct sock *sk = con->sock->sk;
-       struct sockaddr_tipc addr;
-       struct msghdr msg = {};
-       struct kvec iov;
-       void *buf;
-       int ret;
-
-       buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
-       if (!buf) {
-               ret = -ENOMEM;
-               goto out_close;
-       }
-
-       iov.iov_base = buf;
-       iov.iov_len = s->max_rcvbuf_size;
-       msg.msg_name = &addr;
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
-       ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
-       if (ret <= 0) {
-               kmem_cache_free(s->rcvbuf_cache, buf);
-               goto out_close;
-       }
-
-       read_lock_bh(&sk->sk_callback_lock);
-       if (test_bit(CF_CONNECTED, &con->flags))
-               ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
-                                          &addr, con->usr_data, buf, ret);
-       read_unlock_bh(&sk->sk_callback_lock);
-       kmem_cache_free(s->rcvbuf_cache, buf);
-       if (ret < 0)
-               tipc_conn_terminate(s, con->conid);
-       return ret;
-
-out_close:
-       if (ret != -EWOULDBLOCK)
-               tipc_close_conn(con);
-       else if (ret == 0)
-               /* Don't return success if we really got EOF */
-               ret = -EAGAIN;
-
-       return ret;
-}
-
-static int tipc_accept_from_sock(struct tipc_conn *con)
-{
-       struct tipc_server *s = con->server;
-       struct socket *sock = con->sock;
-       struct socket *newsock;
-       struct tipc_conn *newcon;
-       int ret;
-
-       ret = kernel_accept(sock, &newsock, O_NONBLOCK);
-       if (ret < 0)
-               return ret;
-
-       newcon = tipc_alloc_conn(con->server);
-       if (IS_ERR(newcon)) {
-               ret = PTR_ERR(newcon);
-               sock_release(newsock);
-               return ret;
-       }
-
-       newcon->rx_action = tipc_receive_from_sock;
-       tipc_register_callbacks(newsock, newcon);
-
-       /* Notify that new connection is incoming */
-       newcon->usr_data = s->tipc_conn_new(newcon->conid);
-       if (!newcon->usr_data) {
-               sock_release(newsock);
-               conn_put(newcon);
-               return -ENOMEM;
-       }
-
-       /* Wake up receive process in case of 'SYN+' message */
-       newsock->sk->sk_data_ready(newsock->sk);
-       return ret;
-}
-
-static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
-{
-       struct tipc_server *s = con->server;
-       struct socket *sock = NULL;
-       int ret;
-
-       ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
-       if (ret < 0)
-               return NULL;
-       ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
-                               (char *)&s->imp, sizeof(s->imp));
-       if (ret < 0)
-               goto create_err;
-       ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
-       if (ret < 0)
-               goto create_err;
-
-       switch (s->type) {
-       case SOCK_STREAM:
-       case SOCK_SEQPACKET:
-               con->rx_action = tipc_accept_from_sock;
-
-               ret = kernel_listen(sock, 0);
-               if (ret < 0)
-                       goto create_err;
-               break;
-       case SOCK_DGRAM:
-       case SOCK_RDM:
-               con->rx_action = tipc_receive_from_sock;
-               break;
-       default:
-               pr_err("Unknown socket type %d\n", s->type);
-               goto create_err;
-       }
-
-       /* As server's listening socket owner and creator is the same module,
-        * we have to decrease TIPC module reference count to guarantee that
-        * it remains zero after the server socket is created, otherwise,
-        * executing "rmmod" command is unable to make TIPC module deleted
-        * after TIPC module is inserted successfully.
-        *
-        * However, the reference count is ever increased twice in
-        * sock_create_kern(): one is to increase the reference count of owner
-        * of TIPC socket's proto_ops struct; another is to increment the
-        * reference count of owner of TIPC proto struct. Therefore, we must
-        * decrement the module reference count twice to ensure that it keeps
-        * zero after server's listening socket is created. Of course, we
-        * must bump the module reference count twice as well before the socket
-        * is closed.
-        */
-       module_put(sock->ops->owner);
-       module_put(sock->sk->sk_prot_creator->owner);
-       set_bit(CF_SERVER, &con->flags);
-
-       return sock;
-
-create_err:
-       kernel_sock_shutdown(sock, SHUT_RDWR);
-       sock_release(sock);
-       return NULL;
-}
-
-static int tipc_open_listening_sock(struct tipc_server *s)
-{
-       struct socket *sock;
-       struct tipc_conn *con;
-
-       con = tipc_alloc_conn(s);
-       if (IS_ERR(con))
-               return PTR_ERR(con);
-
-       sock = tipc_create_listen_sock(con);
-       if (!sock) {
-               idr_remove(&s->conn_idr, con->conid);
-               s->idr_in_use--;
-               kfree(con);
-               return -EINVAL;
-       }
-
-       tipc_register_callbacks(sock, con);
-       return 0;
-}
-
-static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
-{
-       struct outqueue_entry *entry;
-       void *buf;
-
-       entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
-       if (!entry)
-               return NULL;
-
-       buf = kmemdup(data, len, GFP_ATOMIC);
-       if (!buf) {
-               kfree(entry);
-               return NULL;
-       }
-
-       entry->iov.iov_base = buf;
-       entry->iov.iov_len = len;
-
-       return entry;
-}
-
-static void tipc_free_entry(struct outqueue_entry *e)
-{
-       kfree(e->iov.iov_base);
-       kfree(e);
-}
-
-static void tipc_clean_outqueues(struct tipc_conn *con)
-{
-       struct outqueue_entry *e, *safe;
-
-       spin_lock_bh(&con->outqueue_lock);
-       list_for_each_entry_safe(e, safe, &con->outqueue, list) {
-               list_del(&e->list);
-               tipc_free_entry(e);
-       }
-       spin_unlock_bh(&con->outqueue_lock);
-}
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-                     struct sockaddr_tipc *addr, void *data, size_t len)
-{
-       struct outqueue_entry *e;
-       struct tipc_conn *con;
-
-       con = tipc_conn_lookup(s, conid);
-       if (!con)
-               return -EINVAL;
-
-       if (!test_bit(CF_CONNECTED, &con->flags)) {
-               conn_put(con);
-               return 0;
-       }
-
-       e = tipc_alloc_entry(data, len);
-       if (!e) {
-               conn_put(con);
-               return -ENOMEM;
-       }
-
-       if (addr)
-               memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
-
-       spin_lock_bh(&con->outqueue_lock);
-       list_add_tail(&e->list, &con->outqueue);
-       spin_unlock_bh(&con->outqueue_lock);
-
-       if (!queue_work(s->send_wq, &con->swork))
-               conn_put(con);
-       return 0;
-}
-
-void tipc_conn_terminate(struct tipc_server *s, int conid)
-{
-       struct tipc_conn *con;
-
-       con = tipc_conn_lookup(s, conid);
-       if (con) {
-               tipc_close_conn(con);
-               conn_put(con);
-       }
-}
-
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
-                            u32 upper, u32 filter, int *conid)
-{
-       struct tipc_subscriber *scbr;
-       struct tipc_subscr sub;
-       struct tipc_server *s;
-       struct tipc_conn *con;
-
-       sub.seq.type = type;
-       sub.seq.lower = lower;
-       sub.seq.upper = upper;
-       sub.timeout = TIPC_WAIT_FOREVER;
-       sub.filter = filter;
-       *(u32 *)&sub.usr_handle = port;
-
-       con = tipc_alloc_conn(tipc_topsrv(net));
-       if (IS_ERR(con))
-               return false;
-
-       *conid = con->conid;
-       s = con->server;
-       scbr = s->tipc_conn_new(*conid);
-       if (!scbr) {
-               conn_put(con);
-               return false;
-       }
-
-       con->usr_data = scbr;
-       con->sock = NULL;
-       s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
-       return true;
-}
-
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
-{
-       struct tipc_conn *con;
-       struct tipc_server *srv;
-
-       con = tipc_conn_lookup(tipc_topsrv(net), conid);
-       if (!con)
-               return;
-
-       test_and_clear_bit(CF_CONNECTED, &con->flags);
-       srv = con->server;
-       if (con->conid)
-               srv->tipc_conn_release(con->conid, con->usr_data);
-       conn_put(con);
-       conn_put(con);
-}
-
-static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
-{
-       u32 port = *(u32 *)&evt->s.usr_handle;
-       u32 self = tipc_own_addr(net);
-       struct sk_buff_head evtq;
-       struct sk_buff *skb;
-
-       skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
-                             self, self, port, port, 0);
-       if (!skb)
-               return;
-       msg_set_dest_droppable(buf_msg(skb), true);
-       memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
-       skb_queue_head_init(&evtq);
-       __skb_queue_tail(&evtq, skb);
-       tipc_sk_rcv(net, &evtq);
-}
-
-static void tipc_send_to_sock(struct tipc_conn *con)
-{
-       struct tipc_server *s = con->server;
-       struct outqueue_entry *e;
-       struct tipc_event *evt;
-       struct msghdr msg;
-       int count = 0;
-       int ret;
-
-       spin_lock_bh(&con->outqueue_lock);
-       while (test_bit(CF_CONNECTED, &con->flags)) {
-               e = list_entry(con->outqueue.next, struct outqueue_entry, list);
-               if ((struct list_head *) e == &con->outqueue)
-                       break;
-
-               spin_unlock_bh(&con->outqueue_lock);
-
-               if (con->sock) {
-                       memset(&msg, 0, sizeof(msg));
-                       msg.msg_flags = MSG_DONTWAIT;
-                       if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
-                               msg.msg_name = &e->dest;
-                               msg.msg_namelen = sizeof(struct sockaddr_tipc);
-                       }
-                       ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
-                                            e->iov.iov_len);
-                       if (ret == -EWOULDBLOCK || ret == 0) {
-                               cond_resched();
-                               goto out;
-                       } else if (ret < 0) {
-                               goto send_err;
-                       }
-               } else {
-                       evt = e->iov.iov_base;
-                       tipc_send_kern_top_evt(s->net, evt);
-               }
-               /* Don't starve users filling buffers */
-               if (++count >= MAX_SEND_MSG_COUNT) {
-                       cond_resched();
-                       count = 0;
-               }
-
-               spin_lock_bh(&con->outqueue_lock);
-               list_del(&e->list);
-               tipc_free_entry(e);
-       }
-       spin_unlock_bh(&con->outqueue_lock);
-out:
-       return;
-
-send_err:
-       tipc_close_conn(con);
-}
-
-static void tipc_recv_work(struct work_struct *work)
-{
-       struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
-       int count = 0;
-
-       while (test_bit(CF_CONNECTED, &con->flags)) {
-               if (con->rx_action(con))
-                       break;
-
-               /* Don't flood Rx machine */
-               if (++count >= MAX_RECV_MSG_COUNT) {
-                       cond_resched();
-                       count = 0;
-               }
-       }
-       conn_put(con);
-}
-
-static void tipc_send_work(struct work_struct *work)
-{
-       struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
-
-       if (test_bit(CF_CONNECTED, &con->flags))
-               tipc_send_to_sock(con);
-
-       conn_put(con);
-}
-
-static void tipc_work_stop(struct tipc_server *s)
-{
-       destroy_workqueue(s->rcv_wq);
-       destroy_workqueue(s->send_wq);
-}
-
-static int tipc_work_start(struct tipc_server *s)
-{
-       s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
-       if (!s->rcv_wq) {
-               pr_err("can't start tipc receive workqueue\n");
-               return -ENOMEM;
-       }
-
-       s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
-       if (!s->send_wq) {
-               pr_err("can't start tipc send workqueue\n");
-               destroy_workqueue(s->rcv_wq);
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-int tipc_server_start(struct tipc_server *s)
-{
-       int ret;
-
-       spin_lock_init(&s->idr_lock);
-       idr_init(&s->conn_idr);
-       s->idr_in_use = 0;
-
-       s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
-                                           0, SLAB_HWCACHE_ALIGN, NULL);
-       if (!s->rcvbuf_cache)
-               return -ENOMEM;
-
-       ret = tipc_work_start(s);
-       if (ret < 0) {
-               kmem_cache_destroy(s->rcvbuf_cache);
-               return ret;
-       }
-       ret = tipc_open_listening_sock(s);
-       if (ret < 0) {
-               tipc_work_stop(s);
-               kmem_cache_destroy(s->rcvbuf_cache);
-               return ret;
-       }
-       return ret;
-}
-
-void tipc_server_stop(struct tipc_server *s)
-{
-       struct tipc_conn *con;
-       int id;
-
-       spin_lock_bh(&s->idr_lock);
-       for (id = 0; s->idr_in_use; id++) {
-               con = idr_find(&s->conn_idr, id);
-               if (con) {
-                       spin_unlock_bh(&s->idr_lock);
-                       tipc_close_conn(con);
-                       spin_lock_bh(&s->idr_lock);
-               }
-       }
-       spin_unlock_bh(&s->idr_lock);
-
-       tipc_work_stop(s);
-       kmem_cache_destroy(s->rcvbuf_cache);
-       idr_destroy(&s->conn_idr);
-}
diff --git a/net/tipc/server.h b/net/tipc/server.h
deleted file mode 100644 (file)
index 64df751..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * net/tipc/server.h: Include file for TIPC server code
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_SERVER_H
-#define _TIPC_SERVER_H
-
-#include <linux/idr.h>
-#include <linux/tipc.h>
-#include <net/net_namespace.h>
-
-#define TIPC_SERVER_NAME_LEN   32
-#define TIPC_SUB_CLUSTER_SCOPE  0x20
-#define TIPC_SUB_NODE_SCOPE     0x40
-#define TIPC_SUB_NO_STATUS      0x80
-
-/**
- * struct tipc_server - TIPC server structure
- * @conn_idr: identifier set of connection
- * @idr_lock: protect the connection identifier set
- * @idr_in_use: amount of allocated identifier entry
- * @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
- * @rcv_wq: receive workqueue
- * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
- * @name: server name
- * @imp: message importance
- * @type: socket type
- */
-struct tipc_server {
-       struct idr conn_idr;
-       spinlock_t idr_lock;
-       int idr_in_use;
-       struct net *net;
-       struct kmem_cache *rcvbuf_cache;
-       struct workqueue_struct *rcv_wq;
-       struct workqueue_struct *send_wq;
-       int max_rcvbuf_size;
-       void *(*tipc_conn_new)(int conid);
-       void (*tipc_conn_release)(int conid, void *usr_data);
-       int (*tipc_conn_recvmsg)(struct net *net, int conid,
-                                struct sockaddr_tipc *addr, void *usr_data,
-                                void *buf, size_t len);
-       struct sockaddr_tipc *saddr;
-       char name[TIPC_SERVER_NAME_LEN];
-       int imp;
-       int type;
-};
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-                     struct sockaddr_tipc *addr, void *data, size_t len);
-
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
-                            u32 upper, u32 filter, int *conid);
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
-
-/**
- * tipc_conn_terminate - terminate connection with server
- *
- * Note: Must call it in process context since it might sleep
- */
-void tipc_conn_terminate(struct tipc_server *s, int conid);
-int tipc_server_start(struct tipc_server *s);
-
-void tipc_server_stop(struct tipc_server *s);
-
-#endif
index b0323ec..f934771 100644 (file)
@@ -665,7 +665,7 @@ exit:
  *       a completely predictable manner).
  */
 static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
-                       int *uaddr_len, int peer)
+                       int peer)
 {
        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
        struct sock *sk = sock->sk;
@@ -684,13 +684,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
                addr->addr.id.node = tn->own_addr;
        }
 
-       *uaddr_len = sizeof(*addr);
        addr->addrtype = TIPC_ADDR_ID;
        addr->family = AF_TIPC;
        addr->scope = 0;
        addr->addr.name.domain = 0;
 
-       return 0;
+       return sizeof(*addr);
 }
 
 /**
index 68e2647..6925a98 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.c: TIPC network topology service
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
  * All rights reserved.
  *
 #include "name_table.h"
 #include "subscr.h"
 
-/**
- * struct tipc_subscriber - TIPC network topology subscriber
- * @kref: reference counter to tipc_subscription object
- * @conid: connection identifier to server connecting to subscriber
- * @lock: control access to subscriber
- * @subscrp_list: list of subscription objects for this subscriber
- */
-struct tipc_subscriber {
-       struct kref kref;
-       int conid;
-       spinlock_t lock;
-       struct list_head subscrp_list;
-};
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
-{
-       return swap ? swab32(in) : in;
-}
-
-static void tipc_subscrp_send_event(struct tipc_subscription *sub,
-                                   u32 found_lower, u32 found_upper,
-                                   u32 event, u32 port_ref, u32 node)
+static void tipc_sub_send_event(struct tipc_subscription *sub,
+                               u32 found_lower, u32 found_upper,
+                               u32 event, u32 port, u32 node)
 {
-       struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-       struct tipc_subscriber *subscriber = sub->subscriber;
-       struct kvec msg_sect;
+       struct tipc_event *evt = &sub->evt;
 
-       msg_sect.iov_base = (void *)&sub->evt;
-       msg_sect.iov_len = sizeof(struct tipc_event);
-       sub->evt.event = htohl(event, sub->swap);
-       sub->evt.found_lower = htohl(found_lower, sub->swap);
-       sub->evt.found_upper = htohl(found_upper, sub->swap);
-       sub->evt.port.ref = htohl(port_ref, sub->swap);
-       sub->evt.port.node = htohl(node, sub->swap);
-       tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL,
-                         msg_sect.iov_base, msg_sect.iov_len);
+       if (sub->inactive)
+               return;
+       tipc_evt_write(evt, event, event);
+       tipc_evt_write(evt, found_lower, found_lower);
+       tipc_evt_write(evt, found_upper, found_upper);
+       tipc_evt_write(evt, port.ref, port);
+       tipc_evt_write(evt, port.node, node);
+       tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
 }
 
 /**
- * tipc_subscrp_check_overlap - test for subscription overlap with the
+ * tipc_sub_check_overlap - test for subscription overlap with the
  * given values
  *
  * Returns 1 if there is overlap, otherwise 0.
  */
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-                              u32 found_upper)
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+                          u32 found_upper)
 {
        if (found_lower < seq->lower)
                found_lower = seq->lower;
@@ -103,298 +72,98 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
        return 1;
 }
 
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
-{
-       return htohl(type, swap);
-}
-
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-                             struct tipc_name_seq *out)
-{
-       out->type = htohl(in->type, swap);
-       out->lower = htohl(in->lower, swap);
-       out->upper = htohl(in->upper, swap);
-}
-
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
-                                u32 found_upper, u32 event, u32 port_ref,
-                                u32 node, u32 scope, int must)
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+                            u32 found_lower, u32 found_upper,
+                            u32 event, u32 port, u32 node,
+                            u32 scope, int must)
 {
-       u32 filter = htohl(sub->evt.s.filter, sub->swap);
+       struct tipc_subscr *s = &sub->evt.s;
+       u32 filter = tipc_sub_read(s, filter);
        struct tipc_name_seq seq;
 
-       tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
-       if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
+       seq.type = tipc_sub_read(s, seq.type);
+       seq.lower = tipc_sub_read(s, seq.lower);
+       seq.upper = tipc_sub_read(s, seq.upper);
+
+       if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
                return;
+
        if (!must && !(filter & TIPC_SUB_PORTS))
                return;
        if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
                return;
        if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
                return;
-
-       tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
-                               node);
+       spin_lock(&sub->lock);
+       tipc_sub_send_event(sub, found_lower, found_upper,
+                           event, port, node);
+       spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_timeout(struct timer_list *t)
+static void tipc_sub_timeout(struct timer_list *t)
 {
        struct tipc_subscription *sub = from_timer(sub, t, timer);
-       struct tipc_subscriber *subscriber = sub->subscriber;
-
-       spin_lock_bh(&subscriber->lock);
-       tipc_nametbl_unsubscribe(sub);
-       list_del(&sub->subscrp_list);
-       spin_unlock_bh(&subscriber->lock);
-
-       /* Notify subscriber of timeout */
-       tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
-                               TIPC_SUBSCR_TIMEOUT, 0, 0);
-
-       tipc_subscrp_put(sub);
-}
-
-static void tipc_subscrb_kref_release(struct kref *kref)
-{
-       kfree(container_of(kref,struct tipc_subscriber, kref));
-}
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
-{
-       kref_put(&subscriber->kref, tipc_subscrb_kref_release);
-}
+       struct tipc_subscr *s = &sub->evt.s;
 
-static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
-{
-       kref_get(&subscriber->kref);
+       spin_lock(&sub->lock);
+       tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
+                           TIPC_SUBSCR_TIMEOUT, 0, 0);
+       sub->inactive = true;
+       spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_kref_release(struct kref *kref)
+static void tipc_sub_kref_release(struct kref *kref)
 {
-       struct tipc_subscription *sub = container_of(kref,
-                                                    struct tipc_subscription,
-                                                    kref);
-       struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-       struct tipc_subscriber *subscriber = sub->subscriber;
-
-       atomic_dec(&tn->subscription_count);
-       kfree(sub);
-       tipc_subscrb_put(subscriber);
+       kfree(container_of(kref, struct tipc_subscription, kref));
 }
 
-void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_sub_put(struct tipc_subscription *subscription)
 {
-       kref_put(&subscription->kref, tipc_subscrp_kref_release);
+       kref_put(&subscription->kref, tipc_sub_kref_release);
 }
 
-void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_sub_get(struct tipc_subscription *subscription)
 {
        kref_get(&subscription->kref);
 }
 
-/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
- * subscriptions for a given subscriber.
- */
-static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
-                                       struct tipc_subscr *s)
-{
-       struct list_head *subscription_list = &subscriber->subscrp_list;
-       struct tipc_subscription *sub, *temp;
-       u32 timeout;
-
-       spin_lock_bh(&subscriber->lock);
-       list_for_each_entry_safe(sub, temp, subscription_list,  subscrp_list) {
-               if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
-                       continue;
-
-               timeout = htohl(sub->evt.s.timeout, sub->swap);
-               if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
-                       tipc_nametbl_unsubscribe(sub);
-                       list_del(&sub->subscrp_list);
-                       tipc_subscrp_put(sub);
-               }
-
-               if (s)
-                       break;
-       }
-       spin_unlock_bh(&subscriber->lock);
-}
-
-static struct tipc_subscriber *tipc_subscrb_create(int conid)
-{
-       struct tipc_subscriber *subscriber;
-
-       subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
-       if (!subscriber) {
-               pr_warn("Subscriber rejected, no memory\n");
-               return NULL;
-       }
-       INIT_LIST_HEAD(&subscriber->subscrp_list);
-       kref_init(&subscriber->kref);
-       subscriber->conid = conid;
-       spin_lock_init(&subscriber->lock);
-
-       return subscriber;
-}
-
-static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
-{
-       tipc_subscrb_subscrp_delete(subscriber, NULL);
-       tipc_subscrb_put(subscriber);
-}
-
-static void tipc_subscrp_cancel(struct tipc_subscr *s,
-                               struct tipc_subscriber *subscriber)
-{
-       tipc_subscrb_get(subscriber);
-       tipc_subscrb_subscrp_delete(subscriber, s);
-       tipc_subscrb_put(subscriber);
-}
-
-static struct tipc_subscription *tipc_subscrp_create(struct net *net,
-                                                    struct tipc_subscr *s,
-                                                    int swap)
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+                                            struct tipc_subscr *s,
+                                            int conid)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       u32 filter = tipc_sub_read(s, filter);
        struct tipc_subscription *sub;
-       u32 filter = htohl(s->filter, swap);
+       u32 timeout;
 
-       /* Refuse subscription if global limit exceeded */
-       if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
-               pr_warn("Subscription rejected, limit reached (%u)\n",
-                       TIPC_MAX_SUBSCRIPTIONS);
+       if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
+           (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+               pr_warn("Subscription rejected, illegal request\n");
                return NULL;
        }
-
-       /* Allocate subscription object */
        sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
        if (!sub) {
                pr_warn("Subscription rejected, no memory\n");
                return NULL;
        }
-
-       /* Initialize subscription object */
        sub->net = net;
-       if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
-           (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
-               pr_warn("Subscription rejected, illegal request\n");
-               kfree(sub);
-               return NULL;
-       }
-
-       sub->swap = swap;
+       sub->conid = conid;
+       sub->inactive = false;
        memcpy(&sub->evt.s, s, sizeof(*s));
-       atomic_inc(&tn->subscription_count);
+       spin_lock_init(&sub->lock);
        kref_init(&sub->kref);
-       return sub;
-}
-
-static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
-                                 struct tipc_subscriber *subscriber, int swap,
-                                 bool status)
-{
-       struct tipc_subscription *sub = NULL;
-       u32 timeout;
-
-       sub = tipc_subscrp_create(net, s, swap);
-       if (!sub)
-               return -1;
-
-       spin_lock_bh(&subscriber->lock);
-       list_add(&sub->subscrp_list, &subscriber->subscrp_list);
-       sub->subscriber = subscriber;
-       tipc_nametbl_subscribe(sub, status);
-       tipc_subscrb_get(subscriber);
-       spin_unlock_bh(&subscriber->lock);
-
-       timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
-       timeout = htohl(sub->evt.s.timeout, swap);
-
+       tipc_nametbl_subscribe(sub);
+       timer_setup(&sub->timer, tipc_sub_timeout, 0);
+       timeout = tipc_sub_read(&sub->evt.s, timeout);
        if (timeout != TIPC_WAIT_FOREVER)
                mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
-       return 0;
-}
-
-/* Handle one termination request for the subscriber */
-static void tipc_subscrb_release_cb(int conid, void *usr_data)
-{
-       tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
-}
-
-/* Handle one request to create a new subscription for the subscriber */
-static int tipc_subscrb_rcv_cb(struct net *net, int conid,
-                              struct sockaddr_tipc *addr, void *usr_data,
-                              void *buf, size_t len)
-{
-       struct tipc_subscriber *subscriber = usr_data;
-       struct tipc_subscr *s = (struct tipc_subscr *)buf;
-       bool status;
-       int swap;
-
-       /* Determine subscriber's endianness */
-       swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
-                             TIPC_SUB_CANCEL));
-
-       /* Detect & process a subscription cancellation request */
-       if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
-               s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
-               tipc_subscrp_cancel(s, subscriber);
-               return 0;
-       }
-       status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
-       return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
-}
-
-/* Handle one request to establish a new subscriber */
-static void *tipc_subscrb_connect_cb(int conid)
-{
-       return (void *)tipc_subscrb_create(conid);
-}
-
-int tipc_topsrv_start(struct net *net)
-{
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       const char name[] = "topology_server";
-       struct tipc_server *topsrv;
-       struct sockaddr_tipc *saddr;
-
-       saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
-       if (!saddr)
-               return -ENOMEM;
-       saddr->family                   = AF_TIPC;
-       saddr->addrtype                 = TIPC_ADDR_NAMESEQ;
-       saddr->addr.nameseq.type        = TIPC_TOP_SRV;
-       saddr->addr.nameseq.lower       = TIPC_TOP_SRV;
-       saddr->addr.nameseq.upper       = TIPC_TOP_SRV;
-       saddr->scope                    = TIPC_NODE_SCOPE;
-
-       topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
-       if (!topsrv) {
-               kfree(saddr);
-               return -ENOMEM;
-       }
-       topsrv->net                     = net;
-       topsrv->saddr                   = saddr;
-       topsrv->imp                     = TIPC_CRITICAL_IMPORTANCE;
-       topsrv->type                    = SOCK_SEQPACKET;
-       topsrv->max_rcvbuf_size         = sizeof(struct tipc_subscr);
-       topsrv->tipc_conn_recvmsg       = tipc_subscrb_rcv_cb;
-       topsrv->tipc_conn_new           = tipc_subscrb_connect_cb;
-       topsrv->tipc_conn_release       = tipc_subscrb_release_cb;
-
-       strncpy(topsrv->name, name, strlen(name) + 1);
-       tn->topsrv = topsrv;
-       atomic_set(&tn->subscription_count, 0);
-
-       return tipc_server_start(topsrv);
+       return sub;
 }
 
-void tipc_topsrv_stop(struct net *net)
+void tipc_sub_unsubscribe(struct tipc_subscription *sub)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       struct tipc_server *topsrv = tn->topsrv;
-
-       tipc_server_stop(topsrv);
-       kfree(topsrv->saddr);
-       kfree(topsrv);
+       tipc_nametbl_unsubscribe(sub);
+       if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
+               del_timer_sync(&sub->timer);
+       list_del(&sub->sub_list);
+       tipc_sub_put(sub);
 }
index f3edca7..8b2d22b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.h: Include file for TIPC network topology service
  *
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
  * All rights reserved.
  *
 #ifndef _TIPC_SUBSCR_H
 #define _TIPC_SUBSCR_H
 
-#include "server.h"
+#include "topsrv.h"
 
-#define TIPC_MAX_SUBSCRIPTIONS 65535
-#define TIPC_MAX_PUBLICATIONS  65535
+#define TIPC_MAX_SUBSCR         65535
+#define TIPC_MAX_PUBLICATIONS   65535
 
 struct tipc_subscription;
-struct tipc_subscriber;
+struct tipc_conn;
 
 /**
  * struct tipc_subscription - TIPC network topology subscription object
  * @subscriber: pointer to its subscriber
  * @seq: name sequence associated with subscription
- * @net: point to network namespace
  * @timer: timer governing subscription duration (optional)
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscrp_list: adjacent subscriptions in subscriber's subscription list
- * @swap: indicates if subscriber uses opposite endianness in its messages
+ * @sub_list: adjacent subscriptions in subscriber's subscription list
  * @evt: template for events generated by subscription
  */
 struct tipc_subscription {
        struct kref kref;
-       struct tipc_subscriber *subscriber;
        struct net *net;
        struct timer_list timer;
        struct list_head nameseq_list;
-       struct list_head subscrp_list;
-       int swap;
+       struct list_head sub_list;
        struct tipc_event evt;
+       int conid;
+       bool inactive;
+       spinlock_t lock; /* serialize up/down and timer events */
 };
 
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-                              u32 found_upper);
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
-                                u32 found_lower, u32 found_upper, u32 event,
-                                u32 port_ref, u32 node, u32 scope, int must);
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-                             struct tipc_name_seq *out);
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+                                            struct tipc_subscr *s,
+                                            int conid);
+void tipc_sub_unsubscribe(struct tipc_subscription *sub);
+
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+                          u32 found_upper);
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+                            u32 found_lower, u32 found_upper,
+                            u32 event, u32 port, u32 node,
+                            u32 scope, int must);
 int tipc_topsrv_start(struct net *net);
 void tipc_topsrv_stop(struct net *net);
 
-void tipc_subscrp_put(struct tipc_subscription *subscription);
-void tipc_subscrp_get(struct tipc_subscription *subscription);
+void tipc_sub_put(struct tipc_subscription *subscription);
+void tipc_sub_get(struct tipc_subscription *subscription);
+
+#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
+
+/* tipc_sub_read - return field_ of struct sub_ in host endian format
+ */
+#define tipc_sub_read(sub_, field_)                                    \
+       ({                                                              \
+               struct tipc_subscr *sub__ = sub_;                       \
+               u32 val__ = (sub__)->field_;                            \
+               int swap_ = !((sub__)->filter & TIPC_FILTER_MASK);      \
+               (swap_ ? swab32(val__) : val__);                        \
+       })
+
+/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
+ */
+#define tipc_evt_write(evt_, field_, val_)                             \
+       ({                                                              \
+               struct tipc_event *evt__ = evt_;                        \
+               u32 val__ = val_;                                       \
+               int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK));  \
+               (evt__)->field_ = swap_ ? swab32(val__) : val__;        \
+       })
 
 #endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
new file mode 100644 (file)
index 0000000..c8e34ef
--- /dev/null
@@ -0,0 +1,703 @@
+/*
+ * net/tipc/server.c: TIPC server infrastructure
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017-2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "subscr.h"
+#include "topsrv.h"
+#include "core.h"
+#include "socket.h"
+#include "addr.h"
+#include "msg.h"
+#include <net/sock.h>
+#include <linux/module.h>
+
+/* Number of messages to send before rescheduling */
+#define MAX_SEND_MSG_COUNT     25
+#define MAX_RECV_MSG_COUNT     25
+#define CF_CONNECTED           1
+#define CF_SERVER              2
+
+#define TIPC_SERVER_NAME_LEN   32
+
+/**
+ * struct tipc_topsrv - TIPC server structure
+ * @conn_idr: identifier set of connection
+ * @idr_lock: protect the connection identifier set
+ * @idr_in_use: amount of allocated identifier entry
+ * @net: network namspace instance
+ * @rcvbuf_cache: memory cache of server receive buffer
+ * @rcv_wq: receive workqueue
+ * @send_wq: send workqueue
+ * @max_rcvbuf_size: maximum permitted receive message length
+ * @tipc_conn_new: callback will be called when new connection is incoming
+ * @tipc_conn_release: callback will be called before releasing the connection
+ * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @name: server name
+ * @imp: message importance
+ * @type: socket type
+ */
+struct tipc_topsrv {
+       struct idr conn_idr;
+       spinlock_t idr_lock; /* for idr list */
+       int idr_in_use;
+       struct net *net;
+       struct work_struct awork;
+       struct workqueue_struct *rcv_wq;
+       struct workqueue_struct *send_wq;
+       int max_rcvbuf_size;
+       struct socket *listener;
+       char name[TIPC_SERVER_NAME_LEN];
+};
+
+/**
+ * struct tipc_conn - TIPC connection structure
+ * @kref: reference counter to connection object
+ * @conid: connection identifier
+ * @sock: socket handler associated with connection
+ * @flags: indicates connection state
+ * @server: pointer to connected server
+ * @sub_list: lsit to all pertaing subscriptions
+ * @sub_lock: lock protecting the subscription list
+ * @outqueue_lock: control access to the outqueue
+ * @rwork: receive work item
+ * @rx_action: what to do when connection socket is active
+ * @outqueue: pointer to first outbound message in queue
+ * @outqueue_lock: control access to the outqueue
+ * @swork: send work item
+ */
+struct tipc_conn {
+       struct kref kref;
+       int conid;
+       struct socket *sock;
+       unsigned long flags;
+       struct tipc_topsrv *server;
+       struct list_head sub_list;
+       spinlock_t sub_lock; /* for subscription list */
+       struct work_struct rwork;
+       struct list_head outqueue;
+       spinlock_t outqueue_lock; /* for outqueue */
+       struct work_struct swork;
+};
+
+/* An entry waiting to be sent */
+struct outqueue_entry {
+       bool inactive;
+       struct tipc_event evt;
+       struct list_head list;
+};
+
+static void tipc_conn_recv_work(struct work_struct *work);
+static void tipc_conn_send_work(struct work_struct *work);
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
+
+static bool connected(struct tipc_conn *con)
+{
+       return con && test_bit(CF_CONNECTED, &con->flags);
+}
+
+static void tipc_conn_kref_release(struct kref *kref)
+{
+       struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+       struct tipc_topsrv *s = con->server;
+       struct outqueue_entry *e, *safe;
+
+       spin_lock_bh(&s->idr_lock);
+       idr_remove(&s->conn_idr, con->conid);
+       s->idr_in_use--;
+       spin_unlock_bh(&s->idr_lock);
+       if (con->sock)
+               sock_release(con->sock);
+
+       spin_lock_bh(&con->outqueue_lock);
+       list_for_each_entry_safe(e, safe, &con->outqueue, list) {
+               list_del(&e->list);
+               kfree(e);
+       }
+       spin_unlock_bh(&con->outqueue_lock);
+       kfree(con);
+}
+
+static void conn_put(struct tipc_conn *con)
+{
+       kref_put(&con->kref, tipc_conn_kref_release);
+}
+
+static void conn_get(struct tipc_conn *con)
+{
+       kref_get(&con->kref);
+}
+
+static void tipc_conn_close(struct tipc_conn *con)
+{
+       struct sock *sk = con->sock->sk;
+       bool disconnect = false;
+
+       write_lock_bh(&sk->sk_callback_lock);
+       disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+
+       if (disconnect) {
+               sk->sk_user_data = NULL;
+               tipc_conn_delete_sub(con, NULL);
+       }
+       write_unlock_bh(&sk->sk_callback_lock);
+
+       /* Handle concurrent calls from sending and receiving threads */
+       if (!disconnect)
+               return;
+
+       /* Don't flush pending works, -just let them expire */
+       kernel_sock_shutdown(con->sock, SHUT_RDWR);
+
+       conn_put(con);
+}
+
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+{
+       struct tipc_conn *con;
+       int ret;
+
+       con = kzalloc(sizeof(*con), GFP_ATOMIC);
+       if (!con)
+               return ERR_PTR(-ENOMEM);
+
+       kref_init(&con->kref);
+       INIT_LIST_HEAD(&con->outqueue);
+       INIT_LIST_HEAD(&con->sub_list);
+       spin_lock_init(&con->outqueue_lock);
+       spin_lock_init(&con->sub_lock);
+       INIT_WORK(&con->swork, tipc_conn_send_work);
+       INIT_WORK(&con->rwork, tipc_conn_recv_work);
+
+       spin_lock_bh(&s->idr_lock);
+       ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
+       if (ret < 0) {
+               kfree(con);
+               spin_unlock_bh(&s->idr_lock);
+               return ERR_PTR(-ENOMEM);
+       }
+       con->conid = ret;
+       s->idr_in_use++;
+       spin_unlock_bh(&s->idr_lock);
+
+       set_bit(CF_CONNECTED, &con->flags);
+       con->server = s;
+
+       return con;
+}
+
+static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
+{
+       struct tipc_conn *con;
+
+       spin_lock_bh(&s->idr_lock);
+       con = idr_find(&s->conn_idr, conid);
+       if (!connected(con) || !kref_get_unless_zero(&con->kref))
+               con = NULL;
+       spin_unlock_bh(&s->idr_lock);
+       return con;
+}
+
+/* tipc_conn_delete_sub - delete a specific or all subscriptions
+ * for a given subscriber
+ */
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
+{
+       struct tipc_net *tn = tipc_net(con->server->net);
+       struct list_head *sub_list = &con->sub_list;
+       struct tipc_subscription *sub, *tmp;
+
+       spin_lock_bh(&con->sub_lock);
+       list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
+               if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
+                       tipc_sub_unsubscribe(sub);
+                       atomic_dec(&tn->subscription_count);
+               } else if (s) {
+                       break;
+               }
+       }
+       spin_unlock_bh(&con->sub_lock);
+}
+
+static void tipc_conn_send_to_sock(struct tipc_conn *con)
+{
+       struct list_head *queue = &con->outqueue;
+       struct tipc_topsrv *srv = con->server;
+       struct outqueue_entry *e;
+       struct tipc_event *evt;
+       struct msghdr msg;
+       struct kvec iov;
+       int count = 0;
+       int ret;
+
+       spin_lock_bh(&con->outqueue_lock);
+
+       while (!list_empty(queue)) {
+               e = list_first_entry(queue, struct outqueue_entry, list);
+               evt = &e->evt;
+               spin_unlock_bh(&con->outqueue_lock);
+
+               if (e->inactive)
+                       tipc_conn_delete_sub(con, &evt->s);
+
+               memset(&msg, 0, sizeof(msg));
+               msg.msg_flags = MSG_DONTWAIT;
+               iov.iov_base = evt;
+               iov.iov_len = sizeof(*evt);
+               msg.msg_name = NULL;
+
+               if (con->sock) {
+                       ret = kernel_sendmsg(con->sock, &msg, &iov,
+                                            1, sizeof(*evt));
+                       if (ret == -EWOULDBLOCK || ret == 0) {
+                               cond_resched();
+                               return;
+                       } else if (ret < 0) {
+                               return tipc_conn_close(con);
+                       }
+               } else {
+                       tipc_topsrv_kern_evt(srv->net, evt);
+               }
+
+               /* Don't starve users filling buffers */
+               if (++count >= MAX_SEND_MSG_COUNT) {
+                       cond_resched();
+                       count = 0;
+               }
+               spin_lock_bh(&con->outqueue_lock);
+               list_del(&e->list);
+               kfree(e);
+       }
+       spin_unlock_bh(&con->outqueue_lock);
+}
+
+static void tipc_conn_send_work(struct work_struct *work)
+{
+       struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+
+       if (connected(con))
+               tipc_conn_send_to_sock(con);
+
+       conn_put(con);
+}
+
+/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
+ */
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+                          u32 event, struct tipc_event *evt)
+{
+       struct tipc_topsrv *srv = tipc_topsrv(net);
+       struct outqueue_entry *e;
+       struct tipc_conn *con;
+
+       con = tipc_conn_lookup(srv, conid);
+       if (!con)
+               return;
+
+       if (!connected(con))
+               goto err;
+
+       e = kmalloc(sizeof(*e), GFP_ATOMIC);
+       if (!e)
+               goto err;
+       e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
+       memcpy(&e->evt, evt, sizeof(*evt));
+       spin_lock_bh(&con->outqueue_lock);
+       list_add_tail(&e->list, &con->outqueue);
+       spin_unlock_bh(&con->outqueue_lock);
+
+       if (queue_work(srv->send_wq, &con->swork))
+               return;
+err:
+       conn_put(con);
+}
+
+/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
+ */
+static void tipc_conn_write_space(struct sock *sk)
+{
+       struct tipc_conn *con;
+
+       read_lock_bh(&sk->sk_callback_lock);
+       con = sk->sk_user_data;
+       if (connected(con)) {
+               conn_get(con);
+               if (!queue_work(con->server->send_wq, &con->swork))
+                       conn_put(con);
+       }
+       read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
+                            struct tipc_conn *con,
+                            struct tipc_subscr *s)
+{
+       struct tipc_net *tn = tipc_net(srv->net);
+       struct tipc_subscription *sub;
+
+       if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+               tipc_conn_delete_sub(con, s);
+               return 0;
+       }
+       if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+               pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+               return -1;
+       }
+       sub = tipc_sub_subscribe(srv->net, s, con->conid);
+       if (!sub)
+               return -1;
+       atomic_inc(&tn->subscription_count);
+       spin_lock_bh(&con->sub_lock);
+       list_add(&sub->sub_list, &con->sub_list);
+       spin_unlock_bh(&con->sub_lock);
+       return 0;
+}
+
+static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
+{
+       struct tipc_topsrv *srv = con->server;
+       struct sock *sk = con->sock->sk;
+       struct msghdr msg = {};
+       struct tipc_subscr s;
+       struct kvec iov;
+       int ret;
+
+       iov.iov_base = &s;
+       iov.iov_len = sizeof(s);
+       msg.msg_name = NULL;
+       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+       ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
+       if (ret == -EWOULDBLOCK)
+               return -EWOULDBLOCK;
+       if (ret > 0) {
+               read_lock_bh(&sk->sk_callback_lock);
+               ret = tipc_conn_rcv_sub(srv, con, &s);
+               read_unlock_bh(&sk->sk_callback_lock);
+       }
+       if (ret < 0)
+               tipc_conn_close(con);
+
+       return ret;
+}
+
+static void tipc_conn_recv_work(struct work_struct *work)
+{
+       struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
+       int count = 0;
+
+       while (connected(con)) {
+               if (tipc_conn_rcv_from_sock(con))
+                       break;
+
+               /* Don't flood Rx machine */
+               if (++count >= MAX_RECV_MSG_COUNT) {
+                       cond_resched();
+                       count = 0;
+               }
+       }
+       conn_put(con);
+}
+
+/* tipc_conn_data_ready - interrupt callback indicating the socket has data
+ * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
+ */
+static void tipc_conn_data_ready(struct sock *sk)
+{
+       struct tipc_conn *con;
+
+       read_lock_bh(&sk->sk_callback_lock);
+       con = sk->sk_user_data;
+       if (connected(con)) {
+               conn_get(con);
+               if (!queue_work(con->server->rcv_wq, &con->rwork))
+                       conn_put(con);
+       }
+       read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void tipc_topsrv_accept(struct work_struct *work)
+{
+       struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
+       struct socket *lsock = srv->listener;
+       struct socket *newsock;
+       struct tipc_conn *con;
+       struct sock *newsk;
+       int ret;
+
+       while (1) {
+               ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+               if (ret < 0)
+                       return;
+               con = tipc_conn_alloc(srv);
+               if (IS_ERR(con)) {
+                       ret = PTR_ERR(con);
+                       sock_release(newsock);
+                       return;
+               }
+               /* Register callbacks */
+               newsk = newsock->sk;
+               write_lock_bh(&newsk->sk_callback_lock);
+               newsk->sk_data_ready = tipc_conn_data_ready;
+               newsk->sk_write_space = tipc_conn_write_space;
+               newsk->sk_user_data = con;
+               con->sock = newsock;
+               write_unlock_bh(&newsk->sk_callback_lock);
+
+               /* Wake up receive process in case of 'SYN+' message */
+               newsk->sk_data_ready(newsk);
+       }
+}
+
+/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+ * The queued job is launched into tipc_topsrv_accept()
+ */
+static void tipc_topsrv_listener_data_ready(struct sock *sk)
+{
+       struct tipc_topsrv *srv;
+
+       read_lock_bh(&sk->sk_callback_lock);
+       srv = sk->sk_user_data;
+       if (srv->listener)
+               queue_work(srv->rcv_wq, &srv->awork);
+       read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
+{
+       int imp = TIPC_CRITICAL_IMPORTANCE;
+       struct socket *lsock = NULL;
+       struct sockaddr_tipc saddr;
+       struct sock *sk;
+       int rc;
+
+       rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
+       if (rc < 0)
+               return rc;
+
+       srv->listener = lsock;
+       sk = lsock->sk;
+       write_lock_bh(&sk->sk_callback_lock);
+       sk->sk_data_ready = tipc_topsrv_listener_data_ready;
+       sk->sk_user_data = srv;
+       write_unlock_bh(&sk->sk_callback_lock);
+
+       rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
+                              (char *)&imp, sizeof(imp));
+       if (rc < 0)
+               goto err;
+
+       saddr.family                    = AF_TIPC;
+       saddr.addrtype                  = TIPC_ADDR_NAMESEQ;
+       saddr.addr.nameseq.type         = TIPC_TOP_SRV;
+       saddr.addr.nameseq.lower        = TIPC_TOP_SRV;
+       saddr.addr.nameseq.upper        = TIPC_TOP_SRV;
+       saddr.scope                     = TIPC_NODE_SCOPE;
+
+       rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+       if (rc < 0)
+               goto err;
+       rc = kernel_listen(lsock, 0);
+       if (rc < 0)
+               goto err;
+
+       /* As server's listening socket owner and creator is the same module,
+        * we have to decrease TIPC module reference count to guarantee that
+        * it remains zero after the server socket is created, otherwise,
+        * executing "rmmod" command is unable to make TIPC module deleted
+        * after TIPC module is inserted successfully.
+        *
+        * However, the reference count is ever increased twice in
+        * sock_create_kern(): one is to increase the reference count of owner
+        * of TIPC socket's proto_ops struct; another is to increment the
+        * reference count of owner of TIPC proto struct. Therefore, we must
+        * decrement the module reference count twice to ensure that it keeps
+        * zero after server's listening socket is created. Of course, we
+        * must bump the module reference count twice as well before the socket
+        * is closed.
+        */
+       module_put(lsock->ops->owner);
+       module_put(sk->sk_prot_creator->owner);
+
+       return 0;
+err:
+       sock_release(lsock);
+       return -EINVAL;
+}
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+                            u32 upper, u32 filter, int *conid)
+{
+       struct tipc_subscr sub;
+       struct tipc_conn *con;
+       int rc;
+
+       sub.seq.type = type;
+       sub.seq.lower = lower;
+       sub.seq.upper = upper;
+       sub.timeout = TIPC_WAIT_FOREVER;
+       sub.filter = filter;
+       *(u32 *)&sub.usr_handle = port;
+
+       con = tipc_conn_alloc(tipc_topsrv(net));
+       if (IS_ERR(con))
+               return false;
+
+       *conid = con->conid;
+       con->sock = NULL;
+       rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+       if (rc >= 0)
+               return true;
+       conn_put(con);
+       return false;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+       struct tipc_conn *con;
+
+       con = tipc_conn_lookup(tipc_topsrv(net), conid);
+       if (!con)
+               return;
+
+       test_and_clear_bit(CF_CONNECTED, &con->flags);
+       tipc_conn_delete_sub(con, NULL);
+       conn_put(con);
+       conn_put(con);
+}
+
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
+{
+       u32 port = *(u32 *)&evt->s.usr_handle;
+       u32 self = tipc_own_addr(net);
+       struct sk_buff_head evtq;
+       struct sk_buff *skb;
+
+       skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+                             self, self, port, port, 0);
+       if (!skb)
+               return;
+       msg_set_dest_droppable(buf_msg(skb), true);
+       memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+       skb_queue_head_init(&evtq);
+       __skb_queue_tail(&evtq, skb);
+       tipc_sk_rcv(net, &evtq);
+}
+
+static int tipc_topsrv_work_start(struct tipc_topsrv *s)
+{
+       s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
+       if (!s->rcv_wq) {
+               pr_err("can't start tipc receive workqueue\n");
+               return -ENOMEM;
+       }
+
+       s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
+       if (!s->send_wq) {
+               pr_err("can't start tipc send workqueue\n");
+               destroy_workqueue(s->rcv_wq);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
+{
+       destroy_workqueue(s->rcv_wq);
+       destroy_workqueue(s->send_wq);
+}
+
+int tipc_topsrv_start(struct net *net)
+{
+       struct tipc_net *tn = tipc_net(net);
+       const char name[] = "topology_server";
+       struct tipc_topsrv *srv;
+       int ret;
+
+       srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
+       if (!srv)
+               return -ENOMEM;
+
+       srv->net = net;
+       srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
+       INIT_WORK(&srv->awork, tipc_topsrv_accept);
+
+       strncpy(srv->name, name, strlen(name) + 1);
+       tn->topsrv = srv;
+       atomic_set(&tn->subscription_count, 0);
+
+       spin_lock_init(&srv->idr_lock);
+       idr_init(&srv->conn_idr);
+       srv->idr_in_use = 0;
+
+       ret = tipc_topsrv_work_start(srv);
+       if (ret < 0)
+               return ret;
+
+       ret = tipc_topsrv_create_listener(srv);
+       if (ret < 0)
+               tipc_topsrv_work_stop(srv);
+
+       return ret;
+}
+
+void tipc_topsrv_stop(struct net *net)
+{
+       struct tipc_topsrv *srv = tipc_topsrv(net);
+       struct socket *lsock = srv->listener;
+       struct tipc_conn *con;
+       int id;
+
+       spin_lock_bh(&srv->idr_lock);
+       for (id = 0; srv->idr_in_use; id++) {
+               con = idr_find(&srv->conn_idr, id);
+               if (con) {
+                       spin_unlock_bh(&srv->idr_lock);
+                       tipc_conn_close(con);
+                       spin_lock_bh(&srv->idr_lock);
+               }
+       }
+       __module_get(lsock->ops->owner);
+       __module_get(lsock->sk->sk_prot_creator->owner);
+       srv->listener = NULL;
+       spin_unlock_bh(&srv->idr_lock);
+       sock_release(lsock);
+       tipc_topsrv_work_stop(srv);
+       idr_destroy(&srv->conn_idr);
+       kfree(srv);
+}
diff --git a/net/tipc/topsrv.h b/net/tipc/topsrv.h
new file mode 100644 (file)
index 0000000..c7ea712
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * net/tipc/server.h: Include file for TIPC server code
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_SERVER_H
+#define _TIPC_SERVER_H
+
+#include "core.h"
+
+#define TIPC_SERVER_NAME_LEN   32
+#define TIPC_SUB_CLUSTER_SCOPE  0x20
+#define TIPC_SUB_NODE_SCOPE     0x40
+#define TIPC_SUB_NO_STATUS      0x80
+
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+                          u32 event, struct tipc_event *evt);
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+                            u32 upper, u32 filter, int *conid);
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
+
+#endif
index b0d5fce..e9b4b53 100644 (file)
@@ -308,8 +308,11 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
                        goto out;
                }
                lock_sock(sk);
-               memcpy(crypto_info_aes_gcm_128->iv, ctx->iv,
+               memcpy(crypto_info_aes_gcm_128->iv,
+                      ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
                       TLS_CIPHER_AES_GCM_128_IV_SIZE);
+               memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq,
+                      TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
                release_sock(sk);
                if (copy_to_user(optval,
                                 crypto_info_aes_gcm_128,
@@ -375,7 +378,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
        rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
        if (rc) {
                rc = -EFAULT;
-               goto out;
+               goto err_crypto_info;
        }
 
        /* check version */
index d545e1d..bc2970a 100644 (file)
@@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
                               int addr_len, int flags);
 static int unix_socketpair(struct socket *, struct socket *);
 static int unix_accept(struct socket *, struct socket *, int, bool);
-static int unix_getname(struct socket *, struct sockaddr *, int *, int);
+static int unix_getname(struct socket *, struct sockaddr *, int);
 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 static __poll_t unix_dgram_poll(struct file *, struct socket *,
                                    poll_table *);
@@ -1453,7 +1453,7 @@ out:
 }
 
 
-static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 {
        struct sock *sk = sock->sk;
        struct unix_sock *u;
@@ -1476,12 +1476,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
        if (!u->addr) {
                sunaddr->sun_family = AF_UNIX;
                sunaddr->sun_path[0] = 0;
-               *uaddr_len = sizeof(short);
+               err = sizeof(short);
        } else {
                struct unix_address *addr = u->addr;
 
-               *uaddr_len = addr->len;
-               memcpy(sunaddr, addr->name, *uaddr_len);
+               err = addr->len;
+               memcpy(sunaddr, addr->name, addr->len);
        }
        unix_state_unlock(sk);
        sock_put(sk);
@@ -1825,7 +1825,7 @@ out:
 }
 
 /* We use paged skbs for stream sockets, and limit occupancy to 32768
- * bytes, and a minimun of a full page.
+ * bytes, and a minimum of a full page.
  */
 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
 
@@ -2913,6 +2913,7 @@ static void __net_exit unix_net_exit(struct net *net)
 static struct pernet_operations unix_net_ops = {
        .init = unix_net_init,
        .exit = unix_net_exit,
+       .async = true,
 };
 
 static int __init af_unix_init(void)
index e0fc84d..aac9b8f 100644 (file)
@@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 }
 
 static int vsock_getname(struct socket *sock,
-                        struct sockaddr *addr, int *addr_len, int peer)
+                        struct sockaddr *addr, int peer)
 {
        int err;
        struct sock *sk;
@@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock,
         */
        BUILD_BUG_ON(sizeof(*vm_addr) > 128);
        memcpy(addr, vm_addr, sizeof(*vm_addr));
-       *addr_len = sizeof(*vm_addr);
+       err = sizeof(*vm_addr);
 
 out:
        release_sock(sk);
index a6f3cac..670aa22 100644 (file)
@@ -1340,6 +1340,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg80211_pernet_ops = {
        .exit = cfg80211_pernet_exit,
+       .async = true,
 };
 
 static int __init cfg80211_init(void)
index 51aa556..b12da6e 100644 (file)
@@ -170,9 +170,28 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
                enum nl80211_bss_scan_width scan_width;
                struct ieee80211_supported_band *sband =
                                rdev->wiphy.bands[setup->chandef.chan->band];
-               scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
-               setup->basic_rates = ieee80211_mandatory_rates(sband,
-                                                              scan_width);
+
+               if (setup->chandef.chan->band == NL80211_BAND_2GHZ) {
+                       int i;
+
+                       /*
+                        * Older versions selected the mandatory rates for
+                        * 2.4 GHz as well, but were broken in that only
+                        * 1 Mbps was regarded as a mandatory rate. Keep
+                        * using just 1 Mbps as the default basic rate for
+                        * mesh to be interoperable with older versions.
+                        */
+                       for (i = 0; i < sband->n_bitrates; i++) {
+                               if (sband->bitrates[i].bitrate == 10) {
+                                       setup->basic_rates = BIT(i);
+                                       break;
+                               }
+                       }
+               } else {
+                       scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
+                       setup->basic_rates = ieee80211_mandatory_rates(sband,
+                                                                      scan_width);
+               }
        }
 
        err = cfg80211_chandef_dfs_required(&rdev->wiphy,
index 9c0dcc8..a910150 100644 (file)
@@ -421,6 +421,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
        [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
        [NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
+       [NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -3923,9 +3924,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
                        return false;
                return true;
        case NL80211_CMD_CONNECT:
-               /* SAE not supported yet */
-               if (auth_type == NL80211_AUTHTYPE_SAE)
+               if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
+                   auth_type == NL80211_AUTHTYPE_SAE)
                        return false;
+
                /* FILS with SK PFS or PK not supported yet */
                if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
                    auth_type == NL80211_AUTHTYPE_FILS_PK)
@@ -4487,6 +4489,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
        PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
        PUT_SINFO_U64(BEACON_RX, rx_beacon);
        PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
+       PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
 
 #undef PUT_SINFO
 #undef PUT_SINFO_U64
@@ -5848,7 +5851,6 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
        return genlmsg_reply(msg, info);
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
  out:
        nlmsg_free(msg);
        return -ENOBUFS;
@@ -6329,7 +6331,6 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 nla_put_failure_rcu:
        rcu_read_unlock();
 nla_put_failure:
-       genlmsg_cancel(msg, hdr);
 put_failure:
        nlmsg_free(msg);
        return -EMSGSIZE;
@@ -6718,8 +6719,17 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 
        *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
 
-       if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-           !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+       if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+            !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
+           ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
+            !wiphy_ext_feature_isset(wiphy,
+                                     NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
+           ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
+            !wiphy_ext_feature_isset(wiphy,
+                                     NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
+           ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
+            !wiphy_ext_feature_isset(wiphy,
+                                     NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
                return -EOPNOTSUPP;
 
        if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
@@ -9155,6 +9165,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
                return -EINVAL;
        }
 
+       if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
+               if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+                       GENL_SET_ERR_MSG(info,
+                                        "external auth requires connection ownership");
+                       return -EINVAL;
+               }
+               connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT;
+       }
+
        wdev_lock(dev->ieee80211_ptr);
 
        err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -12463,6 +12482,41 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
        return ret;
 }
 
+static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
+{
+       struct cfg80211_registered_device *rdev = info->user_ptr[0];
+       struct net_device *dev = info->user_ptr[1];
+       struct cfg80211_external_auth_params params;
+
+       if (!rdev->ops->external_auth)
+               return -EOPNOTSUPP;
+
+       if (!info->attrs[NL80211_ATTR_SSID])
+               return -EINVAL;
+
+       if (!info->attrs[NL80211_ATTR_BSSID])
+               return -EINVAL;
+
+       if (!info->attrs[NL80211_ATTR_STATUS_CODE])
+               return -EINVAL;
+
+       memset(&params, 0, sizeof(params));
+
+       params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+       if (params.ssid.ssid_len == 0 ||
+           params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+               return -EINVAL;
+       memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
+              params.ssid.ssid_len);
+
+       memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
+              ETH_ALEN);
+
+       params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
+
+       return rdev_external_auth(rdev, dev, &params);
+}
+
 #define NL80211_FLAG_NEED_WIPHY                0x01
 #define NL80211_FLAG_NEED_NETDEV       0x02
 #define NL80211_FLAG_NEED_RTNL         0x04
@@ -13358,6 +13412,14 @@ static const struct genl_ops nl80211_ops[] = {
                .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
                                  NL80211_FLAG_NEED_RTNL,
        },
+       {
+               .cmd = NL80211_CMD_EXTERNAL_AUTH,
+               .doit = nl80211_external_auth,
+               .policy = nl80211_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+                                 NL80211_FLAG_NEED_RTNL,
+       },
 
 };
 
@@ -13672,7 +13734,6 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
        return;
 
 nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -13720,7 +13781,6 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -13808,7 +13868,6 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -13884,7 +13943,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -13924,7 +13982,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -13954,7 +14011,6 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -13991,7 +14047,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14024,7 +14079,6 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14065,7 +14119,6 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
@@ -14104,7 +14157,6 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14159,7 +14211,6 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
        return;
 
 nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14205,7 +14256,6 @@ static void nl80211_send_remain_on_chan_event(
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14319,7 +14369,6 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_conn_failed);
@@ -14356,7 +14405,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
        return true;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
        return true;
 }
@@ -14440,7 +14488,6 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
        return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
        return -ENOBUFS;
 }
@@ -14484,7 +14531,6 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
@@ -14693,7 +14739,6 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14751,7 +14796,6 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14804,7 +14848,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
@@ -14886,12 +14929,67 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+                                      struct sta_opmode_info *sta_opmode,
+                                      gfp_t gfp)
+{
+       struct sk_buff *msg;
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
+       struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+       void *hdr;
+
+       if (WARN_ON(!mac))
+               return;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+       if (!msg)
+               return;
+
+       hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED);
+       if (!hdr) {
+               nlmsg_free(msg);
+               return;
+       }
+
+       if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+               goto nla_put_failure;
+
+       if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac))
+               goto nla_put_failure;
+
+       if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) &&
+           nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode))
+               goto nla_put_failure;
+
+       if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
+           nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+               goto nla_put_failure;
+
+       if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
+           nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+
+       genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+                               NL80211_MCGRP_MLME, gfp);
+
+       return;
+
+nla_put_failure:
+       nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify);
+
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-                          u64 cookie, bool acked, gfp_t gfp)
+                          u64 cookie, bool acked, s32 ack_signal,
+                          bool is_valid_ack_signal, gfp_t gfp)
 {
        struct wireless_dev *wdev = dev->ieee80211_ptr;
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -14916,7 +15014,9 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
            nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
            nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
                              NL80211_ATTR_PAD) ||
-           (acked && nla_put_flag(msg, NL80211_ATTR_ACK)))
+           (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) ||
+           (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL,
+                                               ack_signal)))
                goto nla_put_failure;
 
        genlmsg_end(msg, hdr);
@@ -14926,7 +15026,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_probe_status);
@@ -14971,8 +15070,6 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 
  nla_put_failure:
        spin_unlock_bh(&rdev->beacon_registrations_lock);
-       if (hdr)
-               genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_report_obss_beacon);
@@ -15188,7 +15285,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
        return;
 
  nla_put_failure:
-       genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_tdls_oper_request);
@@ -15333,8 +15429,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
        return;
 
  nla_put_failure:
-       if (hdr)
-               genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
@@ -15369,6 +15463,47 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
        nlmsg_free(msg);
 }
 
+int cfg80211_external_auth_request(struct net_device *dev,
+                                  struct cfg80211_external_auth_params *params,
+                                  gfp_t gfp)
+{
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
+       struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+       struct sk_buff *msg;
+       void *hdr;
+
+       if (!wdev->conn_owner_nlportid)
+               return -EINVAL;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+       if (!msg)
+               return -ENOMEM;
+
+       hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH);
+       if (!hdr)
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+           nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+           nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
+           nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+                       params->action) ||
+           nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
+           nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len,
+                   params->ssid.ssid))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+                       wdev->conn_owner_nlportid);
+       return 0;
+
+ nla_put_failure:
+       nlmsg_free(msg);
+       return -ENOBUFS;
+}
+EXPORT_SYMBOL(cfg80211_external_auth_request);
+
 /* initialisation/exit functions */
 
 int __init nl80211_init(void)
index 0c06240..84f23ae 100644 (file)
@@ -1190,4 +1190,19 @@ static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
 }
+
+static inline int
+rdev_external_auth(struct cfg80211_registered_device *rdev,
+                  struct net_device *dev,
+                  struct cfg80211_external_auth_params *params)
+{
+       int ret = -EOPNOTSUPP;
+
+       trace_rdev_external_auth(&rdev->wiphy, dev, params);
+       if (rdev->ops->external_auth)
+               ret = rdev->ops->external_auth(&rdev->wiphy, dev, params);
+       trace_rdev_return_int(&rdev->wiphy, ret);
+       return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
index fdb3646..701cfd7 100644 (file)
@@ -1032,6 +1032,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
        wdev->current_bss = NULL;
        wdev->ssid_len = 0;
        wdev->conn_owner_nlportid = 0;
+       kzfree(wdev->connect_keys);
+       wdev->connect_keys = NULL;
 
        nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
 
index bcfedd3..5152938 100644 (file)
@@ -2319,6 +2319,29 @@ TRACE_EVENT(rdev_del_pmk,
                  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
 );
 
+TRACE_EVENT(rdev_external_auth,
+           TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+                    struct cfg80211_external_auth_params *params),
+           TP_ARGS(wiphy, netdev, params),
+           TP_STRUCT__entry(WIPHY_ENTRY
+                            NETDEV_ENTRY
+                            MAC_ENTRY(bssid)
+                            __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
+                            __field(u16, status)
+           ),
+           TP_fast_assign(WIPHY_ASSIGN;
+                          NETDEV_ASSIGN;
+                          MAC_ASSIGN(bssid, params->bssid);
+                          memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
+                          memcpy(__entry->ssid, params->ssid.ssid,
+                                 params->ssid.ssid_len);
+                          __entry->status = params->status;
+           ),
+           TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+                     ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+                     __entry->bssid, __entry->ssid, __entry->status)
+);
+
 /*************************************************************
  *          cfg80211 exported functions traces              *
  *************************************************************/
index c691606..d112e9a 100644 (file)
@@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-                                 const u8 *addr, enum nl80211_iftype iftype)
+                                 const u8 *addr, enum nl80211_iftype iftype,
+                                 u8 data_offset)
 {
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
        struct {
@@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
        if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
                return -1;
 
-       hdrlen = ieee80211_hdrlen(hdr->frame_control);
+       hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
        if (skb->len < hdrlen + 8)
                return -1;
 
index 9efbfc7..bc70644 100644 (file)
@@ -390,6 +390,7 @@ static void __net_exit wext_pernet_exit(struct net *net)
 static struct pernet_operations wext_pernet_ops = {
        .init = wext_pernet_init,
        .exit = wext_pernet_exit,
+       .async = true,
 };
 
 static int __init wireless_nlevent_init(void)
index 562cc11..d49aa79 100644 (file)
@@ -896,7 +896,7 @@ out:
 }
 
 static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
-                      int *uaddr_len, int peer)
+                      int peer)
 {
        struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
        struct sock *sk = sock->sk;
@@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
                sx25->sx25_addr = x25->source_addr;
 
        sx25->sx25_family = AF_X25;
-       *uaddr_len = sizeof(*sx25);
+       rc = sizeof(*sx25);
 
 out:
        return rc;
index db0b131..9c214ec 100644 (file)
@@ -335,8 +335,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
                }
        }
 
-       pr_debug("invalid PLP frame %02X %02X %02X\n",
-              frame[0], frame[1], frame[2]);
+       pr_debug("invalid PLP frame %3ph\n", frame);
 
        return X25_ILLEGAL;
 }
index 7a23078..77d9d1a 100644 (file)
@@ -2982,6 +2982,7 @@ static void __net_exit xfrm_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm_net_ops = {
        .init = xfrm_net_init,
        .exit = xfrm_net_exit,
+       .async = true,
 };
 
 void __init xfrm_init(void)
index ec3fc8d..2c2a587 100644 (file)
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
 hostprogs-y += xdp_monitor
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
+hostprogs-y += cpustat
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -144,6 +146,7 @@ always += xdp_monitor_kern.o
 always += xdp_rxq_info_kern.o
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
+always += cpustat_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
 HOSTLOADLIBES_xdp_monitor += -lelf
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644 (file)
index 0000000..68c84da
--- /dev/null
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ *   WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU                        8
+#define MAX_PSTATE_ENTRIES     5
+#define MAX_CSTATE_ENTRIES     3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp         |
+ * +--------------------------+
+ * | cstate index             |
+ * +--------------------------+
+ * | pstate timestamp         |
+ * +--------------------------+
+ * | pstate index             |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME    0
+#define MAP_OFF_CSTATE_IDX     1
+#define MAP_OFF_PSTATE_TIME    2
+#define MAP_OFF_PSTATE_IDX     3
+#define MAP_OFF_NUM            4
+
+struct bpf_map_def SEC("maps") my_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+       u64 pad;
+       u32 state;
+       u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+       u32 i;
+
+       for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+               if (frequency == cpu_opps[i])
+                       return i;
+       }
+
+       return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+       u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+       u32 key, cpu, pstate_idx;
+       u64 *val;
+
+       if (ctx->cpu_id > MAX_CPU)
+               return 0;
+
+       cpu = ctx->cpu_id;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+       cts = bpf_map_lookup_elem(&my_map, &key);
+       if (!cts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+       cstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!cstate)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+       pts = bpf_map_lookup_elem(&my_map, &key);
+       if (!pts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+       pstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!pstate)
+               return 0;
+
+       prev_state = *cstate;
+       *cstate = ctx->state;
+
+       if (!*cts) {
+               *cts = bpf_ktime_get_ns();
+               return 0;
+       }
+
+       cur_ts = bpf_ktime_get_ns();
+       delta = cur_ts - *cts;
+       *cts = cur_ts;
+
+       /*
+        * When state doesn't equal to (u32)-1, the cpu will enter
+        * one idle state; for this case we need to record interval
+        * for the pstate.
+        *
+        *                 OPP2
+        *            +---------------------+
+        *     OPP1   |                     |
+        *   ---------+                     |
+        *                                  |  Idle state
+        *                                  +---------------
+        *
+        *            |<- pstate duration ->|
+        *            ^                     ^
+        *           pts                  cur_ts
+        */
+       if (ctx->state != (u32)-1) {
+
+               /* record pstate after have first cpu_frequency event */
+               if (!*pts)
+                       return 0;
+
+               delta = cur_ts - *pts;
+
+               pstate_idx = find_cpu_pstate_idx(*pstate);
+               if (pstate_idx >= MAX_PSTATE_ENTRIES)
+                       return 0;
+
+               key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+               val = bpf_map_lookup_elem(&pstate_duration, &key);
+               if (val)
+                       __sync_fetch_and_add((long *)val, delta);
+
+       /*
+        * When state equal to (u32)-1, the cpu just exits from one
+        * specific idle state; for this case we need to record
+        * interval for the pstate.
+        *
+        *       OPP2
+        *   -----------+
+        *              |                          OPP1
+        *              |                     +-----------
+        *              |     Idle state      |
+        *              +---------------------+
+        *
+        *              |<- cstate duration ->|
+        *              ^                     ^
+        *             cts                  cur_ts
+        */
+       } else {
+
+               key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+               val = bpf_map_lookup_elem(&cstate_duration, &key);
+               if (val)
+                       __sync_fetch_and_add((long *)val, delta);
+       }
+
+       /* Update timestamp for pstate as new start time */
+       if (*pts)
+               *pts = cur_ts;
+
+       return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+       u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+       u32 key, cpu, pstate_idx;
+       u64 *val;
+
+       cpu = ctx->cpu_id;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+       pts = bpf_map_lookup_elem(&my_map, &key);
+       if (!pts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+       pstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!pstate)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+       cstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!cstate)
+               return 0;
+
+       prev_state = *pstate;
+       *pstate = ctx->state;
+
+       if (!*pts) {
+               *pts = bpf_ktime_get_ns();
+               return 0;
+       }
+
+       cur_ts = bpf_ktime_get_ns();
+       delta = cur_ts - *pts;
+       *pts = cur_ts;
+
+       /* When CPU is in idle, bail out to skip pstate statistics */
+       if (*cstate != (u32)(-1))
+               return 0;
+
+       /*
+        * The cpu changes to another different OPP (in below diagram
+        * change frequency from OPP3 to OPP1), need recording interval
+        * for previous frequency OPP3 and update timestamp as start
+        * time for new frequency OPP1.
+        *
+        *                 OPP3
+        *            +---------------------+
+        *     OPP2   |                     |
+        *   ---------+                     |
+        *                                  |    OPP1
+        *                                  +---------------
+        *
+        *            |<- pstate duration ->|
+        *            ^                     ^
+        *           pts                  cur_ts
+        */
+       pstate_idx = find_cpu_pstate_idx(*pstate);
+       if (pstate_idx >= MAX_PSTATE_ENTRIES)
+               return 0;
+
+       key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+       val = bpf_map_lookup_elem(&pstate_duration, &key);
+       if (val)
+               __sync_fetch_and_add((long *)val, delta);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644 (file)
index 0000000..2b4cd1a
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU                        8
+#define MAX_PSTATE_ENTRIES     5
+#define MAX_CSTATE_ENTRIES     3
+#define MAX_STARS              40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ    "208000"
+#define CPUFREQ_HIGHEST_FREQ   "12000000"
+
+struct cpu_stat_data {
+       unsigned long cstate[MAX_CSTATE_ENTRIES];
+       unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+       int i, j;
+       char state_str[sizeof("cstate-9")];
+       struct cpu_stat_data *data;
+
+       /* Clear screen */
+       printf("\033[2J");
+
+       /* Header */
+       printf("\nCPU states statistics:\n");
+       printf("%-10s ", "state(ms)");
+
+       for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+               sprintf(state_str, "cstate-%d", i);
+               printf("%-11s ", state_str);
+       }
+
+       for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+               sprintf(state_str, "pstate-%d", i);
+               printf("%-11s ", state_str);
+       }
+
+       printf("\n");
+
+       for (j = 0; j < MAX_CPU; j++) {
+               data = &stat_data[j];
+
+               printf("CPU-%-6d ", j);
+               for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+                       printf("%-11ld ", data->cstate[i] / 1000000);
+
+               for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+                       printf("%-11ld ", data->pstate[i] / 1000000);
+
+               printf("\n");
+       }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+       unsigned long key, value;
+       int c, i;
+
+       for (c = 0; c < MAX_CPU; c++) {
+               for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+                       key = c * MAX_CSTATE_ENTRIES + i;
+                       bpf_map_lookup_elem(cstate_fd, &key, &value);
+                       stat_data[c].cstate[i] = value;
+               }
+
+               for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+                       key = c * MAX_PSTATE_ENTRIES + i;
+                       bpf_map_lookup_elem(pstate_fd, &key, &value);
+                       stat_data[c].pstate[i] = value;
+               }
+       }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+       int rcpu, i, ret;
+       cpu_set_t cpumask;
+       cpu_set_t original_cpumask;
+
+       ret = sysconf(_SC_NPROCESSORS_CONF);
+       if (ret < 0)
+               return -1;
+
+       rcpu = sched_getcpu();
+       if (rcpu < 0)
+               return -1;
+
+       /* Keep track of the CPUs we will run on */
+       sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+       for (i = 0; i < ret; i++) {
+
+               /* Pointless to wake up ourself */
+               if (i == rcpu)
+                       continue;
+
+               /* Pointless to wake CPUs we will not run on */
+               if (!CPU_ISSET(i, &original_cpumask))
+                       continue;
+
+               CPU_ZERO(&cpumask);
+               CPU_SET(i, &cpumask);
+
+               sched_setaffinity(0, sizeof(cpumask), &cpumask);
+       }
+
+       /* Enable all the CPUs of the original mask */
+       sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+       return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+       int len, fd;
+
+       fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+       if (fd < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               return fd;
+       }
+
+       len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+       if (len < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               goto err;
+       }
+
+       len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+       if (len < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               goto err;
+       }
+
+err:
+       close(fd);
+       return len;
+}
+
+static void int_exit(int sig)
+{
+       cpu_stat_inject_cpu_idle_event();
+       cpu_stat_inject_cpu_frequency_event();
+       cpu_stat_update(map_fd[1], map_fd[2]);
+       cpu_stat_print();
+       exit(0);
+}
+
+int main(int argc, char **argv)
+{
+       char filename[256];
+       int ret;
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       ret = cpu_stat_inject_cpu_idle_event();
+       if (ret < 0)
+               return 1;
+
+       ret = cpu_stat_inject_cpu_frequency_event();
+       if (ret < 0)
+               return 1;
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       while (1) {
+               cpu_stat_update(map_fd[1], map_fd[2]);
+               cpu_stat_print();
+               sleep(5);
+       }
+
+       return 0;
+}
index efdc16d..9a8db7b 100644 (file)
@@ -52,7 +52,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
        key.tunnel_tos = 0;
        key.tunnel_ttl = 64;
 
-       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -92,7 +93,8 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
        key.tunnel_label = 0xabcde;
 
        ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
-                                    BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+                                    BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+                                    BPF_F_SEQ_NUMBER);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
index 8ee0371..9f61742 100755 (executable)
@@ -61,6 +61,7 @@ cleanup_and_exit()
 
        [ -n "$msg" ] && echo "ERROR: $msg"
 
+       test_cgrp2_sock -d ${CGRP_MNT}/sockopts
        ip li del cgrp2_sock
        umount ${CGRP_MNT}
 
index fc4e64d..0f396a8 100755 (executable)
@@ -28,6 +28,9 @@ function attach_bpf {
 }
 
 function cleanup {
+       if [ -d /tmp/cgroupv2/foo ]; then
+               test_cgrp2_sock -d /tmp/cgroupv2/foo
+       fi
        ip link del veth0b
        ip netns delete at_ns0
        umount /tmp/cgroupv2
index 43ce049..c265863 100755 (executable)
@@ -23,7 +23,8 @@ function config_device {
 function add_gre_tunnel {
        # in namespace
        ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+               local 172.16.1.100 remote 172.16.1.200
        ip netns exec at_ns0 ip link set dev $DEV_NS up
        ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
 
@@ -43,7 +44,7 @@ function add_ip6gretap_tunnel {
 
        # in namespace
        ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+               ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
                local ::11 remote ::22
 
        ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
index d54e91e..b701b5c 100644 (file)
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <sys/resource.h>
 
 #include "bpf_load.h"
 #include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        const char *optstr = "SN";
        char filename[256];
        int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
                return 1;
        }
 
+       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+               perror("setrlimit(RLIMIT_MEMLOCK)");
+               return 1;
+       }
+
        ifindex_in = strtoul(argv[optind], NULL, 0);
        ifindex_out = strtoul(argv[optind + 1], NULL, 0);
        printf("input: %d output: %d\n", ifindex_in, ifindex_out);
index 73f1da4..9bf2881 100644 (file)
@@ -2,7 +2,7 @@
 hostprogs-y := sockmap
 
 # Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
index 7c25c0c..95a54a8 100644 (file)
@@ -566,6 +566,7 @@ run:
        else
                fprintf(stderr, "unknown test\n");
 out:
+       bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
        close(s1);
        close(s2);
        close(p1);
index 6f9e4ce..9bb0a7f 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/digsig.h>
+#include <linux/vmalloc.h>
 #include <crypto/public_key.h>
 #include <keys/system_keyring.h>
 
index 929e149..fa728f6 100644 (file)
 #include <keys/big_key-type.h>
 #include <crypto/aead.h>
 
+struct big_key_buf {
+       unsigned int            nr_pages;
+       void                    *virt;
+       struct scatterlist      *sg;
+       struct page             *pages[];
+};
+
 /*
  * Layout of key payload words.
  */
@@ -91,10 +98,9 @@ static DEFINE_MUTEX(big_key_aead_lock);
 /*
  * Encrypt/decrypt big_key data
  */
-static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
+static int big_key_crypt(enum big_key_op op, struct big_key_buf *buf, size_t datalen, u8 *key)
 {
        int ret;
-       struct scatterlist sgio;
        struct aead_request *aead_req;
        /* We always use a zero nonce. The reason we can get away with this is
         * because we're using a different randomly generated key for every
@@ -109,8 +115,7 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
                return -ENOMEM;
 
        memset(zero_nonce, 0, sizeof(zero_nonce));
-       sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0));
-       aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce);
+       aead_request_set_crypt(aead_req, buf->sg, buf->sg, datalen, zero_nonce);
        aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
        aead_request_set_ad(aead_req, 0);
 
@@ -129,22 +134,82 @@ error:
        return ret;
 }
 
+/*
+ * Free up the buffer.
+ */
+static void big_key_free_buffer(struct big_key_buf *buf)
+{
+       unsigned int i;
+
+       if (buf->virt) {
+               memset(buf->virt, 0, buf->nr_pages * PAGE_SIZE);
+               vunmap(buf->virt);
+       }
+
+       for (i = 0; i < buf->nr_pages; i++)
+               if (buf->pages[i])
+                       __free_page(buf->pages[i]);
+
+       kfree(buf);
+}
+
+/*
+ * Allocate a buffer consisting of a set of pages with a virtual mapping
+ * applied over them.
+ */
+static void *big_key_alloc_buffer(size_t len)
+{
+       struct big_key_buf *buf;
+       unsigned int npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       unsigned int i, l;
+
+       buf = kzalloc(sizeof(struct big_key_buf) +
+                     sizeof(struct page) * npg +
+                     sizeof(struct scatterlist) * npg,
+                     GFP_KERNEL);
+       if (!buf)
+               return NULL;
+
+       buf->nr_pages = npg;
+       buf->sg = (void *)(buf->pages + npg);
+       sg_init_table(buf->sg, npg);
+
+       for (i = 0; i < buf->nr_pages; i++) {
+               buf->pages[i] = alloc_page(GFP_KERNEL);
+               if (!buf->pages[i])
+                       goto nomem;
+
+               l = min_t(size_t, len, PAGE_SIZE);
+               sg_set_page(&buf->sg[i], buf->pages[i], l, 0);
+               len -= l;
+       }
+
+       buf->virt = vmap(buf->pages, buf->nr_pages, VM_MAP, PAGE_KERNEL);
+       if (!buf->virt)
+               goto nomem;
+
+       return buf;
+
+nomem:
+       big_key_free_buffer(buf);
+       return NULL;
+}
+
 /*
  * Preparse a big key
  */
 int big_key_preparse(struct key_preparsed_payload *prep)
 {
+       struct big_key_buf *buf;
        struct path *path = (struct path *)&prep->payload.data[big_key_path];
        struct file *file;
        u8 *enckey;
-       u8 *data = NULL;
        ssize_t written;
-       size_t datalen = prep->datalen;
+       size_t datalen = prep->datalen, enclen = datalen + ENC_AUTHTAG_SIZE;
        int ret;
 
-       ret = -EINVAL;
        if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
-               goto error;
+               return -EINVAL;
 
        /* Set an arbitrary quota */
        prep->quotalen = 16;
@@ -157,13 +222,12 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                 *
                 * File content is stored encrypted with randomly generated key.
                 */
-               size_t enclen = datalen + ENC_AUTHTAG_SIZE;
                loff_t pos = 0;
 
-               data = kmalloc(enclen, GFP_KERNEL);
-               if (!data)
+               buf = big_key_alloc_buffer(enclen);
+               if (!buf)
                        return -ENOMEM;
-               memcpy(data, prep->data, datalen);
+               memcpy(buf->virt, prep->data, datalen);
 
                /* generate random key */
                enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
@@ -176,7 +240,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                        goto err_enckey;
 
                /* encrypt aligned data */
-               ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey);
+               ret = big_key_crypt(BIG_KEY_ENC, buf, datalen, enckey);
                if (ret)
                        goto err_enckey;
 
@@ -187,7 +251,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                        goto err_enckey;
                }
 
-               written = kernel_write(file, data, enclen, &pos);
+               written = kernel_write(file, buf->virt, enclen, &pos);
                if (written != enclen) {
                        ret = written;
                        if (written >= 0)
@@ -202,7 +266,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                *path = file->f_path;
                path_get(path);
                fput(file);
-               kzfree(data);
+               big_key_free_buffer(buf);
        } else {
                /* Just store the data in a buffer */
                void *data = kmalloc(datalen, GFP_KERNEL);
@@ -220,7 +284,7 @@ err_fput:
 err_enckey:
        kzfree(enckey);
 error:
-       kzfree(data);
+       big_key_free_buffer(buf);
        return ret;
 }
 
@@ -298,15 +362,15 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
                return datalen;
 
        if (datalen > BIG_KEY_FILE_THRESHOLD) {
+               struct big_key_buf *buf;
                struct path *path = (struct path *)&key->payload.data[big_key_path];
                struct file *file;
-               u8 *data;
                u8 *enckey = (u8 *)key->payload.data[big_key_data];
                size_t enclen = datalen + ENC_AUTHTAG_SIZE;
                loff_t pos = 0;
 
-               data = kmalloc(enclen, GFP_KERNEL);
-               if (!data)
+               buf = big_key_alloc_buffer(enclen);
+               if (!buf)
                        return -ENOMEM;
 
                file = dentry_open(path, O_RDONLY, current_cred());
@@ -316,26 +380,26 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
                }
 
                /* read file to kernel and decrypt */
-               ret = kernel_read(file, data, enclen, &pos);
+               ret = kernel_read(file, buf->virt, enclen, &pos);
                if (ret >= 0 && ret != enclen) {
                        ret = -EIO;
                        goto err_fput;
                }
 
-               ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey);
+               ret = big_key_crypt(BIG_KEY_DEC, buf, enclen, enckey);
                if (ret)
                        goto err_fput;
 
                ret = datalen;
 
                /* copy decrypted data to user */
-               if (copy_to_user(buffer, data, datalen) != 0)
+               if (copy_to_user(buffer, buf->virt, datalen) != 0)
                        ret = -EFAULT;
 
 err_fput:
                fput(file);
 error:
-               kzfree(data);
+               big_key_free_buffer(buf);
        } else {
                ret = datalen;
                if (copy_to_user(buffer, key->payload.data[big_key_data],
index 8644d86..b4d7b62 100644 (file)
@@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net)
 static struct pernet_operations selinux_net_ops = {
        .init = selinux_nf_register,
        .exit = selinux_nf_unregister,
+       .async = true,
 };
 
 static int __init selinux_nf_ip_init(void)
index e36d178..3f29c03 100644 (file)
@@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net)
 static struct pernet_operations smack_net_ops = {
        .init = smack_nf_register,
        .exit = smack_nf_unregister,
+       .async = true,
 };
 
 static int __init smack_nf_ip_init(void)
index cd6932e..9094f4b 100644 (file)
@@ -655,10 +655,11 @@ int tomoyo_socket_listen_permission(struct socket *sock)
                return 0;
        {
                const int error = sock->ops->getname(sock, (struct sockaddr *)
-                                                    &addr, &addr_len, 0);
+                                                    &addr, 0);
 
-               if (error)
+               if (error < 0)
                        return error;
+               addr_len = error;
        }
        address.protocol = type;
        address.operation = TOMOYO_NETWORK_LISTEN;
index f8a64e1..baa5f8e 100644 (file)
@@ -5,7 +5,6 @@
 
 config AC97_BUS_NEW
        tristate
-       select AC97
        help
          This is the new AC97 bus type, successor of AC97_BUS. The ported
          drivers which benefit from the AC97 automatic probing should "select"
index 60db327..04d4db4 100644 (file)
@@ -1003,7 +1003,7 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
 {
        struct snd_seq_client *client = file->private_data;
        int written = 0, len;
-       int err = -EINVAL;
+       int err;
        struct snd_seq_event event;
 
        if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT))
@@ -1018,11 +1018,15 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
 
        /* allocate the pool now if the pool is not allocated yet */ 
        if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) {
-               if (snd_seq_pool_init(client->pool) < 0)
+               mutex_lock(&client->ioctl_mutex);
+               err = snd_seq_pool_init(client->pool);
+               mutex_unlock(&client->ioctl_mutex);
+               if (err < 0)
                        return -ENOMEM;
        }
 
        /* only process whole events */
+       err = -EINVAL;
        while (count >= sizeof(struct snd_seq_event)) {
                /* Read in the event header from the user */
                len = sizeof(event);
index 2347588..ce28f7c 100644 (file)
@@ -3465,6 +3465,19 @@ static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec,
                spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
 }
 
+static void alc269_fixup_pincfg_U7x7_headset_mic(struct hda_codec *codec,
+                                                const struct hda_fixup *fix,
+                                                int action)
+{
+       unsigned int cfg_headphone = snd_hda_codec_get_pincfg(codec, 0x21);
+       unsigned int cfg_headset_mic = snd_hda_codec_get_pincfg(codec, 0x19);
+
+       if (cfg_headphone && cfg_headset_mic == 0x411111f0)
+               snd_hda_codec_set_pincfg(codec, 0x19,
+                       (cfg_headphone & ~AC_DEFCFG_DEVICE) |
+                       (AC_JACK_MIC_IN << AC_DEFCFG_DEVICE_SHIFT));
+}
+
 static void alc269_fixup_hweq(struct hda_codec *codec,
                               const struct hda_fixup *fix, int action)
 {
@@ -4972,6 +4985,28 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec,
        }
 }
 
+static void alc_fixup_tpt470_dock(struct hda_codec *codec,
+                                 const struct hda_fixup *fix, int action)
+{
+       static const struct hda_pintbl pincfgs[] = {
+               { 0x17, 0x21211010 }, /* dock headphone */
+               { 0x19, 0x21a11010 }, /* dock mic */
+               { }
+       };
+       struct alc_spec *spec = codec->spec;
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
+               /* Enable DOCK device */
+               snd_hda_codec_write(codec, 0x17, 0,
+                           AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
+               /* Enable DOCK device */
+               snd_hda_codec_write(codec, 0x19, 0,
+                           AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
+               snd_hda_apply_pincfgs(codec, pincfgs);
+       }
+}
+
 static void alc_shutup_dell_xps13(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
@@ -5351,6 +5386,7 @@ enum {
        ALC269_FIXUP_LIFEBOOK_EXTMIC,
        ALC269_FIXUP_LIFEBOOK_HP_PIN,
        ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT,
+       ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC,
        ALC269_FIXUP_AMIC,
        ALC269_FIXUP_DMIC,
        ALC269VB_FIXUP_AMIC,
@@ -5446,6 +5482,7 @@ enum {
        ALC700_FIXUP_INTEL_REFERENCE,
        ALC274_FIXUP_DELL_BIND_DACS,
        ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+       ALC298_FIXUP_TPT470_DOCK,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5556,6 +5593,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
        },
+       [ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc269_fixup_pincfg_U7x7_headset_mic,
+       },
        [ALC269_FIXUP_AMIC] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -6271,6 +6312,12 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC274_FIXUP_DELL_BIND_DACS
        },
+       [ALC298_FIXUP_TPT470_DOCK] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_tpt470_dock,
+               .chained = true,
+               .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6321,6 +6368,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
        SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
        SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+       SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+       SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -6422,6 +6471,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT),
        SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
+       SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
        SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
@@ -6450,8 +6500,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x222d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+       SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x224b, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -6472,7 +6530,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
+       SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+       SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
        SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
        SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
@@ -6734,6 +6797,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0xb7a60130},
                {0x14, 0x90170110},
                {0x21, 0x02211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x14, 0x90170110},
+               {0x14, 0x01011020},
+               {0x21, 0x0221101f}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC256_STANDARD_PINS),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC,
@@ -6803,6 +6871,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0x90a60120},
                {0x14, 0x90170110},
                {0x21, 0x0321101f}),
+       SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0xb7a60130},
+               {0x14, 0x90170110},
+               {0x21, 0x04211020}),
        SND_HDA_PIN_QUIRK(0x10ec0290, 0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1,
                ALC290_STANDARD_PINS,
                {0x15, 0x04211040},
index 9afb8ab..06b2262 100644 (file)
@@ -347,17 +347,20 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request,
                            int validx, int *value_ret)
 {
        struct snd_usb_audio *chip = cval->head.mixer->chip;
-       unsigned char buf[4 + 3 * sizeof(__u32)]; /* enough space for one range */
+       /* enough space for one range */
+       unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)];
        unsigned char *val;
-       int idx = 0, ret, size;
+       int idx = 0, ret, val_size, size;
        __u8 bRequest;
 
+       val_size = uac2_ctl_value_size(cval->val_type);
+
        if (request == UAC_GET_CUR) {
                bRequest = UAC2_CS_CUR;
-               size = uac2_ctl_value_size(cval->val_type);
+               size = val_size;
        } else {
                bRequest = UAC2_CS_RANGE;
-               size = sizeof(buf);
+               size = sizeof(__u16) + 3 * val_size;
        }
 
        memset(buf, 0, sizeof(buf));
@@ -390,16 +393,17 @@ error:
                val = buf + sizeof(__u16);
                break;
        case UAC_GET_MAX:
-               val = buf + sizeof(__u16) * 2;
+               val = buf + sizeof(__u16) + val_size;
                break;
        case UAC_GET_RES:
-               val = buf + sizeof(__u16) * 3;
+               val = buf + sizeof(__u16) + val_size * 2;
                break;
        default:
                return -EINVAL;
        }
 
-       *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, sizeof(__u16)));
+       *value_ret = convert_signed_value(cval,
+                                         snd_usb_combine_bytes(val, val_size));
 
        return 0;
 }
index b9c9a19..3cbfae6 100644 (file)
@@ -352,6 +352,15 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs,
                ep = 0x86;
                iface = usb_ifnum_to_if(dev, 2);
 
+               if (!iface || iface->num_altsetting == 0)
+                       return -EINVAL;
+
+               alts = &iface->altsetting[1];
+               goto add_sync_ep;
+       case USB_ID(0x1397, 0x0002):
+               ep = 0x81;
+               iface = usb_ifnum_to_if(dev, 1);
+
                if (!iface || iface->num_altsetting == 0)
                        return -EINVAL;
 
index a66ef57..ea8f3de 100644 (file)
@@ -1363,8 +1363,11 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
 
-       /* Amanero Combo384 USB interface with native DSD support */
-       case USB_ID(0x16d0, 0x071a):
+       /* Amanero Combo384 USB based DACs with native DSD support */
+       case USB_ID(0x16d0, 0x071a):  /* Amanero - Combo384 */
+       case USB_ID(0x2ab6, 0x0004):  /* T+A DAC8DSD-V2.0, MP1000E-V2.0, MP2000R-V2.0, MP2500R-V2.0, MP3100HV-V2.0 */
+       case USB_ID(0x2ab6, 0x0005):  /* T+A USB HD Audio 1 */
+       case USB_ID(0x2ab6, 0x0006):  /* T+A USB HD Audio 2 */
                if (fp->altsetting == 2) {
                        switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
                        case 0x199:
index 637b726..833ed9a 100644 (file)
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_TIDR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
 #define KVM_REG_PPC_PSSCR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h
deleted file mode 100644 (file)
index 7251209..0000000
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/unistd.h"
- */
-
-#ifndef _UAPI_ASM_S390_UNISTD_H_
-#define _UAPI_ASM_S390_UNISTD_H_
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_exit                 1
-#define __NR_fork                 2
-#define __NR_read                 3
-#define __NR_write                4
-#define __NR_open                 5
-#define __NR_close                6
-#define __NR_restart_syscall     7
-#define __NR_creat                8
-#define __NR_link                 9
-#define __NR_unlink              10
-#define __NR_execve              11
-#define __NR_chdir               12
-#define __NR_mknod               14
-#define __NR_chmod               15
-#define __NR_lseek               19
-#define __NR_getpid              20
-#define __NR_mount               21
-#define __NR_umount              22
-#define __NR_ptrace              26
-#define __NR_alarm               27
-#define __NR_pause               29
-#define __NR_utime               30
-#define __NR_access              33
-#define __NR_nice                34
-#define __NR_sync                36
-#define __NR_kill                37
-#define __NR_rename              38
-#define __NR_mkdir               39
-#define __NR_rmdir               40
-#define __NR_dup                 41
-#define __NR_pipe                42
-#define __NR_times               43
-#define __NR_brk                 45
-#define __NR_signal              48
-#define __NR_acct                51
-#define __NR_umount2             52
-#define __NR_ioctl               54
-#define __NR_fcntl               55
-#define __NR_setpgid             57
-#define __NR_umask               60
-#define __NR_chroot              61
-#define __NR_ustat               62
-#define __NR_dup2                63
-#define __NR_getppid             64
-#define __NR_getpgrp             65
-#define __NR_setsid              66
-#define __NR_sigaction           67
-#define __NR_sigsuspend          72
-#define __NR_sigpending          73
-#define __NR_sethostname         74
-#define __NR_setrlimit           75
-#define __NR_getrusage           77
-#define __NR_gettimeofday        78
-#define __NR_settimeofday        79
-#define __NR_symlink             83
-#define __NR_readlink            85
-#define __NR_uselib              86
-#define __NR_swapon              87
-#define __NR_reboot              88
-#define __NR_readdir             89
-#define __NR_mmap                90
-#define __NR_munmap              91
-#define __NR_truncate            92
-#define __NR_ftruncate           93
-#define __NR_fchmod              94
-#define __NR_getpriority         96
-#define __NR_setpriority         97
-#define __NR_statfs              99
-#define __NR_fstatfs            100
-#define __NR_socketcall         102
-#define __NR_syslog             103
-#define __NR_setitimer          104
-#define __NR_getitimer          105
-#define __NR_stat               106
-#define __NR_lstat              107
-#define __NR_fstat              108
-#define __NR_lookup_dcookie     110
-#define __NR_vhangup            111
-#define __NR_idle               112
-#define __NR_wait4              114
-#define __NR_swapoff            115
-#define __NR_sysinfo            116
-#define __NR_ipc                117
-#define __NR_fsync              118
-#define __NR_sigreturn          119
-#define __NR_clone              120
-#define __NR_setdomainname      121
-#define __NR_uname              122
-#define __NR_adjtimex           124
-#define __NR_mprotect           125
-#define __NR_sigprocmask        126
-#define __NR_create_module      127
-#define __NR_init_module        128
-#define __NR_delete_module      129
-#define __NR_get_kernel_syms    130
-#define __NR_quotactl           131
-#define __NR_getpgid            132
-#define __NR_fchdir             133
-#define __NR_bdflush            134
-#define __NR_sysfs              135
-#define __NR_personality        136
-#define __NR_afs_syscall        137 /* Syscall for Andrew File System */
-#define __NR_getdents           141
-#define __NR_flock              143
-#define __NR_msync              144
-#define __NR_readv              145
-#define __NR_writev             146
-#define __NR_getsid             147
-#define __NR_fdatasync          148
-#define __NR__sysctl            149
-#define __NR_mlock              150
-#define __NR_munlock            151
-#define __NR_mlockall           152
-#define __NR_munlockall         153
-#define __NR_sched_setparam             154
-#define __NR_sched_getparam             155
-#define __NR_sched_setscheduler         156
-#define __NR_sched_getscheduler         157
-#define __NR_sched_yield                158
-#define __NR_sched_get_priority_max     159
-#define __NR_sched_get_priority_min     160
-#define __NR_sched_rr_get_interval      161
-#define __NR_nanosleep          162
-#define __NR_mremap             163
-#define __NR_query_module       167
-#define __NR_poll               168
-#define __NR_nfsservctl         169
-#define __NR_prctl              172
-#define __NR_rt_sigreturn       173
-#define __NR_rt_sigaction       174
-#define __NR_rt_sigprocmask     175
-#define __NR_rt_sigpending      176
-#define __NR_rt_sigtimedwait    177
-#define __NR_rt_sigqueueinfo    178
-#define __NR_rt_sigsuspend      179
-#define __NR_pread64            180
-#define __NR_pwrite64           181
-#define __NR_getcwd             183
-#define __NR_capget             184
-#define __NR_capset             185
-#define __NR_sigaltstack        186
-#define __NR_sendfile           187
-#define __NR_getpmsg           188
-#define __NR_putpmsg           189
-#define __NR_vfork             190
-#define __NR_pivot_root         217
-#define __NR_mincore            218
-#define __NR_madvise            219
-#define __NR_getdents64                220
-#define __NR_readahead         222
-#define __NR_setxattr          224
-#define __NR_lsetxattr         225
-#define __NR_fsetxattr         226
-#define __NR_getxattr          227
-#define __NR_lgetxattr         228
-#define __NR_fgetxattr         229
-#define __NR_listxattr         230
-#define __NR_llistxattr                231
-#define __NR_flistxattr                232
-#define __NR_removexattr       233
-#define __NR_lremovexattr      234
-#define __NR_fremovexattr      235
-#define __NR_gettid            236
-#define __NR_tkill             237
-#define __NR_futex             238
-#define __NR_sched_setaffinity 239
-#define __NR_sched_getaffinity 240
-#define __NR_tgkill            241
-/* Number 242 is reserved for tux */
-#define __NR_io_setup          243
-#define __NR_io_destroy                244
-#define __NR_io_getevents      245
-#define __NR_io_submit         246
-#define __NR_io_cancel         247
-#define __NR_exit_group                248
-#define __NR_epoll_create      249
-#define __NR_epoll_ctl         250
-#define __NR_epoll_wait                251
-#define __NR_set_tid_address   252
-#define __NR_fadvise64         253
-#define __NR_timer_create      254
-#define __NR_timer_settime     255
-#define __NR_timer_gettime     256
-#define __NR_timer_getoverrun  257
-#define __NR_timer_delete      258
-#define __NR_clock_settime     259
-#define __NR_clock_gettime     260
-#define __NR_clock_getres      261
-#define __NR_clock_nanosleep   262
-/* Number 263 is reserved for vserver */
-#define __NR_statfs64          265
-#define __NR_fstatfs64         266
-#define __NR_remap_file_pages  267
-#define __NR_mbind             268
-#define __NR_get_mempolicy     269
-#define __NR_set_mempolicy     270
-#define __NR_mq_open           271
-#define __NR_mq_unlink         272
-#define __NR_mq_timedsend      273
-#define __NR_mq_timedreceive   274
-#define __NR_mq_notify         275
-#define __NR_mq_getsetattr     276
-#define __NR_kexec_load                277
-#define __NR_add_key           278
-#define __NR_request_key       279
-#define __NR_keyctl            280
-#define __NR_waitid            281
-#define __NR_ioprio_set                282
-#define __NR_ioprio_get                283
-#define __NR_inotify_init      284
-#define __NR_inotify_add_watch 285
-#define __NR_inotify_rm_watch  286
-#define __NR_migrate_pages     287
-#define __NR_openat            288
-#define __NR_mkdirat           289
-#define __NR_mknodat           290
-#define __NR_fchownat          291
-#define __NR_futimesat         292
-#define __NR_unlinkat          294
-#define __NR_renameat          295
-#define __NR_linkat            296
-#define __NR_symlinkat         297
-#define __NR_readlinkat                298
-#define __NR_fchmodat          299
-#define __NR_faccessat         300
-#define __NR_pselect6          301
-#define __NR_ppoll             302
-#define __NR_unshare           303
-#define __NR_set_robust_list   304
-#define __NR_get_robust_list   305
-#define __NR_splice            306
-#define __NR_sync_file_range   307
-#define __NR_tee               308
-#define __NR_vmsplice          309
-#define __NR_move_pages                310
-#define __NR_getcpu            311
-#define __NR_epoll_pwait       312
-#define __NR_utimes            313
-#define __NR_fallocate         314
-#define __NR_utimensat         315
-#define __NR_signalfd          316
-#define __NR_timerfd           317
-#define __NR_eventfd           318
-#define __NR_timerfd_create    319
-#define __NR_timerfd_settime   320
-#define __NR_timerfd_gettime   321
-#define __NR_signalfd4         322
-#define __NR_eventfd2          323
-#define __NR_inotify_init1     324
-#define __NR_pipe2             325
-#define __NR_dup3              326
-#define __NR_epoll_create1     327
-#define        __NR_preadv             328
-#define        __NR_pwritev            329
-#define __NR_rt_tgsigqueueinfo 330
-#define __NR_perf_event_open   331
-#define __NR_fanotify_init     332
-#define __NR_fanotify_mark     333
-#define __NR_prlimit64         334
-#define __NR_name_to_handle_at 335
-#define __NR_open_by_handle_at 336
-#define __NR_clock_adjtime     337
-#define __NR_syncfs            338
-#define __NR_setns             339
-#define __NR_process_vm_readv  340
-#define __NR_process_vm_writev 341
-#define __NR_s390_runtime_instr 342
-#define __NR_kcmp              343
-#define __NR_finit_module      344
-#define __NR_sched_setattr     345
-#define __NR_sched_getattr     346
-#define __NR_renameat2         347
-#define __NR_seccomp           348
-#define __NR_getrandom         349
-#define __NR_memfd_create      350
-#define __NR_bpf               351
-#define __NR_s390_pci_mmio_write       352
-#define __NR_s390_pci_mmio_read                353
-#define __NR_execveat          354
-#define __NR_userfaultfd       355
-#define __NR_membarrier                356
-#define __NR_recvmmsg          357
-#define __NR_sendmmsg          358
-#define __NR_socket            359
-#define __NR_socketpair                360
-#define __NR_bind              361
-#define __NR_connect           362
-#define __NR_listen            363
-#define __NR_accept4           364
-#define __NR_getsockopt                365
-#define __NR_setsockopt                366
-#define __NR_getsockname       367
-#define __NR_getpeername       368
-#define __NR_sendto            369
-#define __NR_sendmsg           370
-#define __NR_recvfrom          371
-#define __NR_recvmsg           372
-#define __NR_shutdown          373
-#define __NR_mlock2            374
-#define __NR_copy_file_range   375
-#define __NR_preadv2           376
-#define __NR_pwritev2          377
-#define __NR_s390_guarded_storage      378
-#define __NR_statx             379
-#define __NR_s390_sthyi                380
-#define NR_syscalls 381
-
-/* 
- * There are some system calls that are not present on 64 bit, some
- * have a different name although they do the same (e.g. __NR_chown32
- * is __NR_chown on 64 bit).
- */
-#ifndef __s390x__
-
-#define __NR_time               13
-#define __NR_lchown             16
-#define __NR_setuid             23
-#define __NR_getuid             24
-#define __NR_stime              25
-#define __NR_setgid             46
-#define __NR_getgid             47
-#define __NR_geteuid            49
-#define __NR_getegid            50
-#define __NR_setreuid           70
-#define __NR_setregid           71
-#define __NR_getrlimit          76
-#define __NR_getgroups          80
-#define __NR_setgroups          81
-#define __NR_fchown             95
-#define __NR_ioperm            101
-#define __NR_setfsuid          138
-#define __NR_setfsgid          139
-#define __NR__llseek           140
-#define __NR__newselect        142
-#define __NR_setresuid         164
-#define __NR_getresuid         165
-#define __NR_setresgid         170
-#define __NR_getresgid         171
-#define __NR_chown             182
-#define __NR_ugetrlimit                191     /* SuS compliant getrlimit */
-#define __NR_mmap2             192
-#define __NR_truncate64                193
-#define __NR_ftruncate64       194
-#define __NR_stat64            195
-#define __NR_lstat64           196
-#define __NR_fstat64           197
-#define __NR_lchown32          198
-#define __NR_getuid32          199
-#define __NR_getgid32          200
-#define __NR_geteuid32         201
-#define __NR_getegid32         202
-#define __NR_setreuid32                203
-#define __NR_setregid32                204
-#define __NR_getgroups32       205
-#define __NR_setgroups32       206
-#define __NR_fchown32          207
-#define __NR_setresuid32       208
-#define __NR_getresuid32       209
-#define __NR_setresgid32       210
-#define __NR_getresgid32       211
-#define __NR_chown32           212
-#define __NR_setuid32          213
-#define __NR_setgid32          214
-#define __NR_setfsuid32                215
-#define __NR_setfsgid32                216
-#define __NR_fcntl64           221
-#define __NR_sendfile64                223
-#define __NR_fadvise64_64      264
-#define __NR_fstatat64         293
-
-#else
-
-#define __NR_select            142
-#define __NR_getrlimit         191     /* SuS compliant getrlimit */
-#define __NR_lchown            198
-#define __NR_getuid            199
-#define __NR_getgid            200
-#define __NR_geteuid           201
-#define __NR_getegid           202
-#define __NR_setreuid                  203
-#define __NR_setregid                  204
-#define __NR_getgroups         205
-#define __NR_setgroups         206
-#define __NR_fchown            207
-#define __NR_setresuid         208
-#define __NR_getresuid         209
-#define __NR_setresgid         210
-#define __NR_getresgid         211
-#define __NR_chown             212
-#define __NR_setuid            213
-#define __NR_setgid            214
-#define __NR_setfsuid                  215
-#define __NR_setfsgid                  216
-#define __NR_newfstatat                293
-
-#endif
-
-#endif /* _UAPI_ASM_S390_UNISTD_H_ */
index 1d9199e..0dfe4d3 100644 (file)
 
 #define X86_FEATURE_MBA                        ( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW          ( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV                        ( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB           ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 
index e4ceee7..67ca6c6 100644 (file)
@@ -21,7 +21,7 @@ MAP COMMANDS
 =============
 
 |      **bpftool** **prog { show | list }** [*PROG*]
-|      **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
+|      **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |      **bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |      **bpftool** **prog pin** *PROG* *FILE*
 |      **bpftool** **prog load** *OBJ* *FILE*
@@ -39,12 +39,18 @@ DESCRIPTION
                  Output will start with program ID followed by program type and
                  zero or more named attributes (depending on kernel version).
 
-       **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }]
-                 Dump eBPF instructions of the program from the kernel.
-                 If *FILE* is specified image will be written to a file,
-                 otherwise it will be disassembled and printed to stdout.
+       **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }]
+                 Dump eBPF instructions of the program from the kernel. By
+                 default, eBPF will be disassembled and printed to standard
+                 output in human-readable format. In this case, **opcodes**
+                 controls if raw opcodes should be printed as well.
 
-                 **opcodes** controls if raw opcodes will be printed.
+                 If **file** is specified, the binary image will instead be
+                 written to *FILE*.
+
+                 If **visual** is specified, control flow graph (CFG) will be
+                 built instead, and eBPF instructions will be presented with
+                 CFG in DOT format, on standard output.
 
        **bpftool prog dump jited**  *PROG* [{ **file** *FILE* | **opcodes** }]
                  Dump jited image (host machine code) of the program.
index 08719c5..490811b 100644 (file)
@@ -147,7 +147,7 @@ _bpftool()
 
     # Deal with simplest keywords
     case $prev in
-        help|key|opcodes)
+        help|key|opcodes|visual)
             return 0
             ;;
         tag)
@@ -223,11 +223,16 @@ _bpftool()
                             return 0
                             ;;
                     *)
-                            _bpftool_once_attr 'file'
+                        _bpftool_once_attr 'file'
+                        if _bpftool_search_list 'xlated'; then
+                            COMPREPLY+=( $( compgen -W 'opcodes visual' -- \
+                                "$cur" ) )
+                        else
                             COMPREPLY+=( $( compgen -W 'opcodes' -- \
                                 "$cur" ) )
-                            return 0
-                            ;;
+                        fi
+                        return 0
+                        ;;
                     esac
                     ;;
                 pin)
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
new file mode 100644 (file)
index 0000000..f30b3a4
--- /dev/null
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cfg.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+struct cfg {
+       struct list_head funcs;
+       int func_num;
+};
+
+struct func_node {
+       struct list_head l;
+       struct list_head bbs;
+       struct bpf_insn *start;
+       struct bpf_insn *end;
+       int idx;
+       int bb_num;
+};
+
+struct bb_node {
+       struct list_head l;
+       struct list_head e_prevs;
+       struct list_head e_succs;
+       struct bpf_insn *head;
+       struct bpf_insn *tail;
+       int idx;
+};
+
+#define EDGE_FLAG_EMPTY                0x0
+#define EDGE_FLAG_FALLTHROUGH  0x1
+#define EDGE_FLAG_JUMP         0x2
+struct edge_node {
+       struct list_head l;
+       struct bb_node *src;
+       struct bb_node *dst;
+       int flags;
+};
+
+#define ENTRY_BLOCK_INDEX      0
+#define EXIT_BLOCK_INDEX       1
+#define NUM_FIXED_BLOCKS       2
+#define func_prev(func)                list_prev_entry(func, l)
+#define func_next(func)                list_next_entry(func, l)
+#define bb_prev(bb)            list_prev_entry(bb, l)
+#define bb_next(bb)            list_next_entry(bb, l)
+#define entry_bb(func)         func_first_bb(func)
+#define exit_bb(func)          func_last_bb(func)
+#define cfg_first_func(cfg)    \
+       list_first_entry(&cfg->funcs, struct func_node, l)
+#define cfg_last_func(cfg)     \
+       list_last_entry(&cfg->funcs, struct func_node, l)
+#define func_first_bb(func)    \
+       list_first_entry(&func->bbs, struct bb_node, l)
+#define func_last_bb(func)     \
+       list_last_entry(&func->bbs, struct bb_node, l)
+
+static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
+{
+       struct func_node *new_func, *func;
+
+       list_for_each_entry(func, &cfg->funcs, l) {
+               if (func->start == insn)
+                       return func;
+               else if (func->start > insn)
+                       break;
+       }
+
+       func = func_prev(func);
+       new_func = calloc(1, sizeof(*new_func));
+       if (!new_func) {
+               p_err("OOM when allocating FUNC node");
+               return NULL;
+       }
+       new_func->start = insn;
+       new_func->idx = cfg->func_num;
+       list_add(&new_func->l, &func->l);
+       cfg->func_num++;
+
+       return new_func;
+}
+
+static struct bb_node *func_append_bb(struct func_node *func,
+                                     struct bpf_insn *insn)
+{
+       struct bb_node *new_bb, *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               if (bb->head == insn)
+                       return bb;
+               else if (bb->head > insn)
+                       break;
+       }
+
+       bb = bb_prev(bb);
+       new_bb = calloc(1, sizeof(*new_bb));
+       if (!new_bb) {
+               p_err("OOM when allocating BB node");
+               return NULL;
+       }
+       new_bb->head = insn;
+       INIT_LIST_HEAD(&new_bb->e_prevs);
+       INIT_LIST_HEAD(&new_bb->e_succs);
+       list_add(&new_bb->l, &bb->l);
+
+       return new_bb;
+}
+
+static struct bb_node *func_insert_dummy_bb(struct list_head *after)
+{
+       struct bb_node *bb;
+
+       bb = calloc(1, sizeof(*bb));
+       if (!bb) {
+               p_err("OOM when allocating BB node");
+               return NULL;
+       }
+
+       INIT_LIST_HEAD(&bb->e_prevs);
+       INIT_LIST_HEAD(&bb->e_succs);
+       list_add(&bb->l, after);
+
+       return bb;
+}
+
+static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
+                               struct bpf_insn *end)
+{
+       struct func_node *func, *last_func;
+
+       func = cfg_append_func(cfg, cur);
+       if (!func)
+               return true;
+
+       for (; cur < end; cur++) {
+               if (cur->code != (BPF_JMP | BPF_CALL))
+                       continue;
+               if (cur->src_reg != BPF_PSEUDO_CALL)
+                       continue;
+               func = cfg_append_func(cfg, cur + cur->off + 1);
+               if (!func)
+                       return true;
+       }
+
+       last_func = cfg_last_func(cfg);
+       last_func->end = end - 1;
+       func = cfg_first_func(cfg);
+       list_for_each_entry_from(func, &last_func->l, l) {
+               func->end = func_next(func)->start - 1;
+       }
+
+       return false;
+}
+
+static bool func_partition_bb_head(struct func_node *func)
+{
+       struct bpf_insn *cur, *end;
+       struct bb_node *bb;
+
+       cur = func->start;
+       end = func->end;
+       INIT_LIST_HEAD(&func->bbs);
+       bb = func_append_bb(func, cur);
+       if (!bb)
+               return true;
+
+       for (; cur <= end; cur++) {
+               if (BPF_CLASS(cur->code) == BPF_JMP) {
+                       u8 opcode = BPF_OP(cur->code);
+
+                       if (opcode == BPF_EXIT || opcode == BPF_CALL)
+                               continue;
+
+                       bb = func_append_bb(func, cur + cur->off + 1);
+                       if (!bb)
+                               return true;
+
+                       if (opcode != BPF_JA) {
+                               bb = func_append_bb(func, cur + 1);
+                               if (!bb)
+                                       return true;
+                       }
+               }
+       }
+
+       return false;
+}
+
+static void func_partition_bb_tail(struct func_node *func)
+{
+       unsigned int bb_idx = NUM_FIXED_BLOCKS;
+       struct bb_node *bb, *last;
+
+       last = func_last_bb(func);
+       last->tail = func->end;
+       bb = func_first_bb(func);
+       list_for_each_entry_from(bb, &last->l, l) {
+               bb->tail = bb_next(bb)->head - 1;
+               bb->idx = bb_idx++;
+       }
+
+       last->idx = bb_idx++;
+       func->bb_num = bb_idx;
+}
+
+static bool func_add_special_bb(struct func_node *func)
+{
+       struct bb_node *bb;
+
+       bb = func_insert_dummy_bb(&func->bbs);
+       if (!bb)
+               return true;
+       bb->idx = ENTRY_BLOCK_INDEX;
+
+       bb = func_insert_dummy_bb(&func_last_bb(func)->l);
+       if (!bb)
+               return true;
+       bb->idx = EXIT_BLOCK_INDEX;
+
+       return false;
+}
+
+static bool func_partition_bb(struct func_node *func)
+{
+       if (func_partition_bb_head(func))
+               return true;
+
+       func_partition_bb_tail(func);
+
+       return false;
+}
+
+static struct bb_node *func_search_bb_with_head(struct func_node *func,
+                                               struct bpf_insn *insn)
+{
+       struct bb_node *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               if (bb->head == insn)
+                       return bb;
+       }
+
+       return NULL;
+}
+
+static struct edge_node *new_edge(struct bb_node *src, struct bb_node *dst,
+                                 int flags)
+{
+       struct edge_node *e;
+
+       e = calloc(1, sizeof(*e));
+       if (!e) {
+               p_err("OOM when allocating edge node");
+               return NULL;
+       }
+
+       if (src)
+               e->src = src;
+       if (dst)
+               e->dst = dst;
+
+       e->flags |= flags;
+
+       return e;
+}
+
+static bool func_add_bb_edges(struct func_node *func)
+{
+       struct bpf_insn *insn;
+       struct edge_node *e;
+       struct bb_node *bb;
+
+       bb = entry_bb(func);
+       e = new_edge(bb, bb_next(bb), EDGE_FLAG_FALLTHROUGH);
+       if (!e)
+               return true;
+       list_add_tail(&e->l, &bb->e_succs);
+
+       bb = exit_bb(func);
+       e = new_edge(bb_prev(bb), bb, EDGE_FLAG_FALLTHROUGH);
+       if (!e)
+               return true;
+       list_add_tail(&e->l, &bb->e_prevs);
+
+       bb = entry_bb(func);
+       bb = bb_next(bb);
+       list_for_each_entry_from(bb, &exit_bb(func)->l, l) {
+               e = new_edge(bb, NULL, EDGE_FLAG_EMPTY);
+               if (!e)
+                       return true;
+               e->src = bb;
+
+               insn = bb->tail;
+               if (BPF_CLASS(insn->code) != BPF_JMP ||
+                   BPF_OP(insn->code) == BPF_EXIT) {
+                       e->dst = bb_next(bb);
+                       e->flags |= EDGE_FLAG_FALLTHROUGH;
+                       list_add_tail(&e->l, &bb->e_succs);
+                       continue;
+               } else if (BPF_OP(insn->code) == BPF_JA) {
+                       e->dst = func_search_bb_with_head(func,
+                                                         insn + insn->off + 1);
+                       e->flags |= EDGE_FLAG_JUMP;
+                       list_add_tail(&e->l, &bb->e_succs);
+                       continue;
+               }
+
+               e->dst = bb_next(bb);
+               e->flags |= EDGE_FLAG_FALLTHROUGH;
+               list_add_tail(&e->l, &bb->e_succs);
+
+               e = new_edge(bb, NULL, EDGE_FLAG_JUMP);
+               if (!e)
+                       return true;
+               e->src = bb;
+               e->dst = func_search_bb_with_head(func, insn + insn->off + 1);
+               list_add_tail(&e->l, &bb->e_succs);
+       }
+
+       return false;
+}
+
+static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
+{
+       int cnt = len / sizeof(*insn);
+       struct func_node *func;
+
+       INIT_LIST_HEAD(&cfg->funcs);
+
+       if (cfg_partition_funcs(cfg, insn, insn + cnt))
+               return true;
+
+       list_for_each_entry(func, &cfg->funcs, l) {
+               if (func_partition_bb(func) || func_add_special_bb(func))
+                       return true;
+
+               if (func_add_bb_edges(func))
+                       return true;
+       }
+
+       return false;
+}
+
+static void cfg_destroy(struct cfg *cfg)
+{
+       struct func_node *func, *func2;
+
+       list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+               struct bb_node *bb, *bb2;
+
+               list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+                       struct edge_node *e, *e2;
+
+                       list_for_each_entry_safe(e, e2, &bb->e_prevs, l) {
+                               list_del(&e->l);
+                               free(e);
+                       }
+
+                       list_for_each_entry_safe(e, e2, &bb->e_succs, l) {
+                               list_del(&e->l);
+                               free(e);
+                       }
+
+                       list_del(&bb->l);
+                       free(bb);
+               }
+
+               list_del(&func->l);
+               free(func);
+       }
+}
+
+static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+{
+       const char *shape;
+
+       if (bb->idx == ENTRY_BLOCK_INDEX || bb->idx == EXIT_BLOCK_INDEX)
+               shape = "Mdiamond";
+       else
+               shape = "record";
+
+       printf("\tfn_%d_bb_%d [shape=%s,style=filled,label=\"",
+              func->idx, bb->idx, shape);
+
+       if (bb->idx == ENTRY_BLOCK_INDEX) {
+               printf("ENTRY");
+       } else if (bb->idx == EXIT_BLOCK_INDEX) {
+               printf("EXIT");
+       } else {
+               unsigned int start_idx;
+               struct dump_data dd = {};
+
+               printf("{");
+               kernel_syms_load(&dd);
+               start_idx = bb->head - func->start;
+               dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
+               kernel_syms_destroy(&dd);
+               printf("}");
+       }
+
+       printf("\"];\n\n");
+}
+
+static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
+{
+       const char *style = "\"solid,bold\"";
+       const char *color = "black";
+       int func_idx = func->idx;
+       struct edge_node *e;
+       int weight = 10;
+
+       if (list_empty(&bb->e_succs))
+               return;
+
+       list_for_each_entry(e, &bb->e_succs, l) {
+               printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=%s, color=%s, weight=%d, constraint=true",
+                      func_idx, e->src->idx, func_idx, e->dst->idx,
+                      style, color, weight);
+               printf("];\n");
+       }
+}
+
+static void func_output_bb_def(struct func_node *func)
+{
+       struct bb_node *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               draw_bb_node(func, bb);
+       }
+}
+
+static void func_output_edges(struct func_node *func)
+{
+       int func_idx = func->idx;
+       struct bb_node *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               draw_bb_succ_edges(func, bb);
+       }
+
+       /* Add an invisible edge from ENTRY to EXIT, this is to
+        * improve the graph layout.
+        */
+       printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=\"invis\", constraint=true];\n",
+              func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
+}
+
+static void cfg_dump(struct cfg *cfg)
+{
+       struct func_node *func;
+
+       printf("digraph \"DOT graph for eBPF program\" {\n");
+       list_for_each_entry(func, &cfg->funcs, l) {
+               printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
+                      func->idx, func->idx);
+               func_output_bb_def(func);
+               func_output_edges(func);
+               printf("}\n");
+       }
+       printf("}\n");
+}
+
+void dump_xlated_cfg(void *buf, unsigned int len)
+{
+       struct bpf_insn *insn = buf;
+       struct cfg cfg;
+
+       memset(&cfg, 0, sizeof(cfg));
+       if (cfg_build(&cfg, insn, len))
+               return;
+
+       cfg_dump(&cfg);
+
+       cfg_destroy(&cfg);
+}
diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h
new file mode 100644 (file)
index 0000000..2cc9bd9
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_CFG_H
+#define __BPF_TOOL_CFG_H
+
+void dump_xlated_cfg(void *buf, unsigned int len);
+
+#endif /* __BPF_TOOL_CFG_H */
index 3a0396d..1ec852d 100644 (file)
@@ -46,6 +46,9 @@
 
 #include "main.h"
 
+#define BATCH_LINE_LEN_MAX 65536
+#define BATCH_ARG_NB_MAX 4096
+
 const char *bin_name;
 static int last_argc;
 static char **last_argv;
@@ -157,6 +160,54 @@ void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
        }
 }
 
+/* Split command line into argument vector. */
+static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
+{
+       static const char ws[] = " \t\r\n";
+       char *cp = line;
+       int n_argc = 0;
+
+       while (*cp) {
+               /* Skip leading whitespace. */
+               cp += strspn(cp, ws);
+
+               if (*cp == '\0')
+                       break;
+
+               if (n_argc >= (maxargs - 1)) {
+                       p_err("too many arguments to command %d", cmd_nb);
+                       return -1;
+               }
+
+               /* Word begins with quote. */
+               if (*cp == '\'' || *cp == '"') {
+                       char quote = *cp++;
+
+                       n_argv[n_argc++] = cp;
+                       /* Find ending quote. */
+                       cp = strchr(cp, quote);
+                       if (!cp) {
+                               p_err("unterminated quoted string in command %d",
+                                     cmd_nb);
+                               return -1;
+                       }
+               } else {
+                       n_argv[n_argc++] = cp;
+
+                       /* Find end of word. */
+                       cp += strcspn(cp, ws);
+                       if (*cp == '\0')
+                               break;
+               }
+
+               /* Separate words. */
+               *cp++ = 0;
+       }
+       n_argv[n_argc] = NULL;
+
+       return n_argc;
+}
+
 static int do_batch(int argc, char **argv);
 
 static const struct cmd cmds[] = {
@@ -171,11 +222,12 @@ static const struct cmd cmds[] = {
 
 static int do_batch(int argc, char **argv)
 {
+       char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
+       char *n_argv[BATCH_ARG_NB_MAX];
        unsigned int lines = 0;
-       char *n_argv[4096];
-       char buf[65536];
        int n_argc;
        FILE *fp;
+       char *cp;
        int err;
        int i;
 
@@ -191,7 +243,10 @@ static int do_batch(int argc, char **argv)
        }
        NEXT_ARG();
 
-       fp = fopen(*argv, "r");
+       if (!strcmp(*argv, "-"))
+               fp = stdin;
+       else
+               fp = fopen(*argv, "r");
        if (!fp) {
                p_err("Can't open file (%s): %s", *argv, strerror(errno));
                return -1;
@@ -200,27 +255,45 @@ static int do_batch(int argc, char **argv)
        if (json_output)
                jsonw_start_array(json_wtr);
        while (fgets(buf, sizeof(buf), fp)) {
+               cp = strchr(buf, '#');
+               if (cp)
+                       *cp = '\0';
+
                if (strlen(buf) == sizeof(buf) - 1) {
                        errno = E2BIG;
                        break;
                }
 
-               n_argc = 0;
-               n_argv[n_argc] = strtok(buf, " \t\n");
-
-               while (n_argv[n_argc]) {
-                       n_argc++;
-                       if (n_argc == ARRAY_SIZE(n_argv)) {
-                               p_err("line %d has too many arguments, skip",
+               /* Append continuation lines if any (coming after a line ending
+                * with '\' in the batch file).
+                */
+               while ((cp = strstr(buf, "\\\n")) != NULL) {
+                       if (!fgets(contline, sizeof(contline), fp) ||
+                           strlen(contline) == 0) {
+                               p_err("missing continuation line on command %d",
                                      lines);
-                               n_argc = 0;
-                               break;
+                               err = -1;
+                               goto err_close;
+                       }
+
+                       cp = strchr(contline, '#');
+                       if (cp)
+                               *cp = '\0';
+
+                       if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
+                               p_err("command %d is too long", lines);
+                               err = -1;
+                               goto err_close;
                        }
-                       n_argv[n_argc] = strtok(NULL, " \t\n");
+                       buf[strlen(buf) - 2] = '\0';
+                       strcat(buf, contline);
                }
 
+               n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
                if (!n_argc)
                        continue;
+               if (n_argc < 0)
+                       goto err_close;
 
                if (json_output) {
                        jsonw_start_object(json_wtr);
@@ -244,14 +317,15 @@ static int do_batch(int argc, char **argv)
        }
 
        if (errno && errno != ENOENT) {
-               perror("reading batch file failed");
+               p_err("reading batch file failed: %s", strerror(errno));
                err = -1;
        } else {
-               p_info("processed %d lines", lines);
+               p_info("processed %d commands", lines);
                err = 0;
        }
 err_close:
-       fclose(fp);
+       if (fp != stdin)
+               fclose(fp);
 
        if (json_output)
                jsonw_end_array(json_wtr);
index e8e2baa..f7a8108 100644 (file)
@@ -47,8 +47,9 @@
 #include <bpf.h>
 #include <libbpf.h>
 
+#include "cfg.h"
 #include "main.h"
-#include "disasm.h"
+#include "xlated_dumper.h"
 
 static const char * const prog_type_name[] = {
        [BPF_PROG_TYPE_UNSPEC]          = "unspec",
@@ -407,259 +408,6 @@ static int do_show(int argc, char **argv)
        return err;
 }
 
-#define SYM_MAX_NAME   256
-
-struct kernel_sym {
-       unsigned long address;
-       char name[SYM_MAX_NAME];
-};
-
-struct dump_data {
-       unsigned long address_call_base;
-       struct kernel_sym *sym_mapping;
-       __u32 sym_count;
-       char scratch_buff[SYM_MAX_NAME];
-};
-
-static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
-{
-       return ((struct kernel_sym *)sym_a)->address -
-              ((struct kernel_sym *)sym_b)->address;
-}
-
-static void kernel_syms_load(struct dump_data *dd)
-{
-       struct kernel_sym *sym;
-       char buff[256];
-       void *tmp, *address;
-       FILE *fp;
-
-       fp = fopen("/proc/kallsyms", "r");
-       if (!fp)
-               return;
-
-       while (!feof(fp)) {
-               if (!fgets(buff, sizeof(buff), fp))
-                       break;
-               tmp = realloc(dd->sym_mapping,
-                             (dd->sym_count + 1) *
-                             sizeof(*dd->sym_mapping));
-               if (!tmp) {
-out:
-                       free(dd->sym_mapping);
-                       dd->sym_mapping = NULL;
-                       fclose(fp);
-                       return;
-               }
-               dd->sym_mapping = tmp;
-               sym = &dd->sym_mapping[dd->sym_count];
-               if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
-                       continue;
-               sym->address = (unsigned long)address;
-               if (!strcmp(sym->name, "__bpf_call_base")) {
-                       dd->address_call_base = sym->address;
-                       /* sysctl kernel.kptr_restrict was set */
-                       if (!sym->address)
-                               goto out;
-               }
-               if (sym->address)
-                       dd->sym_count++;
-       }
-
-       fclose(fp);
-
-       qsort(dd->sym_mapping, dd->sym_count,
-             sizeof(*dd->sym_mapping), kernel_syms_cmp);
-}
-
-static void kernel_syms_destroy(struct dump_data *dd)
-{
-       free(dd->sym_mapping);
-}
-
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
-                                            unsigned long key)
-{
-       struct kernel_sym sym = {
-               .address = key,
-       };
-
-       return dd->sym_mapping ?
-              bsearch(&sym, dd->sym_mapping, dd->sym_count,
-                      sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
-}
-
-static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-       va_list args;
-
-       va_start(args, fmt);
-       vprintf(fmt, args);
-       va_end(args);
-}
-
-static const char *print_call_pcrel(struct dump_data *dd,
-                                   struct kernel_sym *sym,
-                                   unsigned long address,
-                                   const struct bpf_insn *insn)
-{
-       if (sym)
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "%+d#%s", insn->off, sym->name);
-       else
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "%+d#0x%lx", insn->off, address);
-       return dd->scratch_buff;
-}
-
-static const char *print_call_helper(struct dump_data *dd,
-                                    struct kernel_sym *sym,
-                                    unsigned long address)
-{
-       if (sym)
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "%s", sym->name);
-       else
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "0x%lx", address);
-       return dd->scratch_buff;
-}
-
-static const char *print_call(void *private_data,
-                             const struct bpf_insn *insn)
-{
-       struct dump_data *dd = private_data;
-       unsigned long address = dd->address_call_base + insn->imm;
-       struct kernel_sym *sym;
-
-       sym = kernel_syms_search(dd, address);
-       if (insn->src_reg == BPF_PSEUDO_CALL)
-               return print_call_pcrel(dd, sym, address, insn);
-       else
-               return print_call_helper(dd, sym, address);
-}
-
-static const char *print_imm(void *private_data,
-                            const struct bpf_insn *insn,
-                            __u64 full_imm)
-{
-       struct dump_data *dd = private_data;
-
-       if (insn->src_reg == BPF_PSEUDO_MAP_FD)
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "map[id:%u]", insn->imm);
-       else
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "0x%llx", (unsigned long long)full_imm);
-       return dd->scratch_buff;
-}
-
-static void dump_xlated_plain(struct dump_data *dd, void *buf,
-                             unsigned int len, bool opcodes)
-{
-       const struct bpf_insn_cbs cbs = {
-               .cb_print       = print_insn,
-               .cb_call        = print_call,
-               .cb_imm         = print_imm,
-               .private_data   = dd,
-       };
-       struct bpf_insn *insn = buf;
-       bool double_insn = false;
-       unsigned int i;
-
-       for (i = 0; i < len / sizeof(*insn); i++) {
-               if (double_insn) {
-                       double_insn = false;
-                       continue;
-               }
-
-               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-               printf("% 4d: ", i);
-               print_bpf_insn(&cbs, NULL, insn + i, true);
-
-               if (opcodes) {
-                       printf("       ");
-                       fprint_hex(stdout, insn + i, 8, " ");
-                       if (double_insn && i < len - 1) {
-                               printf(" ");
-                               fprint_hex(stdout, insn + i + 1, 8, " ");
-                       }
-                       printf("\n");
-               }
-       }
-}
-
-static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-       unsigned int l = strlen(fmt);
-       char chomped_fmt[l];
-       va_list args;
-
-       va_start(args, fmt);
-       if (l > 0) {
-               strncpy(chomped_fmt, fmt, l - 1);
-               chomped_fmt[l - 1] = '\0';
-       }
-       jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
-       va_end(args);
-}
-
-static void dump_xlated_json(struct dump_data *dd, void *buf,
-                            unsigned int len, bool opcodes)
-{
-       const struct bpf_insn_cbs cbs = {
-               .cb_print       = print_insn_json,
-               .cb_call        = print_call,
-               .cb_imm         = print_imm,
-               .private_data   = dd,
-       };
-       struct bpf_insn *insn = buf;
-       bool double_insn = false;
-       unsigned int i;
-
-       jsonw_start_array(json_wtr);
-       for (i = 0; i < len / sizeof(*insn); i++) {
-               if (double_insn) {
-                       double_insn = false;
-                       continue;
-               }
-               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-               jsonw_start_object(json_wtr);
-               jsonw_name(json_wtr, "disasm");
-               print_bpf_insn(&cbs, NULL, insn + i, true);
-
-               if (opcodes) {
-                       jsonw_name(json_wtr, "opcodes");
-                       jsonw_start_object(json_wtr);
-
-                       jsonw_name(json_wtr, "code");
-                       jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
-
-                       jsonw_name(json_wtr, "src_reg");
-                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
-
-                       jsonw_name(json_wtr, "dst_reg");
-                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
-
-                       jsonw_name(json_wtr, "off");
-                       print_hex_data_json((uint8_t *)(&insn[i].off), 2);
-
-                       jsonw_name(json_wtr, "imm");
-                       if (double_insn && i < len - 1)
-                               print_hex_data_json((uint8_t *)(&insn[i].imm),
-                                                   12);
-                       else
-                               print_hex_data_json((uint8_t *)(&insn[i].imm),
-                                                   4);
-                       jsonw_end_object(json_wtr);
-               }
-               jsonw_end_object(json_wtr);
-       }
-       jsonw_end_array(json_wtr);
-}
-
 static int do_dump(int argc, char **argv)
 {
        struct bpf_prog_info info = {};
@@ -668,6 +416,7 @@ static int do_dump(int argc, char **argv)
        unsigned int buf_size;
        char *filepath = NULL;
        bool opcodes = false;
+       bool visual = false;
        unsigned char *buf;
        __u32 *member_len;
        __u64 *member_ptr;
@@ -706,6 +455,9 @@ static int do_dump(int argc, char **argv)
        } else if (is_prefix(*argv, "opcodes")) {
                opcodes = true;
                NEXT_ARG();
+       } else if (is_prefix(*argv, "visual")) {
+               visual = true;
+               NEXT_ARG();
        }
 
        if (argc) {
@@ -774,27 +526,33 @@ static int do_dump(int argc, char **argv)
                              n < 0 ? strerror(errno) : "short write");
                        goto err_free;
                }
-       } else {
-               if (member_len == &info.jited_prog_len) {
-                       const char *name = NULL;
-
-                       if (info.ifindex) {
-                               name = ifindex_to_bfd_name_ns(info.ifindex,
-                                                             info.netns_dev,
-                                                             info.netns_ino);
-                               if (!name)
-                                       goto err_free;
-                       }
 
-                       disasm_print_insn(buf, *member_len, opcodes, name);
-               } else {
-                       kernel_syms_load(&dd);
-                       if (json_output)
-                               dump_xlated_json(&dd, buf, *member_len, opcodes);
-                       else
-                               dump_xlated_plain(&dd, buf, *member_len, opcodes);
-                       kernel_syms_destroy(&dd);
+               if (json_output)
+                       jsonw_null(json_wtr);
+       } else if (member_len == &info.jited_prog_len) {
+               const char *name = NULL;
+
+               if (info.ifindex) {
+                       name = ifindex_to_bfd_name_ns(info.ifindex,
+                                                     info.netns_dev,
+                                                     info.netns_ino);
+                       if (!name)
+                               goto err_free;
                }
+
+               disasm_print_insn(buf, *member_len, opcodes, name);
+       } else if (visual) {
+               if (json_output)
+                       jsonw_null(json_wtr);
+               else
+                       dump_xlated_cfg(buf, *member_len);
+       } else {
+               kernel_syms_load(&dd);
+               if (json_output)
+                       dump_xlated_json(&dd, buf, *member_len, opcodes);
+               else
+                       dump_xlated_plain(&dd, buf, *member_len, opcodes);
+               kernel_syms_destroy(&dd);
        }
 
        free(buf);
@@ -848,7 +606,7 @@ static int do_help(int argc, char **argv)
 
        fprintf(stderr,
                "Usage: %s %s { show | list } [PROG]\n"
-               "       %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
+               "       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
                "       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
                "       %s %s pin   PROG FILE\n"
                "       %s %s load  OBJ  FILE\n"
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
new file mode 100644 (file)
index 0000000..20da835
--- /dev/null
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "disasm.h"
+#include "json_writer.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+       return ((struct kernel_sym *)sym_a)->address -
+              ((struct kernel_sym *)sym_b)->address;
+}
+
+void kernel_syms_load(struct dump_data *dd)
+{
+       struct kernel_sym *sym;
+       char buff[256];
+       void *tmp, *address;
+       FILE *fp;
+
+       fp = fopen("/proc/kallsyms", "r");
+       if (!fp)
+               return;
+
+       while (!feof(fp)) {
+               if (!fgets(buff, sizeof(buff), fp))
+                       break;
+               tmp = realloc(dd->sym_mapping,
+                             (dd->sym_count + 1) *
+                             sizeof(*dd->sym_mapping));
+               if (!tmp) {
+out:
+                       free(dd->sym_mapping);
+                       dd->sym_mapping = NULL;
+                       fclose(fp);
+                       return;
+               }
+               dd->sym_mapping = tmp;
+               sym = &dd->sym_mapping[dd->sym_count];
+               if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+                       continue;
+               sym->address = (unsigned long)address;
+               if (!strcmp(sym->name, "__bpf_call_base")) {
+                       dd->address_call_base = sym->address;
+                       /* sysctl kernel.kptr_restrict was set */
+                       if (!sym->address)
+                               goto out;
+               }
+               if (sym->address)
+                       dd->sym_count++;
+       }
+
+       fclose(fp);
+
+       qsort(dd->sym_mapping, dd->sym_count,
+             sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+void kernel_syms_destroy(struct dump_data *dd)
+{
+       free(dd->sym_mapping);
+}
+
+static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+                                            unsigned long key)
+{
+       struct kernel_sym sym = {
+               .address = key,
+       };
+
+       return dd->sym_mapping ?
+              bsearch(&sym, dd->sym_mapping, dd->sym_count,
+                      sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
+static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+       va_list args;
+
+       va_start(args, fmt);
+       vprintf(fmt, args);
+       va_end(args);
+}
+
+static void
+print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+       char buf[64], *p;
+       va_list args;
+
+       va_start(args, fmt);
+       vsnprintf(buf, sizeof(buf), fmt, args);
+       va_end(args);
+
+       p = buf;
+       while (*p != '\0') {
+               if (*p == '\n') {
+                       memmove(p + 3, p, strlen(buf) + 1 - (p - buf));
+                       /* Align each instruction dump row left. */
+                       *p++ = '\\';
+                       *p++ = 'l';
+                       /* Output multiline concatenation. */
+                       *p++ = '\\';
+               } else if (*p == '<' || *p == '>' || *p == '|' || *p == '&') {
+                       memmove(p + 1, p, strlen(buf) + 1 - (p - buf));
+                       /* Escape special character. */
+                       *p++ = '\\';
+               }
+
+               p++;
+       }
+
+       printf("%s", buf);
+}
+
+static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+       unsigned int l = strlen(fmt);
+       char chomped_fmt[l];
+       va_list args;
+
+       va_start(args, fmt);
+       if (l > 0) {
+               strncpy(chomped_fmt, fmt, l - 1);
+               chomped_fmt[l - 1] = '\0';
+       }
+       jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
+       va_end(args);
+}
+
+static const char *print_call_pcrel(struct dump_data *dd,
+                                   struct kernel_sym *sym,
+                                   unsigned long address,
+                                   const struct bpf_insn *insn)
+{
+       if (sym)
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "%+d#%s", insn->off, sym->name);
+       else
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "%+d#0x%lx", insn->off, address);
+       return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+                                    struct kernel_sym *sym,
+                                    unsigned long address)
+{
+       if (sym)
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "%s", sym->name);
+       else
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "0x%lx", address);
+       return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+                             const struct bpf_insn *insn)
+{
+       struct dump_data *dd = private_data;
+       unsigned long address = dd->address_call_base + insn->imm;
+       struct kernel_sym *sym;
+
+       sym = kernel_syms_search(dd, address);
+       if (insn->src_reg == BPF_PSEUDO_CALL)
+               return print_call_pcrel(dd, sym, address, insn);
+       else
+               return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+                            const struct bpf_insn *insn,
+                            __u64 full_imm)
+{
+       struct dump_data *dd = private_data;
+
+       if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "map[id:%u]", insn->imm);
+       else
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "0x%llx", (unsigned long long)full_imm);
+       return dd->scratch_buff;
+}
+
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+                     bool opcodes)
+{
+       const struct bpf_insn_cbs cbs = {
+               .cb_print       = print_insn_json,
+               .cb_call        = print_call,
+               .cb_imm         = print_imm,
+               .private_data   = dd,
+       };
+       struct bpf_insn *insn = buf;
+       bool double_insn = false;
+       unsigned int i;
+
+       jsonw_start_array(json_wtr);
+       for (i = 0; i < len / sizeof(*insn); i++) {
+               if (double_insn) {
+                       double_insn = false;
+                       continue;
+               }
+               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+               jsonw_start_object(json_wtr);
+               jsonw_name(json_wtr, "disasm");
+               print_bpf_insn(&cbs, NULL, insn + i, true);
+
+               if (opcodes) {
+                       jsonw_name(json_wtr, "opcodes");
+                       jsonw_start_object(json_wtr);
+
+                       jsonw_name(json_wtr, "code");
+                       jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
+
+                       jsonw_name(json_wtr, "src_reg");
+                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
+
+                       jsonw_name(json_wtr, "dst_reg");
+                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
+
+                       jsonw_name(json_wtr, "off");
+                       print_hex_data_json((uint8_t *)(&insn[i].off), 2);
+
+                       jsonw_name(json_wtr, "imm");
+                       if (double_insn && i < len - 1)
+                               print_hex_data_json((uint8_t *)(&insn[i].imm),
+                                                   12);
+                       else
+                               print_hex_data_json((uint8_t *)(&insn[i].imm),
+                                                   4);
+                       jsonw_end_object(json_wtr);
+               }
+               jsonw_end_object(json_wtr);
+       }
+       jsonw_end_array(json_wtr);
+}
+
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+                      bool opcodes)
+{
+       const struct bpf_insn_cbs cbs = {
+               .cb_print       = print_insn,
+               .cb_call        = print_call,
+               .cb_imm         = print_imm,
+               .private_data   = dd,
+       };
+       struct bpf_insn *insn = buf;
+       bool double_insn = false;
+       unsigned int i;
+
+       for (i = 0; i < len / sizeof(*insn); i++) {
+               if (double_insn) {
+                       double_insn = false;
+                       continue;
+               }
+
+               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+               printf("% 4d: ", i);
+               print_bpf_insn(&cbs, NULL, insn + i, true);
+
+               if (opcodes) {
+                       printf("       ");
+                       fprint_hex(stdout, insn + i, 8, " ");
+                       if (double_insn && i < len - 1) {
+                               printf(" ");
+                               fprint_hex(stdout, insn + i + 1, 8, " ");
+                       }
+                       printf("\n");
+               }
+       }
+}
+
+void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+                          unsigned int start_idx)
+{
+       const struct bpf_insn_cbs cbs = {
+               .cb_print       = print_insn_for_graph,
+               .cb_call        = print_call,
+               .cb_imm         = print_imm,
+               .private_data   = dd,
+       };
+       struct bpf_insn *insn_start = buf_start;
+       struct bpf_insn *insn_end = buf_end;
+       struct bpf_insn *cur = insn_start;
+
+       for (; cur <= insn_end; cur++) {
+               printf("% 4d: ", (int)(cur - insn_start + start_idx));
+               print_bpf_insn(&cbs, NULL, cur, true);
+               if (cur != insn_end)
+                       printf(" | ");
+       }
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
new file mode 100644 (file)
index 0000000..51c935d
--- /dev/null
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_XLATED_DUMPER_H
+#define __BPF_TOOL_XLATED_DUMPER_H
+
+#define SYM_MAX_NAME   256
+
+struct kernel_sym {
+       unsigned long address;
+       char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+       unsigned long address_call_base;
+       struct kernel_sym *sym_mapping;
+       __u32 sym_count;
+       char scratch_buff[SYM_MAX_NAME];
+};
+
+void kernel_syms_load(struct dump_data *dd);
+void kernel_syms_destroy(struct dump_data *dd);
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+                     bool opcodes);
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+                      bool opcodes);
+void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
+                          unsigned int start_index);
+
+#endif
index 860fa15..ffca068 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for cgroup tools
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra
 
 all: cgroup_event_listener
index 805a2c0..240eda0 100644 (file)
@@ -12,8 +12,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
index 1139d71..5db5e62 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Hyper-V tools
 
-CC = $(CROSS_COMPILE)gcc
 WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
 
index a08e7a4..332ed2f 100644 (file)
@@ -12,8 +12,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer
index ac3c650..536ee4f 100644 (file)
@@ -86,6 +86,62 @@ enum i915_mocs_table_index {
        I915_MOCS_CACHED,
 };
 
+/*
+ * Different engines serve different roles, and there may be more than one
+ * engine serving each role. enum drm_i915_gem_engine_class provides a
+ * classification of the role of the engine, which may be used when requesting
+ * operations to be performed on a certain subset of engines, or for providing
+ * information about that group.
+ */
+enum drm_i915_gem_engine_class {
+       I915_ENGINE_CLASS_RENDER        = 0,
+       I915_ENGINE_CLASS_COPY          = 1,
+       I915_ENGINE_CLASS_VIDEO         = 2,
+       I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
+
+       I915_ENGINE_CLASS_INVALID       = -1
+};
+
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+       I915_SAMPLE_BUSY = 0,
+       I915_SAMPLE_WAIT = 1,
+       I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+       (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+       ((class) << I915_PMU_CLASS_SHIFT | \
+       (instance) << I915_PMU_SAMPLE_BITS | \
+       (sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS            __I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY         __I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255        /* table size 2k - maximum due to use
@@ -450,6 +506,27 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
 
+/*
+ * Query whether every context (both per-file default and user created) is
+ * isolated (insofar as HW supports). If this parameter is not true, then
+ * freshly created contexts may inherit values from an existing context,
+ * rather than default HW values. If true, it also ensures (insofar as HW
+ * supports) that all state set by this context will not leak to any other
+ * context.
+ *
+ * As not every engine across every gen support contexts, the returned
+ * value reports the support of context isolation for individual engines by
+ * returning a bitmask of each engine class set to true if that class supports
+ * isolation.
+ */
+#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
+
+/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
+ * registers. This used to be fixed per platform but from CNL onwards, this
+ * might vary depending on the parts.
+ */
+#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
+
 typedef struct drm_i915_getparam {
        __s32 param;
        /*
index 8616131..6d94477 100644 (file)
@@ -163,6 +163,7 @@ enum {
        IFLA_IF_NETNSID,
        IFLA_CARRIER_UP_COUNT,
        IFLA_CARRIER_DOWN_COUNT,
+       IFLA_NEW_IFINDEX,
        __IFLA_MAX
 };
 
index 8fb90a0..0fb5ef9 100644 (file)
@@ -1362,6 +1362,96 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_CMMA_MIGRATION */
 #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+/* Memory Encryption Commands */
+#define KVM_MEMORY_ENCRYPT_OP      _IOWR(KVMIO, 0xba, unsigned long)
+
+struct kvm_enc_region {
+       __u64 addr;
+       __u64 size;
+};
+
+#define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+#define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+       /* Guest initialization commands */
+       KVM_SEV_INIT = 0,
+       KVM_SEV_ES_INIT,
+       /* Guest launch commands */
+       KVM_SEV_LAUNCH_START,
+       KVM_SEV_LAUNCH_UPDATE_DATA,
+       KVM_SEV_LAUNCH_UPDATE_VMSA,
+       KVM_SEV_LAUNCH_SECRET,
+       KVM_SEV_LAUNCH_MEASURE,
+       KVM_SEV_LAUNCH_FINISH,
+       /* Guest migration commands (outgoing) */
+       KVM_SEV_SEND_START,
+       KVM_SEV_SEND_UPDATE_DATA,
+       KVM_SEV_SEND_UPDATE_VMSA,
+       KVM_SEV_SEND_FINISH,
+       /* Guest migration commands (incoming) */
+       KVM_SEV_RECEIVE_START,
+       KVM_SEV_RECEIVE_UPDATE_DATA,
+       KVM_SEV_RECEIVE_UPDATE_VMSA,
+       KVM_SEV_RECEIVE_FINISH,
+       /* Guest status and debug commands */
+       KVM_SEV_GUEST_STATUS,
+       KVM_SEV_DBG_DECRYPT,
+       KVM_SEV_DBG_ENCRYPT,
+       /* Guest certificates commands */
+       KVM_SEV_CERT_EXPORT,
+
+       KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+       __u32 id;
+       __u64 data;
+       __u32 error;
+       __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 dh_uaddr;
+       __u32 dh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+       __u64 uaddr;
+       __u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+       __u64 uaddr;
+       __u32 len;
+};
+
+struct kvm_sev_guest_status {
+       __u32 handle;
+       __u32 policy;
+       __u32 state;
+};
+
+struct kvm_sev_dbg {
+       __u64 src_uaddr;
+       __u64 dst_uaddr;
+       __u32 len;
+};
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
index 5f758c4..b572d94 100644 (file)
@@ -2,7 +2,6 @@
 PREFIX ?= /usr
 SBINDIR ?= sbin
 INSTALL ?= install
-CC = $(CROSS_COMPILE)gcc
 
 TARGET = freefall
 
index c379af0..7b6bed1 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for LEDs tools
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra -g -I../../include/uapi
 
 all: uledmon led_hw_brightness_mon
index 97073d6..5bbbf28 100644 (file)
@@ -1060,11 +1060,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
                prog->insns = new_insn;
                prog->main_prog_cnt = prog->insns_cnt;
                prog->insns_cnt = new_cnt;
+               pr_debug("added %zd insn from %s to prog %s\n",
+                        text->insns_cnt, text->section_name,
+                        prog->section_name);
        }
        insn = &prog->insns[relo->insn_idx];
        insn->imm += prog->main_prog_cnt - relo->insn_idx;
-       pr_debug("added %zd insn from %s to prog %s\n",
-                text->insns_cnt, text->section_name, prog->section_name);
        return 0;
 }
 
index b00b189..a8cb69a 100644 (file)
@@ -852,8 +852,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
  *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
  *    writing, there are 11 occurrences of it in the allmodconfig kernel.
  *
+ *    As of GCC 7 there are quite a few more of these and the 'in between' code
+ *    is significant. Esp. with KASAN enabled some of the code between the mov
+ *    and jmpq uses .rodata itself, which can confuse things.
+ *
  *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
  *    ensure the same register is used in the mov and jump instructions.
+ *
+ *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
  */
 static struct rela *find_switch_table(struct objtool_file *file,
                                      struct symbol *func,
@@ -875,12 +881,25 @@ static struct rela *find_switch_table(struct objtool_file *file,
                                                text_rela->addend + 4);
                if (!rodata_rela)
                        return NULL;
+
                file->ignore_unreachables = true;
                return rodata_rela;
        }
 
        /* case 3 */
-       func_for_each_insn_continue_reverse(file, func, insn) {
+       /*
+        * Backward search using the @first_jump_src links, these help avoid
+        * much of the 'in between' code. Which avoids us getting confused by
+        * it.
+        */
+       for (insn = list_prev_entry(insn, list);
+
+            &insn->list != &file->insn_list &&
+            insn->sec == func->sec &&
+            insn->offset >= func->offset;
+
+            insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+
                if (insn->type == INSN_JUMP_DYNAMIC)
                        break;
 
@@ -910,14 +929,32 @@ static struct rela *find_switch_table(struct objtool_file *file,
        return NULL;
 }
 
+
 static int add_func_switch_tables(struct objtool_file *file,
                                  struct symbol *func)
 {
-       struct instruction *insn, *prev_jump = NULL;
+       struct instruction *insn, *last = NULL, *prev_jump = NULL;
        struct rela *rela, *prev_rela = NULL;
        int ret;
 
        func_for_each_insn(file, func, insn) {
+               if (!last)
+                       last = insn;
+
+               /*
+                * Store back-pointers for unconditional forward jumps such
+                * that find_switch_table() can back-track using those and
+                * avoid some potentially confusing code.
+                */
+               if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
+                   insn->offset > last->offset &&
+                   insn->jump_dest->offset > insn->offset &&
+                   !insn->jump_dest->first_jump_src) {
+
+                       insn->jump_dest->first_jump_src = insn;
+                       last = insn->jump_dest;
+               }
+
                if (insn->type != INSN_JUMP_DYNAMIC)
                        continue;
 
@@ -1899,13 +1936,19 @@ static bool ignore_unreachable_insn(struct instruction *insn)
                if (is_kasan_insn(insn) || is_ubsan_insn(insn))
                        return true;
 
-               if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-                       insn = insn->jump_dest;
-                       continue;
+               if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+                       if (insn->jump_dest &&
+                           insn->jump_dest->func == insn->func) {
+                               insn = insn->jump_dest;
+                               continue;
+                       }
+
+                       break;
                }
 
                if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
                        break;
+
                insn = list_next_entry(insn, list);
        }
 
index dbadb30..23a1d06 100644 (file)
@@ -47,6 +47,7 @@ struct instruction {
        bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
        struct symbol *call_dest;
        struct instruction *jump_dest;
+       struct instruction *first_jump_src;
        struct list_head alts;
        struct symbol *func;
        struct stack_op stack_op;
index f0796a4..90bb4aa 100644 (file)
@@ -30,6 +30,10 @@ OPTIONS for 'convert'
 -i::
        Specify input perf data file path.
 
+-f::
+--force::
+       Don't complain, do it.
+
 -v::
 --verbose::
         Be more verbose (show counter open errors, etc).
index 9b0351d..0123280 100644 (file)
@@ -146,12 +146,6 @@ define allow-override
     $(eval $(1) = $(2)))
 endef
 
-# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,LD,$(CROSS_COMPILE)ld)
-$(call allow-override,CXX,$(CROSS_COMPILE)g++)
-
 LD += $(EXTRA_LDFLAGS)
 
 HOSTCC  ?= gcc
index 48228de..dfa6e31 100644 (file)
@@ -10,15 +10,19 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 
 out    := $(OUTPUT)arch/s390/include/generated/asm
 header := $(out)/syscalls_64.c
-sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/
+syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
-       $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@
+       @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B $(sysdef) $(syskrn) >/dev/null) \
+        || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+       $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
 
 clean::
        $(call QUIET_CLEAN, s390) $(RM) $(header)
index 7fa0d0a..72ecbb6 100755 (executable)
@@ -3,25 +3,23 @@
 #
 # Generate system call table for perf
 #
-#
-# Copyright IBM Corp. 2017
+# Copyright IBM Corp. 2017, 2018
 # Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
 #
 
-gcc=$1
-input=$2
+SYSCALL_TBL=$1
 
-if ! test -r $input; then
+if ! test -r $SYSCALL_TBL; then
        echo "Could not read input file" >&2
        exit 1
 fi
 
 create_table()
 {
-       local max_nr
+       local max_nr nr abi sc discard
 
        echo 'static const char *syscalltbl_s390_64[] = {'
-       while read sc nr; do
+       while read nr abi sc discard; do
                printf '\t[%d] = "%s",\n' $nr $sc
                max_nr=$nr
        done
@@ -29,8 +27,6 @@ create_table()
        echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
 }
 
-
-$gcc -m64 -E -dM -x c  $input         \
-       |sed -ne 's/^#define __NR_//p' \
-       |sort -t' ' -k2 -nu            \
+grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL    \
+       |sort -k1 -n                                    \
        |create_table
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
new file mode 100644 (file)
index 0000000..b38d484
--- /dev/null
@@ -0,0 +1,390 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1    common    exit                    sys_exit                        sys_exit
+2    common    fork                    sys_fork                        sys_fork
+3    common    read                    sys_read                        compat_sys_s390_read
+4    common    write                   sys_write                       compat_sys_s390_write
+5    common    open                    sys_open                        compat_sys_open
+6    common    close                   sys_close                       sys_close
+7    common    restart_syscall         sys_restart_syscall             sys_restart_syscall
+8    common    creat                   sys_creat                       compat_sys_creat
+9    common    link                    sys_link                        compat_sys_link
+10   common    unlink                  sys_unlink                      compat_sys_unlink
+11   common    execve                  sys_execve                      compat_sys_execve
+12   common    chdir                   sys_chdir                       compat_sys_chdir
+13   32                time                    -                               compat_sys_time
+14   common    mknod                   sys_mknod                       compat_sys_mknod
+15   common    chmod                   sys_chmod                       compat_sys_chmod
+16   32                lchown                  -                               compat_sys_s390_lchown16
+19   common    lseek                   sys_lseek                       compat_sys_lseek
+20   common    getpid                  sys_getpid                      sys_getpid
+21   common    mount                   sys_mount                       compat_sys_mount
+22   common    umount                  sys_oldumount                   compat_sys_oldumount
+23   32                setuid                  -                               compat_sys_s390_setuid16
+24   32                getuid                  -                               compat_sys_s390_getuid16
+25   32                stime                   -                               compat_sys_stime
+26   common    ptrace                  sys_ptrace                      compat_sys_ptrace
+27   common    alarm                   sys_alarm                       sys_alarm
+29   common    pause                   sys_pause                       sys_pause
+30   common    utime                   sys_utime                       compat_sys_utime
+33   common    access                  sys_access                      compat_sys_access
+34   common    nice                    sys_nice                        sys_nice
+36   common    sync                    sys_sync                        sys_sync
+37   common    kill                    sys_kill                        sys_kill
+38   common    rename                  sys_rename                      compat_sys_rename
+39   common    mkdir                   sys_mkdir                       compat_sys_mkdir
+40   common    rmdir                   sys_rmdir                       compat_sys_rmdir
+41   common    dup                     sys_dup                         sys_dup
+42   common    pipe                    sys_pipe                        compat_sys_pipe
+43   common    times                   sys_times                       compat_sys_times
+45   common    brk                     sys_brk                         compat_sys_brk
+46   32                setgid                  -                               compat_sys_s390_setgid16
+47   32                getgid                  -                               compat_sys_s390_getgid16
+48   common    signal                  sys_signal                      compat_sys_signal
+49   32                geteuid                 -                               compat_sys_s390_geteuid16
+50   32                getegid                 -                               compat_sys_s390_getegid16
+51   common    acct                    sys_acct                        compat_sys_acct
+52   common    umount2                 sys_umount                      compat_sys_umount
+54   common    ioctl                   sys_ioctl                       compat_sys_ioctl
+55   common    fcntl                   sys_fcntl                       compat_sys_fcntl
+57   common    setpgid                 sys_setpgid                     sys_setpgid
+60   common    umask                   sys_umask                       sys_umask
+61   common    chroot                  sys_chroot                      compat_sys_chroot
+62   common    ustat                   sys_ustat                       compat_sys_ustat
+63   common    dup2                    sys_dup2                        sys_dup2
+64   common    getppid                 sys_getppid                     sys_getppid
+65   common    getpgrp                 sys_getpgrp                     sys_getpgrp
+66   common    setsid                  sys_setsid                      sys_setsid
+67   common    sigaction               sys_sigaction                   compat_sys_sigaction
+70   32                setreuid                -                               compat_sys_s390_setreuid16
+71   32                setregid                -                               compat_sys_s390_setregid16
+72   common    sigsuspend              sys_sigsuspend                  compat_sys_sigsuspend
+73   common    sigpending              sys_sigpending                  compat_sys_sigpending
+74   common    sethostname             sys_sethostname                 compat_sys_sethostname
+75   common    setrlimit               sys_setrlimit                   compat_sys_setrlimit
+76   32                getrlimit               -                               compat_sys_old_getrlimit
+77   common    getrusage               sys_getrusage                   compat_sys_getrusage
+78   common    gettimeofday            sys_gettimeofday                compat_sys_gettimeofday
+79   common    settimeofday            sys_settimeofday                compat_sys_settimeofday
+80   32                getgroups               -                               compat_sys_s390_getgroups16
+81   32                setgroups               -                               compat_sys_s390_setgroups16
+83   common    symlink                 sys_symlink                     compat_sys_symlink
+85   common    readlink                sys_readlink                    compat_sys_readlink
+86   common    uselib                  sys_uselib                      compat_sys_uselib
+87   common    swapon                  sys_swapon                      compat_sys_swapon
+88   common    reboot                  sys_reboot                      compat_sys_reboot
+89   common    readdir                 -                               compat_sys_old_readdir
+90   common    mmap                    sys_old_mmap                    compat_sys_s390_old_mmap
+91   common    munmap                  sys_munmap                      compat_sys_munmap
+92   common    truncate                sys_truncate                    compat_sys_truncate
+93   common    ftruncate               sys_ftruncate                   compat_sys_ftruncate
+94   common    fchmod                  sys_fchmod                      sys_fchmod
+95   32                fchown                  -                               compat_sys_s390_fchown16
+96   common    getpriority             sys_getpriority                 sys_getpriority
+97   common    setpriority             sys_setpriority                 sys_setpriority
+99   common    statfs                  sys_statfs                      compat_sys_statfs
+100  common    fstatfs                 sys_fstatfs                     compat_sys_fstatfs
+101  32                ioperm                  -                               -
+102  common    socketcall              sys_socketcall                  compat_sys_socketcall
+103  common    syslog                  sys_syslog                      compat_sys_syslog
+104  common    setitimer               sys_setitimer                   compat_sys_setitimer
+105  common    getitimer               sys_getitimer                   compat_sys_getitimer
+106  common    stat                    sys_newstat                     compat_sys_newstat
+107  common    lstat                   sys_newlstat                    compat_sys_newlstat
+108  common    fstat                   sys_newfstat                    compat_sys_newfstat
+110  common    lookup_dcookie          sys_lookup_dcookie              compat_sys_lookup_dcookie
+111  common    vhangup                 sys_vhangup                     sys_vhangup
+112  common    idle                    -                               -
+114  common    wait4                   sys_wait4                       compat_sys_wait4
+115  common    swapoff                 sys_swapoff                     compat_sys_swapoff
+116  common    sysinfo                 sys_sysinfo                     compat_sys_sysinfo
+117  common    ipc                     sys_s390_ipc                    compat_sys_s390_ipc
+118  common    fsync                   sys_fsync                       sys_fsync
+119  common    sigreturn               sys_sigreturn                   compat_sys_sigreturn
+120  common    clone                   sys_clone                       compat_sys_clone
+121  common    setdomainname           sys_setdomainname               compat_sys_setdomainname
+122  common    uname                   sys_newuname                    compat_sys_newuname
+124  common    adjtimex                sys_adjtimex                    compat_sys_adjtimex
+125  common    mprotect                sys_mprotect                    compat_sys_mprotect
+126  common    sigprocmask             sys_sigprocmask                 compat_sys_sigprocmask
+127  common    create_module           -                               -
+128  common    init_module             sys_init_module                 compat_sys_init_module
+129  common    delete_module           sys_delete_module               compat_sys_delete_module
+130  common    get_kernel_syms         -                               -
+131  common    quotactl                sys_quotactl                    compat_sys_quotactl
+132  common    getpgid                 sys_getpgid                     sys_getpgid
+133  common    fchdir                  sys_fchdir                      sys_fchdir
+134  common    bdflush                 sys_bdflush                     compat_sys_bdflush
+135  common    sysfs                   sys_sysfs                       compat_sys_sysfs
+136  common    personality             sys_s390_personality            sys_s390_personality
+137  common    afs_syscall             -                               -
+138  32                setfsuid                -                               compat_sys_s390_setfsuid16
+139  32                setfsgid                -                               compat_sys_s390_setfsgid16
+140  32                _llseek                 -                               compat_sys_llseek
+141  common    getdents                sys_getdents                    compat_sys_getdents
+142  32                _newselect              -                               compat_sys_select
+142  64                select                  sys_select                      -
+143  common    flock                   sys_flock                       sys_flock
+144  common    msync                   sys_msync                       compat_sys_msync
+145  common    readv                   sys_readv                       compat_sys_readv
+146  common    writev                  sys_writev                      compat_sys_writev
+147  common    getsid                  sys_getsid                      sys_getsid
+148  common    fdatasync               sys_fdatasync                   sys_fdatasync
+149  common    _sysctl                 sys_sysctl                      compat_sys_sysctl
+150  common    mlock                   sys_mlock                       compat_sys_mlock
+151  common    munlock                 sys_munlock                     compat_sys_munlock
+152  common    mlockall                sys_mlockall                    sys_mlockall
+153  common    munlockall              sys_munlockall                  sys_munlockall
+154  common    sched_setparam          sys_sched_setparam              compat_sys_sched_setparam
+155  common    sched_getparam          sys_sched_getparam              compat_sys_sched_getparam
+156  common    sched_setscheduler      sys_sched_setscheduler          compat_sys_sched_setscheduler
+157  common    sched_getscheduler      sys_sched_getscheduler          sys_sched_getscheduler
+158  common    sched_yield             sys_sched_yield                 sys_sched_yield
+159  common    sched_get_priority_max  sys_sched_get_priority_max      sys_sched_get_priority_max
+160  common    sched_get_priority_min  sys_sched_get_priority_min      sys_sched_get_priority_min
+161  common    sched_rr_get_interval   sys_sched_rr_get_interval       compat_sys_sched_rr_get_interval
+162  common    nanosleep               sys_nanosleep                   compat_sys_nanosleep
+163  common    mremap                  sys_mremap                      compat_sys_mremap
+164  32                setresuid               -                               compat_sys_s390_setresuid16
+165  32                getresuid               -                               compat_sys_s390_getresuid16
+167  common    query_module            -                               -
+168  common    poll                    sys_poll                        compat_sys_poll
+169  common    nfsservctl              -                               -
+170  32                setresgid               -                               compat_sys_s390_setresgid16
+171  32                getresgid               -                               compat_sys_s390_getresgid16
+172  common    prctl                   sys_prctl                       compat_sys_prctl
+173  common    rt_sigreturn            sys_rt_sigreturn                compat_sys_rt_sigreturn
+174  common    rt_sigaction            sys_rt_sigaction                compat_sys_rt_sigaction
+175  common    rt_sigprocmask          sys_rt_sigprocmask              compat_sys_rt_sigprocmask
+176  common    rt_sigpending           sys_rt_sigpending               compat_sys_rt_sigpending
+177  common    rt_sigtimedwait         sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait
+178  common    rt_sigqueueinfo         sys_rt_sigqueueinfo             compat_sys_rt_sigqueueinfo
+179  common    rt_sigsuspend           sys_rt_sigsuspend               compat_sys_rt_sigsuspend
+180  common    pread64                 sys_pread64                     compat_sys_s390_pread64
+181  common    pwrite64                sys_pwrite64                    compat_sys_s390_pwrite64
+182  32                chown                   -                               compat_sys_s390_chown16
+183  common    getcwd                  sys_getcwd                      compat_sys_getcwd
+184  common    capget                  sys_capget                      compat_sys_capget
+185  common    capset                  sys_capset                      compat_sys_capset
+186  common    sigaltstack             sys_sigaltstack                 compat_sys_sigaltstack
+187  common    sendfile                sys_sendfile64                  compat_sys_sendfile
+188  common    getpmsg                 -                               -
+189  common    putpmsg                 -                               -
+190  common    vfork                   sys_vfork                       sys_vfork
+191  32                ugetrlimit              -                               compat_sys_getrlimit
+191  64                getrlimit               sys_getrlimit                   -
+192  32                mmap2                   -                               compat_sys_s390_mmap2
+193  32                truncate64              -                               compat_sys_s390_truncate64
+194  32                ftruncate64             -                               compat_sys_s390_ftruncate64
+195  32                stat64                  -                               compat_sys_s390_stat64
+196  32                lstat64                 -                               compat_sys_s390_lstat64
+197  32                fstat64                 -                               compat_sys_s390_fstat64
+198  32                lchown32                -                               compat_sys_lchown
+198  64                lchown                  sys_lchown                      -
+199  32                getuid32                -                               sys_getuid
+199  64                getuid                  sys_getuid                      -
+200  32                getgid32                -                               sys_getgid
+200  64                getgid                  sys_getgid                      -
+201  32                geteuid32               -                               sys_geteuid
+201  64                geteuid                 sys_geteuid                     -
+202  32                getegid32               -                               sys_getegid
+202  64                getegid                 sys_getegid                     -
+203  32                setreuid32              -                               sys_setreuid
+203  64                setreuid                sys_setreuid                    -
+204  32                setregid32              -                               sys_setregid
+204  64                setregid                sys_setregid                    -
+205  32                getgroups32             -                               compat_sys_getgroups
+205  64                getgroups               sys_getgroups                   -
+206  32                setgroups32             -                               compat_sys_setgroups
+206  64                setgroups               sys_setgroups                   -
+207  32                fchown32                -                               sys_fchown
+207  64                fchown                  sys_fchown                      -
+208  32                setresuid32             -                               sys_setresuid
+208  64                setresuid               sys_setresuid                   -
+209  32                getresuid32             -                               compat_sys_getresuid
+209  64                getresuid               sys_getresuid                   -
+210  32                setresgid32             -                               sys_setresgid
+210  64                setresgid               sys_setresgid                   -
+211  32                getresgid32             -                               compat_sys_getresgid
+211  64                getresgid               sys_getresgid                   -
+212  32                chown32                 -                               compat_sys_chown
+212  64                chown                   sys_chown                       -
+213  32                setuid32                -                               sys_setuid
+213  64                setuid                  sys_setuid                      -
+214  32                setgid32                -                               sys_setgid
+214  64                setgid                  sys_setgid                      -
+215  32                setfsuid32              -                               sys_setfsuid
+215  64                setfsuid                sys_setfsuid                    -
+216  32                setfsgid32              -                               sys_setfsgid
+216  64                setfsgid                sys_setfsgid                    -
+217  common    pivot_root              sys_pivot_root                  compat_sys_pivot_root
+218  common    mincore                 sys_mincore                     compat_sys_mincore
+219  common    madvise                 sys_madvise                     compat_sys_madvise
+220  common    getdents64              sys_getdents64                  compat_sys_getdents64
+221  32                fcntl64                 -                               compat_sys_fcntl64
+222  common    readahead               sys_readahead                   compat_sys_s390_readahead
+223  32                sendfile64              -                               compat_sys_sendfile64
+224  common    setxattr                sys_setxattr                    compat_sys_setxattr
+225  common    lsetxattr               sys_lsetxattr                   compat_sys_lsetxattr
+226  common    fsetxattr               sys_fsetxattr                   compat_sys_fsetxattr
+227  common    getxattr                sys_getxattr                    compat_sys_getxattr
+228  common    lgetxattr               sys_lgetxattr                   compat_sys_lgetxattr
+229  common    fgetxattr               sys_fgetxattr                   compat_sys_fgetxattr
+230  common    listxattr               sys_listxattr                   compat_sys_listxattr
+231  common    llistxattr              sys_llistxattr                  compat_sys_llistxattr
+232  common    flistxattr              sys_flistxattr                  compat_sys_flistxattr
+233  common    removexattr             sys_removexattr                 compat_sys_removexattr
+234  common    lremovexattr            sys_lremovexattr                compat_sys_lremovexattr
+235  common    fremovexattr            sys_fremovexattr                compat_sys_fremovexattr
+236  common    gettid                  sys_gettid                      sys_gettid
+237  common    tkill                   sys_tkill                       sys_tkill
+238  common    futex                   sys_futex                       compat_sys_futex
+239  common    sched_setaffinity       sys_sched_setaffinity           compat_sys_sched_setaffinity
+240  common    sched_getaffinity       sys_sched_getaffinity           compat_sys_sched_getaffinity
+241  common    tgkill                  sys_tgkill                      sys_tgkill
+243  common    io_setup                sys_io_setup                    compat_sys_io_setup
+244  common    io_destroy              sys_io_destroy                  compat_sys_io_destroy
+245  common    io_getevents            sys_io_getevents                compat_sys_io_getevents
+246  common    io_submit               sys_io_submit                   compat_sys_io_submit
+247  common    io_cancel               sys_io_cancel                   compat_sys_io_cancel
+248  common    exit_group              sys_exit_group                  sys_exit_group
+249  common    epoll_create            sys_epoll_create                sys_epoll_create
+250  common    epoll_ctl               sys_epoll_ctl                   compat_sys_epoll_ctl
+251  common    epoll_wait              sys_epoll_wait                  compat_sys_epoll_wait
+252  common    set_tid_address         sys_set_tid_address             compat_sys_set_tid_address
+253  common    fadvise64               sys_fadvise64_64                compat_sys_s390_fadvise64
+254  common    timer_create            sys_timer_create                compat_sys_timer_create
+255  common    timer_settime           sys_timer_settime               compat_sys_timer_settime
+256  common    timer_gettime           sys_timer_gettime               compat_sys_timer_gettime
+257  common    timer_getoverrun        sys_timer_getoverrun            sys_timer_getoverrun
+258  common    timer_delete            sys_timer_delete                sys_timer_delete
+259  common    clock_settime           sys_clock_settime               compat_sys_clock_settime
+260  common    clock_gettime           sys_clock_gettime               compat_sys_clock_gettime
+261  common    clock_getres            sys_clock_getres                compat_sys_clock_getres
+262  common    clock_nanosleep         sys_clock_nanosleep             compat_sys_clock_nanosleep
+264  32                fadvise64_64            -                               compat_sys_s390_fadvise64_64
+265  common    statfs64                sys_statfs64                    compat_sys_statfs64
+266  common    fstatfs64               sys_fstatfs64                   compat_sys_fstatfs64
+267  common    remap_file_pages        sys_remap_file_pages            compat_sys_remap_file_pages
+268  common    mbind                   sys_mbind                       compat_sys_mbind
+269  common    get_mempolicy           sys_get_mempolicy               compat_sys_get_mempolicy
+270  common    set_mempolicy           sys_set_mempolicy               compat_sys_set_mempolicy
+271  common    mq_open                 sys_mq_open                     compat_sys_mq_open
+272  common    mq_unlink               sys_mq_unlink                   compat_sys_mq_unlink
+273  common    mq_timedsend            sys_mq_timedsend                compat_sys_mq_timedsend
+274  common    mq_timedreceive         sys_mq_timedreceive             compat_sys_mq_timedreceive
+275  common    mq_notify               sys_mq_notify                   compat_sys_mq_notify
+276  common    mq_getsetattr           sys_mq_getsetattr               compat_sys_mq_getsetattr
+277  common    kexec_load              sys_kexec_load                  compat_sys_kexec_load
+278  common    add_key                 sys_add_key                     compat_sys_add_key
+279  common    request_key             sys_request_key                 compat_sys_request_key
+280  common    keyctl                  sys_keyctl                      compat_sys_keyctl
+281  common    waitid                  sys_waitid                      compat_sys_waitid
+282  common    ioprio_set              sys_ioprio_set                  sys_ioprio_set
+283  common    ioprio_get              sys_ioprio_get                  sys_ioprio_get
+284  common    inotify_init            sys_inotify_init                sys_inotify_init
+285  common    inotify_add_watch       sys_inotify_add_watch           compat_sys_inotify_add_watch
+286  common    inotify_rm_watch        sys_inotify_rm_watch            sys_inotify_rm_watch
+287  common    migrate_pages           sys_migrate_pages               compat_sys_migrate_pages
+288  common    openat                  sys_openat                      compat_sys_openat
+289  common    mkdirat                 sys_mkdirat                     compat_sys_mkdirat
+290  common    mknodat                 sys_mknodat                     compat_sys_mknodat
+291  common    fchownat                sys_fchownat                    compat_sys_fchownat
+292  common    futimesat               sys_futimesat                   compat_sys_futimesat
+293  32                fstatat64               -                               compat_sys_s390_fstatat64
+293  64                newfstatat              sys_newfstatat                  -
+294  common    unlinkat                sys_unlinkat                    compat_sys_unlinkat
+295  common    renameat                sys_renameat                    compat_sys_renameat
+296  common    linkat                  sys_linkat                      compat_sys_linkat
+297  common    symlinkat               sys_symlinkat                   compat_sys_symlinkat
+298  common    readlinkat              sys_readlinkat                  compat_sys_readlinkat
+299  common    fchmodat                sys_fchmodat                    compat_sys_fchmodat
+300  common    faccessat               sys_faccessat                   compat_sys_faccessat
+301  common    pselect6                sys_pselect6                    compat_sys_pselect6
+302  common    ppoll                   sys_ppoll                       compat_sys_ppoll
+303  common    unshare                 sys_unshare                     compat_sys_unshare
+304  common    set_robust_list         sys_set_robust_list             compat_sys_set_robust_list
+305  common    get_robust_list         sys_get_robust_list             compat_sys_get_robust_list
+306  common    splice                  sys_splice                      compat_sys_splice
+307  common    sync_file_range         sys_sync_file_range             compat_sys_s390_sync_file_range
+308  common    tee                     sys_tee                         compat_sys_tee
+309  common    vmsplice                sys_vmsplice                    compat_sys_vmsplice
+310  common    move_pages              sys_move_pages                  compat_sys_move_pages
+311  common    getcpu                  sys_getcpu                      compat_sys_getcpu
+312  common    epoll_pwait             sys_epoll_pwait                 compat_sys_epoll_pwait
+313  common    utimes                  sys_utimes                      compat_sys_utimes
+314  common    fallocate               sys_fallocate                   compat_sys_s390_fallocate
+315  common    utimensat               sys_utimensat                   compat_sys_utimensat
+316  common    signalfd                sys_signalfd                    compat_sys_signalfd
+317  common    timerfd                 -                               -
+318  common    eventfd                 sys_eventfd                     sys_eventfd
+319  common    timerfd_create          sys_timerfd_create              sys_timerfd_create
+320  common    timerfd_settime         sys_timerfd_settime             compat_sys_timerfd_settime
+321  common    timerfd_gettime         sys_timerfd_gettime             compat_sys_timerfd_gettime
+322  common    signalfd4               sys_signalfd4                   compat_sys_signalfd4
+323  common    eventfd2                sys_eventfd2                    sys_eventfd2
+324  common    inotify_init1           sys_inotify_init1               sys_inotify_init1
+325  common    pipe2                   sys_pipe2                       compat_sys_pipe2
+326  common    dup3                    sys_dup3                        sys_dup3
+327  common    epoll_create1           sys_epoll_create1               sys_epoll_create1
+328  common    preadv                  sys_preadv                      compat_sys_preadv
+329  common    pwritev                 sys_pwritev                     compat_sys_pwritev
+330  common    rt_tgsigqueueinfo       sys_rt_tgsigqueueinfo           compat_sys_rt_tgsigqueueinfo
+331  common    perf_event_open         sys_perf_event_open             compat_sys_perf_event_open
+332  common    fanotify_init           sys_fanotify_init               sys_fanotify_init
+333  common    fanotify_mark           sys_fanotify_mark               compat_sys_fanotify_mark
+334  common    prlimit64               sys_prlimit64                   compat_sys_prlimit64
+335  common    name_to_handle_at       sys_name_to_handle_at           compat_sys_name_to_handle_at
+336  common    open_by_handle_at       sys_open_by_handle_at           compat_sys_open_by_handle_at
+337  common    clock_adjtime           sys_clock_adjtime               compat_sys_clock_adjtime
+338  common    syncfs                  sys_syncfs                      sys_syncfs
+339  common    setns                   sys_setns                       sys_setns
+340  common    process_vm_readv        sys_process_vm_readv            compat_sys_process_vm_readv
+341  common    process_vm_writev       sys_process_vm_writev           compat_sys_process_vm_writev
+342  common    s390_runtime_instr      sys_s390_runtime_instr          sys_s390_runtime_instr
+343  common    kcmp                    sys_kcmp                        compat_sys_kcmp
+344  common    finit_module            sys_finit_module                compat_sys_finit_module
+345  common    sched_setattr           sys_sched_setattr               compat_sys_sched_setattr
+346  common    sched_getattr           sys_sched_getattr               compat_sys_sched_getattr
+347  common    renameat2               sys_renameat2                   compat_sys_renameat2
+348  common    seccomp                 sys_seccomp                     compat_sys_seccomp
+349  common    getrandom               sys_getrandom                   compat_sys_getrandom
+350  common    memfd_create            sys_memfd_create                compat_sys_memfd_create
+351  common    bpf                     sys_bpf                         compat_sys_bpf
+352  common    s390_pci_mmio_write     sys_s390_pci_mmio_write         compat_sys_s390_pci_mmio_write
+353  common    s390_pci_mmio_read      sys_s390_pci_mmio_read          compat_sys_s390_pci_mmio_read
+354  common    execveat                sys_execveat                    compat_sys_execveat
+355  common    userfaultfd             sys_userfaultfd                 sys_userfaultfd
+356  common    membarrier              sys_membarrier                  sys_membarrier
+357  common    recvmmsg                sys_recvmmsg                    compat_sys_recvmmsg
+358  common    sendmmsg                sys_sendmmsg                    compat_sys_sendmmsg
+359  common    socket                  sys_socket                      sys_socket
+360  common    socketpair              sys_socketpair                  compat_sys_socketpair
+361  common    bind                    sys_bind                        compat_sys_bind
+362  common    connect                 sys_connect                     compat_sys_connect
+363  common    listen                  sys_listen                      sys_listen
+364  common    accept4                 sys_accept4                     compat_sys_accept4
+365  common    getsockopt              sys_getsockopt                  compat_sys_getsockopt
+366  common    setsockopt              sys_setsockopt                  compat_sys_setsockopt
+367  common    getsockname             sys_getsockname                 compat_sys_getsockname
+368  common    getpeername             sys_getpeername                 compat_sys_getpeername
+369  common    sendto                  sys_sendto                      compat_sys_sendto
+370  common    sendmsg                 sys_sendmsg                     compat_sys_sendmsg
+371  common    recvfrom                sys_recvfrom                    compat_sys_recvfrom
+372  common    recvmsg                 sys_recvmsg                     compat_sys_recvmsg
+373  common    shutdown                sys_shutdown                    sys_shutdown
+374  common    mlock2                  sys_mlock2                      compat_sys_mlock2
+375  common    copy_file_range         sys_copy_file_range             compat_sys_copy_file_range
+376  common    preadv2                 sys_preadv2                     compat_sys_preadv2
+377  common    pwritev2                sys_pwritev2                    compat_sys_pwritev2
+378  common    s390_guarded_storage    sys_s390_guarded_storage        compat_sys_s390_guarded_storage
+379  common    statx                   sys_statx                       compat_sys_statx
+380  common    s390_sthyi              sys_s390_sthyi                  compat_sys_s390_sthyi
index c0815a3..539c3d4 100644 (file)
@@ -2245,7 +2245,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
        c2c_browser__update_nr_entries(browser);
 
        while (1) {
-               key = hist_browser__run(browser, "? - help");
+               key = hist_browser__run(browser, "? - help", true);
 
                switch (key) {
                case 's':
@@ -2314,7 +2314,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
        c2c_browser__update_nr_entries(browser);
 
        while (1) {
-               key = hist_browser__run(browser, "? - help");
+               key = hist_browser__run(browser, "? - help", true);
 
                switch (key) {
                case 'q':
index 42a52dc..4ad5dc6 100644 (file)
@@ -530,7 +530,8 @@ static int report__browse_hists(struct report *rep)
        case 1:
                ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
                                                    rep->min_percent,
-                                                   &session->header.env);
+                                                   &session->header.env,
+                                                   true);
                /*
                 * Usually "ret" is the last pressed key, and we only
                 * care if the key notifies us to switch data file.
index c6ccda5..b7c823b 100644 (file)
@@ -283,8 +283,9 @@ static void perf_top__print_sym_table(struct perf_top *top)
 
        printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 
-       if (hists->stats.nr_lost_warned !=
-           hists->stats.nr_events[PERF_RECORD_LOST]) {
+       if (!top->record_opts.overwrite &&
+           (hists->stats.nr_lost_warned !=
+           hists->stats.nr_events[PERF_RECORD_LOST])) {
                hists->stats.nr_lost_warned =
                              hists->stats.nr_events[PERF_RECORD_LOST];
                color_fprintf(stdout, PERF_COLOR_RED,
@@ -611,7 +612,8 @@ static void *display_thread_tui(void *arg)
 
        perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
                                      top->min_percent,
-                                     &top->session->header.env);
+                                     &top->session->header.env,
+                                     !top->record_opts.overwrite);
 
        done = 1;
        return NULL;
@@ -807,15 +809,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 {
+       struct record_opts *opts = &top->record_opts;
+       struct perf_evlist *evlist = top->evlist;
        struct perf_sample sample;
        struct perf_evsel *evsel;
+       struct perf_mmap *md;
        struct perf_session *session = top->session;
        union perf_event *event;
        struct machine *machine;
+       u64 end, start;
        int ret;
 
-       while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
-               ret = perf_evlist__parse_sample(top->evlist, event, &sample);
+       md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
+       if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
+               return;
+
+       while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
+               ret = perf_evlist__parse_sample(evlist, event, &sample);
                if (ret) {
                        pr_err("Can't parse sample, err = %d\n", ret);
                        goto next_event;
@@ -869,16 +879,120 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
                } else
                        ++session->evlist->stats.nr_unknown_events;
 next_event:
-               perf_evlist__mmap_consume(top->evlist, idx);
+               perf_mmap__consume(md, opts->overwrite);
        }
+
+       perf_mmap__read_done(md);
 }
 
 static void perf_top__mmap_read(struct perf_top *top)
 {
+       bool overwrite = top->record_opts.overwrite;
+       struct perf_evlist *evlist = top->evlist;
+       unsigned long long start, end;
        int i;
 
+       start = rdclock();
+       if (overwrite)
+               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+
        for (i = 0; i < top->evlist->nr_mmaps; i++)
                perf_top__mmap_read_idx(top, i);
+
+       if (overwrite) {
+               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+       }
+       end = rdclock();
+
+       if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
+               ui__warning("Too slow to read ring buffer.\n"
+                           "Please try increasing the period (-c) or\n"
+                           "decreasing the freq (-F) or\n"
+                           "limiting the number of CPUs (-C)\n");
+}
+
+/*
+ * Check per-event overwrite term.
+ * perf top should support consistent term for all events.
+ * - All events don't have per-event term
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
+ *   Nothing change, return 0.
+ * - All events have same per-event term
+ *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
+ *   Using the per-event setting to replace the opts->overwrite if
+ *   they are different, then return 0.
+ * - Events have different per-event term
+ *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ * - Some of the event set per-event term, but some not.
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ */
+static int perf_top__overwrite_check(struct perf_top *top)
+{
+       struct record_opts *opts = &top->record_opts;
+       struct perf_evlist *evlist = top->evlist;
+       struct perf_evsel_config_term *term;
+       struct list_head *config_terms;
+       struct perf_evsel *evsel;
+       int set, overwrite = -1;
+
+       evlist__for_each_entry(evlist, evsel) {
+               set = -1;
+               config_terms = &evsel->config_terms;
+               list_for_each_entry(term, config_terms, list) {
+                       if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+                               set = term->val.overwrite ? 1 : 0;
+               }
+
+               /* no term for current and previous event (likely) */
+               if ((overwrite < 0) && (set < 0))
+                       continue;
+
+               /* has term for both current and previous event, compare */
+               if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
+                       return -1;
+
+               /* no term for current event but has term for previous one */
+               if ((overwrite >= 0) && (set < 0))
+                       return -1;
+
+               /* has term for current event */
+               if ((overwrite < 0) && (set >= 0)) {
+                       /* if it's first event, set overwrite */
+                       if (evsel == perf_evlist__first(evlist))
+                               overwrite = set;
+                       else
+                               return -1;
+               }
+       }
+
+       if ((overwrite >= 0) && (opts->overwrite != overwrite))
+               opts->overwrite = overwrite;
+
+       return 0;
+}
+
+static int perf_top_overwrite_fallback(struct perf_top *top,
+                                      struct perf_evsel *evsel)
+{
+       struct record_opts *opts = &top->record_opts;
+       struct perf_evlist *evlist = top->evlist;
+       struct perf_evsel *counter;
+
+       if (!opts->overwrite)
+               return 0;
+
+       /* only fall back when first event fails */
+       if (evsel != perf_evlist__first(evlist))
+               return 0;
+
+       evlist__for_each_entry(evlist, counter)
+               counter->attr.write_backward = false;
+       opts->overwrite = false;
+       ui__warning("fall back to non-overwrite mode\n");
+       return 1;
 }
 
 static int perf_top__start_counters(struct perf_top *top)
@@ -888,12 +1002,33 @@ static int perf_top__start_counters(struct perf_top *top)
        struct perf_evlist *evlist = top->evlist;
        struct record_opts *opts = &top->record_opts;
 
+       if (perf_top__overwrite_check(top)) {
+               ui__error("perf top only support consistent per-event "
+                         "overwrite setting for all events\n");
+               goto out_err;
+       }
+
        perf_evlist__config(evlist, opts, &callchain_param);
 
        evlist__for_each_entry(evlist, counter) {
 try_again:
                if (perf_evsel__open(counter, top->evlist->cpus,
                                     top->evlist->threads) < 0) {
+
+                       /*
+                        * Specially handle overwrite fall back.
+                        * Because perf top is the only tool which has
+                        * overwrite mode by default, support
+                        * both overwrite and non-overwrite mode, and
+                        * require consistent mode for all events.
+                        *
+                        * May move it to generic code with more tools
+                        * have similar attribute.
+                        */
+                       if (perf_missing_features.write_backward &&
+                           perf_top_overwrite_fallback(top, counter))
+                               goto try_again;
+
                        if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
                                if (verbose > 0)
                                        ui__warning("%s\n", msg);
@@ -1033,7 +1168,7 @@ static int __cmd_top(struct perf_top *top)
 
                perf_top__mmap_read(top);
 
-               if (hits == top->samples)
+               if (opts->overwrite || (hits == top->samples))
                        ret = perf_evlist__poll(top->evlist, 100);
 
                if (resize) {
@@ -1127,6 +1262,7 @@ int cmd_top(int argc, const char **argv)
                                .uses_mmap   = true,
                        },
                        .proc_map_timeout    = 500,
+                       .overwrite      = 1,
                },
                .max_stack           = sysctl_perf_event_max_stack,
                .sym_pcnt_filter     = 5,
index 51abdb0..790ec25 100755 (executable)
@@ -33,7 +33,6 @@ arch/s390/include/uapi/asm/kvm.h
 arch/s390/include/uapi/asm/kvm_perf.h
 arch/s390/include/uapi/asm/ptrace.h
 arch/s390/include/uapi/asm/sie.h
-arch/s390/include/uapi/asm/unistd.h
 arch/arm/include/uapi/asm/kvm.h
 arch/arm64/include/uapi/asm/kvm.h
 arch/alpha/include/uapi/asm/errno.h
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
new file mode 100644 (file)
index 0000000..3b62087
--- /dev/null
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0x7A",
+    "EventName": "BR_INDIRECT_SPEC",
+    "BriefDescription": "Branch speculatively executed - Indirect branch"
+  },
+  {,
+    "EventCode": "0xC9",
+    "EventName": "BR_COND",
+    "BriefDescription": "Conditional branch executed"
+  },
+  {,
+    "EventCode": "0xCA",
+    "EventName": "BR_INDIRECT_MISPRED",
+    "BriefDescription": "Indirect branch mispredicted"
+  },
+  {,
+    "EventCode": "0xCB",
+    "EventName": "BR_INDIRECT_MISPRED_ADDR",
+    "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+  },
+  {,
+    "EventCode": "0xCC",
+    "EventName": "BR_COND_MISPRED",
+    "BriefDescription": "Conditional branch mispredicted"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
new file mode 100644 (file)
index 0000000..480d9f7
--- /dev/null
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
new file mode 100644 (file)
index 0000000..11baad6
--- /dev/null
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0xC2",
+    "EventName": "PREFETCH_LINEFILL",
+    "BriefDescription": "Linefill because of prefetch"
+  },
+  {,
+    "EventCode": "0xC3",
+    "EventName": "PREFETCH_LINEFILL_DROP",
+    "BriefDescription": "Instruction Cache Throttle occurred"
+  },
+  {,
+    "EventCode": "0xC4",
+    "EventName": "READ_ALLOC_ENTER",
+    "BriefDescription": "Entering read allocate mode"
+  },
+  {,
+    "EventCode": "0xC5",
+    "EventName": "READ_ALLOC",
+    "BriefDescription": "Read allocate mode"
+  },
+  {,
+    "EventCode": "0xC8",
+    "EventName": "EXT_SNOOP",
+    "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
new file mode 100644 (file)
index 0000000..480d9f7
--- /dev/null
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
new file mode 100644 (file)
index 0000000..73a2240
--- /dev/null
@@ -0,0 +1,32 @@
+[
+  {,
+    "EventCode": "0x86",
+    "EventName": "EXC_IRQ",
+    "BriefDescription": "Exception taken, IRQ"
+  },
+  {,
+    "EventCode": "0x87",
+    "EventName": "EXC_FIQ",
+    "BriefDescription": "Exception taken, FIQ"
+  },
+  {,
+    "EventCode": "0xC6",
+    "EventName": "PRE_DECODE_ERR",
+    "BriefDescription": "Pre-decode error"
+  },
+  {,
+    "EventCode": "0xD0",
+    "EventName": "L1I_CACHE_ERR",
+    "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+  },
+  {,
+    "EventCode": "0xD1",
+    "EventName": "L1D_CACHE_ERR",
+    "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+  },
+  {,
+    "EventCode": "0xD2",
+    "EventName": "TLB_ERR",
+    "BriefDescription": "TLB memory error"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
new file mode 100644 (file)
index 0000000..3149fb9
--- /dev/null
@@ -0,0 +1,52 @@
+[
+  {,
+    "EventCode": "0xC7",
+    "EventName": "STALL_SB_FULL",
+    "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+  },
+  {,
+    "EventCode": "0xE0",
+    "EventName": "OTHER_IQ_DEP_STALL",
+    "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+  },
+  {,
+    "EventCode": "0xE1",
+    "EventName": "IC_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+  },
+  {,
+    "EventCode": "0xE2",
+    "EventName": "IUTLB_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+  },
+  {,
+    "EventCode": "0xE3",
+    "EventName": "DECODE_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+  },
+  {,
+    "EventCode": "0xE4",
+    "EventName": "OTHER_INTERLOCK_STALL",
+    "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
+  },
+  {,
+    "EventCode": "0xE5",
+    "EventName": "AGU_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+  },
+  {,
+    "EventCode": "0xE6",
+    "EventName": "SIMD_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+  },
+  {,
+    "EventCode": "0xE7",
+    "EventName": "LD_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+  },
+  {,
+    "EventCode": "0xE8",
+    "EventName": "ST_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+  }
+]
index 219d675..e61c9ca 100644 (file)
@@ -13,3 +13,4 @@
 #
 #Family-model,Version,Filename,EventType
 0x00000000420f5160,v1,cavium,core
+0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
index 4035d43..e0b1b41 100644 (file)
@@ -31,10 +31,12 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
        int i;
 
        for (i = 0; i < evlist->nr_mmaps; i++) {
+               struct perf_mmap *map = &evlist->overwrite_mmap[i];
                union perf_event *event;
+               u64 start, end;
 
-               perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
-               while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
+               perf_mmap__read_init(map, true, &start, &end);
+               while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
                        const u32 type = event->header.type;
 
                        switch (type) {
@@ -49,6 +51,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
                                return TEST_FAIL;
                        }
                }
+               perf_mmap__read_done(map);
        }
        return TEST_OK;
 }
index 8b3da21..c446c89 100755 (executable)
@@ -22,10 +22,23 @@ trace_libc_inet_pton_backtrace() {
        expected[4]="rtt min.*"
        expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
        expected[6]=".*inet_pton[[:space:]]\($libc\)$"
-       expected[7]="getaddrinfo[[:space:]]\($libc\)$"
-       expected[8]=".*\(.*/bin/ping.*\)$"
-
-       perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
+       case "$(uname -m)" in
+       s390x)
+               eventattr='call-graph=dwarf'
+               expected[7]="gaih_inet[[:space:]]\(inlined\)$"
+               expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
+               expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
+               expected[10]="__libc_start_main[[:space:]]\($libc\)$"
+               expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
+               ;;
+       *)
+               eventattr='max-stack=3'
+               expected[7]="getaddrinfo[[:space:]]\($libc\)$"
+               expected[8]=".*\(.*/bin/ping.*\)$"
+               ;;
+       esac
+
+       perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
                echo $line
                echo "$line" | egrep -q "${expected[$idx]}"
                if [ $? -ne 0 ] ; then
@@ -33,7 +46,7 @@ trace_libc_inet_pton_backtrace() {
                        exit 1
                fi
                let idx+=1
-               [ $idx -eq 9 ] && break
+               [ -z "${expected[$idx]}" ] && break
        done
 }
 
index 68146f4..6495ee5 100644 (file)
@@ -608,7 +608,8 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
        return browser->title ? browser->title(browser, bf, size) : 0;
 }
 
-int hist_browser__run(struct hist_browser *browser, const char *help)
+int hist_browser__run(struct hist_browser *browser, const char *help,
+                     bool warn_lost_event)
 {
        int key;
        char title[160];
@@ -638,8 +639,9 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
                        nr_entries = hist_browser__nr_entries(browser);
                        ui_browser__update_nr_entries(&browser->b, nr_entries);
 
-                       if (browser->hists->stats.nr_lost_warned !=
-                           browser->hists->stats.nr_events[PERF_RECORD_LOST]) {
+                       if (warn_lost_event &&
+                           (browser->hists->stats.nr_lost_warned !=
+                           browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
                                browser->hists->stats.nr_lost_warned =
                                        browser->hists->stats.nr_events[PERF_RECORD_LOST];
                                ui_browser__warn_lost_events(&browser->b);
@@ -2763,7 +2765,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                                    bool left_exits,
                                    struct hist_browser_timer *hbt,
                                    float min_pcnt,
-                                   struct perf_env *env)
+                                   struct perf_env *env,
+                                   bool warn_lost_event)
 {
        struct hists *hists = evsel__hists(evsel);
        struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
@@ -2844,7 +2847,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
                nr_options = 0;
 
-               key = hist_browser__run(browser, helpline);
+               key = hist_browser__run(browser, helpline,
+                                       warn_lost_event);
 
                if (browser->he_selection != NULL) {
                        thread = hist_browser__selected_thread(browser);
@@ -3184,7 +3188,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 
 static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
                                int nr_events, const char *help,
-                               struct hist_browser_timer *hbt)
+                               struct hist_browser_timer *hbt,
+                               bool warn_lost_event)
 {
        struct perf_evlist *evlist = menu->b.priv;
        struct perf_evsel *pos;
@@ -3203,7 +3208,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
                case K_TIMER:
                        hbt->timer(hbt->arg);
 
-                       if (!menu->lost_events_warned && menu->lost_events) {
+                       if (!menu->lost_events_warned &&
+                           menu->lost_events &&
+                           warn_lost_event) {
                                ui_browser__warn_lost_events(&menu->b);
                                menu->lost_events_warned = true;
                        }
@@ -3224,7 +3231,8 @@ browse_hists:
                        key = perf_evsel__hists_browse(pos, nr_events, help,
                                                       true, hbt,
                                                       menu->min_pcnt,
-                                                      menu->env);
+                                                      menu->env,
+                                                      warn_lost_event);
                        ui_browser__show_title(&menu->b, title);
                        switch (key) {
                        case K_TAB:
@@ -3282,7 +3290,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
                                           int nr_entries, const char *help,
                                           struct hist_browser_timer *hbt,
                                           float min_pcnt,
-                                          struct perf_env *env)
+                                          struct perf_env *env,
+                                          bool warn_lost_event)
 {
        struct perf_evsel *pos;
        struct perf_evsel_menu menu = {
@@ -3309,13 +3318,15 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
                        menu.b.width = line_len;
        }
 
-       return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
+       return perf_evsel_menu__run(&menu, nr_entries, help,
+                                   hbt, warn_lost_event);
 }
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
                                  struct hist_browser_timer *hbt,
                                  float min_pcnt,
-                                 struct perf_env *env)
+                                 struct perf_env *env,
+                                 bool warn_lost_event)
 {
        int nr_entries = evlist->nr_entries;
 
@@ -3325,7 +3336,7 @@ single_entry:
 
                return perf_evsel__hists_browse(first, nr_entries, help,
                                                false, hbt, min_pcnt,
-                                               env);
+                                               env, warn_lost_event);
        }
 
        if (symbol_conf.event_group) {
@@ -3342,5 +3353,6 @@ single_entry:
        }
 
        return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
-                                              hbt, min_pcnt, env);
+                                              hbt, min_pcnt, env,
+                                              warn_lost_event);
 }
index ba43177..9428bee 100644 (file)
@@ -28,7 +28,8 @@ struct hist_browser {
 
 struct hist_browser *hist_browser__new(struct hists *hists);
 void hist_browser__delete(struct hist_browser *browser);
-int hist_browser__run(struct hist_browser *browser, const char *help);
+int hist_browser__run(struct hist_browser *browser, const char *help,
+                     bool warn_lost_event);
 void hist_browser__init(struct hist_browser *browser,
                        struct hists *hists);
 #endif /* _PERF_UI_BROWSER_HISTS_H_ */
index ac35cd2..e5fc14e 100644 (file)
@@ -715,28 +715,11 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
        return perf_mmap__read_forward(md);
 }
 
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
-{
-       struct perf_mmap *md = &evlist->mmap[idx];
-
-       /*
-        * No need to check messup for backward ring buffer:
-        * We can always read arbitrary long data from a backward
-        * ring buffer unless we forget to pause it before reading.
-        */
-       return perf_mmap__read_backward(md);
-}
-
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
        return perf_evlist__mmap_read_forward(evlist, idx);
 }
 
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
-{
-       perf_mmap__read_catchup(&evlist->mmap[idx]);
-}
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 {
        perf_mmap__consume(&evlist->mmap[idx], false);
index 75f8e0a..336b838 100644 (file)
@@ -133,10 +133,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
 
 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
                                                 int idx);
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
-                                                 int idx);
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
index ff359c9..ef35168 100644 (file)
 
 #include "sane_ctype.h"
 
-static struct {
-       bool sample_id_all;
-       bool exclude_guest;
-       bool mmap2;
-       bool cloexec;
-       bool clockid;
-       bool clockid_wrong;
-       bool lbr_flags;
-       bool write_backward;
-       bool group_read;
-} perf_missing_features;
+struct perf_missing_features perf_missing_features;
 
 static clockid_t clockid;
 
index 846e416..a7487c6 100644 (file)
@@ -149,6 +149,20 @@ union u64_swap {
        u32 val32[2];
 };
 
+struct perf_missing_features {
+       bool sample_id_all;
+       bool exclude_guest;
+       bool mmap2;
+       bool cloexec;
+       bool clockid;
+       bool clockid_wrong;
+       bool lbr_flags;
+       bool write_backward;
+       bool group_read;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
 struct cpu_map;
 struct target;
 struct thread_map;
index f6630cb..02721b5 100644 (file)
@@ -430,7 +430,8 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
                                  struct hist_browser_timer *hbt,
                                  float min_pcnt,
-                                 struct perf_env *env);
+                                 struct perf_env *env,
+                                 bool warn_lost_event);
 int script_browse(const char *script_opt);
 #else
 static inline
@@ -438,7 +439,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
                                  const char *help __maybe_unused,
                                  struct hist_browser_timer *hbt __maybe_unused,
                                  float min_pcnt __maybe_unused,
-                                 struct perf_env *env __maybe_unused)
+                                 struct perf_env *env __maybe_unused,
+                                 bool warn_lost_event __maybe_unused)
 {
        return 0;
 }
index 05076e6..91531a7 100644 (file)
@@ -22,29 +22,27 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
 
 /* When check_messup is true, 'end' must points to a good entry */
 static union perf_event *perf_mmap__read(struct perf_mmap *map,
-                                        u64 start, u64 end, u64 *prev)
+                                        u64 *startp, u64 end)
 {
        unsigned char *data = map->base + page_size;
        union perf_event *event = NULL;
-       int diff = end - start;
+       int diff = end - *startp;
 
        if (diff >= (int)sizeof(event->header)) {
                size_t size;
 
-               event = (union perf_event *)&data[start & map->mask];
+               event = (union perf_event *)&data[*startp & map->mask];
                size = event->header.size;
 
-               if (size < sizeof(event->header) || diff < (int)size) {
-                       event = NULL;
-                       goto broken_event;
-               }
+               if (size < sizeof(event->header) || diff < (int)size)
+                       return NULL;
 
                /*
                 * Event straddles the mmap boundary -- header should always
                 * be inside due to u64 alignment of output.
                 */
-               if ((start & map->mask) + size != ((start + size) & map->mask)) {
-                       unsigned int offset = start;
+               if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+                       unsigned int offset = *startp;
                        unsigned int len = min(sizeof(*event), size), cpy;
                        void *dst = map->event_copy;
 
@@ -59,20 +57,19 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
                        event = (union perf_event *)map->event_copy;
                }
 
-               start += size;
+               *startp += size;
        }
 
-broken_event:
-       if (prev)
-               *prev = start;
-
        return event;
 }
 
+/*
+ * legacy interface for mmap read.
+ * Don't use it. Use perf_mmap__read_event().
+ */
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 {
        u64 head;
-       u64 old = map->prev;
 
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -82,13 +79,26 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 
        head = perf_mmap__read_head(map);
 
-       return perf_mmap__read(map, old, head, &map->prev);
+       return perf_mmap__read(map, &map->prev, head);
 }
 
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ *     //process the event
+ *     perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+                                       bool overwrite,
+                                       u64 *startp, u64 end)
 {
-       u64 head, end;
-       u64 start = map->prev;
+       union perf_event *event;
 
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -96,40 +106,19 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
        if (!refcount_read(&map->refcnt))
                return NULL;
 
-       head = perf_mmap__read_head(map);
-       if (!head)
+       if (startp == NULL)
                return NULL;
 
-       /*
-        * 'head' pointer starts from 0. Kernel minus sizeof(record) form
-        * it each time when kernel writes to it, so in fact 'head' is
-        * negative. 'end' pointer is made manually by adding the size of
-        * the ring buffer to 'head' pointer, means the validate data can
-        * read is the whole ring buffer. If 'end' is positive, the ring
-        * buffer has not fully filled, so we must adjust 'end' to 0.
-        *
-        * However, since both 'head' and 'end' is unsigned, we can't
-        * simply compare 'end' against 0. Here we compare '-head' and
-        * the size of the ring buffer, where -head is the number of bytes
-        * kernel write to the ring buffer.
-        */
-       if (-head < (u64)(map->mask + 1))
-               end = 0;
-       else
-               end = head + map->mask + 1;
-
-       return perf_mmap__read(map, start, end, &map->prev);
-}
+       /* non-overwirte doesn't pause the ringbuffer */
+       if (!overwrite)
+               end = perf_mmap__read_head(map);
 
-void perf_mmap__read_catchup(struct perf_mmap *map)
-{
-       u64 head;
+       event = perf_mmap__read(map, startp, end);
 
-       if (!refcount_read(&map->refcnt))
-               return;
+       if (!overwrite)
+               map->prev = *startp;
 
-       head = perf_mmap__read_head(map);
-       map->prev = head;
+       return event;
 }
 
 static bool perf_mmap__empty(struct perf_mmap *map)
@@ -267,41 +256,60 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
        return -1;
 }
 
-int perf_mmap__push(struct perf_mmap *md, bool overwrite,
-                   void *to, int push(void *to, void *buf, size_t size))
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+                        u64 *startp, u64 *endp)
 {
        u64 head = perf_mmap__read_head(md);
        u64 old = md->prev;
-       u64 end = head, start = old;
        unsigned char *data = md->base + page_size;
        unsigned long size;
-       void *buf;
-       int rc = 0;
 
-       start = overwrite ? head : old;
-       end = overwrite ? old : head;
+       *startp = overwrite ? head : old;
+       *endp = overwrite ? old : head;
 
-       if (start == end)
-               return 0;
+       if (*startp == *endp)
+               return -EAGAIN;
 
-       size = end - start;
+       size = *endp - *startp;
        if (size > (unsigned long)(md->mask) + 1) {
                if (!overwrite) {
                        WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 
                        md->prev = head;
                        perf_mmap__consume(md, overwrite);
-                       return 0;
+                       return -EAGAIN;
                }
 
                /*
                 * Backward ring buffer is full. We still have a chance to read
                 * most of data from it.
                 */
-               if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
-                       return -1;
+               if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
+                       return -EINVAL;
        }
 
+       return 0;
+}
+
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
+                   void *to, int push(void *to, void *buf, size_t size))
+{
+       u64 head = perf_mmap__read_head(md);
+       u64 end, start;
+       unsigned char *data = md->base + page_size;
+       unsigned long size;
+       void *buf;
+       int rc = 0;
+
+       rc = perf_mmap__read_init(md, overwrite, &start, &end);
+       if (rc < 0)
+               return (rc == -EAGAIN) ? 0 : -1;
+
+       size = end - start;
+
        if ((start & md->mask) + size != (end & md->mask)) {
                buf = &data[start & md->mask];
                size = md->mask + 1 - (start & md->mask);
@@ -327,3 +335,14 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 out:
        return rc;
 }
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->prev.
+ * Need to correct the map->prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+       map->prev = perf_mmap__read_head(map);
+}
index e43d7b5..ec7d3a2 100644 (file)
@@ -65,8 +65,6 @@ void perf_mmap__put(struct perf_mmap *map);
 
 void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
 
-void perf_mmap__read_catchup(struct perf_mmap *md);
-
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
        struct perf_event_mmap_page *pc = mm->base;
@@ -87,11 +85,17 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
+
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+                                       bool overwrite,
+                                       u64 *startp, u64 end);
 
 int perf_mmap__push(struct perf_mmap *md, bool backward,
                    void *to, int push(void *to, void *buf, size_t size));
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+                        u64 *startp, u64 *endp);
+void perf_mmap__read_done(struct perf_mmap *map);
 #endif /*__PERF_MMAP_H */
index 443892d..1019bbc 100644 (file)
@@ -340,35 +340,15 @@ size_t hex_width(u64 v)
        return n;
 }
 
-static int hex(char ch)
-{
-       if ((ch >= '0') && (ch <= '9'))
-               return ch - '0';
-       if ((ch >= 'a') && (ch <= 'f'))
-               return ch - 'a' + 10;
-       if ((ch >= 'A') && (ch <= 'F'))
-               return ch - 'A' + 10;
-       return -1;
-}
-
 /*
  * While we find nice hex chars, build a long_val.
  * Return number of chars processed.
  */
 int hex2u64(const char *ptr, u64 *long_val)
 {
-       const char *p = ptr;
-       *long_val = 0;
-
-       while (*p) {
-               const int hex_val = hex(*p);
+       char *p;
 
-               if (hex_val < 0)
-                       break;
-
-               *long_val = (*long_val << 4) | hex_val;
-               p++;
-       }
+       *long_val = strtoull(ptr, &p, 16);
 
        return p - ptr;
 }
index a1883bb..2cccbba 100644 (file)
@@ -56,9 +56,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 # to compile vs uClibc, that can be done here as well.
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
 CROSS_COMPILE ?= $(CROSS)
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)gcc
-STRIP = $(CROSS_COMPILE)strip
 HOSTCC = gcc
 
 # check if compiler option is supported
index fcb3ed0..dd61446 100644 (file)
@@ -42,6 +42,24 @@ EXTRA_WARNINGS += -Wformat
 
 CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
 
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+$(call allow-override,CXX,$(CROSS_COMPILE)g++)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+
 ifeq ($(CC_NO_CLANG), 1)
 EXTRA_WARNINGS += -Wstrict-aliasing=3
 endif
index 90615e1..815d155 100644 (file)
@@ -11,8 +11,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := spidev_test spidev_fdx
index cc15af2..9cf83f8 100644 (file)
@@ -11,3 +11,4 @@ test_progs
 test_tcpbpf_user
 test_verifier_log
 feature
+test_libbpf_open
index 5c43c18..8567a85 100644 (file)
@@ -35,12 +35,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
 .PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644 (file)
index 0000000..9dac9b3
--- /dev/null
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+       struct rlimit rlim_old, rlim_new = {
+               .rlim_cur       = RLIM_INFINITY,
+               .rlim_max       = RLIM_INFINITY,
+       };
+
+       getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+       /* For the sake of running the test cases, we temporarily
+        * set rlimit to infinity in order for kernel to focus on
+        * errors from actual test cases and not getting noise
+        * from hitting memlock limits. The limit is on per-process
+        * basis and not a global one, hence destructor not really
+        * needed here.
+        */
+       if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+               perror("Unable to lift memlock rlimit");
+               /* Trying out lower limit, but expect potential test
+                * case failures from this!
+                */
+               rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+               rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+               setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+       }
+}
index ff8bd7e..6b1b302 100644 (file)
@@ -9,8 +9,6 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-#include <sys/resource.h>
-
 #include <linux/unistd.h>
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
@@ -19,6 +17,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
@@ -702,9 +701,6 @@ static int do_test(unsigned int from, unsigned int to)
 int main(int argc, char **argv)
 {
        unsigned int from = 0, to = ARRAY_SIZE(tests);
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
 
        if (argc == 3) {
                unsigned int l = atoi(argv[argc - 2]);
index 3489cc2..9c8b50b 100644 (file)
 #include <errno.h>
 #include <assert.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
 
 int main(int argc, char **argv)
 {
-       struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
        struct bpf_object *obj;
        int error = EXIT_FAILURE;
        int prog_fd, cgroup_fd;
        __u32 prog_cnt;
 
-       if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-               perror("Unable to lift memlock rlimit");
-
        if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
                          &obj, &prog_fd)) {
                printf("Failed to load DEV_CGROUP program\n");
index 2be87e9..147e34c 100644 (file)
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 struct tlpm_node {
        struct tlpm_node *next;
@@ -736,17 +737,11 @@ static void test_lpm_multi_thread(void)
 
 int main(void)
 {
-       struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
-       int i, ret;
+       int i;
 
        /* we want predictable, pseudo random tests */
        srand(0xf00ba1);
 
-       /* allow unlimited locked memory */
-       ret = setrlimit(RLIMIT_MEMLOCK, &limit);
-       if (ret < 0)
-               perror("Unable to lift memlock rlimit");
-
        test_lpm_basic();
        test_lpm_order();
 
@@ -755,11 +750,8 @@ int main(void)
                test_lpm_map(i);
 
        test_lpm_ipaddr();
-
        test_lpm_delete();
-
        test_lpm_get_next_key();
-
        test_lpm_multi_thread();
 
        printf("test_lpm: OK\n");
index 8c10c91..781c7de 100644 (file)
 #include <time.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 #define LOCAL_FREE_TARGET      (128)
 #define PERCPU_FREE_TARGET     (4)
@@ -613,7 +614,6 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
 
 int main(int argc, char **argv)
 {
-       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
                             BPF_MAP_TYPE_LRU_PERCPU_HASH};
        int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
@@ -621,8 +621,6 @@ int main(int argc, char **argv)
 
        setbuf(stdout, NULL);
 
-       assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
        nr_cpus = bpf_num_possible_cpus();
        assert(nr_cpus != -1);
        printf("nr_cpus:%d\n\n", nr_cpus);
index 436c4c7..1238733 100644 (file)
 #include <stdlib.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 static int map_flags;
 
@@ -126,6 +127,8 @@ static void test_hashmap_sizes(int task, void *data)
                        fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
                                            2, map_flags);
                        if (fd < 0) {
+                               if (errno == ENOMEM)
+                                       return;
                                printf("Failed to create hashmap key=%d value=%d '%s'\n",
                                       i, j, strerror(errno));
                                exit(1);
@@ -1124,10 +1127,6 @@ static void run_all_tests(void)
 
 int main(void)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
-
        map_flags = 0;
        run_all_tests();
 
index b549308..27ad540 100644 (file)
@@ -26,7 +26,6 @@ typedef __u16 __sum16;
 
 #include <sys/ioctl.h>
 #include <sys/wait.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <fcntl.h>
 
@@ -34,9 +33,11 @@ typedef __u16 __sum16;
 #include <linux/err.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
 #include "bpf_endian.h"
+#include "bpf_rlimit.h"
 
 static int error_cnt, pass_cnt;
 
@@ -965,10 +966,6 @@ out:
 
 int main(void)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
-
        test_pkt_access();
        test_xdp();
        test_l4lb_all();
index 8b20189..6272c78 100644 (file)
@@ -12,7 +12,6 @@
 #include <assert.h>
 
 #include <sys/socket.h>
-#include <sys/resource.h>
 
 #include <linux/filter.h>
 #include <linux/bpf.h>
@@ -21,6 +20,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 static struct bpf_insn prog[BPF_MAXINSNS];
 
@@ -184,11 +184,9 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
 
 int main(void)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
        uint32_t tests = 0;
        int i, fd_map;
 
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
        fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
                                sizeof(int), 1, BPF_F_NO_PREALLOC);
        assert(fd_map > 0);
index 57119ad..3e645ee 100644 (file)
@@ -5,7 +5,6 @@
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <linux/ip.h>
-#include <linux/in6.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
index 95a370f..84ab516 100644 (file)
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 #include <linux/perf_event.h>
 #include "test_tcpbpf.h"
 
index c0f16e9..9eb05f3 100644 (file)
@@ -24,7 +24,6 @@
 #include <limits.h>
 
 #include <sys/capability.h>
-#include <sys/resource.h>
 
 #include <linux/unistd.h>
 #include <linux/filter.h>
@@ -41,7 +40,7 @@
 #  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
 # endif
 #endif
-
+#include "bpf_rlimit.h"
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -57,6 +56,9 @@
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS     (1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT           (1 << 1)
 
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
 struct bpf_test {
        const char *descr;
        struct bpf_insn insns[MAX_INSNS];
@@ -2586,6 +2588,90 @@ static struct bpf_test tests[] = {
                .result_unpriv = REJECT,
                .result = ACCEPT,
        },
+       {
+               "runtime/jit: tail_call within bounds, prog once",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 0),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 42,
+       },
+       {
+               "runtime/jit: tail_call within bounds, prog loop",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 1),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 41,
+       },
+       {
+               "runtime/jit: tail_call within bounds, no prog",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 2),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "runtime/jit: tail_call out of bounds",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 256),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "runtime/jit: pass negative index to tail_call",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, -1),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "runtime/jit: pass > 32bit index to tail_call",
+               .insns = {
+                       BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 2 },
+               .result = ACCEPT,
+               .retval = 42,
+       },
        {
                "stack pointer arithmetic",
                .insns = {
@@ -11137,6 +11223,95 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_TRACEPOINT,
        },
+       {
+               "jit: lsh, rsh, arsh by 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_MOV64_IMM(BPF_REG_1, 0xff),
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+                       BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+                       BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: mov32 for ldimm64, 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+                       BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: mov32 for ldimm64, 2",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+                       BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: various mul tests",
+               .insns = {
+                       BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+                       BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+                       BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+                       BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+                       BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+                       BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+                       BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+                       BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -11161,16 +11336,61 @@ static int create_map(uint32_t size_value, uint32_t max_elem)
        return fd;
 }
 
+static int create_prog_dummy1(void)
+{
+       struct bpf_insn prog[] = {
+               BPF_MOV64_IMM(BPF_REG_0, 42),
+               BPF_EXIT_INSN(),
+       };
+
+       return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+                               ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(int mfd, int idx)
+{
+       struct bpf_insn prog[] = {
+               BPF_MOV64_IMM(BPF_REG_3, idx),
+               BPF_LD_MAP_FD(BPF_REG_2, mfd),
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                            BPF_FUNC_tail_call),
+               BPF_MOV64_IMM(BPF_REG_0, 41),
+               BPF_EXIT_INSN(),
+       };
+
+       return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+                               ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
 static int create_prog_array(void)
 {
-       int fd;
+       int p1key = 0, p2key = 1;
+       int mfd, p1fd, p2fd;
 
-       fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
-                           sizeof(int), 4, 0);
-       if (fd < 0)
+       mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+                            sizeof(int), 4, 0);
+       if (mfd < 0) {
                printf("Failed to create prog array '%s'!\n", strerror(errno));
+               return -1;
+       }
 
-       return fd;
+       p1fd = create_prog_dummy1();
+       p2fd = create_prog_dummy2(mfd, p2key);
+       if (p1fd < 0 || p2fd < 0)
+               goto out;
+       if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+               goto out;
+       if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+               goto out;
+       close(p2fd);
+       close(p1fd);
+
+       return mfd;
+out:
+       close(p2fd);
+       close(p1fd);
+       close(mfd);
+       return -1;
 }
 
 static int create_map_in_map(void)
@@ -11291,7 +11511,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                        goto fail_log;
                }
                if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
-                       printf("FAIL\nUnexpected error message!\n");
+                       printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+                             expected_err, bpf_vlog);
                        goto fail_log;
                }
        }
@@ -11375,9 +11596,20 @@ out:
        return ret;
 }
 
+static void get_unpriv_disabled()
+{
+       char buf[2];
+       FILE *fd;
+
+       fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+       if (fgets(buf, 2, fd) == buf && atoi(buf))
+               unpriv_disabled = true;
+       fclose(fd);
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
-       int i, passes = 0, errors = 0;
+       int i, passes = 0, errors = 0, skips = 0;
 
        for (i = from; i < to; i++) {
                struct bpf_test *test = &tests[i];
@@ -11385,7 +11617,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
                /* Program types that are not supported by non-root we
                 * skip right away.
                 */
-               if (!test->prog_type) {
+               if (!test->prog_type && unpriv_disabled) {
+                       printf("#%d/u %s SKIP\n", i, test->descr);
+                       skips++;
+               } else if (!test->prog_type) {
                        if (!unpriv)
                                set_admin(false);
                        printf("#%d/u %s ", i, test->descr);
@@ -11394,20 +11629,22 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
                                set_admin(true);
                }
 
-               if (!unpriv) {
+               if (unpriv) {
+                       printf("#%d/p %s SKIP\n", i, test->descr);
+                       skips++;
+               } else {
                        printf("#%d/p %s ", i, test->descr);
                        do_test_single(test, false, &passes, &errors);
                }
        }
 
-       printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+       printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+              skips, errors);
        return errors ? EXIT_FAILURE : EXIT_SUCCESS;
 }
 
 int main(int argc, char **argv)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-       struct rlimit rlim = { 1 << 20, 1 << 20 };
        unsigned int from = 0, to = ARRAY_SIZE(tests);
        bool unpriv = !is_admin();
 
@@ -11428,6 +11665,12 @@ int main(int argc, char **argv)
                }
        }
 
-       setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
+       get_unpriv_disabled();
+       if (unpriv && unpriv_disabled) {
+               printf("Cannot run as unprivileged user with sysctl %s.\n",
+                      UNPRIV_SYSCTL);
+               return EXIT_FAILURE;
+       }
+
        return do_test(unpriv, from, to);
 }
index e9626cf..8d6918c 100644 (file)
@@ -4,7 +4,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -12,6 +11,8 @@
 
 #include <bpf/bpf.h>
 
+#include "bpf_rlimit.h"
+
 #define LOG_SIZE (1 << 20)
 
 #define err(str...)    printf("ERROR: " str)
@@ -133,16 +134,11 @@ static void test_log_bad(char *log, size_t log_len, int log_level)
 
 int main(int argc, char **argv)
 {
-       struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
        char full_log[LOG_SIZE];
        char log[LOG_SIZE];
        size_t want_len;
        int i;
 
-       /* allow unlimited locked memory to have more consistent error code */
-       if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-               perror("Unable to lift memlock rlimit");
-
        memset(log, 1, LOG_SIZE);
 
        /* Test incorrect attr */
index a5276a9..0862e6f 100644 (file)
@@ -5,6 +5,7 @@ CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_tests.sh
+TEST_FILES := run_fuse_test.sh
 TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
index d7c30d3..229a038 100644 (file)
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755 (executable)
index 0000000..06b1d7c
--- /dev/null
@@ -0,0 +1,375 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+       local rc=$1
+       local expected=$2
+       local msg="$3"
+
+       if [ ${rc} -eq ${expected} ]; then
+               nsuccess=$((nsuccess+1))
+               printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+       else
+               nfail=$((nfail+1))
+               printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+                       echo
+                       echo "hit enter to continue, 'q' to quit"
+                       read a
+                       [ "$a" = "q" ] && exit 1
+               fi
+       fi
+}
+
+log_section()
+{
+       echo
+       echo "######################################################################"
+       echo "TEST SECTION: $*"
+       echo "######################################################################"
+}
+
+log_subsection()
+{
+       echo
+       echo "#########################################"
+       echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+       echo
+       echo "COMMAND: $*"
+       eval $*
+}
+
+get_linklocal()
+{
+       local dev=$1
+       local pfx
+       local addr
+
+       addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+       awk '{
+               for (i = 3; i <= NF; ++i) {
+                       if ($i ~ /^fe80/)
+                               print $i
+               }
+       }'
+       )
+       addr=${addr/\/*}
+
+       [ -z "$addr" ] && return 1
+
+       echo $addr
+
+       return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+       echo
+       echo "########################################"
+       echo "Configuring interfaces"
+
+       set -e
+
+       # create namespace
+       ip netns add ${PEER_NS}
+       ip -netns ${PEER_NS} li set lo up
+
+       # add vrf table
+       ip li add ${VRF} type vrf table ${VRF_TABLE}
+       ip li set ${VRF} up
+       ip ro add table ${VRF_TABLE} unreachable default
+       ip -6 ro add table ${VRF_TABLE} unreachable default
+
+       # create test interfaces
+       ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+       ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+       ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+       ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+       # enslave vrf interfaces
+       for n in 5 7; do
+               ip li set ${NETIFS[p${n}]} vrf ${VRF}
+       done
+
+       # add addresses
+       for n in 1 3 5 7; do
+               ip li set ${NETIFS[p${n}]} up
+               ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+               ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+       done
+
+       # move peer interfaces to namespace and add addresses
+       for n in 2 4 6 8; do
+               ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+               ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+               ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+       done
+
+       set +e
+
+       # let DAD complete - assume default of 1 probe
+       sleep 1
+}
+
+cleanup()
+{
+       # make sure we start from a clean slate
+       ip netns del ${PEER_NS} 2>/dev/null
+       for n in 1 3 5 7; do
+               ip link del ${NETIFS[p${n}]} 2>/dev/null
+       done
+       ip link del ${VRF} 2>/dev/null
+       ip ro flush table ${VRF_TABLE}
+       ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+       local table="$1"
+       local prefix="$2"
+       local gw="$3"
+       local dev="$4"
+       local exp_rc="$5"
+       local desc="$6"
+
+       # dev arg may be empty
+       [ -n "${dev}" ] && dev="dev ${dev}"
+
+       run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+       log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+       # - unicast connected, unicast recursive
+       #
+       log_subsection "default VRF - main table"
+
+       run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+       run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+       log_subsection "VRF ${VRF}"
+
+       run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+       run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+       log_subsection "VRF device, PBR table"
+
+       run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+       run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+}
+
+invalid_onlink_ipv4()
+{
+       run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+               "Invalid gw - local unicast address"
+
+       run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+               "Invalid gw - local unicast address, VRF"
+
+       run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+       run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+               "Gateway resolves to wrong nexthop device"
+
+       run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+               "Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+       local table="$1"
+       local prefix="$2"
+       local gw="$3"
+       local dev="$4"
+       local exp_rc="$5"
+       local desc="$6"
+
+       # dev arg may be empty
+       [ -n "${dev}" ] && dev="dev ${dev}"
+
+       run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+       log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+       # - unicast connected, unicast recursive, v4-mapped
+       #
+       log_subsection "default VRF - main table"
+
+       run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+       run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+       run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+       log_subsection "VRF ${VRF}"
+
+       run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+       run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+       run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+       log_subsection "VRF device, PBR table"
+
+       run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+       run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+       run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+}
+
+invalid_onlink_ipv6()
+{
+       local lladdr
+
+       lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+       run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+               "Invalid gw - local unicast address"
+       run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+               "Invalid gw - local linklocal address"
+       run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+               "Invalid gw - multicast address"
+
+       lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+       run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+               "Invalid gw - local unicast address, VRF"
+       run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+               "Invalid gw - local linklocal address, VRF"
+       run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+               "Invalid gw - multicast address, VRF"
+
+       run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+               "No nexthop device given"
+
+       # default VRF validation is done against LOCAL table
+       # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+       #       "Gateway resolves to wrong nexthop device"
+
+       run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+               "Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+       log_section "IPv4 onlink"
+       log_subsection "Valid onlink commands"
+       valid_onlink_ipv4
+       log_subsection "Invalid onlink commands"
+       invalid_onlink_ipv4
+
+       log_section "IPv6 onlink"
+       log_subsection "Valid onlink commands"
+       valid_onlink_ipv6
+       invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+       printf "\nTests passed: %3d\n" ${nsuccess}
+       printf "Tests failed: %3d\n"   ${nfail}
+fi
index a9154ee..b617985 100755 (executable)
 
 ret=0
 
-check_err()
-{
-       if [ $ret -eq 0 ]; then
-               ret=$1
-       fi
-}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 
-check_fail()
+log_test()
 {
-       if [ $1 -eq 0 ]; then
+       local rc=$1
+       local expected=$2
+       local msg="$3"
+
+       if [ ${rc} -eq ${expected} ]; then
+               printf "        %-60s  [ OK ]\n" "${msg}"
+       else
                ret=1
+               printf "        %-60s  [FAIL]\n" "${msg}"
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+               echo
+                       echo "hit enter to continue, 'q' to quit"
+                       read a
+                       [ "$a" = "q" ] && exit 1
+               fi
        fi
 }
 
-netns_create()
+setup()
 {
-       local testns=$1
+       set -e
+       ip netns add testns
+       ip -netns testns link set dev lo up
+
+       ip -netns testns link add dummy0 type dummy
+       ip -netns testns link set dev dummy0 up
+       ip -netns testns address add 198.51.100.1/24 dev dummy0
+       ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+       set +e
 
-       ip netns add $testns
-       ip netns exec $testns ip link set dev lo up
 }
 
-fib_unreg_unicast_test()
+cleanup()
 {
-       ret=0
-
-       netns_create "testns"
-
-       ip netns exec testns ip link add dummy0 type dummy
-       ip netns exec testns ip link set dev dummy0 up
+       ip -netns testns link del dev dummy0 &> /dev/null
+       ip netns del testns
+}
 
-       ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+fib_unreg_unicast_test()
+{
+       echo
+       echo "Single path route test"
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-       check_err $?
+       setup
 
-       ip netns exec testns ip link del dev dummy0
-       check_err $?
+       echo "    Start point"
+       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-       check_fail $?
+       set -e
+       ip -netns testns link del dev dummy0
+       set +e
 
-       ip netns del testns
+       echo "    Nexthop device deleted"
+       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       log_test $? 2 "IPv4 fibmatch - no route"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       log_test $? 2 "IPv6 fibmatch - no route"
 
-       if [ $ret -ne 0 ]; then
-               echo "FAIL: unicast route test"
-               return 1
-       fi
-       echo "PASS: unicast route test"
+       cleanup
 }
 
 fib_unreg_multipath_test()
 {
-       ret=0
-
-       netns_create "testns"
 
-       ip netns exec testns ip link add dummy0 type dummy
-       ip netns exec testns ip link set dev dummy0 up
+       echo
+       echo "Multipath route test"
 
-       ip netns exec testns ip link add dummy1 type dummy
-       ip netns exec testns ip link set dev dummy1 up
+       setup
 
-       ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+       set -e
+       ip -netns testns link add dummy1 type dummy
+       ip -netns testns link set dev dummy1 up
+       ip -netns testns address add 192.0.2.1/24 dev dummy1
+       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
 
-       ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-       ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
-       ip netns exec testns ip route add 203.0.113.0/24 \
+       ip -netns testns route add 203.0.113.0/24 \
                nexthop via 198.51.100.2 dev dummy0 \
                nexthop via 192.0.2.2 dev dummy1
-       ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+       ip -netns testns -6 route add 2001:db8:3::/64 \
                nexthop via 2001:db8:1::2 dev dummy0 \
                nexthop via 2001:db8:2::2 dev dummy1
+       set +e
+
+       echo "    Start point"
+       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-       check_err $?
+       set -e
+       ip -netns testns link del dev dummy0
+       set +e
 
-       ip netns exec testns ip link del dev dummy0
-       check_err $?
+       echo "    One nexthop device deleted"
+       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       log_test $? 2 "IPv4 - multipath route removed on delete"
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
        # In IPv6 we do not flush the entire multipath route.
-       check_err $?
+       log_test $? 0 "IPv6 - multipath down to single path"
 
-       ip netns exec testns ip link del dev dummy1
+       set -e
+       ip -netns testns link del dev dummy1
+       set +e
 
-       ip netns del testns
+       echo "    Second nexthop device deleted"
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       log_test $? 2 "IPv6 - no route"
 
-       if [ $ret -ne 0 ]; then
-               echo "FAIL: multipath route test"
-               return 1
-       fi
-       echo "PASS: multipath route test"
+       cleanup
 }
 
 fib_unreg_test()
 {
-       echo "Running netdev unregister tests"
-
        fib_unreg_unicast_test
        fib_unreg_multipath_test
 }
 
 fib_down_unicast_test()
 {
-       ret=0
-
-       netns_create "testns"
-
-       ip netns exec testns ip link add dummy0 type dummy
-       ip netns exec testns ip link set dev dummy0 up
-
-       ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+       echo
+       echo "Single path, admin down"
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-       check_err $?
+       setup
 
-       ip netns exec testns ip link set dev dummy0 down
-       check_err $?
+       echo "    Start point"
+       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-       check_fail $?
+       set -e
+       ip -netns testns link set dev dummy0 down
+       set +e
 
-       ip netns exec testns ip link del dev dummy0
+       echo "    Route deleted on down"
+       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       log_test $? 2 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       log_test $? 2 "IPv6 fibmatch"
 
-       ip netns del testns
-
-       if [ $ret -ne 0 ]; then
-               echo "FAIL: unicast route test"
-               return 1
-       fi
-       echo "PASS: unicast route test"
+       cleanup
 }
 
 fib_down_multipath_test_do()
@@ -161,242 +162,229 @@ fib_down_multipath_test_do()
        local down_dev=$1
        local up_dev=$2
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 \
+       ip -netns testns route get fibmatch 203.0.113.1 \
                oif $down_dev &> /dev/null
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+       log_test $? 2 "IPv4 fibmatch on down device"
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
                oif $down_dev &> /dev/null
-       check_fail $?
+       log_test $? 2 "IPv6 fibmatch on down device"
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 \
+       ip -netns testns route get fibmatch 203.0.113.1 \
                oif $up_dev &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+       log_test $? 0 "IPv4 fibmatch on up device"
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
                oif $up_dev &> /dev/null
-       check_err $?
+       log_test $? 0 "IPv6 fibmatch on up device"
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+       ip -netns testns route get fibmatch 203.0.113.1 | \
                grep $down_dev | grep -q "dead linkdown"
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+       log_test $? 0 "IPv4 flags on down device"
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
                grep $down_dev | grep -q "dead linkdown"
-       check_err $?
+       log_test $? 0 "IPv6 flags on down device"
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+       ip -netns testns route get fibmatch 203.0.113.1 | \
                grep $up_dev | grep -q "dead linkdown"
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+       log_test $? 1 "IPv4 flags on up device"
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
                grep $up_dev | grep -q "dead linkdown"
-       check_fail $?
+       log_test $? 1 "IPv6 flags on up device"
 }
 
 fib_down_multipath_test()
 {
-       ret=0
+       echo
+       echo "Admin down multipath"
 
-       netns_create "testns"
+       setup
 
-       ip netns exec testns ip link add dummy0 type dummy
-       ip netns exec testns ip link set dev dummy0 up
+       set -e
+       ip -netns testns link add dummy1 type dummy
+       ip -netns testns link set dev dummy1 up
 
-       ip netns exec testns ip link add dummy1 type dummy
-       ip netns exec testns ip link set dev dummy1 up
+       ip -netns testns address add 192.0.2.1/24 dev dummy1
+       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
 
-       ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
-
-       ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-       ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
-       ip netns exec testns ip route add 203.0.113.0/24 \
+       ip -netns testns route add 203.0.113.0/24 \
                nexthop via 198.51.100.2 dev dummy0 \
                nexthop via 192.0.2.2 dev dummy1
-       ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+       ip -netns testns -6 route add 2001:db8:3::/64 \
                nexthop via 2001:db8:1::2 dev dummy0 \
                nexthop via 2001:db8:2::2 dev dummy1
+       set +e
+
+       echo "    Verify start point"
+       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-       check_err $?
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip link set dev dummy0 down
-       check_err $?
+       set -e
+       ip -netns testns link set dev dummy0 down
+       set +e
 
+       echo "    One device down, one up"
        fib_down_multipath_test_do "dummy0" "dummy1"
 
-       ip netns exec testns ip link set dev dummy0 up
-       check_err $?
-       ip netns exec testns ip link set dev dummy1 down
-       check_err $?
+       set -e
+       ip -netns testns link set dev dummy0 up
+       ip -netns testns link set dev dummy1 down
+       set +e
 
+       echo "    Other device down and up"
        fib_down_multipath_test_do "dummy1" "dummy0"
 
-       ip netns exec testns ip link set dev dummy0 down
-       check_err $?
+       set -e
+       ip -netns testns link set dev dummy0 down
+       set +e
 
-       ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-       check_fail $?
+       echo "    Both devices down"
+       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       log_test $? 2 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       log_test $? 2 "IPv6 fibmatch"
 
-       ip netns exec testns ip link del dev dummy1
-       ip netns exec testns ip link del dev dummy0
-
-       ip netns del testns
-
-       if [ $ret -ne 0 ]; then
-               echo "FAIL: multipath route test"
-               return 1
-       fi
-       echo "PASS: multipath route test"
+       ip -netns testns link del dev dummy1
+       cleanup
 }
 
 fib_down_test()
 {
-       echo "Running netdev down tests"
-
        fib_down_unicast_test
        fib_down_multipath_test
 }
 
+# Local routes should not be affected when carrier changes.
 fib_carrier_local_test()
 {
-       ret=0
+       echo
+       echo "Local carrier tests - single path"
 
-       # Local routes should not be affected when carrier changes.
-       netns_create "testns"
+       setup
 
-       ip netns exec testns ip link add dummy0 type dummy
-       ip netns exec testns ip link set dev dummy0 up
+       set -e
+       ip -netns testns link set dev dummy0 carrier on
+       set +e
 
-       ip netns exec testns ip link set dev dummy0 carrier on
+       echo "    Start point"
+       ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
-
-       ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-       check_err $?
-
-       ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+       ip -netns testns route get fibmatch 198.51.100.1 | \
                grep -q "linkdown"
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+       log_test $? 1 "IPv4 - no linkdown flag"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
                grep -q "linkdown"
-       check_fail $?
+       log_test $? 1 "IPv6 - no linkdown flag"
 
-       ip netns exec testns ip link set dev dummy0 carrier off
+       set -e
+       ip -netns testns link set dev dummy0 carrier off
+       sleep 1
+       set +e
 
-       ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-       check_err $?
+       echo "    Carrier off on nexthop"
+       ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+       ip -netns testns route get fibmatch 198.51.100.1 | \
                grep -q "linkdown"
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+       log_test $? 1 "IPv4 - linkdown flag set"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
                grep -q "linkdown"
-       check_fail $?
+       log_test $? 1 "IPv6 - linkdown flag set"
 
-       ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+       set -e
+       ip -netns testns address add 192.0.2.1/24 dev dummy0
+       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+       set +e
 
-       ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null
-       check_err $?
+       echo "    Route to local address with carrier down"
+       ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 192.0.2.1 | \
+       ip -netns testns route get fibmatch 192.0.2.1 | \
                grep -q "linkdown"
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \
+       log_test $? 1 "IPv4 linkdown flag set"
+       ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \
                grep -q "linkdown"
-       check_fail $?
+       log_test $? 1 "IPv6 linkdown flag set"
 
-       ip netns exec testns ip link del dev dummy0
-
-       ip netns del testns
-
-       if [ $ret -ne 0 ]; then
-               echo "FAIL: local route carrier test"
-               return 1
-       fi
-       echo "PASS: local route carrier test"
+       cleanup
 }
 
 fib_carrier_unicast_test()
 {
        ret=0
 
-       netns_create "testns"
-
-       ip netns exec testns ip link add dummy0 type dummy
-       ip netns exec testns ip link set dev dummy0 up
+       echo
+       echo "Single path route carrier test"
 
-       ip netns exec testns ip link set dev dummy0 carrier on
+       setup
 
-       ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+       set -e
+       ip -netns testns link set dev dummy0 carrier on
+       set +e
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-       check_err $?
+       echo "    Start point"
+       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+       ip -netns testns route get fibmatch 198.51.100.2 | \
                grep -q "linkdown"
-       check_fail $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+       log_test $? 1 "IPv4 no linkdown flag"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
                grep -q "linkdown"
-       check_fail $?
+       log_test $? 1 "IPv6 no linkdown flag"
 
-       ip netns exec testns ip link set dev dummy0 carrier off
+       set -e
+       ip -netns testns link set dev dummy0 carrier off
+       set +e
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-       check_err $?
+       echo "    Carrier down"
+       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+       ip -netns testns route get fibmatch 198.51.100.2 | \
                grep -q "linkdown"
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+       log_test $? 0 "IPv4 linkdown flag set"
+       ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
                grep -q "linkdown"
-       check_err $?
+       log_test $? 0 "IPv6 linkdown flag set"
 
-       ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-       ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+       set -e
+       ip -netns testns address add 192.0.2.1/24 dev dummy0
+       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+       set +e
 
-       ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null
-       check_err $?
+       echo "    Second address added with carrier down"
+       ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null
+       log_test $? 0 "IPv4 fibmatch"
+       ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+       log_test $? 0 "IPv6 fibmatch"
 
-       ip netns exec testns ip route get fibmatch 192.0.2.2 | \
+       ip -netns testns route get fibmatch 192.0.2.2 | \
                grep -q "linkdown"
-       check_err $?
-       ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \
+       log_test $? 0 "IPv4 linkdown flag set"
+       ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \
                grep -q "linkdown"
-       check_err $?
+       log_test $? 0 "IPv6 linkdown flag set"
 
-       ip netns exec testns ip link del dev dummy0
-
-       ip netns del testns
-
-       if [ $ret -ne 0 ]; then
-               echo "FAIL: unicast route carrier test"
-               return 1
-       fi
-       echo "PASS: unicast route carrier test"
+       cleanup
 }
 
 fib_carrier_test()
 {
-       echo "Running netdev carrier change tests"
-
        fib_carrier_local_test
        fib_carrier_unicast_test
 }
@@ -424,6 +412,9 @@ if [ $? -ne 0 ]; then
        exit 0
 fi
 
+# start clean
+cleanup &> /dev/null
+
 fib_test
 
 exit $ret
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644 (file)
index 0000000..a793eef
--- /dev/null
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644 (file)
index 0000000..4a0964c
--- /dev/null
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+                             br0
+                              +
+               vrf-h1         |           vrf-h2
+                 +        +---+----+        +
+                 |        |        |        |
+    192.0.2.1/24 +        +        +        + 192.0.2.2/24
+               swp1     swp2     swp3     swp4
+                 +        +        +        +
+                 |        |        |        |
+                 +--------+        +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+  of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+  RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+  multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755 (executable)
index 0000000..75d9224
--- /dev/null
@@ -0,0 +1,88 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+       # 10 Seconds ageing time.
+       ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+               mcast_snooping 0
+
+       ip link set dev $swp1 master br0
+       ip link set dev $swp2 master br0
+
+       ip link set dev br0 up
+       ip link set dev $swp1 up
+       ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+       ip link set dev $swp2 down
+       ip link set dev $swp1 down
+
+       ip link del dev br0
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644 (file)
index 0000000..5cd2aed
--- /dev/null
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644 (file)
index 0000000..ab235c1
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644 (file)
index 0000000..d0af521
--- /dev/null
@@ -0,0 +1,540 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+
+if [[ -f forwarding.config ]]; then
+       source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+       tc -j &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: iproute2 too old; tc is missing JSON support"
+               exit 1
+       fi
+
+       tc filter help 2>&1 | grep block &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: iproute2 too old; tc is missing shared block support"
+               exit 1
+       fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+       echo "SKIP: need root privileges"
+       exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+       check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+       echo "SKIP: jq not installed"
+       exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+       echo "SKIP: $MZ not installed"
+       exit 0
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+       echo "SKIP: importer does not define \"NUM_NETIFS\""
+       exit 0
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+       if [[ "$count" -eq "0" ]]; then
+               unset NETIFS
+               declare -A NETIFS
+       fi
+       count=$((count + 1))
+       NETIFS[p$count]="$1"
+       shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+       ip link show dev ${NETIFS[p$i]} &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: could not find all required interfaces"
+               exit 0
+       fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+       local err=$1
+       local msg=$2
+
+       if [[ $RET -eq 0 && $err -ne 0 ]]; then
+               RET=$err
+               retmsg=$msg
+       fi
+}
+
+check_fail()
+{
+       local err=$1
+       local msg=$2
+
+       if [[ $RET -eq 0 && $err -eq 0 ]]; then
+               RET=1
+               retmsg=$msg
+       fi
+}
+
+log_test()
+{
+       local test_name=$1
+       local opt_str=$2
+
+       if [[ $# -eq 2 ]]; then
+               opt_str="($opt_str)"
+       fi
+
+       if [[ $RET -ne 0 ]]; then
+               EXIT_STATUS=1
+               printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
+               if [[ ! -z "$retmsg" ]]; then
+                       printf "\t%s\n" "$retmsg"
+               fi
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+                       echo "Hit enter to continue, 'q' to quit"
+                       read a
+                       [ "$a" = "q" ] && exit 1
+               fi
+               return 1
+       fi
+
+       printf "TEST: %-60s  [PASS]\n" "$test_name $opt_str"
+       return 0
+}
+
+log_info()
+{
+       local msg=$1
+
+       echo "INFO: $msg"
+}
+
+setup_wait()
+{
+       for i in $(eval echo {1..$NUM_NETIFS}); do
+               while true; do
+                       ip link show dev ${NETIFS[p$i]} up \
+                               | grep 'state UP' &> /dev/null
+                       if [[ $? -ne 0 ]]; then
+                               sleep 1
+                       else
+                               break
+                       fi
+               done
+       done
+
+       # Make sure links are ready.
+       sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+       if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+               echo "Pausing before cleanup, hit any key to continue"
+               read
+       fi
+}
+
+vrf_prepare()
+{
+       ip -4 rule add pref 32765 table local
+       ip -4 rule del pref 0
+       ip -6 rule add pref 32765 table local
+       ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+       ip -6 rule add pref 0 table local
+       ip -6 rule del pref 32765
+       ip -4 rule add pref 0 table local
+       ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+       local vrf_name=$1
+
+       __last_tb_id=$((__last_tb_id + 1))
+       __TB_IDS[$vrf_name]=$__last_tb_id
+       return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+       local vrf_name=$1
+
+       return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+       local vrf_name=$1
+       local tb_id
+
+       __vrf_td_id_assign $vrf_name
+       tb_id=$?
+
+       ip link add dev $vrf_name type vrf table $tb_id
+       ip -4 route add table $tb_id unreachable default metric 4278198272
+       ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+       local vrf_name=$1
+       local tb_id
+
+       __vrf_td_id_lookup $vrf_name
+       tb_id=$?
+
+       ip -6 route del table $tb_id unreachable default metric 4278198272
+       ip -4 route del table $tb_id unreachable default metric 4278198272
+       ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+       local if_name=$1
+       local add_del=$2
+       local array
+
+       shift
+       shift
+       array=("${@}")
+
+       for addrstr in "${array[@]}"; do
+               ip address $add_del $addrstr dev $if_name
+       done
+}
+
+simple_if_init()
+{
+       local if_name=$1
+       local vrf_name
+       local array
+
+       shift
+       vrf_name=v$if_name
+       array=("${@}")
+
+       vrf_create $vrf_name
+       ip link set dev $if_name master $vrf_name
+       ip link set dev $vrf_name up
+       ip link set dev $if_name up
+
+       __addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+       local if_name=$1
+       local vrf_name
+       local array
+
+       shift
+       vrf_name=v$if_name
+       array=("${@}")
+
+       __addr_add_del $if_name del "${array[@]}"
+
+       ip link set dev $if_name down
+       vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+       local if_name=$1
+
+       ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+       local if_name=$1
+
+       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+       local if_name=$1
+
+       ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+       local bridge=$1
+       local ageing_time
+
+       # Need to divide by 100 to convert to seconds.
+       ageing_time=$(ip -j -d link show dev $bridge \
+                     | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+       echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+       sysctl -q -w net.ipv4.conf.all.forwarding=1
+       sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+       for i in $(eval echo {1..$NUM_NETIFS}); do
+               ethtool -k ${NETIFS[p$i]} \
+                       | grep "hw-tc-offload: on" &> /dev/null
+               if [[ $? -ne 0 ]]; then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+       local if_name=$1
+       local dip=$2
+       local vrf_name
+
+       RET=0
+
+       vrf_name=$(master_name_get $if_name)
+       ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+       check_err $?
+       log_test "ping"
+}
+
+ping6_test()
+{
+       local if_name=$1
+       local dip=$2
+       local vrf_name
+
+       RET=0
+
+       vrf_name=$(master_name_get $if_name)
+       ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+       check_err $?
+       log_test "ping6"
+}
+
+learning_test()
+{
+       local bridge=$1
+       local br_port1=$2       # Connected to `host1_if`.
+       local host1_if=$3
+       local host2_if=$4
+       local mac=de:ad:be:ef:13:37
+       local ageing_time
+
+       RET=0
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       # Disable unknown unicast flooding on `br_port1` to make sure
+       # packets are only forwarded through the port after a matching
+       # FDB entry was installed.
+       bridge link set dev $br_port1 flood off
+
+       tc qdisc add dev $host1_if ingress
+       tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+               flower dst_mac $mac action drop
+
+       $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host1_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       check_fail $? "Packet reached second host when should not"
+
+       $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+       sleep 1
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_err $? "Did not find FDB record when should"
+
+       $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host1_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       check_err $? "Packet did not reach second host when should"
+
+       # Wait for 10 seconds after the ageing time to make sure FDB
+       # record was aged-out.
+       ageing_time=$(bridge_ageing_time_get $bridge)
+       sleep $((ageing_time + 10))
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       bridge link set dev $br_port1 learning off
+
+       $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+       sleep 1
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       bridge link set dev $br_port1 learning on
+
+       tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+       tc qdisc del dev $host1_if ingress
+
+       bridge link set dev $br_port1 flood on
+
+       log_test "FDB learning"
+}
+
+flood_test_do()
+{
+       local should_flood=$1
+       local mac=$2
+       local ip=$3
+       local host1_if=$4
+       local host2_if=$5
+       local err=0
+
+       # Add an ACL on `host2_if` which will tell us whether the packet
+       # was flooded to it or not.
+       tc qdisc add dev $host2_if ingress
+       tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+               flower dst_mac $mac action drop
+
+       $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host2_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       if [[ $? -ne 0 && $should_flood == "true" || \
+             $? -eq 0 && $should_flood == "false" ]]; then
+               err=1
+       fi
+
+       tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+       tc qdisc del dev $host2_if ingress
+
+       return $err
+}
+
+flood_unicast_test()
+{
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+       local mac=de:ad:be:ef:13:37
+       local ip=192.0.2.100
+
+       RET=0
+
+       bridge link set dev $br_port flood off
+
+       flood_test_do false $mac $ip $host1_if $host2_if
+       check_err $? "Packet flooded when should not"
+
+       bridge link set dev $br_port flood on
+
+       flood_test_do true $mac $ip $host1_if $host2_if
+       check_err $? "Packet was not flooded when should"
+
+       log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+       local mac=01:00:5e:00:00:01
+       local ip=239.0.0.1
+
+       RET=0
+
+       bridge link set dev $br_port mcast_flood off
+
+       flood_test_do false $mac $ip $host1_if $host2_if
+       check_err $? "Packet flooded when should not"
+
+       bridge link set dev $br_port mcast_flood on
+
+       flood_test_do true $mac $ip $host1_if $host2_if
+       check_err $? "Packet was not flooded when should"
+
+       log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+       # `br_port` is connected to `host2_if`
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+
+       flood_unicast_test $br_port $host1_if $host2_if
+       flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755 (executable)
index 0000000..cc6a14a
--- /dev/null
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+       vrf_create "vrf-h1"
+       ip link set dev $h1 master vrf-h1
+
+       ip link set dev vrf-h1 up
+       ip link set dev $h1 up
+
+       ip address add 192.0.2.2/24 dev $h1
+       ip address add 2001:db8:1::2/64 dev $h1
+
+       ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+       ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-h1
+       ip route del 198.51.100.0/24 vrf vrf-h1
+
+       ip address del 2001:db8:1::2/64 dev $h1
+       ip address del 192.0.2.2/24 dev $h1
+
+       ip link set dev $h1 down
+       vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+       vrf_create "vrf-h2"
+       ip link set dev $h2 master vrf-h2
+
+       ip link set dev vrf-h2 up
+       ip link set dev $h2 up
+
+       ip address add 198.51.100.2/24 dev $h2
+       ip address add 2001:db8:2::2/64 dev $h2
+
+       ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+       ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-h2
+       ip route del 192.0.2.0/24 vrf vrf-h2
+
+       ip address del 2001:db8:2::2/64 dev $h2
+       ip address del 198.51.100.2/24 dev $h2
+
+       ip link set dev $h2 down
+       vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+       ip link set dev $rp1 up
+       ip link set dev $rp2 up
+
+       ip address add 192.0.2.1/24 dev $rp1
+       ip address add 2001:db8:1::1/64 dev $rp1
+
+       ip address add 198.51.100.1/24 dev $rp2
+       ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+       ip address del 2001:db8:2::1/64 dev $rp2
+       ip address del 198.51.100.1/24 dev $rp2
+
+       ip address del 2001:db8:1::1/64 dev $rp1
+       ip address del 192.0.2.1/24 dev $rp1
+
+       ip link set dev $rp2 down
+       ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp1=${NETIFS[p2]}
+
+       rp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755 (executable)
index 0000000..3bc3510
--- /dev/null
@@ -0,0 +1,376 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+       vrf_create "vrf-h1"
+       ip link set dev $h1 master vrf-h1
+
+       ip link set dev vrf-h1 up
+       ip link set dev $h1 up
+
+       ip address add 192.0.2.2/24 dev $h1
+       ip address add 2001:db8:1::2/64 dev $h1
+
+       ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+       ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-h1
+       ip route del 198.51.100.0/24 vrf vrf-h1
+
+       ip address del 2001:db8:1::2/64 dev $h1
+       ip address del 192.0.2.2/24 dev $h1
+
+       ip link set dev $h1 down
+       vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+       vrf_create "vrf-h2"
+       ip link set dev $h2 master vrf-h2
+
+       ip link set dev vrf-h2 up
+       ip link set dev $h2 up
+
+       ip address add 198.51.100.2/24 dev $h2
+       ip address add 2001:db8:2::2/64 dev $h2
+
+       ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+       ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-h2
+       ip route del 192.0.2.0/24 vrf vrf-h2
+
+       ip address del 2001:db8:2::2/64 dev $h2
+       ip address del 198.51.100.2/24 dev $h2
+
+       ip link set dev $h2 down
+       vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+       vrf_create "vrf-r1"
+       ip link set dev $rp11 master vrf-r1
+       ip link set dev $rp12 master vrf-r1
+       ip link set dev $rp13 master vrf-r1
+
+       ip link set dev vrf-r1 up
+       ip link set dev $rp11 up
+       ip link set dev $rp12 up
+       ip link set dev $rp13 up
+
+       ip address add 192.0.2.1/24 dev $rp11
+       ip address add 2001:db8:1::1/64 dev $rp11
+
+       ip address add 169.254.2.12/24 dev $rp12
+       ip address add fe80:2::12/64 dev $rp12
+
+       ip address add 169.254.3.13/24 dev $rp13
+       ip address add fe80:3::13/64 dev $rp13
+
+       ip route add 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       ip route add 2001:db8:2::/64 vrf vrf-r1 \
+               nexthop via fe80:2::22 dev $rp12 \
+               nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-r1
+       ip route del 198.51.100.0/24 vrf vrf-r1
+
+       ip address del fe80:3::13/64 dev $rp13
+       ip address del 169.254.3.13/24 dev $rp13
+
+       ip address del fe80:2::12/64 dev $rp12
+       ip address del 169.254.2.12/24 dev $rp12
+
+       ip address del 2001:db8:1::1/64 dev $rp11
+       ip address del 192.0.2.1/24 dev $rp11
+
+       ip link set dev $rp13 down
+       ip link set dev $rp12 down
+       ip link set dev $rp11 down
+
+       vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+       vrf_create "vrf-r2"
+       ip link set dev $rp21 master vrf-r2
+       ip link set dev $rp22 master vrf-r2
+       ip link set dev $rp23 master vrf-r2
+
+       ip link set dev vrf-r2 up
+       ip link set dev $rp21 up
+       ip link set dev $rp22 up
+       ip link set dev $rp23 up
+
+       ip address add 198.51.100.1/24 dev $rp21
+       ip address add 2001:db8:2::1/64 dev $rp21
+
+       ip address add 169.254.2.22/24 dev $rp22
+       ip address add fe80:2::22/64 dev $rp22
+
+       ip address add 169.254.3.23/24 dev $rp23
+       ip address add fe80:3::23/64 dev $rp23
+
+       ip route add 192.0.2.0/24 vrf vrf-r2 \
+               nexthop via 169.254.2.12 dev $rp22 \
+               nexthop via 169.254.3.13 dev $rp23
+       ip route add 2001:db8:1::/64 vrf vrf-r2 \
+               nexthop via fe80:2::12 dev $rp22 \
+               nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-r2
+       ip route del 192.0.2.0/24 vrf vrf-r2
+
+       ip address del fe80:3::23/64 dev $rp23
+       ip address del 169.254.3.23/24 dev $rp23
+
+       ip address del fe80:2::22/64 dev $rp22
+       ip address del 169.254.2.22/24 dev $rp22
+
+       ip address del 2001:db8:2::1/64 dev $rp21
+       ip address del 198.51.100.1/24 dev $rp21
+
+       ip link set dev $rp23 down
+       ip link set dev $rp22 down
+       ip link set dev $rp21 down
+
+       vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local packets_rp12=$4
+       local packets_rp13=$5
+       local weights_ratio packets_ratio diff
+
+       RET=0
+
+       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+              check_err 1 "Packet difference is 0"
+              log_test "Multipath"
+              log_info "Expected ratio $weights_ratio"
+              return
+       fi
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+                      | bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+                      | bc -l)
+       else
+               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+                      bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+                      bc -l)
+       fi
+
+       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+       diff=${diff#-}
+
+       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       check_err $? "Too large discrepancy between expected and measured ratios"
+       log_test "$desc"
+       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+               nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+              -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       # Restore settings.
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_l4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+              nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+              -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 \
+              nexthop via fe80:3::23 dev $rp13
+
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+              nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       # Generate 16384 echo requests, each with a random flow label.
+       for _ in $(seq 1 16384); do
+              ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+       done
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 \
+              nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+       log_info "Running IPv4 multipath tests"
+       multipath4_test "ECMP" 1 1
+       multipath4_test "Weighted MP 2:1" 2 1
+       multipath4_test "Weighted MP 11:45" 11 45
+
+       log_info "Running IPv6 multipath tests"
+       multipath6_test "ECMP" 1 1
+       multipath6_test "Weighted MP 2:1" 2 1
+       multipath6_test "Weighted MP 11:45" 11 45
+
+       log_info "Running IPv6 L4 hash multipath tests"
+       multipath6_l4_test "ECMP" 1 1
+       multipath6_l4_test "Weighted MP 2:1" 2 1
+       multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp11=${NETIFS[p2]}
+
+       rp12=${NETIFS[p3]}
+       rp22=${NETIFS[p4]}
+
+       rp13=${NETIFS[p5]}
+       rp23=${NETIFS[p6]}
+
+       rp21=${NETIFS[p7]}
+       h2=${NETIFS[p8]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router1_create
+       router2_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router2_destroy
+       router1_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755 (executable)
index 0000000..8ab5cf0
--- /dev/null
@@ -0,0 +1,195 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+       simple_if_init $swp1 192.0.2.2/24
+       tc qdisc add dev $swp1 clsact
+
+       simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+       simple_if_fini $swp2 192.0.2.1/24
+
+       tc qdisc del dev $swp1 clsact
+       simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_redirect_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched without redirect rule inserted"
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+               dev $swp2
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_err $? "Did not match incoming redirected packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "mirred egress redirect ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+       RET=0
+
+       tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+               skip_hw dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 102 1
+       check_err $? "Packet was not dropped"
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action ok
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_err $? "Did not see trapped packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+       RET=0
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               skip_hw dst_ip 192.0.2.2 action drop
+       tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+               dev $swp2
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_fail $? "Saw packet without trap rule inserted"
+
+       tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action trap
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 102 1
+       check_err $? "Packet was not trapped"
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_err $? "Did not see trapped packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+       tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       swp1origmac=$(mac_get $swp1)
+       swp2origmac=$(mac_get $swp2)
+       ip link set $swp1 address $h2mac
+       ip link set $swp2 address $h1mac
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+
+       ip link set $swp2 address $swp2origmac
+       ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_redirect_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       gact_drop_and_ok_test
+       mirred_egress_redirect_test
+       gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755 (executable)
index 0000000..2fd1522
--- /dev/null
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower $tcflags dst_mac $h2mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 1101 1
+       check_fail $? "matched on filter in unreachable chain"
+
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower
+
+       log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower $tcflags dst_mac $h2mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_mac $h2mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_mac $h2mac action goto chain 1
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_err $? "Did not match on correct filter with goto chain action"
+
+       tc_check_packets "dev $h2 ingress" 1101 1
+       check_err $? "Did not match on correct filter in chain 1"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower
+
+       log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       h2=${NETIFS[p2]}
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       unreachable_chain_test
+       gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644 (file)
index 0000000..9d3b64a
--- /dev/null
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+       local id=$1
+       local handle=$2
+       local count=$3
+       local ret
+
+       output="$(tc -j -s filter show $id)"
+       # workaround the jq bug which causes jq to return 0 in case input is ""
+       ret=$?
+       if [[ $ret -ne 0 ]]; then
+               return $ret
+       fi
+       echo $output | \
+               jq -e ".[] \
+               | select(.options.handle == $handle) \
+               | select(.options.actions[0].stats.packets == $count)" \
+               &> /dev/null
+       return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755 (executable)
index 0000000..032b882
--- /dev/null
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+       local dummy_mac=de:ad:be:ef:aa:aa
+
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_mac $dummy_mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_mac $h2mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+       local dummy_mac=de:ad:be:ef:aa:aa
+
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags src_mac $dummy_mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags src_mac $h1mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 198.51.100.2 action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+       tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags dst_ip 192.0.2.0/24 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_err $? "Did not match on correct filter with mask"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+       log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags src_ip 198.51.100.1 action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags src_ip 192.0.2.1 action drop
+       tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags src_ip 192.0.2.0/24 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_err $? "Did not match on correct filter with mask"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+       log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       h2=${NETIFS[p2]}
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       match_dst_mac_test
+       match_src_mac_test
+       match_dst_ip_test
+       match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755 (executable)
index 0000000..077b980
--- /dev/null
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+       simple_if_init $swp1 192.0.2.2/24
+       tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+       simple_if_init $swp2 192.0.2.2/24
+       tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+       tc qdisc del dev $swp2 clsact
+       simple_if_fini $swp2 192.0.2.2/24
+
+       tc qdisc del dev $swp1 clsact
+       simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+       RET=0
+
+       tc filter add block 22 protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "block 22" 101 1
+       check_err $? "Did not match first incoming packet on a block"
+
+       $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "block 22" 101 2
+       check_err $? "Did not match second incoming packet on a block"
+
+       tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+       log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       swmac=$(mac_get $swp1)
+       swp2origmac=$(mac_get $swp2)
+       ip link set $swp2 address $swmac
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+
+       ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       shared_block_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
new file mode 100755 (executable)
index 0000000..f57a2ea
--- /dev/null
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+       ip netns add "${NETNS}"
+       ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+       ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+"$@"
+exit "$?"
index e11fe84..406cc70 100644 (file)
@@ -14,6 +14,9 @@
  * - SOCK_DGRAM
  * - SOCK_RAW
  *
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
  * Start this program on two connected hosts, one in send mode and
  * the other with option '-r' to put it in receiver mode.
  *
@@ -53,6 +56,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <linux/rds.h>
 
 #ifndef SO_EE_ORIGIN_ZEROCOPY
 #define SO_EE_ORIGIN_ZEROCOPY          5
@@ -164,17 +168,39 @@ static int do_accept(int fd)
        return fd;
 }
 
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+       struct cmsghdr *cm;
+
+       if (!msg->msg_control)
+               error(1, errno, "NULL cookie");
+       cm = (void *)msg->msg_control;
+       cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+       cm->cmsg_level = SOL_RDS;
+       cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+       memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 {
        int ret, len, i, flags;
+       static uint32_t cookie;
+       char ckbuf[CMSG_SPACE(sizeof(cookie))];
 
        len = 0;
        for (i = 0; i < msg->msg_iovlen; i++)
                len += msg->msg_iov[i].iov_len;
 
        flags = MSG_DONTWAIT;
-       if (do_zerocopy)
+       if (do_zerocopy) {
                flags |= MSG_ZEROCOPY;
+               if (domain == PF_RDS) {
+                       memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+                       msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+                       msg->msg_control = (struct cmsghdr *)ckbuf;
+                       add_zcopy_cookie(msg, ++cookie);
+               }
+       }
 
        ret = sendmsg(fd, msg, flags);
        if (ret == -1 && errno == EAGAIN)
@@ -190,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
                if (do_zerocopy && ret)
                        expected_completions++;
        }
+       if (do_zerocopy && domain == PF_RDS) {
+               msg->msg_control = NULL;
+               msg->msg_controllen = 0;
+       }
 
        return true;
 }
@@ -216,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg)
                msg->msg_iov[0].iov_len = payload_len + extra_len;
                extra_len = 0;
 
-               do_sendmsg(fd, msg, do_zerocopy);
+               do_sendmsg(fd, msg, do_zerocopy,
+                          (cfg_dst_addr.ss_family == AF_INET ?
+                           PF_INET : PF_INET6));
        }
 
        do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
@@ -300,14 +332,65 @@ static int do_setup_tx(int domain, int type, int protocol)
        if (cfg_zerocopy)
                do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
 
-       if (domain != PF_PACKET)
+       if (domain != PF_PACKET && domain != PF_RDS)
                if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
                        error(1, errno, "connect");
 
+       if (domain == PF_RDS) {
+               if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+                       error(1, errno, "bind");
+       }
+
        return fd;
 }
 
-static bool do_recv_completion(int fd)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+       int i;
+
+       if (ck->num > RDS_MAX_ZCOOKIES)
+               error(1, 0, "Returned %d cookies, max expected %d\n",
+                     ck->num, RDS_MAX_ZCOOKIES);
+       for (i = 0; i < ck->num; i++)
+               if (cfg_verbose >= 2)
+                       fprintf(stderr, "%d\n", ck->cookies[i]);
+       return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+       char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+       struct rds_zcopy_cookies *ck;
+       struct cmsghdr *cmsg;
+       struct msghdr msg;
+       bool ret = false;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_control = cmsgbuf;
+       msg.msg_controllen = sizeof(cmsgbuf);
+
+       if (recvmsg(fd, &msg, MSG_DONTWAIT))
+               return ret;
+
+       if (msg.msg_flags & MSG_CTRUNC)
+               error(1, errno, "recvmsg notification: truncated");
+
+       for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+               if (cmsg->cmsg_level == SOL_RDS &&
+                   cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+                       ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+                       completions += do_process_zerocopy_cookies(ck);
+                       ret = true;
+                       break;
+               }
+               error(0, 0, "ignoring cmsg at level %d type %d\n",
+                           cmsg->cmsg_level, cmsg->cmsg_type);
+       }
+       return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
 {
        struct sock_extended_err *serr;
        struct msghdr msg = {};
@@ -316,6 +399,9 @@ static bool do_recv_completion(int fd)
        int ret, zerocopy;
        char control[100];
 
+       if (domain == PF_RDS)
+               return do_recvmsg_completion(fd);
+
        msg.msg_control = control;
        msg.msg_controllen = sizeof(control);
 
@@ -337,6 +423,7 @@ static bool do_recv_completion(int fd)
                      cm->cmsg_level, cm->cmsg_type);
 
        serr = (void *) CMSG_DATA(cm);
+
        if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
                error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
        if (serr->ee_errno != 0)
@@ -371,20 +458,20 @@ static bool do_recv_completion(int fd)
 }
 
 /* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
 {
-       while (do_recv_completion(fd)) {}
+       while (do_recv_completion(fd, domain)) {}
 }
 
 /* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
 {
        int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
 
        while (completions < expected_completions &&
               gettimeofday_ms() < tstop) {
-               if (do_poll(fd, POLLERR))
-                       do_recv_completions(fd);
+               if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+                       do_recv_completions(fd, domain);
        }
 
        if (completions < expected_completions)
@@ -444,6 +531,13 @@ static void do_tx(int domain, int type, int protocol)
                msg.msg_iovlen++;
        }
 
+       if (domain == PF_RDS) {
+               msg.msg_name = &cfg_dst_addr;
+               msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
+                                   sizeof(struct sockaddr_in) :
+                                   sizeof(struct sockaddr_in6));
+       }
+
        iov[2].iov_base = payload;
        iov[2].iov_len = cfg_payload_len;
        msg.msg_iovlen++;
@@ -454,17 +548,17 @@ static void do_tx(int domain, int type, int protocol)
                if (cfg_cork)
                        do_sendmsg_corked(fd, &msg);
                else
-                       do_sendmsg(fd, &msg, cfg_zerocopy);
+                       do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 
                while (!do_poll(fd, POLLOUT)) {
                        if (cfg_zerocopy)
-                               do_recv_completions(fd);
+                               do_recv_completions(fd, domain);
                }
 
        } while (gettimeofday_ms() < tstop);
 
        if (cfg_zerocopy)
-               do_recv_remaining_completions(fd);
+               do_recv_remaining_completions(fd, domain);
 
        if (close(fd))
                error(1, errno, "close");
@@ -610,6 +704,7 @@ static void parse_opts(int argc, char **argv)
                                    40 /* max tcp options */;
        int c;
        char *daddr = NULL, *saddr = NULL;
+       char *cfg_test;
 
        cfg_payload_len = max_payload_len;
 
@@ -667,6 +762,14 @@ static void parse_opts(int argc, char **argv)
                        break;
                }
        }
+
+       cfg_test = argv[argc - 1];
+       if (strcmp(cfg_test, "rds") == 0) {
+               if (!daddr)
+                       error(1, 0, "-D <server addr> required for PF_RDS\n");
+               if (!cfg_rx && !saddr)
+                       error(1, 0, "-S <client addr> required for PF_RDS\n");
+       }
        setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
        setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
 
@@ -699,6 +802,8 @@ int main(int argc, char **argv)
                do_test(cfg_family, SOCK_STREAM, 0);
        else if (!strcmp(cfg_test, "udp"))
                do_test(cfg_family, SOCK_DGRAM, 0);
+       else if (!strcmp(cfg_test, "rds"))
+               do_test(PF_RDS, SOCK_SEQPACKET, 0);
        else
                error(1, 0, "unknown cfg_test %s", cfg_test);
 
index 989f917..bd9b963 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/if_packet.h>
+#include <net/if.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
+       struct sockaddr_ll addr = {0};
        int fd, val;
 
-       fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+       fd = socket(PF_PACKET, SOCK_RAW, 0);
        if (fd < 0) {
                perror("socket packet");
                exit(1);
        }
 
+       pair_udp_setfilter(fd);
+
+       addr.sll_family = AF_PACKET;
+       addr.sll_protocol = htons(ETH_P_IP);
+       addr.sll_ifindex = if_nametoindex("lo");
+       if (addr.sll_ifindex == 0) {
+               perror("if_nametoindex");
+               exit(1);
+       }
+       if (bind(fd, (void *) &addr, sizeof(addr))) {
+               perror("bind packet");
+               exit(1);
+       }
+
        val = (((int) typeflags) << 16) | group_id;
        if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
                if (close(fd)) {
@@ -90,7 +106,6 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
                return -1;
        }
 
-       pair_udp_setfilter(fd);
        return fd;
 }
 
@@ -128,6 +143,8 @@ static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
 
 static void sock_fanout_set_ebpf(int fd)
 {
+       static char log_buf[65536];
+
        const int len_off = __builtin_offsetof(struct __sk_buff, len);
        struct bpf_insn prog[] = {
                { BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
@@ -140,7 +157,6 @@ static void sock_fanout_set_ebpf(int fd)
                { BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
                { BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
        };
-       char log_buf[512];
        union bpf_attr attr;
        int pfd;
 
@@ -228,7 +244,7 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[])
 
        if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
            (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
-               fprintf(stderr, "ERROR: incorrect queue lengths\n");
+               fprintf(stderr, "warning: incorrect queue lengths\n");
                return 1;
        }
 
@@ -347,7 +363,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
        uint8_t type = typeflags & 0xFF;
        int fds[2], fds_udp[2][2], ret;
 
-       fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
+       fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+               typeflags, PORT_BASE, PORT_BASE + port_off);
 
        fds[0] = sock_fanout_open(typeflags, 0);
        fds[1] = sock_fanout_open(typeflags, 0);
@@ -418,7 +435,7 @@ int main(int argc, char **argv)
        const int expect_cpu1[2][2]     = { { 0, 20 },  { 0, 20 } };
        const int expect_bpf[2][2]      = { { 15, 5 },  { 15, 20 } };
        const int expect_uniqueid[2][2] = { { 20, 20},  { 20, 20 } };
-       int port_off = 2, tries = 5, ret;
+       int port_off = 2, tries = 20, ret;
 
        test_control_single();
        test_control_group();
@@ -427,10 +444,14 @@ int main(int argc, char **argv)
        /* find a set of ports that do not collide onto the same socket */
        ret = test_datapath(PACKET_FANOUT_HASH, port_off,
                            expect_hash[0], expect_hash[1]);
-       while (ret && tries--) {
+       while (ret) {
                fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
                ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
                                    expect_hash[0], expect_hash[1]);
+               if (!--tries) {
+                       fprintf(stderr, "too many collisions\n");
+                       return 1;
+               }
        }
 
        ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
index a622eee..e6f4852 100755 (executable)
@@ -517,6 +517,7 @@ kci_test_gretap()
        ip link help gretap 2>&1 | grep -q "^Usage:"
        if [ $? -ne 0 ];then
                echo "SKIP: gretap: iproute2 too old"
+               ip netns del "$testns"
                return 1
        fi
 
@@ -543,6 +544,7 @@ kci_test_gretap()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: gretap"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: gretap"
@@ -565,6 +567,7 @@ kci_test_ip6gretap()
        ip link help ip6gretap 2>&1 | grep -q "^Usage:"
        if [ $? -ne 0 ];then
                echo "SKIP: ip6gretap: iproute2 too old"
+               ip netns del "$testns"
                return 1
        fi
 
@@ -591,6 +594,7 @@ kci_test_ip6gretap()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: ip6gretap"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: ip6gretap"
@@ -655,6 +659,7 @@ kci_test_erspan()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: erspan"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: erspan"
@@ -720,6 +725,7 @@ kci_test_ip6erspan()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: ip6erspan"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: ip6erspan"
index 21fe149..bea079e 100755 (executable)
@@ -9,7 +9,7 @@ fi
 echo "--------------------"
 echo "running psock_fanout test"
 echo "--------------------"
-./psock_fanout
+./in_netns.sh ./psock_fanout
 if [ $? -ne 0 ]; then
        echo "[FAIL]"
 else
@@ -19,7 +19,7 @@ fi
 echo "--------------------"
 echo "running psock_tpacket test"
 echo "--------------------"
-./psock_tpacket
+./in_netns.sh ./psock_tpacket
 if [ $? -ne 0 ]; then
        echo "[FAIL]"
 else
index 39fd362..0f2698f 100644 (file)
@@ -57,7 +57,7 @@ volatile int gotsig;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
-       struct ucontext *ucp = ctx;
+       ucontext_t *ucp = ctx;
 
        if (!testing) {
                signal(sig, SIG_DFL);
index 0b457e8..5df6099 100644 (file)
@@ -141,6 +141,15 @@ struct seccomp_data {
 #define SECCOMP_FILTER_FLAG_LOG 2
 #endif
 
+#ifndef PTRACE_SECCOMP_GET_METADATA
+#define PTRACE_SECCOMP_GET_METADATA    0x420d
+
+struct seccomp_metadata {
+       __u64 filter_off;       /* Input: which filter */
+       __u64 flags;             /* Output: filter's flags */
+};
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -2845,6 +2854,58 @@ TEST(get_action_avail)
        EXPECT_EQ(errno, EOPNOTSUPP);
 }
 
+TEST(get_metadata)
+{
+       pid_t pid;
+       int pipefd[2];
+       char buf;
+       struct seccomp_metadata md;
+
+       ASSERT_EQ(0, pipe(pipefd));
+
+       pid = fork();
+       ASSERT_GE(pid, 0);
+       if (pid == 0) {
+               struct sock_filter filter[] = {
+                       BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+               };
+               struct sock_fprog prog = {
+                       .len = (unsigned short)ARRAY_SIZE(filter),
+                       .filter = filter,
+               };
+
+               /* one with log, one without */
+               ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
+                                    SECCOMP_FILTER_FLAG_LOG, &prog));
+               ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+
+               ASSERT_EQ(0, close(pipefd[0]));
+               ASSERT_EQ(1, write(pipefd[1], "1", 1));
+               ASSERT_EQ(0, close(pipefd[1]));
+
+               while (1)
+                       sleep(100);
+       }
+
+       ASSERT_EQ(0, close(pipefd[1]));
+       ASSERT_EQ(1, read(pipefd[0], &buf, 1));
+
+       ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
+       ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+
+       md.filter_off = 0;
+       ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+       EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
+       EXPECT_EQ(md.filter_off, 0);
+
+       md.filter_off = 1;
+       ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+       EXPECT_EQ(md.flags, 0);
+       EXPECT_EQ(md.filter_off, 1);
+
+       ASSERT_EQ(0, kill(pid, SIGKILL));
+}
+
 /*
  * TODO:
  * - add microbenchmarks
index 970ff29..3a03367 100644 (file)
@@ -14,11 +14,11 @@ REQUIREMENTS
 
 *  The kernel must have network namespace support
 
-*   The kernel must have veth support available, as a veth pair is created
+*  The kernel must have veth support available, as a veth pair is created
    prior to running the tests.
 
-*  All tc-related features must be built in or available as modules.
-   To check what is required in current setup run:
+*  All tc-related features being tested must be built in or available as
+   modules.  To check what is required in current setup run:
    ./tdc.py -c
 
    Note:
@@ -44,10 +44,13 @@ using the -p option when running tdc:
 RUNNING TDC
 -----------
 
-To use tdc, root privileges are required. tdc will not run otherwise.
+To use tdc, root privileges are required.  This is because the
+commands being tested must be run as root.  The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
 
-All tests are executed inside a network namespace to prevent conflicts
-within the host.
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
 
 Running tdc without any arguments will run all tests. Refer to the section
 on command line arguments for more information, or run:
@@ -59,6 +62,33 @@ output captured from the failing test will be printed immediately following
 the failed test in the TAP output.
 
 
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future).  A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+  - setup
+  - execute
+  - verify
+  - teardown
+
+The setup and teardown stages can run zero or more commands.  The setup
+stage does some setup if the test needs it.  The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next.  These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command.  The execute stage
+tests the return code against one or more acceptable values.  The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
 USER-DEFINED CONSTANTS
 ----------------------
 
@@ -70,23 +100,132 @@ executed as part of the test. More will be added as test cases require.
 Example:
        $TC qdisc add dev $DEV1 ingress
 
+The NAMES values are used to substitute into the commands in the test cases.
+
 
 COMMAND LINE ARGUMENTS
 ----------------------
 
 Run tdc.py -h to see the full list of available arguments.
 
--p PATH           Specify the tc executable located at PATH to be used on this
-                  test run
--c                Show the available test case categories in this test file
--c CATEGORY       Run only tests that belong to CATEGORY
--f FILE           Read test cases from the JSON file named FILE
--l [CATEGORY]     List all test cases in the JSON file. If CATEGORY is
-                  specified, list test cases matching that category.
--s ID             Show the test case matching ID
--e ID             Execute the test case identified by ID
--i                Generate unique ID numbers for test cases with no existing
-                  ID number
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+              [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v]
+              [-d DEVICE] [-n NS] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -p PATH, --path PATH  The full path to the tc executable to use
+  -v, --verbose         Show the commands that are being run
+  -d DEVICE, --device DEVICE
+                        Execute the test case in flower category
+
+selection:
+  select which test cases: files plus directories; filtered by categories
+  plus testids
+
+  -D DIR [DIR ...], --directory DIR [DIR ...]
+                        Collect tests from the specified directory(ies)
+                        (default [tc-tests])
+  -f FILE [FILE ...], --file FILE [FILE ...]
+                        Run tests from the specified file(s)
+  -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+                        Run tests only from the specified category/ies, or if
+                        no category/ies is/are specified, list known
+                        categories.
+  -e ID [ID ...], --execute ID [ID ...]
+                        Execute the specified test cases with specified IDs
+
+action:
+  select action to perform on selected test cases
+
+  -l, --list            List all test cases, or those only within the
+                        specified category
+  -s, --show            Display the selected test cases
+  -i, --id              Generate ID numbers for new test cases
+
+netns:
+  options for nsPlugin(run commands in net namespace)
+
+  -n NS, --namespace NS
+                        Run commands in namespace NS
+
+valgrind:
+  options for valgrindPlugin (run command under test under Valgrind)
+
+  -V, --valgrind        Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib.  The are executed from
+directory plugins.  Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx  1 bjb  bjb    27 Oct  4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx  1 bjb  bjb    25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x  1 bjb  bjb     0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them.  They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions.  These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed.  This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+  - pre  (pre-suite)
+  - setup
+  - command
+  - verify
+  - teardown
+  - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py.  To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins.  It will be detected at run time and invoked at the
+appropriate times.  There are a few examples in the plugin-lib
+directory:
+
+  - rootPlugin.py:
+      implements the enforcement of running as root
+  - nsPlugin.py:
+      sets up a network namespace and runs all commands in that namespace
+  - valgrindPlugin.py
+      runs each command in the execute stage under valgrind,
+      and checks for leaks.
+      This plugin will output an extra test for each test in the test file,
+      one is the existing output as to whether the test passed or failed,
+      and the other is a test whether the command leaked memory or not.
+      (This one is a preliminary version, it may not work quite right yet,
+      but the overall template is there and it should only need tweaks.)
 
 
 ACKNOWLEDGEMENTS
index 6a266d8..c406985 100644 (file)
@@ -5,6 +5,27 @@ tc Testing Suite To-Do list:
 
 - Add support for multiple versions of tc to run successively
 
-- Improve error messages when tdc aborts its run
+- Improve error messages when tdc aborts its run.  Partially done - still
+  need to better handle problems in pre- and post-suite.
 
-- Allow tdc to write its results to file
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+  Maybe use python logger module for this too.
+
+- A better implementation of the "hooks".  Currently, every plugin
+  will attempt to run a function at every hook point.  Could be
+  changed so that plugin __init__ methods will register functions to
+  be run in the various predefined times.  Then if a plugin does not
+  require action at a specific point, no penalty will be paid for
+  trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+  and a way to configure a test suite,
+  to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644 (file)
index 0000000..3ee9a6d
--- /dev/null
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+class TdcPlugin:
+    def __init__(self):
+        super().__init__()
+        print(' -- {}.__init__'.format(self.sub_class))
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        self.testcount = testcount
+        self.testidlist = testidlist
+        if self.args.verbose > 1:
+            print(' -- {}.pre_suite'.format(self.sub_class))
+
+    def post_suite(self, index):
+        '''run commands after test_runner completes the test loop
+        index is the last ordinal number of test that was attempted'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_suite'.format(self.sub_class))
+
+    def pre_case(self, test_ordinal, testid):
+        '''run commands before test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_case'.format(self.sub_class))
+        self.args.testid = testid
+        self.args.test_ordinal = test_ordinal
+
+    def post_case(self):
+        '''run commands after test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_case'.format(self.sub_class))
+
+    def pre_execute(self):
+        '''run command before test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_execute'.format(self.sub_class))
+
+    def post_execute(self):
+        '''run command after test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_execute'.format(self.sub_class))
+
+    def adjust_command(self, stage, command):
+        '''adjust the command'''
+        if self.args.verbose > 1:
+            print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+        # if stage == 'pre':
+        #     pass
+        # elif stage == 'setup':
+        #     pass
+        # elif stage == 'execute':
+        #     pass
+        # elif stage == 'verify':
+        #     pass
+        # elif stage == 'teardown':
+        #     pass
+        # elif stage == 'post':
+        #     pass
+        # else:
+        #     pass
+
+        return command
+
+    def add_args(self, parser):
+        '''Get the plugin args from the command line'''
+        self.argparser = parser
+        return self.argparser
+
+    def check_args(self, args, remaining):
+        '''Check that the args are set correctly'''
+        self.args = args
+        if self.args.verbose > 1:
+            print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644 (file)
index 0000000..c18f88d
--- /dev/null
@@ -0,0 +1,104 @@
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - bjb@mojatatu.com
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+    pre        (the pre-suite stage)
+    prepare
+    execute
+    verify
+    teardown
+    post       (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+    def __init__(self)
+    def pre_suite(self, testcount, testidlist)     # see "PRE_SUITE" below
+    def post_suite(self, ordinal)                  # see "SKIPPING" below
+    def pre_case(self, test_ordinal, testid)       # see "PRE_CASE" below
+    def post_case(self)
+    def pre_execute(self)
+    def post_execute(self)
+    def adjust_command(self, stage, command)       # see "ADJUST" below
+    def add_args(self, parser)                     # see "ADD_ARGS" below
+    def check_args(self, args, remaining)          # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run).  This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped.  The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output.  The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted.  It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id.  Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed.  The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+    'pre'
+    'setup'
+    'command'
+    'verify'
+    'teardown'
+    'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it.  Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed.  If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg:  raise Exception('plugin xxx, arg -y is wrong, fix it')
index 0043833..17b267d 100644 (file)
@@ -12,14 +12,18 @@ template.json for the required JSON format for test cases.
 Include the 'id' field, but do not assign a value. Running tdc with the -i
 option will generate a unique ID for that test case.
 
-tdc will recursively search the 'tc' subdirectory for .json files.  Any
-test case files you create in these directories will automatically be included.
-If you wish to store your custom test cases elsewhere, be sure to run tdc
-with the -f argument and the path to your file.
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files.  Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
 
-Be aware of required escape characters in the JSON data - particularly when
-defining the match pattern. Refer to the tctests.json file for examples when
-in doubt.
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt.  The match pattern is written in json, and
+will be used by python.  So the match pattern will be a python regular
+expression, but should be written using json syntax.
 
 
 TEST CASE STRUCTURE
@@ -69,7 +73,8 @@ SETUP/TEARDOWN ERRORS
 If an error is detected during the setup/teardown process, execution of the
 tests will immediately stop with an error message and the namespace in which
 the tests are run will be destroyed. This is to prevent inaccurate results
-in the test cases.
+in the test cases.  tdc will output a series of TAP results for the skipped
+tests.
 
 Repeated failures of the setup/teardown may indicate a problem with the test
 case, or possibly even a bug in one of the commands that are not being tested.
@@ -79,3 +84,17 @@ so that it doesn't halt the script for an error that doesn't matter. Turn the
 individual command into a list, with the command being first, followed by all
 acceptable exit codes for the command.
 
+Example:
+
+A pair of setup commands.  The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action reclassify index 65536"
+        ],
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644 (file)
index 0000000..aa8a266
--- /dev/null
@@ -0,0 +1,27 @@
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+    __init__.py
+    10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+    20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+    30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+    Check if the uid is root.  If not, bail out.
+
+valgrindPlugin
+    Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+    This plugin will write files to the cwd, called vgnd-xxx.log.  These will contain
+    the valgrind output for test xxx.  Any file matching the glob 'vgnd-*.log' will be
+    deleted at the end of the run.
+
+nsPlugin
+    Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644 (file)
index 0000000..a194b1a
--- /dev/null
@@ -0,0 +1,141 @@
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'ns/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+
+        if self.args.namespace:
+            self._ns_create()
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        if self.args.verbose:
+            print('{}.post_suite'.format(self.sub_class))
+
+        if self.args.namespace:
+            self._ns_destroy()
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'netns',
+            'options for nsPlugin(run commands in net namespace)')
+        self.argparser_group.add_argument(
+            '-n', '--namespace', action='store_true',
+            help='Run commands in namespace')
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.namespace:
+            return command
+
+        if self.args.verbose:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+        if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+            if self.args.verbose:
+                print('adjust_command:  stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+            cmdlist.insert(0, self.args.NAMES['NS'])
+            cmdlist.insert(0, 'exec')
+            cmdlist.insert(0, 'netns')
+            cmdlist.insert(0, 'ip')
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def _ns_create(self):
+        '''
+        Create the network namespace in which the tests will be run and set up
+        the required network devices for it.
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV0 up'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            if self.args.device:
+                cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+                cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+
+    def _ns_destroy(self):
+        '''
+        Destroy the network namespace for testing (and any associated network
+        devices as well)
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('post', cmd)
+
+    def _exec_cmd(self, stage, command):
+        '''
+        Perform any required modifications on an executable command, then run
+        it in a subprocess and return the results.
+        '''
+        if '$' in command:
+            command = self._replace_keywords(command)
+
+        self.adjust_command(stage, command)
+        if self.args.verbose:
+            print('_exec_cmd:  command "{}"'.format(command))
+        proc = subprocess.Popen(command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=ENVIR)
+        (rawout, serr) = proc.communicate()
+
+        if proc.returncode != 0 and len(serr) > 0:
+            foutput = serr.decode("utf-8")
+        else:
+            foutput = rawout.decode("utf-8")
+
+        proc.stdout.close()
+        proc.stderr.close()
+        return proc, foutput
+
+    def _replace_keywords(self, cmd):
+        """
+        For a given executable command, substitute any known
+        variables contained within NAMES with the correct values
+        """
+        tcmd = Template(cmd)
+        subcmd = tcmd.safe_substitute(self.args.NAMES)
+        return subcmd
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644 (file)
index 0000000..e36775b
--- /dev/null
@@ -0,0 +1,19 @@
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'root/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        # run commands before test_runner goes into a test loop
+        super().pre_suite(testcount, testidlist)
+
+        if os.geteuid():
+            print('This script must be run with root privileges', file=sys.stderr)
+            exit(1)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644 (file)
index 0000000..477a7bd
--- /dev/null
@@ -0,0 +1,142 @@
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+    return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'valgrind/SubPlugin'
+        self.tap = ''
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+        if self.args.verbose > 1:
+            print('{}.pre_suite'.format(self.sub_class))
+        if self.args.valgrind:
+            self._add_to_tap('1..{}\n'.format(self.testcount))
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        self._add_to_tap('\n|---\n')
+        if self.args.verbose > 1:
+            print('{}.post_suite'.format(self.sub_class))
+        print('{}'.format(self.tap))
+        if self.args.verbose < 4:
+            subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'valgrind',
+            'options for valgrindPlugin (run command under test under Valgrind)')
+
+        self.argparser_group.add_argument(
+            '-V', '--valgrind', action='store_true',
+            help='Run commands under valgrind')
+
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.valgrind:
+            return command
+
+        if self.args.verbose > 1:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+
+        if stage == 'execute':
+            if self.args.verbose > 1:
+                print('adjust_command:  stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+                      format(stage, command, cmdlist))
+            cmdlist.insert(0, '--track-origins=yes')
+            cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+            cmdlist.insert(0, '--leak-check=full')
+            cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+            cmdlist.insert(0, '-v')  # ask for summary of non-leak errors
+            cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose > 1:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def post_execute(self):
+        if not self.args.valgrind:
+            return
+
+        self.definitely_lost_re = re.compile(
+            r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+        self.indirectly_lost_re = re.compile(
+            r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.possibly_lost_re = re.compile(
+            r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.non_leak_error_re = re.compile(
+            r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+        def_num = 0
+        ind_num = 0
+        pos_num = 0
+        nle_num = 0
+
+        # what about concurrent test runs?  Maybe force them to be in different directories?
+        with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+            content = vfd.read()
+            def_mo = self.definitely_lost_re.search(content)
+            ind_mo = self.indirectly_lost_re.search(content)
+            pos_mo = self.possibly_lost_re.search(content)
+            nle_mo = self.non_leak_error_re.search(content)
+
+            if def_mo:
+                def_num = int(def_mo.group(2))
+            if ind_mo:
+                ind_num = int(ind_mo.group(2))
+            if pos_mo:
+                pos_num = int(pos_mo.group(2))
+            if nle_mo:
+                nle_num = int(nle_mo.group(1))
+
+        mem_results = ''
+        if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+            mem_results += 'not '
+
+        mem_results += 'ok {} - {}-mem # {}\n'.format(
+            self.args.test_ordinal, self.args.testid, 'memory leak check')
+        self._add_to_tap(mem_results)
+        if mem_results.startswith('not '):
+            print('{}'.format(content))
+            self._add_to_tap(content)
+
+    def _add_to_tap(self, more_tap_output):
+        self.tap += more_tap_output
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
index fc373fd..241eea3 100755 (executable)
@@ -11,16 +11,96 @@ import re
 import os
 import sys
 import argparse
+import importlib
 import json
 import subprocess
+import time
+import traceback
 from collections import OrderedDict
 from string import Template
 
 from tdc_config import *
 from tdc_helper import *
 
-
-USE_NS = True
+import TdcPlugin
+
+
+class PluginMgrTestFail(Exception):
+    def __init__(self, stage, output, message):
+        self.stage = stage
+        self.output = output
+        self.message = message
+
+class PluginMgr:
+    def __init__(self, argparser):
+        super().__init__()
+        self.plugins = {}
+        self.plugin_instances = []
+        self.args = []
+        self.argparser = argparser
+
+        # TODO, put plugins in order
+        plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+        for dirpath, dirnames, filenames in os.walk(plugindir):
+            for fn in filenames:
+                if (fn.endswith('.py') and
+                    not fn == '__init__.py' and
+                    not fn.startswith('#') and
+                    not fn.startswith('.#')):
+                    mn = fn[0:-3]
+                    foo = importlib.import_module('plugins.' + mn)
+                    self.plugins[mn] = foo
+                    self.plugin_instances.append(foo.SubPlugin())
+
+    def call_pre_suite(self, testcount, testidlist):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_suite(testcount, testidlist)
+
+    def call_post_suite(self, index):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_suite(index)
+
+    def call_pre_case(self, test_ordinal, testid):
+        for pgn_inst in self.plugin_instances:
+            try:
+                pgn_inst.pre_case(test_ordinal, testid)
+            except Exception as ee:
+                print('exception {} in call to pre_case for {} plugin'.
+                      format(ee, pgn_inst.__class__))
+                print('test_ordinal is {}'.format(test_ordinal))
+                print('testid is {}'.format(testid))
+                raise
+
+    def call_post_case(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_case()
+
+    def call_pre_execute(self):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_execute()
+
+    def call_post_execute(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_execute()
+
+    def call_add_args(self, parser):
+        for pgn_inst in self.plugin_instances:
+            parser = pgn_inst.add_args(parser)
+        return parser
+
+    def call_check_args(self, args, remaining):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.check_args(args, remaining)
+
+    def call_adjust_command(self, stage, command):
+        for pgn_inst in self.plugin_instances:
+            command = pgn_inst.adjust_command(stage, command)
+        return command
+
+    @staticmethod
+    def _make_argparser(args):
+        self.argparser = argparse.ArgumentParser(
+            description='Linux TC unit tests')
 
 
 def replace_keywords(cmd):
@@ -33,21 +113,24 @@ def replace_keywords(cmd):
     return subcmd
 
 
-def exec_cmd(command, nsonly=True):
+def exec_cmd(args, pm, stage, command):
     """
     Perform any required modifications on an executable command, then run
     it in a subprocess and return the results.
     """
-    if (USE_NS and nsonly):
-        command = 'ip netns exec $NS ' + command
-
+    if len(command.strip()) == 0:
+        return None, None
     if '$' in command:
         command = replace_keywords(command)
 
+    command = pm.call_adjust_command(stage, command)
+    if args.verbose > 0:
+        print('command "{}"'.format(command))
     proc = subprocess.Popen(command,
         shell=True,
         stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
+        stderr=subprocess.PIPE,
+        env=ENVIR)
     (rawout, serr) = proc.communicate()
 
     if proc.returncode != 0 and len(serr) > 0:
@@ -60,36 +143,93 @@ def exec_cmd(command, nsonly=True):
     return proc, foutput
 
 
-def prepare_env(cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
     """
-    Execute the setup/teardown commands for a test case. Optionally
-    terminate test execution if the command fails.
+    Execute the setup/teardown commands for a test case.
+    Optionally terminate test execution if the command fails.
     """
+    if args.verbose > 0:
+        print('{}'.format(prefix))
     for cmdinfo in cmdlist:
-        if (type(cmdinfo) == list):
+        if isinstance(cmdinfo, list):
             exit_codes = cmdinfo[1:]
             cmd = cmdinfo[0]
         else:
             exit_codes = [0]
             cmd = cmdinfo
 
-        if (len(cmd) == 0):
+        if not cmd:
             continue
 
-        (proc, foutput) = exec_cmd(cmd)
+        (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+
+        if proc and (proc.returncode not in exit_codes):
+            print('', file=sys.stderr)
+            print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+                  file=sys.stderr)
+            print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+                  file=sys.stderr)
+            print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+            print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+            print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+            raise PluginMgrTestFail(
+                stage, output,
+                '"{}" did not complete successfully'.format(prefix))
+
+def run_one_test(pm, args, index, tidx):
+    result = True
+    tresult = ""
+    tap = ""
+    if args.verbose > 0:
+        print("\t====================\n=====> ", end="")
+    print("Test " + tidx["id"] + ": " + tidx["name"])
+
+    pm.call_pre_case(index, tidx['id'])
+    prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+    if (args.verbose > 0):
+        print('-----> execute stage')
+    pm.call_pre_execute()
+    (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+    exit_code = p.returncode
+    pm.call_post_execute()
+
+    if (exit_code != int(tidx["expExitCode"])):
+        result = False
+        print("exit:", exit_code, int(tidx["expExitCode"]))
+        print(procout)
+    else:
+        if args.verbose > 0:
+            print('-----> verify stage')
+        match_pattern = re.compile(
+            str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+        (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+        if procout:
+            match_index = re.findall(match_pattern, procout)
+            if len(match_index) != int(tidx["matchCount"]):
+                result = False
+        elif int(tidx["matchCount"]) != 0:
+            result = False
+
+    if not result:
+        tresult += 'not '
+    tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
+    tap += tresult
+
+    if result == False:
+        if procout:
+            tap += procout
+        else:
+            tap += 'No output!\n'
 
-        if proc.returncode not in exit_codes:
-            print
-            print("Could not execute:")
-            print(cmd)
-            print("\nError message:")
-            print(foutput)
-            print("\nAborting test run.")
-            ns_destroy()
-            exit(1)
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+    pm.call_post_case()
 
+    index += 1
 
-def test_runner(filtered_tests, args):
+    return tap
+
+def test_runner(pm, args, filtered_tests):
     """
     Driver function for the unit tests.
 
@@ -101,75 +241,86 @@ def test_runner(filtered_tests, args):
     testlist = filtered_tests
     tcount = len(testlist)
     index = 1
-    tap = str(index) + ".." + str(tcount) + "\n"
-
+    tap = ''
+    badtest = None
+    stage = None
+    emergency_exit = False
+    emergency_exit_message = ''
+
+    if args.notap:
+        if args.verbose:
+            tap = 'notap requested:  omitting test plan\n'
+    else:
+        tap = str(index) + ".." + str(tcount) + "\n"
+    try:
+        pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    except Exception as ee:
+        ex_type, ex, ex_tb = sys.exc_info()
+        print('Exception {} {} (caught in pre_suite).'.
+              format(ex_type, ex))
+        # when the extra print statements are uncommented,
+        # the traceback does not appear between them
+        # (it appears way earlier in the tdc.py output)
+        # so don't bother ...
+        # print('--------------------(')
+        # print('traceback')
+        traceback.print_tb(ex_tb)
+        # print('--------------------)')
+        emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+        emergency_exit = True
+        stage = 'pre-SUITE'
+
+    if emergency_exit:
+        pm.call_post_suite(index)
+        return emergency_exit_message
+    if args.verbose > 1:
+        print('give test rig 2 seconds to stabilize')
+    time.sleep(2)
     for tidx in testlist:
-        result = True
-        tresult = ""
         if "flower" in tidx["category"] and args.device == None:
+            if args.verbose > 1:
+                print('Not executing test {} {} because DEV2 not defined'.
+                      format(tidx['id'], tidx['name']))
             continue
-        print("Test " + tidx["id"] + ": " + tidx["name"])
-        prepare_env(tidx["setup"])
-        (p, procout) = exec_cmd(tidx["cmdUnderTest"])
-        exit_code = p.returncode
-
-        if (exit_code != int(tidx["expExitCode"])):
-            result = False
-            print("exit:", exit_code, int(tidx["expExitCode"]))
-            print(procout)
-        else:
-            match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL)
-            (p, procout) = exec_cmd(tidx["verifyCmd"])
-            match_index = re.findall(match_pattern, procout)
-            if len(match_index) != int(tidx["matchCount"]):
-                result = False
-
-        if result == True:
-            tresult += "ok "
-        else:
-            tresult += "not ok "
-        tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n"
-
-        if result == False:
-            tap += procout
-
-        prepare_env(tidx["teardown"])
+        try:
+            badtest = tidx  # in case it goes bad
+            tap += run_one_test(pm, args, index, tidx)
+        except PluginMgrTestFail as pmtf:
+            ex_type, ex, ex_tb = sys.exc_info()
+            stage = pmtf.stage
+            message = pmtf.message
+            output = pmtf.output
+            print(message)
+            print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+                  format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+            print('---------------')
+            print('traceback')
+            traceback.print_tb(ex_tb)
+            print('---------------')
+            if stage == 'teardown':
+                print('accumulated output for this test:')
+                if pmtf.output:
+                    print(pmtf.output)
+            print('---------------')
+            break
         index += 1
 
-    return tap
-
-
-def ns_create():
-    """
-    Create the network namespace in which the tests will be run and set up
-    the required network devices for it.
-    """
-    if (USE_NS):
-        cmd = 'ip netns add $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip link add $DEV0 type veth peer name $DEV1'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV1 netns $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV0 up'
-        exec_cmd(cmd, False)
-        cmd = 'ip -n $NS link set $DEV1 up'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV2 netns $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip -n $NS link set $DEV2 up'
-        exec_cmd(cmd, False)
-
-
-def ns_destroy():
-    """
-    Destroy the network namespace for testing (and any associated network
-    devices as well)
-    """
-    if (USE_NS):
-        cmd = 'ip netns delete $NS'
-        exec_cmd(cmd, False)
+    # if we failed in setup or teardown,
+    # fill in the remaining tests with ok-skipped
+    count = index
+    if not args.notap:
+        tap += 'about to flush the tap output if tests need to be skipped\n'
+        if tcount + 1 != index:
+            for tidx in testlist[index - 1:]:
+                msg = 'skipped - previous {} failed'.format(stage)
+                tap += 'ok {} - {} # {} {} {}\n'.format(
+                    count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+                count += 1
+
+        tap += 'done flushing skipped test tap output\n'
+    pm.call_post_suite(index)
 
+    return tap
 
 def has_blank_ids(idlist):
     """
@@ -209,41 +360,67 @@ def set_args(parser):
     """
     Set the command line arguments for tdc.
     """
-    parser.add_argument('-p', '--path', type=str,
-                        help='The full path to the tc executable to use')
-    parser.add_argument('-c', '--category', type=str, nargs='?', const='+c',
-                        help='Run tests only from the specified category, or if no category is specified, list known categories.')
-    parser.add_argument('-f', '--file', type=str,
-                        help='Run tests from the specified file')
-    parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY',
-                        help='List all test cases, or those only within the specified category')
-    parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID',
-                        help='Display the test case with specified id')
-    parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID',
-                        help='Execute the single test case with specified ID')
-    parser.add_argument('-i', '--id', action='store_true', dest='gen_id',
-                        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-p', '--path', type=str,
+        help='The full path to the tc executable to use')
+    sg = parser.add_argument_group(
+        'selection', 'select which test cases: ' +
+        'files plus directories; filtered by categories plus testids')
+    ag = parser.add_argument_group(
+        'action', 'select action to perform on selected test cases')
+
+    sg.add_argument(
+        '-D', '--directory', nargs='+', metavar='DIR',
+        help='Collect tests from the specified directory(ies) ' +
+        '(default [tc-tests])')
+    sg.add_argument(
+        '-f', '--file', nargs='+', metavar='FILE',
+        help='Run tests from the specified file(s)')
+    sg.add_argument(
+        '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+        help='Run tests only from the specified category/ies, ' +
+        'or if no category/ies is/are specified, list known categories.')
+    sg.add_argument(
+        '-e', '--execute', nargs='+', metavar='ID',
+        help='Execute the specified test cases with specified IDs')
+    ag.add_argument(
+        '-l', '--list', action='store_true',
+        help='List all test cases, or those only within the specified category')
+    ag.add_argument(
+        '-s', '--show', action='store_true', dest='showID',
+        help='Display the selected test cases')
+    ag.add_argument(
+        '-i', '--id', action='store_true', dest='gen_id',
+        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-v', '--verbose', action='count', default=0,
+        help='Show the commands that are being run')
+    parser.add_argument(
+        '-N', '--notap', action='store_true',
+        help='Suppress tap results for command under test')
     parser.add_argument('-d', '--device',
                         help='Execute the test case in flower category')
     return parser
 
 
-def check_default_settings(args):
+def check_default_settings(args, remaining, pm):
     """
-    Process any arguments overriding the default settings, and ensure the
-    settings are correct.
+    Process any arguments overriding the default settings,
+    and ensure the settings are correct.
     """
     # Allow for overriding specific settings
     global NAMES
 
     if args.path != None:
-         NAMES['TC'] = args.path
+        NAMES['TC'] = args.path
     if args.device != None:
-         NAMES['DEV2'] = args.device
+        NAMES['DEV2'] = args.device
     if not os.path.isfile(NAMES['TC']):
         print("The specified tc path " + NAMES['TC'] + " does not exist.")
         exit(1)
 
+    pm.call_check_args(args, remaining)
+
 
 def get_id_list(alltests):
     """
@@ -277,7 +454,7 @@ def generate_case_ids(alltests):
     for c in alltests:
         if (c["id"] == ""):
             while True:
-                newid = str('%04x' % random.randrange(16**4))
+                newid = str('{:04x}'.format(random.randrange(16**4)))
                 if (does_id_exist(alltests, newid)):
                     continue
                 else:
@@ -300,40 +477,107 @@ def generate_case_ids(alltests):
         json.dump(testlist, outfile, indent=4)
         outfile.close()
 
+def filter_tests_by_id(args, testlist):
+    '''
+    Remove tests from testlist that are not in the named id list.
+    If id list is empty, return empty list.
+    '''
+    newlist = list()
+    if testlist and args.execute:
+        target_ids = args.execute
+
+        if isinstance(target_ids, list) and (len(target_ids) > 0):
+            newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+    return newlist
+
+def filter_tests_by_category(args, testlist):
+    '''
+    Remove tests from testlist that are not in a named category.
+    '''
+    answer = list()
+    if args.category and testlist:
+        test_ids = list()
+        for catg in set(args.category):
+            if catg == '+c':
+                continue
+            print('considering category {}'.format(catg))
+            for tc in testlist:
+                if catg in tc['category'] and tc['id'] not in test_ids:
+                    answer.append(tc)
+                    test_ids.append(tc['id'])
+
+    return answer
 
 def get_test_cases(args):
     """
     If a test case file is specified, retrieve tests from that file.
     Otherwise, glob for all json files in subdirectories and load from
     each one.
+    Also, if requested, filter by category, and add tests matching
+    certain ids.
     """
     import fnmatch
-    if args.file != None:
-        if not os.path.isfile(args.file):
-            print("The specified test case file " + args.file + " does not exist.")
-            exit(1)
-        flist = [args.file]
-    else:
-        flist = []
-        for root, dirnames, filenames in os.walk('tc-tests'):
+
+    flist = []
+    testdirs = ['tc-tests']
+
+    if args.file:
+        # at least one file was specified - remove the default directory
+        testdirs = []
+
+        for ff in args.file:
+            if not os.path.isfile(ff):
+                print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
+            else:
+                flist.append(os.path.abspath(ff))
+
+    if args.directory:
+        testdirs = args.directory
+
+    for testdir in testdirs:
+        for root, dirnames, filenames in os.walk(testdir):
             for filename in fnmatch.filter(filenames, '*.json'):
-                flist.append(os.path.join(root, filename))
-    alltests = list()
+                candidate = os.path.abspath(os.path.join(root, filename))
+                if candidate not in testdirs:
+                    flist.append(candidate)
+
+    alltestcases = list()
     for casefile in flist:
-        alltests = alltests + (load_from_file(casefile))
-    return alltests
+        alltestcases = alltestcases + (load_from_file(casefile))
 
+    allcatlist = get_test_categories(alltestcases)
+    allidlist = get_id_list(alltestcases)
 
-def set_operation_mode(args):
+    testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+    idtestcases = filter_tests_by_id(args, alltestcases)
+    cattestcases = filter_tests_by_category(args, alltestcases)
+
+    cat_ids = [x['id'] for x in cattestcases]
+    if args.execute:
+        if args.category:
+            alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+        else:
+            alltestcases = idtestcases
+    else:
+        if cat_ids:
+            alltestcases = cattestcases
+        else:
+            # just accept the existing value of alltestcases,
+            # which has been filtered by file/directory
+            pass
+
+    return allcatlist, allidlist, testcases_by_cats, alltestcases
+
+
+def set_operation_mode(pm, args):
     """
     Load the test case data and process remaining arguments to determine
     what the script should do for this run, and call the appropriate
     function.
     """
-    alltests = get_test_cases(args)
+    ucat, idlist, testcases, alltests = get_test_cases(args)
 
     if args.gen_id:
-        idlist = get_id_list(alltests)
         if (has_blank_ids(idlist)):
             alltests = generate_case_ids(alltests)
         else:
@@ -347,70 +591,29 @@ def set_operation_mode(args):
         print("Please correct them before continuing.")
         exit(1)
 
-    ucat = get_test_categories(alltests)
-
     if args.showID:
-        show_test_case_by_id(alltests, args.showID[0])
+        for atest in alltests:
+            print_test_case(atest)
         exit(0)
 
-    if args.execute:
-        target_id = args.execute[0]
-    else:
-        target_id = ""
-
-    if args.category:
-        if (args.category == '+c'):
-            print("Available categories:")
-            print_sll(ucat)
-            exit(0)
-        else:
-            target_category = args.category
-    else:
-        target_category = ""
-
-
-    testcases = get_categorized_testlist(alltests, ucat)
+    if isinstance(args.category, list) and (len(args.category) == 0):
+        print("Available categories:")
+        print_sll(ucat)
+        exit(0)
 
     if args.list:
-        if (args.list == "++"):
+        if args.list:
             list_test_cases(alltests)
             exit(0)
-        elif(len(args.list) > 0):
-            if (args.list not in ucat):
-                print("Unknown category " + args.list)
-                print("Available categories:")
-                print_sll(ucat)
-                exit(1)
-            list_test_cases(testcases[args.list])
-            exit(0)
-
-    if (os.geteuid() != 0):
-        print("This script must be run with root privileges.\n")
-        exit(1)
-
-    ns_create()
-
-    if (len(target_category) == 0):
-        if (len(target_id) > 0):
-            alltests = list(filter(lambda x: target_id in x['id'], alltests))
-            if (len(alltests) == 0):
-                print("Cannot find a test case with ID matching " + target_id)
-                exit(1)
-        catresults = test_runner(alltests, args)
-        print("All test results: " + "\n\n" + catresults)
-    elif (len(target_category) > 0):
-        if (target_category == "flower") and args.device == None:
-            print("Please specify a NIC device (-d) to run category flower")
-            exit(1)
-        if (target_category not in ucat):
-            print("Specified category is not present in this file.")
-            exit(1)
-        else:
-            catresults = test_runner(testcases[target_category], args)
-            print("Category " + target_category + "\n\n" + catresults)
-
-    ns_destroy()
 
+    if len(alltests):
+        catresults = test_runner(pm, args, alltests)
+    else:
+        catresults = 'No tests found\n'
+    if args.notap:
+        print('Tap output suppression requested\n')
+    else:
+        print('All test results: \n\n{}'.format(catresults))
 
 def main():
     """
@@ -419,10 +622,15 @@ def main():
     """
     parser = args_parse()
     parser = set_args(parser)
+    pm = PluginMgr(parser)
+    parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
-    check_default_settings(args)
+    args.NAMES = NAMES
+    check_default_settings(args, remaining, pm)
+    if args.verbose > 2:
+        print('args is {}'.format(args))
 
-    set_operation_mode(args)
+    set_operation_mode(pm, args)
 
     exit(0)
 
index 707c6bf..52fa539 100755 (executable)
@@ -49,13 +49,13 @@ index = 0
 for i in range(0x100):
     for j in range(0x100):
         for k in range(0x100):
-            mac = ("%02x:%02x:%02x" % (i, j, k))
+            mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
             src_mac = "e4:11:00:" + mac
             dst_mac = "e4:12:00:" + mac
-            cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
-                   "src_mac %s dst_mac %s action drop %s" %
+            cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+                   "src_mac {} dst_mac {} action drop {}".format
                    (device, prio, skip, src_mac, dst_mac, share_action))
-            file.write("%s\n" % cmd)
+            file.write("{}\n".format(cmd))
             index += 1
             if index >= number:
                 file.close()
index db38112..9f35c96 100644 (file)
@@ -57,20 +57,11 @@ def print_sll(items):
 
 def print_test_case(tcase):
     """ Pretty-printing of a given test case. """
+    print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
     for k in tcase.keys():
         if (isinstance(tcase[k], list)):
             print(k + ":")
             print_list(tcase[k])
         else:
-            print(k + ": " + tcase[k])
-
-
-def show_test_case_by_id(testlist, caseID):
-    """ Find the specified test case to pretty-print. """
-    if not any(d.get('id', None) == caseID for d in testlist):
-        print("That ID does not exist.")
-        exit(1)
-    else:
-        print_test_case(next((d for d in testlist if d['id'] == caseID)))
-
-
+            if not ((k == 'id') or (k == 'name')):
+                print(k + ": " + str(tcase[k]))
index 10ca46d..d744991 100644 (file)
@@ -5,16 +5,26 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-                       check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+                       check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
                        protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
-TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
@@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
 
-UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-
 define gen-target-rule-32
 $(1) $(1)_32: $(OUTPUT)/$(1)_32
 .PHONY: $(1) $(1)_32
@@ -40,12 +46,14 @@ endef
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
 $(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
 TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
 $(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
 endif
 
index ec0f6b4..9c0325e 100644 (file)
@@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si)
        return si->si_upper;
 }
 #else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+        struct {
+            void *si_addr;
+       } _sigfault;
+
+new _sigfault:
+       struct {
+               void __user *_addr;
+               int _trapno;
+               short _addr_lsb;
+               union {
+                       struct {
+                               void __user *_lower;
+                               void __user *_upper;
+                       } _addr_bnd;
+                       __u32 _pkey;
+               };
+       } _sigfault;
+ *
+ */
+
 static inline void **__si_bounds_hack(siginfo_t *si)
 {
        void *sigfault = &si->_sifields._sigfault;
        void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
-       void **__si_lower = end_sigfault;
+       int *trapno = (int*)end_sigfault;
+       /* skip _trapno and _addr_lsb */
+       void **__si_lower = (void**)(trapno + 2);
 
        return __si_lower;
 }
@@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si)
 
 static inline void *__si_bounds_upper(siginfo_t *si)
 {
-       return (*__si_bounds_hack(si)) + sizeof(void *);
+       return *(__si_bounds_hack(si) + 1);
 }
 #endif
 
index bc1b073..f15aa5a 100644 (file)
@@ -393,34 +393,6 @@ pid_t fork_lazy_child(void)
        return forkret;
 }
 
-void davecmp(void *_a, void *_b, int len)
-{
-       int i;
-       unsigned long *a = _a;
-       unsigned long *b = _b;
-
-       for (i = 0; i < len / sizeof(*a); i++) {
-               if (a[i] == b[i])
-                       continue;
-
-               dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
-       }
-}
-
-void dumpit(char *f)
-{
-       int fd = open(f, O_RDONLY);
-       char buf[100];
-       int nr_read;
-
-       dprintf2("maps fd: %d\n", fd);
-       do {
-               nr_read = read(fd, &buf[0], sizeof(buf));
-               write(1, buf, nr_read);
-       } while (nr_read > 0);
-       close(fd);
-}
-
 #define PKEY_DISABLE_ACCESS    0x1
 #define PKEY_DISABLE_WRITE     0x2
 
index a48da95..ddfdd63 100644 (file)
@@ -119,7 +119,9 @@ static void check_result(void)
 
 int main()
 {
+#ifdef CAN_BUILD_32
        int tmp;
+#endif
 
        sethandler(SIGTRAP, sigtrap, 0);
 
@@ -139,12 +141,13 @@ int main()
                      : : "c" (post_nop) : "r11");
        check_result();
 #endif
-
+#ifdef CAN_BUILD_32
        printf("[RUN]\tSet TF and check int80\n");
        set_eflags(get_eflags() | X86_EFLAGS_TF);
        asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
                        : INT80_CLOBBERS);
        check_result();
+#endif
 
        /*
         * This test is particularly interesting if fast syscalls use
index bf0d687..64f11c8 100644 (file)
@@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp)
                        vdso_size += PAGE_SIZE;
                }
 
+#ifdef __i386__
                /* Glibc is likely to explode now - exit with raw syscall */
                asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+               syscall(SYS_exit, ret);
+#endif
        } else {
                int status;
 
index 29973cd..2352590 100644 (file)
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 int nerrs = 0;
 
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
 #ifdef __x86_64__
-# define VSYS(x) (x)
+       FILE *maps;
+       char line[MAPS_LINE_LEN];
+       bool found = false;
+
+       maps = fopen("/proc/self/maps", "r");
+       if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+               return NULL;
+
+       while (fgets(line, MAPS_LINE_LEN, maps)) {
+               char r, x;
+               void *start, *end;
+               char name[MAPS_LINE_LEN];
+
+               /* sscanf() is safe here as strlen(name) >= strlen(line) */
+               if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+                          &start, &end, &r, &x, name) != 5)
+                       continue;
+
+               if (strcmp(name, "[vsyscall]"))
+                       continue;
+
+               /* assume entries are OK, as we test vDSO here not vsyscall */
+               found = true;
+               break;
+       }
+
+       fclose(maps);
+
+       if (!found) {
+               printf("Warning: failed to find vsyscall getcpu\n");
+               return NULL;
+       }
+       return (void *) (0xffffffffff600800);
 #else
-# define VSYS(x) 0
+       return NULL;
 #endif
+}
 
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
-getcpu_t vdso_getcpu;
 
-void fill_function_pointers()
+static void fill_function_pointers()
 {
        void *vdso = dlopen("linux-vdso.so.1",
                            RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
@@ -54,6 +93,8 @@ void fill_function_pointers()
        vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
        if (!vdso_getcpu)
                printf("Warning: failed to find getcpu in vDSO\n");
+
+       vgetcpu = (getcpu_t) vsyscall_getcpu();
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
index 7a744fa..be81621 100644 (file)
@@ -33,6 +33,9 @@
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
                       int flags)
 {
@@ -98,7 +101,7 @@ static int init_vsys(void)
 #ifdef __x86_64__
        int nerrs = 0;
        FILE *maps;
-       char line[128];
+       char line[MAPS_LINE_LEN];
        bool found = false;
 
        maps = fopen("/proc/self/maps", "r");
@@ -108,10 +111,12 @@ static int init_vsys(void)
                return 0;
        }
 
-       while (fgets(line, sizeof(line), maps)) {
+       while (fgets(line, MAPS_LINE_LEN, maps)) {
                char r, x;
                void *start, *end;
-               char name[128];
+               char name[MAPS_LINE_LEN];
+
+               /* sscanf() is safe here as strlen(name) >= strlen(line) */
                if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
                           &start, &end, &r, &x, name) != 5)
                        continue;
index 4e65060..01d758d 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for USB tools
 
-CC = $(CROSS_COMPILE)gcc
 PTHREAD_LIBS = -lpthread
 WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g -I../include
index be320b9..20f6cf0 100644 (file)
@@ -6,7 +6,6 @@ TARGETS=page-types slabinfo page_owner_sort
 LIB_DIR = ../lib/api
 LIBS = $(LIB_DIR)/libapi.a
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra -I../lib/
 LDFLAGS = $(LIBS)
 
index e664f11..e0e8723 100644 (file)
@@ -2,7 +2,6 @@ PREFIX ?= /usr
 SBINDIR ?= sbin
 INSTALL ?= install
 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
-CC = $(CROSS_COMPILE)gcc
 
 TARGET = dell-smbios-example